def generate_readme_rst() -> bool: '''Converts the REAME.md to a .rst file for pypi''' try: pandoc.convert_file("README.md", "rst", outputfile="README.rst", extra_args=["--verbose"]) except OSError as e: if("no pandoc was found" in str(e).lower() or True): print("Pandoc wasn't found, attempting to install...") from pypandoc.pandoc_download import download_pandoc download_pandoc() print("Pandoc was installed, attempting to generate README.rst again...") generate_readme_rst() print("Cleaning up installer...") try: pandoc_installers = glob.glob("pandoc-*", recursive=False) for installer in pandoc_installers: os.remove(installer) except: print(f"Unable to automatically clean up pandoc installer{'s' if (len(pandoc_installers) != 0) else ''}: {pandoc_installers}.") except: print("An unknown error occured while attempting to generate README.rst") return False return True
def get_long_description(): rst_path = os.path.join(os.path.dirname(__file__), 'README.rst') md_path = os.path.join(os.path.dirname(__file__), 'README.md') try: # Imported here to avoid creating a dependency in the setup.py # if the .rst file already exists. # noinspection PyUnresolvedReferences from pypandoc.pandoc_download import download_pandoc download_pandoc() # noinspection PyUnresolvedReferences from pypandoc import convert_file except ImportError: warnings.warn( "Module pypandoc not installed. Using markdown formatting.") else: # pandoc, you rock... rst_content = convert_file(md_path, 'rst') with open(rst_path, 'w') as rst_file: rst_file.write(rst_content) if os.path.isfile(rst_path): with open(rst_path) as rst_file: return rst_file.read() else: # It will be messy, but it's better than nothing... with open(md_path) as md_file: return md_file.read()
def ensure_pandoc_installed(): """Try to install pandoc if it isn't installed. :raises OSError: if pandoc cannot be installed """ try: _ensure_pandoc_path() except OSError: download_pandoc() _ensure_pandoc_path()
def prepare(cases, advocates, documents, court_specific_documents, dest, now): tempdir = tempfile.mkdtemp() os.makedirs(dest, exist_ok=True) output.save_csv(cases, 'cases', output_dir=tempdir) output.save_csv(advocates, 'advocates', output_dir=tempdir) output.save_csv(documents, 'documents', output_dir=tempdir) for court_name, court_documents in court_specific_documents.items(): output.save_csv(court_documents, 'documents_{}'.format(court_name), output_dir=tempdir) readme = load_readme_content(cases, advocates, documents, now) with open(os.path.join(tempdir, 'README.md'), 'w') as f: f.write(readme) download_pandoc(version='1.19.1') readme_html = pypandoc.convert_file( os.path.join(tempdir, 'README.md'), to='html5', extra_args=[ '-s', '-S', '-H', os.path.join(os.path.dirname(os.path.dirname(__file__)), 'resources', 'pandoc.css') ]) datafile_name = 'oadvokatech.ospravedlnosti.cz-{}.zip'.format( now.strftime('%Y-%m-%d')) metafile_name = 'oadvokatech.ospravedlnosti.cz-{}.meta.json'.format( now.strftime('%Y-%m-%d')) with open(os.path.join(tempdir, 'README.html'), 'w') as f: f.write(readme_html) with open(os.path.join(dest, metafile_name), 'w') as f: json.dump( { 'advocates': len(advocates), 'cases': len(cases), 'documents': len(documents), 'exported': now.strftime('%Y-%m-%d %H:%M:%S'), }, f, indent=4, sort_keys=True) with zipfile.ZipFile(os.path.join(dest, datafile_name), 'w', zipfile.ZIP_DEFLATED) as zp: for fn in ['README.md', 'README.html'] + [ os.path.basename(fn) for fn in glob(os.path.join(tempdir, '*.csv')) ]: print('adding', fn) zp.write(os.path.join(tempdir, fn), fn) with open(os.path.join(dest, 'latest.json'), 'w') as f: json.dump({ 'data': datafile_name, 'meta': metafile_name, }, f, indent=4, sort_keys=True)
def get_long_description(): """Load the long description from the README file. In the process, convert the README from .md to .rst using Pandoc, if possible.""" rst_path = os.path.join(os.path.dirname(__file__), 'README.rst') md_path = os.path.join(os.path.dirname(__file__), 'README.md') try: # Imported here to avoid creating a dependency in the setup.py # if the .rst file already exists. # noinspection PyUnresolvedReferences,PyPackageRequirements from pypandoc import convert_file except ImportError: warnings.warn( "Module pypandoc not installed. Unable to generate README.rst.") else: # First, try to use convert_file, assuming Pandoc is already installed. # If that fails, try to download & install it, and then try to convert # again. # noinspection PyBroadException try: # pandoc, you rock... rst_content = convert_file(md_path, 'rst') with open(rst_path, 'w') as rst_file: for line in rst_content.splitlines(keepends=False): rst_file.write(line + '\n') except Exception: try: # noinspection PyUnresolvedReferences,PyPackageRequirements from pypandoc.pandoc_download import download_pandoc download_pandoc() except FileNotFoundError: warnings.warn( "Unable to download & install pandoc. Unable to generate README.rst." ) else: # pandoc, you rock... rst_content = convert_file(md_path, 'rst') with open(rst_path, 'w') as rst_file: for line in rst_content.splitlines(keepends=False): rst_file.write(line + '\n') if os.path.isfile(rst_path): with open(rst_path) as rst_file: return rst_file.read() else: # It will be messy, but it's better than nothing... with open(md_path) as md_file: return md_file.read()
def main(): logging.basicConfig(level=logging.ERROR) # Download pandoc if needed try: # Check if Pandoc is available output = pypandoc.convert_text('#Test', 'rst', format='md') except Error as e: # Download pandoc download_pandoc() args = docopt.docopt(__doc__) jobs = int(args['--jobs']) # Gather every urls urls = args['<url>'] from_file = args.get('--urls') if from_file is not None: with open(from_file) as urls_file: urls.extend(urls_file) # Remove comments and empty lines urls = set( url.strip() for url in urls if not url.startswith('#') and len(url.strip()) > 0 ) # Fetch data if len(urls) > 0: found = 0 print("Processing %s urls" % len(urls), file=sys.stderr) print("Number of jobs: %s" % jobs, file=sys.stderr) print('-' * 15, file=sys.stderr) print("Privacy Bot") if jobs > 1: pool = multiprocessing.Pool(jobs) print('Created Pool', file=sys.stderr) policies = pool.map(get_privacy_policy_url, urls) else: policies = map(get_privacy_policy_url, urls) print('Map done', file=sys.stderr) for url, result in zip(urls, policies): if not result: print('Not found', url) print('-' * 15, file=sys.stderr)
def download_pandoc(): try: pandoc_download._handle_linux = _handle_linux return pandoc_download.download_pandoc(targetfolder=path, version="latest") finally: if os.path.isfile(filename): os.remove(filename)
def download_pandoc(): try: return pandoc_download.download_pandoc(targetfolder=path, version="1.19.1") except Exception as e: if os.path.isfile(filename): os.remove(filename) raise e
def download_pandoc(): try: pandoc_download._handle_linux = _handle_linux return pandoc_download.download_pandoc(targetfolder=path, version="latest") except Exception as e: if os.path.isfile(filename): os.remove(filename) raise e
def run(self): from pypandoc.pandoc_download import download_pandoc targetfolder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pypandoc", "files") download_pandoc(targetfolder=targetfolder)
def main(): logging.basicConfig(level=logging.ERROR) # Download pandoc if needed try: # Check if Pandoc is available output = pypandoc.convert_text('#Test', 'rst', format='md') except Exception as e: # Download pandoc download_pandoc() args = docopt.docopt(__doc__) jobs = int(args['--jobs']) # Gather every urls urls = args['<url>'] from_file = args.get('--urls') if from_file is not None: with open(from_file) as urls_file: urls.extend(urls_file) # Remove comments and empty lines urls = set(url.strip() for url in urls if not url.startswith('#') and len(url.strip()) > 0) # instance of Headless Browser Scrapper headless_scraper = HeadlessPrivacyScraper() # Fetch data if len(urls) > 0: print("Processing %s urls" % len(urls), file=sys.stderr) print("Number of jobs: %s" % jobs, file=sys.stderr) print('-' * 15, file=sys.stderr) print("Initiating Privacy Bot") if jobs > 1: pool = multiprocessing.Pool(jobs) print('Created Pool', file=sys.stderr) policies = pool.map(get_privacy_policy_url, urls) else: policies = map(get_privacy_policy_url, urls) print('Map done', file=sys.stderr) for url, result in zip(urls, policies): print('RESULT: ', result) if result: break else: print('Not found', url) print('Going headless with: ', url) for purl in iter_protocols(url): links = headless_scraper.found_links(purl) policies = map(get_privacy_policy_url, links) for link in links: if fetch_privacy_policy(url, link): break print("Quiting headless browser") headless_scraper.quit_driver() DF.drop_duplicates() DF.to_csv("DATA.csv", sep=',', encoding='utf-8', index=False) print("done") print('-' * 15, file=sys.stderr)
# expects an installed pypandoc: pip install pypandoc from pypandoc.pandoc_download import download_pandoc # see the documentation how to customize the installation path # but be aware that you then need to include it in the `PATH` download_pandoc()
#!/usr/bin/env python # http://stackoverflow.com/questions/9810603/adding-install-requires-to-setup-py-when-making-a-python-package try: from setuptools import setup except ImportError: from distutils.core import setup try: import pypandoc try: LONG_DESCRIPTION = pypandoc.convert('README.md', 'rst') except: # Catch all exceptions because FileNotFoundError is only in 3.x from pypandoc.pandoc_download import download_pandoc download_pandoc() LONG_DESCRIPTION = pypandoc.convert('README.md', 'rst') except ImportError: with open('README.md', 'r') as f: LONG_DESCRIPTION = f.read() setup( name='molml', version='0.8.0', description='An interface between molecules and machine learning', long_description=LONG_DESCRIPTION, author='Chris Collins', author_email='*****@*****.**', url='https://github.com/crcollins/molml/', license='MIT', packages=['molml'],
def run(self): from pypandoc.pandoc_download import download_pandoc targetfolder = os.path.join( os.path.dirname(os.path.realpath(__file__)), "pypandoc", "files") download_pandoc(targetfolder=targetfolder)