Esempio n. 1
0
def generate_readme_rst() -> bool:
    '''Converts the REAME.md to a .rst file for pypi'''

    try:
        pandoc.convert_file("README.md", "rst", outputfile="README.rst", extra_args=["--verbose"])
    except OSError as e:
        if("no pandoc was found" in str(e).lower() or True):
            print("Pandoc wasn't found, attempting to install...")

            from pypandoc.pandoc_download import download_pandoc
            download_pandoc()

            print("Pandoc was installed, attempting to generate README.rst again...")
            generate_readme_rst()

            print("Cleaning up installer...")
            try:
                pandoc_installers = glob.glob("pandoc-*", recursive=False)
                for installer in pandoc_installers:
                    os.remove(installer)
            except:
                print(f"Unable to automatically clean up pandoc installer{'s' if (len(pandoc_installers) != 0) else ''}: {pandoc_installers}.")

    except:
        print("An unknown error occured while attempting to generate README.rst")
        return False

    return True
Esempio n. 2
0
def get_long_description():
    rst_path = os.path.join(os.path.dirname(__file__), 'README.rst')
    md_path = os.path.join(os.path.dirname(__file__), 'README.md')

    try:
        # Imported here to avoid creating a dependency in the setup.py
        # if the .rst file already exists.

        # noinspection PyUnresolvedReferences
        from pypandoc.pandoc_download import download_pandoc

        download_pandoc()

        # noinspection PyUnresolvedReferences
        from pypandoc import convert_file
    except ImportError:
        warnings.warn(
            "Module pypandoc not installed. Using markdown formatting.")
    else:
        # pandoc, you rock...
        rst_content = convert_file(md_path, 'rst')
        with open(rst_path, 'w') as rst_file:
            rst_file.write(rst_content)

    if os.path.isfile(rst_path):
        with open(rst_path) as rst_file:
            return rst_file.read()
    else:
        # It will be messy, but it's better than nothing...
        with open(md_path) as md_file:
            return md_file.read()
Esempio n. 3
0
def ensure_pandoc_installed():
    """Try to install pandoc if it isn't installed.

    :raises OSError: if pandoc cannot be installed
    """
    try:
        _ensure_pandoc_path()
    except OSError:
        download_pandoc()
        _ensure_pandoc_path()
Esempio n. 4
0
def prepare(cases, advocates, documents, court_specific_documents, dest, now):
    tempdir = tempfile.mkdtemp()
    os.makedirs(dest, exist_ok=True)
    output.save_csv(cases, 'cases', output_dir=tempdir)
    output.save_csv(advocates, 'advocates', output_dir=tempdir)
    output.save_csv(documents, 'documents', output_dir=tempdir)
    for court_name, court_documents in court_specific_documents.items():
        output.save_csv(court_documents,
                        'documents_{}'.format(court_name),
                        output_dir=tempdir)
    readme = load_readme_content(cases, advocates, documents, now)
    with open(os.path.join(tempdir, 'README.md'), 'w') as f:
        f.write(readme)
    download_pandoc(version='1.19.1')
    readme_html = pypandoc.convert_file(
        os.path.join(tempdir, 'README.md'),
        to='html5',
        extra_args=[
            '-s', '-S', '-H',
            os.path.join(os.path.dirname(os.path.dirname(__file__)),
                         'resources', 'pandoc.css')
        ])
    datafile_name = 'oadvokatech.ospravedlnosti.cz-{}.zip'.format(
        now.strftime('%Y-%m-%d'))
    metafile_name = 'oadvokatech.ospravedlnosti.cz-{}.meta.json'.format(
        now.strftime('%Y-%m-%d'))
    with open(os.path.join(tempdir, 'README.html'), 'w') as f:
        f.write(readme_html)
    with open(os.path.join(dest, metafile_name), 'w') as f:
        json.dump(
            {
                'advocates': len(advocates),
                'cases': len(cases),
                'documents': len(documents),
                'exported': now.strftime('%Y-%m-%d %H:%M:%S'),
            },
            f,
            indent=4,
            sort_keys=True)
    with zipfile.ZipFile(os.path.join(dest, datafile_name), 'w',
                         zipfile.ZIP_DEFLATED) as zp:
        for fn in ['README.md', 'README.html'] + [
                os.path.basename(fn)
                for fn in glob(os.path.join(tempdir, '*.csv'))
        ]:
            print('adding', fn)
            zp.write(os.path.join(tempdir, fn), fn)
    with open(os.path.join(dest, 'latest.json'), 'w') as f:
        json.dump({
            'data': datafile_name,
            'meta': metafile_name,
        },
                  f,
                  indent=4,
                  sort_keys=True)
Esempio n. 5
0
def get_long_description():
    """Load the long description from the README file. In the process,
    convert the README from .md to .rst using Pandoc, if possible."""
    rst_path = os.path.join(os.path.dirname(__file__), 'README.rst')
    md_path = os.path.join(os.path.dirname(__file__), 'README.md')

    try:
        # Imported here to avoid creating a dependency in the setup.py
        # if the .rst file already exists.

        # noinspection PyUnresolvedReferences,PyPackageRequirements
        from pypandoc import convert_file
    except ImportError:
        warnings.warn(
            "Module pypandoc not installed. Unable to generate README.rst.")
    else:
        # First, try to use convert_file, assuming Pandoc is already installed.
        # If that fails, try to download & install it, and then try to convert
        # again.
        # noinspection PyBroadException
        try:
            # pandoc, you rock...
            rst_content = convert_file(md_path, 'rst')
            with open(rst_path, 'w') as rst_file:
                for line in rst_content.splitlines(keepends=False):
                    rst_file.write(line + '\n')
        except Exception:
            try:
                # noinspection PyUnresolvedReferences,PyPackageRequirements
                from pypandoc.pandoc_download import download_pandoc

                download_pandoc()
            except FileNotFoundError:
                warnings.warn(
                    "Unable to download & install pandoc. Unable to generate README.rst."
                )
            else:
                # pandoc, you rock...
                rst_content = convert_file(md_path, 'rst')
                with open(rst_path, 'w') as rst_file:
                    for line in rst_content.splitlines(keepends=False):
                        rst_file.write(line + '\n')

    if os.path.isfile(rst_path):
        with open(rst_path) as rst_file:
            return rst_file.read()
    else:
        # It will be messy, but it's better than nothing...
        with open(md_path) as md_file:
            return md_file.read()
Esempio n. 6
0
def main():
    logging.basicConfig(level=logging.ERROR)

    # Download pandoc if needed
    try:
        # Check if Pandoc is available
        output = pypandoc.convert_text('#Test', 'rst', format='md')
    except Error as e:
        # Download pandoc
        download_pandoc()

    args = docopt.docopt(__doc__)
    jobs = int(args['--jobs'])

    # Gather every urls
    urls = args['<url>']
    from_file = args.get('--urls')
    if from_file is not None:
        with open(from_file) as urls_file:
            urls.extend(urls_file)

    # Remove comments and empty lines
    urls = set(
        url.strip() for url in urls
        if not url.startswith('#') and len(url.strip()) > 0
    )

    # Fetch data
    if len(urls) > 0:
        found = 0
        print("Processing %s urls" % len(urls), file=sys.stderr)
        print("Number of jobs: %s" % jobs, file=sys.stderr)
        print('-' * 15, file=sys.stderr)
        print("Privacy Bot")

        if jobs > 1:
            pool = multiprocessing.Pool(jobs)
            print('Created Pool', file=sys.stderr)
            policies = pool.map(get_privacy_policy_url, urls)
        else:
            policies = map(get_privacy_policy_url, urls)

        print('Map done', file=sys.stderr)
        for url, result in zip(urls, policies):
            if not result:
                print('Not found', url)

        print('-' * 15, file=sys.stderr)
Esempio n. 7
0
 def download_pandoc():
     try:
         pandoc_download._handle_linux = _handle_linux
         return pandoc_download.download_pandoc(targetfolder=path, version="latest")
     finally:
         if os.path.isfile(filename):
             os.remove(filename)
Esempio n. 8
0
 def download_pandoc():
     try:
         return pandoc_download.download_pandoc(targetfolder=path,
                                                version="1.19.1")
     except Exception as e:
         if os.path.isfile(filename):
             os.remove(filename)
         raise e
Esempio n. 9
0
 def download_pandoc():
     try:
         pandoc_download._handle_linux = _handle_linux
         return pandoc_download.download_pandoc(targetfolder=path,
                                                version="latest")
     except Exception as e:
         if os.path.isfile(filename):
             os.remove(filename)
         raise e
Esempio n. 10
0
 def run(self):
     from pypandoc.pandoc_download import download_pandoc
     targetfolder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pypandoc", "files")
     download_pandoc(targetfolder=targetfolder)
Esempio n. 11
0
def main():
    logging.basicConfig(level=logging.ERROR)

    # Download pandoc if needed
    try:
        # Check if Pandoc is available
        output = pypandoc.convert_text('#Test', 'rst', format='md')
    except Exception as e:
        # Download pandoc
        download_pandoc()

    args = docopt.docopt(__doc__)
    jobs = int(args['--jobs'])

    # Gather every urls
    urls = args['<url>']
    from_file = args.get('--urls')
    if from_file is not None:
        with open(from_file) as urls_file:
            urls.extend(urls_file)

    # Remove comments and empty lines
    urls = set(url.strip() for url in urls
               if not url.startswith('#') and len(url.strip()) > 0)

    # instance of Headless Browser Scrapper
    headless_scraper = HeadlessPrivacyScraper()

    # Fetch data
    if len(urls) > 0:
        print("Processing %s urls" % len(urls), file=sys.stderr)
        print("Number of jobs: %s" % jobs, file=sys.stderr)
        print('-' * 15, file=sys.stderr)
        print("Initiating Privacy Bot")

        if jobs > 1:
            pool = multiprocessing.Pool(jobs)
            print('Created Pool', file=sys.stderr)
            policies = pool.map(get_privacy_policy_url, urls)
        else:
            policies = map(get_privacy_policy_url, urls)

        print('Map done', file=sys.stderr)
        for url, result in zip(urls, policies):
            print('RESULT: ', result)
            if result:
                break
            else:
                print('Not found', url)

                print('Going headless with: ', url)
                for purl in iter_protocols(url):
                    links = headless_scraper.found_links(purl)
                    policies = map(get_privacy_policy_url, links)
                    for link in links:
                        if fetch_privacy_policy(url, link):
                            break

        print("Quiting headless browser")
        headless_scraper.quit_driver()

        DF.drop_duplicates()
        DF.to_csv("DATA.csv", sep=',', encoding='utf-8', index=False)
        print("done")

        print('-' * 15, file=sys.stderr)
# expects an installed pypandoc: pip install pypandoc
from pypandoc.pandoc_download import download_pandoc
# see the documentation how to customize the installation path
# but be aware that you then need to include it in the `PATH`
download_pandoc()
Esempio n. 13
0
#!/usr/bin/env python

# http://stackoverflow.com/questions/9810603/adding-install-requires-to-setup-py-when-making-a-python-package
try:
    from setuptools import setup
except ImportError:
    from distutils.core import setup

try:
    import pypandoc
    try:
        LONG_DESCRIPTION = pypandoc.convert('README.md', 'rst')
    except:
        # Catch all exceptions because FileNotFoundError is only in 3.x
        from pypandoc.pandoc_download import download_pandoc
        download_pandoc()
        LONG_DESCRIPTION = pypandoc.convert('README.md', 'rst')
except ImportError:
    with open('README.md', 'r') as f:
        LONG_DESCRIPTION = f.read()

setup(
    name='molml',
    version='0.8.0',
    description='An interface between molecules and machine learning',
    long_description=LONG_DESCRIPTION,
    author='Chris Collins',
    author_email='*****@*****.**',
    url='https://github.com/crcollins/molml/',
    license='MIT',
    packages=['molml'],
Esempio n. 14
0
 def run(self):
     from pypandoc.pandoc_download import download_pandoc
     targetfolder = os.path.join(
         os.path.dirname(os.path.realpath(__file__)), "pypandoc", "files")
     download_pandoc(targetfolder=targetfolder)