Beispiel #1
0
def write_eda_notebook(m):
    # Get the EDA notebook file from Github

    url = "https://raw.githubusercontent.com/Metatab/exploratory-data-analysis/master/eda.ipynb"

    resource = m.get_resource()

    if not resource:
        warn('Must specify a resource. Select one of:')
        list_rr(m.doc)
        sys.exit(0)

    r = requests.get(url, allow_redirects=True)
    r.raise_for_status()

    nb_path = Path('notebooks/{}-{}.ipynb'.format(
        splitext(basename(url))[0], resource.name))

    ensure_dir(nb_path.parent)

    if nb_path.exists():
        err("Notebook {} already exists".format(nb_path))

    with nb_path.open('wb') as f:
        f.write(r.content)

    prt('Wrote {}'.format(nb_path))

    with edit_notebook(nb_path) as nb:
        set_cell_source(nb, 'resource_name',
                        "resource_name='{}'".format(resource.name))
Beispiel #2
0
    def _load_file(self, filename, contents):

        if "__pycache__" in filename:
            return

        path = join(self.package_path.path, filename)

        ensure_dir(dirname(path))

        with open(path, 'wb') as f:
            f.write(contents)
Beispiel #3
0
def convert_notebook(nb_path):
    prt('Convert notebook to Metatab source package')

    if not exists(nb_path):
        err("Notebook path does not exist: '{}' ".format(nb_path))

    c = Config()

    pe = NotebookExecutor(config=c, log=logger)

    prt('Running the notebook')
    output, resources = pe.from_filename(nb_path)

    fw = FilesWriter()
    fw.build_directory = pe.output_dir

    fw.write(output, resources, notebook_name=DEFAULT_METATAB_FILE)

    de = DocumentationExporter(config=c,
                               log=logger,
                               metadata=doc_metadata(pe.doc))

    prt('Exporting documentation')
    output, resources = de.from_filename(nb_path)

    fw.build_directory = join(pe.output_dir, 'docs')
    fw.write(output, resources, notebook_name='notebook')

    new_mt_file = join(pe.output_dir, DEFAULT_METATAB_FILE)

    doc = MetapackDoc(new_mt_file)

    de.update_metatab(doc, resources)

    for lib_dir in pe.lib_dirs:
        lib_dir = normpath(lib_dir).lstrip('./')

        doc['Resources'].new_term("Root.PythonLib", lib_dir)

        path = abspath(lib_dir)
        dest = join(pe.output_dir, lib_dir)

        ensure_dir(dest)
        copytree(path, join(pe.output_dir, lib_dir))

    doc.write_csv()

    # Reset the input to use the new data

    prt('Running with new package file: {}'.format(new_mt_file))
Beispiel #4
0
    def mt_materialize(self, line):
        """Materialize a single dataframe
        """

        from json import dumps
        from rowgenerators import get_cache
        from rowgenerators.generator.python import PandasDataframeSource
        from metapack.util import ensure_dir
        import numpy as np
        import csv
        from os.path import join

        args = parse_argstring(self.mt_materialize, line)

        dr = args.dir.strip("'")

        ensure_dir(dr)

        try:
            cache = self.mt_doc._cache
        except KeyError:
            cache = get_cache()

        path = join(dr, args.df_name + ".csv")
        try:
            df = fill_categorical_na(self.shell.user_ns[args.df_name].copy())
        except ValueError:
            # For categorical columns, the fill value must be one of the categories, and ''
            # probably isn't.
            pass

        if len(df.index.names) == 1 and df.index.names[
                0] is None and df.index.dtype != np.dtype('O'):
            # Simple index; ignore it.
            df.to_csv(path, index=False)
        else:
            # PandasDataFrameSource has some more complex handling of multi-level indices
            gen = PandasDataframeSource(parse_app_url(path), df, cache=cache)

            with open(path, 'w') as f:
                w = csv.writer(f)
                w.writerows(gen)

        print(dumps({
            'df_name': args.df_name,
            'path': path,
        }, indent=4))
Beispiel #5
0
def write_notebook(m):
    # Get the EDA notebook file from Github

    url = "https://raw.githubusercontent.com/Metatab/notebook-templates/master/package-notebook.ipynb"

    r = requests.get(url, allow_redirects=True)
    r.raise_for_status()

    p = Path(m.args.new_notebook)

    nb_path = f'notebooks/{p.stem}.ipynb'

    ensure_dir(dirname(nb_path))

    if exists(nb_path):
        err("Notebook {} already exists".format(nb_path))

    with open(nb_path, 'wb') as f:
        f.write(r.content)

    prt('Wrote {}'.format(nb_path))
Beispiel #6
0
def new_cmd(args):
    from metapack import MetapackDoc
    from metapack.util import make_metatab_file, datetime_now, ensure_dir
    from metapack.cli.core import write_doc, prt, err
    from os.path import exists, join, expanduser
    from metatab import DEFAULT_METATAB_FILE
    from os import getenv

    if args.config:
        config_file = args.config
    elif getenv("METAPACK_CONFIG"):
        config_file = getenv("METAPACK_DEFAULTS")
    elif expanduser('~/.metapack-default.csv'):
        config_file = expanduser('~/.metapack-defaults.csv')
    else:
        config_file = None

    if config_file and exists(config_file):
        prt(f"Using defaults file '{config_file}'")
        config = MetapackDoc(config_file)
    else:
        config = MetapackDoc()

    if args.jupyter:
        import tempfile

        with tempfile.NamedTemporaryFile(suffix='.ipynb', delete=False) as fp:

            r = requests.get(TEMPLATE_NOTEBOOK, allow_redirects=True)
            r.raise_for_status()

            fp.write(r.content)
            nb_path = Path(fp.name)

        doc = MetapackDoc(str(nb_path))

    else:

        doc = make_metatab_file(args.template)

    doc['Root']['Created'] = datetime_now()

    origin = args.origin or config.get_value('Root.Origin')

    if not origin:
        err("Must specify a value for origin, either on command line or in defaults file"
            )

    (doc['Root'].find_first('Root.Origin') or et).value = origin
    (doc['Root'].find_first('Root.Dataset') or et).value = args.dataset
    (doc['Root'].find_first('Root.Space')
     or et).value = args.space or config.get_value('Root.Space')
    (doc['Root'].find_first('Root.Time')
     or et).value = args.time or config.get_value('Root.Time')
    (doc['Root'].find_first('Root.Grain')
     or et).value = args.grain or config.get_value('Root.Grain')
    (doc['Root'].find_first('Root.Variant')
     or et).value = args.variant or config.get_value('Root.Variant')

    v = doc['Root'].get_or_new_term('Root.Version')
    v.get_or_new_child(
        'Version.Major'
    ).value = args.revision or config.get_value('Root.Version')
    v.get_or_new_child('Version.Minor').value = 1
    v.get_or_new_child('Version.Patch').value = 1

    # Copy contacts in
    if 'Contacts' in config:
        for c in config['Contacts']:
            doc['Contacts'].add_term(c)

    if args.title:
        doc['Root'].find_first('Root.Title').value = args.title.strip()

    nv_name = doc.as_version(None)

    if args.example:
        doc['Resources'].new_term(
            'Root.Datafile',
            'http://public.source.civicknowledge.com/example.com/sources/random-names.csv',
            name='random_names')

        doc['Documentation'].new_term('Root.Homepage',
                                      'http://metatab.org',
                                      title='Metatab Home Page')

    doc.ensure_identifier()
    doc.update_name(create_term=True)

    if getattr(args, 'jupyter'):  # b/c maybe metatab_jupyter is not installed

        from metapack_jupyter.convert import write_metatab_notebook
        from metapack_jupyter.core import edit_notebook, set_cell_source, get_cell_source

        new_nb_path = Path(f'{nv_name}.ipynb')

        doc['Resources'].new_term(
            'Root.Datafile',
            './' + str(new_nb_path) + "#df",
            name='local_dataframe',
            description='Example of using a local Dataframe')

        if new_nb_path.exists():
            err(f"Directory {nb_path} already exists")

        copyfile(nb_path, new_nb_path)

        write_metatab_notebook(doc, new_nb_path)

        with edit_notebook(new_nb_path) as nb:
            init = get_cell_source(nb, 'init')
            init += f"\nthis_package_name = '{str(new_nb_path.name)}'"
            set_cell_source(nb, 'init', init)

        nb_path.unlink()
    else:

        doc['Documentation'].new_term('Root.Documentation',
                                      'file:README.md',
                                      title='README')

        if exists(nv_name):
            err(f"Directory {nv_name} already exists")

        if args.csv:
            fn = doc.nonver_name + '.csv'
            write_doc(doc, fn)
            prt(f"Writing to {fn}")

        else:
            ensure_dir(nv_name)

            pylib_dir = join(nv_name, 'pylib')
            ensure_dir(pylib_dir)
            with open(join(pylib_dir, '__init__.py'),
                      'w') as f_out, open(pylib.__file__) as f_in:
                f_out.write(f_in.read())

            if args.example:
                doc['Resources'].new_term('Root.Datafile',
                                          'python:pylib#row_generator',
                                          name='row_generator')

            prt(f"Writing to '{nv_name}'")

            write_doc(doc, join(nv_name, DEFAULT_METATAB_FILE))

            add_missing_files(nv_name)

            if args.title:
                readme = '# {}\n'.format(args.title)
            else:
                readme = '# {}\n'.format(doc.get_value('Root.Name'))

            with open(join(nv_name, 'README.md'), 'w') as f:
                f.write(readme)