def write_eda_notebook(m): # Get the EDA notebook file from Github url = "https://raw.githubusercontent.com/Metatab/exploratory-data-analysis/master/eda.ipynb" resource = m.get_resource() if not resource: warn('Must specify a resource. Select one of:') list_rr(m.doc) sys.exit(0) r = requests.get(url, allow_redirects=True) r.raise_for_status() nb_path = Path('notebooks/{}-{}.ipynb'.format( splitext(basename(url))[0], resource.name)) ensure_dir(nb_path.parent) if nb_path.exists(): err("Notebook {} already exists".format(nb_path)) with nb_path.open('wb') as f: f.write(r.content) prt('Wrote {}'.format(nb_path)) with edit_notebook(nb_path) as nb: set_cell_source(nb, 'resource_name', "resource_name='{}'".format(resource.name))
def convert_documentation(nb_path): """Run only the document conversion portion of the notebook conversion The final document will not be completel """ with open(nb_path) as f: nb = nbformat.reads(f.read(), as_version=4) doc = ExtractInlineMetatabDoc(package_url="metapack+file:" + dirname(nb_path)).run(nb) package_name = doc.as_version(None) output_dir = join(getcwd(), package_name) de = DocumentationExporter(config=Config(), log=logger, metadata=doc_metadata(doc)) prt('Converting documentation') output, resources = de.from_filename(nb_path) fw = FilesWriter() fw.build_directory = join(output_dir, 'docs') fw.write(output, resources, notebook_name='notebook') prt("Wrote documentation to {}".format(fw.build_directory))
def maybe_trial_build(m): from shutil import copyfile '''Update the metadata for a trial build, then restore it''' if not m.args.trial: yield False, m.mt_file return if not m.doc._has_semver(): raise MetapackError( "To use trial builds, package must have a semantic version ") prt('Building a trial') mt_file = Path(m.mt_file.fspath).parent.joinpath('trial.csv') copyfile(m.mt_file.fspath, mt_file) doc = MetapackDoc(mt_file) version = doc['Root'].find_first('Root.Version') vb = version.get_or_new_child('Version.Build') vb.value = 'trial' try: doc.update_name() doc.write() yield True, parse_app_url(str(mt_file), downloader) finally: mt_file.unlink()
def compare_hashes(m): hp = Path(m.package_root.fspath, '.hashes.yaml') if not hp.exists(): print("!!! NO HASHES: ", hp) return hashes = yaml.safe_load(hp.read_text()) pm = last_build_marker_path(m) diffs = 0 if pm.exists(): p = open_package(pm.read_text()) for r in p.resources(): h1 = r.raw_row_generator.hash h2 = hashes['last_hashes'].get(r.name) if h1 != h2: diffs += 1 prt(f"{diffs} diffs") if diffs: sys.exit(1) else: sys.exit(0)
def _process_metapack_resource(doc, r, force): remote_resource = r.resolved_url.resource if not remote_resource: warn('Metatab resource could not be resolved from {}'.format(r.resolved_url)) return remote_st = remote_resource.schema_term schema_term = r.schema_term if schema_term: prt("Updating table '{}' ".format(r.schema_name)) # Remove existing columns, so add them back later, possibly in a new order for child in list(schema_term.children): schema_term.remove_child(child) else: prt("Adding table '{}' ".format(r.schema_name)) schema_term = doc['Schema'].new_term('Table', r.schema_name) for c in remote_st.children: schema_term.add_child(c)
def row_generator(resource, doc, env, *args, **kwargs): """ An example row generator function. Reference this function in a Metatab file as the value of a Datafile: Datafile: python:pylib#row_generator The function must yield rows, with the first being headers, and subsequenct rows being data. :param resource: The Datafile term being processed :param doc: The Metatab document that contains the term being processed :param args: Positional arguments passed to the generator :param kwargs: Keyword arguments passed to the generator :return: The env argument is a dict with these environmental keys: * CACHE_DIR * RESOURCE_NAME * RESOLVED_URL * WORKING_DIR * METATAB_DOC * METATAB_WORKING_DIR * METATAB_PACKAGE It also contains key/valu pairs for all of the properties of the resource. """ import requests from io import StringIO import csv from metapack.cli.core import prt ref = doc.reference('url_template') for i, year in enumerate(range(2000, 2019)): url = ref.resolved_url.interpolate({'year': year}) prt(url) r = requests.get(url) r.raise_for_status() f = StringIO(r.text) reader = csv.reader(f, delimiter=',') for j, row in enumerate(reader): if j == 0: # first row if i == 0: # first file yield row # yield the header continue else: continue yield row
def process_schemas(mt_file, resource=None, cache=None, clean=False, report_found=True, force=False, min_rows=5000, allow_codes=True): from metapack import MetapackDoc, MetapackResourceUrl, MetapackDocumentUrl if isinstance(mt_file, MetapackDoc): doc = mt_file write_doc_to_file = False else: doc = MetapackDoc(mt_file) write_doc_to_file = True try: if clean: doc['Schema'].clean() else: doc['Schema'] except KeyError: doc.new_section('Schema', ['DataType', 'AltName', 'Description']) schemas_processed = 0 for r in doc['Resources'].find('Root.Resource'): if resource and r.name != resource: continue schema_term = r.schema_term col_count = len(list(r.columns())) datatype_count = sum(1 for c in r.columns() if c['datatype']) if schema_term and col_count == datatype_count and force is False: if report_found: prt("Found table for '{}'; skipping".format(r.schema_name)) continue if col_count != datatype_count: prt("Found table for '{}'; but {} columns don't have datatypes" .format(r.schema_name, col_count - datatype_count)) schemas_processed += 1 rr = r.resolved_url rmtree(get_materialized_data_cache(doc), ignore_errors=True) if isinstance(rr, MetapackDocumentUrl): warn('{} is a MetapackDocumentUrl; skipping', r.name) elif isinstance(rr, MetapackResourceUrl): _process_metapack_resource(doc, r, force) else: _process_normal_resource(doc, r, force, skip_start=min_rows, allow_codes=allow_codes) if write_doc_to_file and schemas_processed: write_doc(doc, mt_file)
def touch_metadata(m): import os p = m.filesystem_package t = p.package_build_time() os.utime(m.doc.ref.fspath, (t, t)) prt(f"Set times for '{m.doc.ref.fspath}'' to {t}")
def run_colmap_test(args): m = MetapackCliMemo(args, downloader) r = m.get_resource() if not r: prt('Select a resource to run:') list_rr(m.doc) sys.exit(0) cm = get_col_map(r) print(cm)
def index_packages(m): from metapack.cli.index import walk_packages from metapack.index import SearchIndex, search_index_file idx = SearchIndex(search_index_file()) entries = [] for p in walk_packages(None, parse_app_url(str(m.package_root.fspath))): prt("Indexing:", p.ref) idx.add_package(p) entries.append(p.name) idx.write() prt("Indexed ", len(entries), 'entries')
def run_colmap_new(args): m = MetapackCliMemo(args, downloader) resources = get_resources(m) if not resources: err(f"No resources found with colmap name '{m.args.colmap_name}'") # Collect all of the headers, into a list of headers, # and the union of all of them in col_index col_index = [] headers = [] for r in resources: h = r.headers col_index += [ alt_col_name(c) for c in h if alt_col_name(c) not in col_index ] headers.append(h) # Create lists, of the same length as the index, of the source # column names, at the same position as the alt_col_name is in the col_index data = [col_index] for header in headers: new_row = [None] * len(col_index) for c in header: new_row[col_index.index(alt_col_name(c))] = c data.append(new_row) t = [['index'] + [r.name for r in resources]] + list( zip(*data)) # zip transposes rows into columns. path = Path(f"colmap-{m.args.colmap_name}.csv") if m.args.print: from tabulate import tabulate prt(tabulate(t[1:], headers=t[0])) else: if path.exists() and not m.args.force: err(f"Col map file '{str(path)}' already exists. Use -f to overwrite" ) else: with path.open('w') as f: csv.writer(f).writerows(t) prt(f"Wrote {str(path)}")
def add_single_resource(doc, ref, cache, seen_names): from metatab.util import slugify t = doc.find_first('Root.Datafile', value=ref) if t: prt("Datafile exists for '{}', deleting".format(ref)) doc.remove_term(t) else: prt("Adding {}".format(ref)) term_name = classify_url(ref) path, name = extract_path_name(ref) # If the name already exists, try to create a new one. # 20 attempts ought to be enough. if name in seen_names: base_name = re.sub(r'-?\d+$', '', name) for i in range(1, 20): name = "{}-{}".format(base_name, i) if name not in seen_names: break seen_names.add(name) encoding = start_line = None header_lines = [] if not name: from hashlib import sha1 name = sha1(slugify(path).encode('ascii')).hexdigest()[:12] # xlrd gets grouchy if the name doesn't start with a char try: int(name[0]) name = 'a' + name[1:] except Exception: pass return doc['Resources'].new_term(term_name, ref, name=name, startline=start_line, headerlines=','.join( str(e) for e in header_lines), encoding=encoding)
def add_missing_files(package_dir): import metapack_build.support as support_dir src_dir = Path(support_dir.__file__).parent for src, dest in [('gitignore', '.gitignore'), ('tox.ini', 'tox.ini'), ('requirements.txt', 'requirements.txt'), ('tasks.py', 'tasks.py')]: dest_p = Path(package_dir).joinpath(dest) if not dest_p.exists(): copyfile(src_dir.joinpath(src), dest_p) prt('Creating file: ', dest) else: prt('File exists:', dest)
def run_url_add(args): """Add a resources entry, downloading the intuiting the file, replacing entries with the same reference""" m = MetapackCliMemo(args, downloader) update_name(m.mt_file, fail_on_missing=False, report_unchanged=False) if isinstance(m.mt_file, MetapackDoc): doc = m.mt_file else: doc = MetapackDoc(m.mt_file) if 'Resources' not in doc: doc.new_section('Resources') doc['Resources'].args = [ e for e in set(doc['Resources'].args + ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e ] seen_names = set() u = parse_app_url(args.url) # The web and file URLs don't list the same. if u.proto == 'file': entries = u.list() else: entries = [ssu for su in u.list() for ssu in su.list()] errors = [] for e in entries: if not add_single_resource( doc, e, cache=m.cache, seen_names=seen_names): errors.append(e) if errors: prt() warn("Found, but failed to add these urls:") for e in errors: print(' ', e) write_doc(doc)
def update_promote(m): sections = m.args.promote if not sections or sections == 'RR': sections = ['resources', 'references'] elif sections == '*': sections = [e.lower() for e in m.doc.sections.keys()] else: sections = [sections.lower()] for section_name, section in m.doc.sections.items(): if section_name.lower() in sections: for arg in promote_terms(section): prt("Move {} to header in section {} ".format( arg, section_name)) write_doc(m.doc)
def move_alt_names(m): doc = m.doc for t in doc['Schema'].find('Root.Table'): moved = 0 for c in t.find('Table.Column'): altname = c.get('AltName') if altname: if not c.get('Description'): c.description = (c.name or '').replace('\n', ' ') c.name = altname.value c.remove_child(altname) moved += 1 prt("Moved {} names in '{}'".format(moved, t.name)) write_doc(doc)
def write_notebook(m): # Get the EDA notebook file from Github url = "https://raw.githubusercontent.com/Metatab/notebook-templates/master/package-notebook.ipynb" r = requests.get(url, allow_redirects=True) r.raise_for_status() p = Path(m.args.new_notebook) nb_path = f'notebooks/{p.stem}.ipynb' ensure_dir(dirname(nb_path)) if exists(nb_path): err("Notebook {} already exists".format(nb_path)) with open(nb_path, 'wb') as f: f.write(r.content) prt('Wrote {}'.format(nb_path))
def custom_update(doc, args): from metapack.cli.core import prt source = doc.reference('source') r = iter(source) descriptions = next(r) header = next(r) desc_map = {k: v for k, v in zip(header, descriptions)} changes = 0 for c in doc['Schema'].find('Table.Column'): desc = desc_map.get(c.name) if desc and c.get_value('Description') != desc: prt(f"Setting {c.name} description to: {desc}") c['Description'] = desc changes += 1 if c.get_value('Datatype') == 'unknown': c['Datatype'] = 'number' changes += 1 if changes: prt(f'Writing {changes} changes') doc.write() else: prt('No changes')
def _exec_build(p, package_root, force, nv_name, extant_url_f, post_f): from metapack import MetapackUrl if force: reason = 'Forcing build' should_build = True elif p.is_older_than_metadata(): reason = 'Metadata is younger than package' should_build = True elif not p.exists(): reason = "Package doesn't exist" should_build = True else: reason = 'Metadata is older than package' should_build = False if should_build: prt("Building {} package ({})".format(p.type_code, reason)) url = p.save() prt("Package ( type: {} ) saved to: {}".format(p.type_code, url)) created = True else: prt("Not building {} package ({})".format(p.type_code, reason)) if not should_build and p.exists(): created = False url = extant_url_f(p) post_f() if nv_name: p.move_to_nv_name() return p, MetapackUrl(url, downloader=package_root.downloader), created
def convert_notebook(nb_path): prt('Convert notebook to Metatab source package') if not exists(nb_path): err("Notebook path does not exist: '{}' ".format(nb_path)) c = Config() pe = NotebookExecutor(config=c, log=logger) prt('Running the notebook') output, resources = pe.from_filename(nb_path) fw = FilesWriter() fw.build_directory = pe.output_dir fw.write(output, resources, notebook_name=DEFAULT_METATAB_FILE) de = DocumentationExporter(config=c, log=logger, metadata=doc_metadata(pe.doc)) prt('Exporting documentation') output, resources = de.from_filename(nb_path) fw.build_directory = join(pe.output_dir, 'docs') fw.write(output, resources, notebook_name='notebook') new_mt_file = join(pe.output_dir, DEFAULT_METATAB_FILE) doc = MetapackDoc(new_mt_file) de.update_metatab(doc, resources) for lib_dir in pe.lib_dirs: lib_dir = normpath(lib_dir).lstrip('./') doc['Resources'].new_term("Root.PythonLib", lib_dir) path = abspath(lib_dir) dest = join(pe.output_dir, lib_dir) ensure_dir(dest) copytree(path, join(pe.output_dir, lib_dir)) doc.write_csv() # Reset the input to use the new data prt('Running with new package file: {}'.format(new_mt_file))
def convert_hugo(nb_path, hugo_path): from os import environ from os.path import abspath # Total hack. Would like the -H to be allowed to have no arg, and then use the env var, # but I don't know how to do that. This is the case where the user types # -H nb_path, so just go with it. if hugo_path and not nb_path: nb_path = hugo_path hugo_path = environ.get('METAPACK_HUGO_DIR') if not hugo_path: err("Must specify value for -H or the METAPACK_HUGO_DIR environment var" ) if not exists(nb_path): err("Notebook path does not exist: '{}' ".format(nb_path)) c = Config() c.HugoExporter.hugo_dir = abspath( hugo_path) # Exports assume rel path is rel to notebook he = HugoExporter(config=c, log=logger) output, resources = he.from_filename(nb_path) prt('Writing Notebook to Hugo Markdown') prt(' Writing ', resources['unique_key'] + resources['output_extension']) for k, v in resources['outputs'].items(): prt(' Writing ', k) fw = FilesWriter() fw.write(output, resources, notebook_name=resources['unique_key'])
def run_init_cmd(m): from git.exc import GitCommandError g = Github(get_token()) remote_r = get_or_new_github_repo(g, m) local_r = get_or_init_local_repo(m) try: origin = local_r.remote('origin') except (ValueError, GitCommandError) as e: print(e) origin = local_r.create_remote('origin', remote_r.clone_url) #local_r.create_head('master', origin.refs.master) local_r.git.push('--set-upstream', 'origin', 'master') add_giturl(m.doc, force=True) write_doc(m.doc) prt(f'Initialized local and remote {origin.refs.master} at {remote_r.clone_url}' )
def build(m): raise NotImplementedError() def mp(*args): pass name = m.doc.name lb_file = m.package_root.fspath.joinpath('.last_build') if m.args.result: prt = print else: from metapack.cli.core import prt if lb_file.exists(): # Run a test build ft_args = ['build', '-FT'] if m.args.no_cache: ft_args = ['-n'] + ft_args mp(ft_args, do_cli_init=False) tb_path = m.package_root.fspath.joinpath('.trial_build').read_text() lb_path = lb_file.read_text() tdoc = MetapackDoc(tb_path) ldoc = MetapackDoc(lb_path) diff_hashes = 0 for t_r in tdoc.resources(): l_r = ldoc.resource(t_r.name) h1 = t_r.raw_row_generator.hash h2 = l_r.raw_row_generator.hash if h1 != h2: diff_hashes += 1 if diff_hashes == 0: prt(f'👍 {name}: Hashes Unchanged: will not rebuild') return prt(f'🛠 {name}: Hashes changed. Marked for rebuilding') Path(m.mt_file.fspath).touch() if m.args.increment: m.doc.update_name(mod_version='+') m.doc.write() else: prt(f'🛠 {name}: No previous build')
def show_credentials(profile): import boto3 session = boto3.Session(profile_name=profile) if profile: cred_line = " 'eval $(metasync -C -p {} )'".format(profile) else: cred_line = " 'eval $(metasync -C)'" prt("export AWS_ACCESS_KEY_ID={} ".format( session.get_credentials().access_key)) prt("export AWS_SECRET_ACCESS_KEY={}".format( session.get_credentials().secret_key)) prt("# Run {} to configure credentials in a shell".format(cred_line))
def run_url_scrape(args): m = MetapackCliMemo(args, downloader) from metapack.util import scrape_urls_from_web_page doc = m.doc url = m.args.url doc['resources'].new_term('DownloadPage', url) d = scrape_urls_from_web_page(url) if d.get('error'): err(d.get('error')) new_resources = 0 new_documentation = 0 if not args.no_resources: for k, v in d['sources'].items(): u = parse_app_url(v['url']) t = doc['Resources'].new_term('DataFile', v['url'], name=u.fspath.stem, description=v.get('description')) new_resources += 1 if args.verbose: prt(t, t.props) if not args.no_docs: for k, v in d['external_documentation'].items(): term_name = classify_url(v['url']) u = parse_app_url(v['url']) t = doc['Documentation'].new_term(term_name, v['url'], name=u.fspath.stem, description=v.get('description')) new_documentation += 1 if args.verbose: prt(t, t.props) prt("Added {} resource and {} documentation terms".format( new_resources, new_documentation)) if not args.dry_run: write_doc(doc)
def _build_cmd(args): from rowgenerators.rowpipe.exceptions import TooManyCastingErrors downloader.set_callback((build_downloader_callback)) m = MetapackCliMemo(args, downloader) if m.args.profile: from metatab.s3 import set_s3_profile set_s3_profile(m.args.profile) if m.args.clean_cache: clean_cache('metapack') try: changes = metatab_derived_handler(m) prt(f"{changes} changes") except TooManyCastingErrors as e: prt('Casting Errors:') for error in e.errors: prt(error) if m.args.exceptions: raise e else: err(e) except Exception as e: raise if m.args.exceptions: raise e else: err(e) clean_cache(m.cache) return changes
def _process_normal_resource(doc, r, force, skip_start=5000, allow_codes=True): """Process a resource that requires reading the file; not a metatab resource""" from rowgenerators.exceptions import SourceError, SchemaError from requests.exceptions import ConnectionError from itertools import islice from rowgenerators.source import SelectiveRowGenerator from tableintuit import TypeIntuiter schema_term = r.schema_term try: if force: rg = r.raw_row_generator else: rg = r.row_generator except SchemaError: rg = r.raw_row_generator warn("Failed to build row processor table, using raw row generator") # Take only the first 250K rows, and then skip through them. # For 250,000 rows, the routine will analyze about 10K slice = skip_iterator(islice(rg, 250000), skip_start=skip_start) headers, start, end = r._get_start_end_header() si = SelectiveRowGenerator(slice, header_lines=headers, start=start, end=end) try: ti = TypeIntuiter().run(si) except SourceError as e: warn("Failed to process resource '{}'; {}".format(r.name, e)) return except ConnectionError as e: warn("Failed to download resource '{}'; {}".format(r.name, e)) return except UnicodeDecodeError as e: warn("Text encoding error for resource '{}'; {}".format(r.name, e)) return if schema_term: prt("Updating table '{}' ".format(r.schema_name)) # Existing columns orig_columns = {e['name'].lower() if e['name'] else '': e for e in r.schema_columns or {}} # Remove existing columns, so add them back later, possibly in a new order for child in list(schema_term.children): schema_term.remove_child(child) else: prt("Adding table '{}' ".format(r.schema_name)) schema_term = doc['Schema'].new_term('Table', r.schema_name) orig_columns = {} for i, c in enumerate(ti.to_rows()): raw_alt_name = alt_col_name(c['header'], i) alt_name = raw_alt_name if raw_alt_name != c['header'] else '' kwargs = {} if alt_name: kwargs['AltName'] = alt_name datatype = type_map.get(c['resolved_type'], c['resolved_type']) # If the field has codes, it is probably an integer, with a few # strings if c['has_codes'] and not allow_codes: datatype = 'text' if c['unicode'] else 'string' schema_term.new_child('Column', c['header'], datatype=datatype, # description = get_col_value(c['header'].lower(),'description'), has_codes='T' if c['has_codes'] else '', **kwargs) update_resource_properties(r, orig_columns=orig_columns, force=force) return ti
def update_resource_properties(r, orig_columns={}, force=False): """Get descriptions and other properties from this, or upstream, packages, and add them to the schema. """ added = [] schema_term = r.schema_term if not schema_term: warn("No schema term for ", r.name) return prt("Processing schema {}".format(r.name)) rg = r.row_generator # Get columns information from the schema, or, if it is a package reference, # from the upstream schema upstream_columns = {e['name'].lower() if e['name'] else '': e for e in r.columns() or {}} # Just from the local schema schema_columns = {e['name'].lower() if e['name'] else '': e for e in r.schema_columns or {}} # Ask the generator if it can provide column descriptions and types generator_columns = {e['name'].lower() if e['name'] else '': e for e in rg.columns or {}} def get_col_value(col_name, value_name): v = None if not col_name: return None for d in [generator_columns, upstream_columns, orig_columns, schema_columns]: v_ = d.get(col_name.lower(), {}).get(value_name) if v_: v = v_ return v # Look for new properties extra_properties = set() for d in [generator_columns, upstream_columns, orig_columns, schema_columns]: for k, v in d.items(): for kk, vv in v.items(): extra_properties.add(kk) # Remove the properties that are already accounted for extra_properties = extra_properties - {'pos', 'header', 'name', ''} # Add any extra properties, such as from upstream packages, to the schema. for ep in extra_properties: r.doc['Schema'].add_arg(ep) for c in schema_term.find('Table.Column'): for ep in extra_properties: t = c.get_or_new_child(ep) v = get_col_value(c.name, ep) if v: t.value = v added.append((c.name, ep, v)) prt('Updated schema for {}. Set {} properties'.format(r.name, len(added)))
def add_single_resource(doc, ref, cache, seen_names): from metatab.util import slugify t = doc.find_first('Root.Datafile', value=ref) if t: prt("Datafile exists for '{}', deleting".format(ref)) doc.remove_term(t) term_name = classify_url(ref) path, name = extract_path_name(ref) # If the name already exists, try to create a new one. # 20 attempts ought to be enough. if name in seen_names: base_name = re.sub(r'-?\d+$', '', name) for i in range(1, 20): name = "{}-{}".format(base_name, i) if name not in seen_names: break seen_names.add(name) encoding = start_line = None header_lines = [] try: encoding, ri = run_row_intuit(path, cache) start_line = ri.start_line or None header_lines = ri.header_lines except RowIntuitError as e: warn("Failed to intuit '{}'; {}".format(ref, e)) except RowGeneratorError as e: warn("Can't generate rows for: '{}'; {}".format(ref, e)) return None except SourceError as e: warn("Source Error: '{}'; {}".format(ref, e)) return None except Exception as e: warn("Error: '{}'; {}".format(ref, e)) raise if not name: from hashlib import sha1 name = sha1(slugify(path).encode('ascii')).hexdigest()[:12] # xlrd gets grouchy if the name doesn't start with a char try: int(name[0]) name = 'a' + name[1:] except Exception: pass prt("Added {}, url: {} ".format(name, ref)) return doc['Resources'].new_term(term_name, ref, name=name, startline=start_line, headerlines=','.join( str(e) for e in header_lines))
def new_cmd(args): from metapack import MetapackDoc from metapack.util import make_metatab_file, datetime_now, ensure_dir from metapack.cli.core import write_doc, prt, err from os.path import exists, join, expanduser from metatab import DEFAULT_METATAB_FILE from os import getenv if args.config: config_file = args.config elif getenv("METAPACK_CONFIG"): config_file = getenv("METAPACK_DEFAULTS") elif expanduser('~/.metapack-default.csv'): config_file = expanduser('~/.metapack-defaults.csv') else: config_file = None if config_file and exists(config_file): prt(f"Using defaults file '{config_file}'") config = MetapackDoc(config_file) else: config = MetapackDoc() if args.jupyter: import tempfile with tempfile.NamedTemporaryFile(suffix='.ipynb', delete=False) as fp: r = requests.get(TEMPLATE_NOTEBOOK, allow_redirects=True) r.raise_for_status() fp.write(r.content) nb_path = Path(fp.name) doc = MetapackDoc(str(nb_path)) else: doc = make_metatab_file(args.template) doc['Root']['Created'] = datetime_now() origin = args.origin or config.get_value('Root.Origin') if not origin: err("Must specify a value for origin, either on command line or in defaults file" ) (doc['Root'].find_first('Root.Origin') or et).value = origin (doc['Root'].find_first('Root.Dataset') or et).value = args.dataset (doc['Root'].find_first('Root.Space') or et).value = args.space or config.get_value('Root.Space') (doc['Root'].find_first('Root.Time') or et).value = args.time or config.get_value('Root.Time') (doc['Root'].find_first('Root.Grain') or et).value = args.grain or config.get_value('Root.Grain') (doc['Root'].find_first('Root.Variant') or et).value = args.variant or config.get_value('Root.Variant') v = doc['Root'].get_or_new_term('Root.Version') v.get_or_new_child( 'Version.Major' ).value = args.revision or config.get_value('Root.Version') v.get_or_new_child('Version.Minor').value = 1 v.get_or_new_child('Version.Patch').value = 1 # Copy contacts in if 'Contacts' in config: for c in config['Contacts']: doc['Contacts'].add_term(c) if args.title: doc['Root'].find_first('Root.Title').value = args.title.strip() nv_name = doc.as_version(None) if args.example: doc['Resources'].new_term( 'Root.Datafile', 'http://public.source.civicknowledge.com/example.com/sources/random-names.csv', name='random_names') doc['Documentation'].new_term('Root.Homepage', 'http://metatab.org', title='Metatab Home Page') doc.ensure_identifier() doc.update_name(create_term=True) if getattr(args, 'jupyter'): # b/c maybe metatab_jupyter is not installed from metapack_jupyter.convert import write_metatab_notebook from metapack_jupyter.core import edit_notebook, set_cell_source, get_cell_source new_nb_path = Path(f'{nv_name}.ipynb') doc['Resources'].new_term( 'Root.Datafile', './' + str(new_nb_path) + "#df", name='local_dataframe', description='Example of using a local Dataframe') if new_nb_path.exists(): err(f"Directory {nb_path} already exists") copyfile(nb_path, new_nb_path) write_metatab_notebook(doc, new_nb_path) with edit_notebook(new_nb_path) as nb: init = get_cell_source(nb, 'init') init += f"\nthis_package_name = '{str(new_nb_path.name)}'" set_cell_source(nb, 'init', init) nb_path.unlink() else: doc['Documentation'].new_term('Root.Documentation', 'file:README.md', title='README') if exists(nv_name): err(f"Directory {nv_name} already exists") if args.csv: fn = doc.nonver_name + '.csv' write_doc(doc, fn) prt(f"Writing to {fn}") else: ensure_dir(nv_name) pylib_dir = join(nv_name, 'pylib') ensure_dir(pylib_dir) with open(join(pylib_dir, '__init__.py'), 'w') as f_out, open(pylib.__file__) as f_in: f_out.write(f_in.read()) if args.example: doc['Resources'].new_term('Root.Datafile', 'python:pylib#row_generator', name='row_generator') prt(f"Writing to '{nv_name}'") write_doc(doc, join(nv_name, DEFAULT_METATAB_FILE)) add_missing_files(nv_name) if args.title: readme = '# {}\n'.format(args.title) else: readme = '# {}\n'.format(doc.get_value('Root.Name')) with open(join(nv_name, 'README.md'), 'w') as f: f.write(readme)