def add_resource(mt_file, ref, cache): """Add a resources entry, downloading the intuiting the file, replacing entries with the same reference""" if isinstance(mt_file, MetapackDoc): doc = mt_file else: doc = MetapackDoc(mt_file) if 'Resources' not in doc: doc.new_section('Resources') doc['Resources'].args = [ e for e in set(doc['Resources'].args + ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e ] seen_names = set() u = parse_app_url(ref) # The web and file URLs don't list the same. if u.proto == 'file': entries = u.list() else: entries = [ssu for su in u.list() for ssu in su.list()] for e in entries: add_single_resource(doc, e, cache=cache, seen_names=seen_names) write_doc(doc, mt_file)
def process_schemas(mt_file, resource=None, cache=None, clean=False, report_found=True, force=False, min_rows=5000, allow_codes=True): from metapack import MetapackDoc, MetapackResourceUrl, MetapackDocumentUrl if isinstance(mt_file, MetapackDoc): doc = mt_file write_doc_to_file = False else: doc = MetapackDoc(mt_file) write_doc_to_file = True try: if clean: doc['Schema'].clean() else: doc['Schema'] except KeyError: doc.new_section('Schema', ['DataType', 'AltName', 'Description']) schemas_processed = 0 for r in doc['Resources'].find('Root.Resource'): if resource and r.name != resource: continue schema_term = r.schema_term col_count = len(list(r.columns())) datatype_count = sum(1 for c in r.columns() if c['datatype']) if schema_term and col_count == datatype_count and force is False: if report_found: prt("Found table for '{}'; skipping".format(r.schema_name)) continue if col_count != datatype_count: prt("Found table for '{}'; but {} columns don't have datatypes" .format(r.schema_name, col_count - datatype_count)) schemas_processed += 1 rr = r.resolved_url rmtree(get_materialized_data_cache(doc), ignore_errors=True) if isinstance(rr, MetapackDocumentUrl): warn('{} is a MetapackDocumentUrl; skipping', r.name) elif isinstance(rr, MetapackResourceUrl): _process_metapack_resource(doc, r, force) else: _process_normal_resource(doc, r, force, skip_start=min_rows, allow_codes=allow_codes) if write_doc_to_file and schemas_processed: write_doc(doc, mt_file)
def run_url_add(args): """Add a resources entry, downloading the intuiting the file, replacing entries with the same reference""" m = MetapackCliMemo(args, downloader) update_name(m.mt_file, fail_on_missing=False, report_unchanged=False) if isinstance(m.mt_file, MetapackDoc): doc = m.mt_file else: doc = MetapackDoc(m.mt_file) if 'Resources' not in doc: doc.new_section('Resources') doc['Resources'].args = [ e for e in set(doc['Resources'].args + ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e ] seen_names = set() u = parse_app_url(args.url) # The web and file URLs don't list the same. if u.proto == 'file': entries = u.list() else: entries = [ssu for su in u.list() for ssu in su.list()] errors = [] for e in entries: if not add_single_resource( doc, e, cache=m.cache, seen_names=seen_names): errors.append(e) if errors: prt() warn("Found, but failed to add these urls:") for e in errors: print(' ', e) write_doc(doc)