Exemple #1
0
def add_resource(mt_file, ref, cache):
    """Add a resources entry, downloading the intuiting the file, replacing entries with
    the same reference"""

    if isinstance(mt_file, MetapackDoc):
        doc = mt_file
    else:
        doc = MetapackDoc(mt_file)

    if 'Resources' not in doc:
        doc.new_section('Resources')

    doc['Resources'].args = [
        e for e in set(doc['Resources'].args +
                       ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e
    ]

    seen_names = set()

    u = parse_app_url(ref)

    # The web and file URLs don't list the same.

    if u.proto == 'file':
        entries = u.list()
    else:
        entries = [ssu for su in u.list() for ssu in su.list()]

    for e in entries:
        add_single_resource(doc, e, cache=cache, seen_names=seen_names)

    write_doc(doc, mt_file)
Exemple #2
0
def process_schemas(mt_file, resource=None, cache=None, clean=False, report_found=True, force=False, min_rows=5000,
                    allow_codes=True):
    from metapack import MetapackDoc, MetapackResourceUrl, MetapackDocumentUrl

    if isinstance(mt_file, MetapackDoc):
        doc = mt_file
        write_doc_to_file = False
    else:
        doc = MetapackDoc(mt_file)
        write_doc_to_file = True

    try:
        if clean:
            doc['Schema'].clean()
        else:
            doc['Schema']

    except KeyError:
        doc.new_section('Schema', ['DataType', 'AltName', 'Description'])

    schemas_processed = 0

    for r in doc['Resources'].find('Root.Resource'):

        if resource and r.name != resource:
            continue

        schema_term = r.schema_term

        col_count = len(list(r.columns()))
        datatype_count = sum(1 for c in r.columns() if c['datatype'])

        if schema_term and col_count == datatype_count and force is False:
            if report_found:
                prt("Found table for '{}'; skipping".format(r.schema_name))
            continue

        if col_count != datatype_count:
            prt("Found table for '{}'; but {} columns don't have datatypes"
                .format(r.schema_name, col_count - datatype_count))

        schemas_processed += 1

        rr = r.resolved_url

        rmtree(get_materialized_data_cache(doc), ignore_errors=True)

        if isinstance(rr, MetapackDocumentUrl):
            warn('{} is a MetapackDocumentUrl; skipping', r.name)
        elif isinstance(rr, MetapackResourceUrl):
            _process_metapack_resource(doc, r, force)
        else:
            _process_normal_resource(doc, r, force, skip_start=min_rows, allow_codes=allow_codes)

    if write_doc_to_file and schemas_processed:
        write_doc(doc, mt_file)
Exemple #3
0
def run_url_add(args):
    """Add a resources entry, downloading the intuiting the file, replacing entries with
        the same reference"""

    m = MetapackCliMemo(args, downloader)

    update_name(m.mt_file, fail_on_missing=False, report_unchanged=False)

    if isinstance(m.mt_file, MetapackDoc):
        doc = m.mt_file
    else:
        doc = MetapackDoc(m.mt_file)

    if 'Resources' not in doc:
        doc.new_section('Resources')

    doc['Resources'].args = [
        e for e in set(doc['Resources'].args +
                       ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e
    ]

    seen_names = set()

    u = parse_app_url(args.url)

    # The web and file URLs don't list the same.

    if u.proto == 'file':
        entries = u.list()
    else:
        entries = [ssu for su in u.list() for ssu in su.list()]

    errors = []

    for e in entries:
        if not add_single_resource(
                doc, e, cache=m.cache, seen_names=seen_names):
            errors.append(e)

    if errors:
        prt()
        warn("Found, but failed to add these urls:")
        for e in errors:
            print('    ', e)

    write_doc(doc)