Ejemplo n.º 1
0
def metatab_admin_handler(m):
    if m.args.enumerate:

        from metatab.util import enumerate_contents

        specs = list(enumerate_contents(m.args.enumerate, m.cache, callback=prt))

        for s in specs:
            prt(classify_url(s.url), s.target_format, s.url, s.target_segment)

    if m.args.html:
        from metatab.html import html
        doc = MetatabDoc(m.mt_file)

        # print(doc.html)
        prt(html(doc))

    if m.args.markdown:
        from metatab.html import markdown

        doc = MetatabDoc(m.mt_file)
        prt(markdown(doc))

    if m.args.clean_cache:
        clean_cache('metapack')

    if m.args.name:
        doc = MetatabDoc(m.mt_file)
        prt(doc.find_first_value("Root.Name"))
        exit(0)
Ejemplo n.º 2
0
        def __init__(self, raw_args):
            self.cwd = getcwd()

            self.raw_args = raw_args

            self.args = parser.parse_args(self.raw_args[1:])

            self.cache = get_cache('metapack')

            # This one is for loading packages that have just been
            # written to S3.
            self.tmp_cache = get_cache('temp')
            clean_cache(self.tmp_cache)

            if self.args.all_s3:
                self.args.s3 = self.args.all_s3
                self.args.excel = True
                self.args.zip = True
                self.args.csv = True
                self.args.fs = True

            self.mtfile_arg = self.args.metatabfile if self.args.metatabfile else join(
                self.cwd, DEFAULT_METATAB_FILE)

            self.mtfile_url = Url(self.mtfile_arg)
            self.resource = self.mtfile_url.parts.fragment

            self.package_url, self.mt_file = resolve_package_metadata_url(
                self.mtfile_url.rebuild_url(False, False))

            self.args.fs = self.args.csv or self.args.fs
Ejemplo n.º 3
0
def get_cache(clean=False):
    from rowgenerators.util import get_cache, clean_cache

    cache = get_cache('metapack')

    if clean:
        clean_cache(cache)

    return cache
Ejemplo n.º 4
0
def _build_cmd(args):
    from rowgenerators.rowpipe.exceptions import TooManyCastingErrors

    downloader.set_callback((build_downloader_callback))

    m = MetapackCliMemo(args, downloader)

    if m.args.profile:
        from metatab.s3 import set_s3_profile
        set_s3_profile(m.args.profile)

    if m.args.clean_cache:
        clean_cache('metapack')

    try:
        changes = metatab_derived_handler(m)
        prt(f"{changes} changes")

    except TooManyCastingErrors as e:
        prt('Casting Errors:')
        for error in e.errors:
            prt(error)
        if m.args.exceptions:
            raise e
        else:
            err(e)
    except Exception as e:
        raise
        if m.args.exceptions:
            raise e
        else:
            err(e)

    clean_cache(m.cache)

    return changes
Ejemplo n.º 5
0
def metapack():
    import argparse

    parser = argparse.ArgumentParser(
        prog='metapack',
        description='Create and manipulate metatab data packages, version {}'.format(_meta.__version__))

    parser.add_argument('metatabfile', nargs='?',
                        help="Path or URL to a metatab file. If not provided, defaults to 'metadata.csv' ")

    parser.add_argument('-p', '--profile', help="Name of a BOTO or AWS credentails profile", required=False)

    parser.add_argument('--exceptions', default=False, action='store_true',
                             help='Show full stack tract for some unhandled exceptions')

    parser.set_defaults(handler=None)

    ##
    ## Build Group

    build_group = parser.add_argument_group('Building Metatab Files', 'Build and manage a metatab file for a pacakge')

    build_group.add_argument('-c', '--create', action='store', nargs='?', default=False,
                             help="Create a new metatab file, from named template. With no argument, uses the "
                                  "'metatab' template ")

    build_group.add_argument('-a', '--add', default=False,
                             help='Add a file or url to the resources. With a directory add a data files in the directory. '
                                  'If given a URL to a web page, will add all links that point to CSV, Excel Files and'
                                  'data files in ZIP files. (Caution: it will download and cache all of these files. )')

    # build_group.add_argument('-S', '--scrape',
    #                help='Similar to --add, but scrape a web page for links to data files, documentation '
    #                     'and web pages and add the links as resources ')

    # build_group.add_argument('-r', '--resources', default=False, action='store_true',
    #                    help='Rebuild the resources, intuiting rows and encodings from the URLs')

    build_group.add_argument('-s', '--schemas', default=False, action='store_true',
                             help='Rebuild the schemas for files referenced in the resource section')

    build_group.add_argument('-d', '--datapackage', action='store_true', default=False,
                             help="Write a datapackage.json file adjacent to the metatab file")

    build_group.add_argument('-u', '--update', action='store_true', default=False,
                             help="Update the Name from the Datasetname, Origin and Version terms")

    build_group.add_argument('-F', '--force', action='store_true', default=False,
                             help='Force some operations, like updating the name and building packages')

    ##
    ## Derived Package Group

    derived_group = parser.add_argument_group('Derived Packages', 'Generate other types of packages')

    derived_group.add_argument('-e', '--excel', action='store_true', default=False,
                               help='Create an excel archive from a metatab file')

    derived_group.add_argument('-z', '--zip', action='store_true', default=False,
                               help='Create a zip archive from a metatab file')

    derived_group.add_argument('-f', '--filesystem', action='store_true', default=False,
                               help='Create a filesystem archive from a metatab file')

    derived_group.add_argument('-v', '--csv', action='store_true', default=False,
                               help='Create a CSV archive from a metatab file')


    ##
    ## QueryPackage Group

    query_group = parser.add_argument_group('Query', 'Return information and data from a package')

    query_group.add_argument('-r', '--resource', default=False, action='store_true',
                             help='If the URL has no fragment, dump the resources listed in the metatab file.'
                                  ' With a fragment, dump a resource as a CSV')

    query_group.add_argument('-H', '--head', default=False, action='store_true',
                             help="Dump the first 20 lines of a resource ")

    ##
    ## Administration Group

    admin_group = parser.add_argument_group('Administration', 'Information and administration')

    admin_group.add_argument('--clean-cache', default=False, action='store_true',
                             help="Clean the download cache")

    admin_group.add_argument('-C', '--clean', default=False, action='store_true',
                             help="For some operations, like updating schemas, clear the section of existing terms first")

    admin_group.add_argument('-i', '--info', default=False, action='store_true',
                             help="Show configuration information")

    admin_group.add_argument('-n', '--name', default=False, action='store_true',
                             help="Print the name of the package")

    admin_group.add_argument('-E', '--enumerate',
                             help='Enumerate the resources referenced from a URL. Does not alter the Metatab file')

    admin_group.add_argument('--html', default=False, action='store_true',
                             help='Generate HTML documentation')

    admin_group.add_argument('--markdown', default=False, action='store_true',
                             help='Generate Markdown documentation')

    # cmd = parser.add_subparsers(title='Plugin Commands', help='Additional command supplied by plugins')
    # load_plugins(cmd)


    class MetapackCliMemo(object):
        def __init__(self, args):
            self.cwd = getcwd()
            self.args = args
            self.cache = get_cache('metapack')

            if args.metatabfile and args.metatabfile.startswith('#'):
                # It's just a fragment, default metatab file
                args.metatabfile = join(self.cwd, DEFAULT_METATAB_FILE) + args.metatabfile

            self.mtfile_arg = args.metatabfile if args.metatabfile else join(self.cwd, DEFAULT_METATAB_FILE)

            self.mtfile_url = Url(self.mtfile_arg)

            self.resource = self.mtfile_url.parts.fragment

            self.package_url, self.mt_file = resolve_package_metadata_url(self.mtfile_url.rebuild_url(False, False))

    m = MetapackCliMemo(parser.parse_args(sys.argv[1:]))

    if m.args.info:
        metatab_info(m.cache)
        exit(0)

    if m.args.profile:
        from metatab.s3 import set_s3_profile
        set_s3_profile(m.args.profile)

    try:
        for handler in (metatab_build_handler, metatab_derived_handler, metatab_query_handler, metatab_admin_handler):
            handler(m)
    except Exception as e:
        if m.args.exceptions:
            raise e
        else:
            err(e)

    clean_cache(m.cache)
Ejemplo n.º 6
0
def metatab():
    import argparse
    parser = argparse.ArgumentParser(
        prog='metatab',
        description='Matatab file parser, version {}'.format(_meta.__version__))

    parser.add_argument('-C', '--clean-cache', default=False, action='store_true',
                        help="Clean the download cache")

    g = parser.add_mutually_exclusive_group(required=True)

    g.add_argument('-i', '--info', default=False, action='store_true',
                   help="Show configuration information")

    g.add_argument('-c', '--create', action='store', nargs='?', default=False,
                   help="Create a new metatab file, from named template. With no argument, uses the 'metatab' template ")

    g.add_argument('-t', '--terms', default=False, action='store_true',
                   help='Parse a file and print out the stream of terms, before interpretation')

    g.add_argument('-I', '--interp', default=False, action='store_true',
                   help='Parse a file and print out the stream of terms, after interpretation')

    g.add_argument('-j', '--json', default=False, action='store_true',
                   help='Parse a file and print out a JSON representation')

    g.add_argument('-y', '--yaml', default=False, action='store_true',
                   help='Parse a file and print out a YAML representation')

    g.add_argument('-R', '--resource', default=False, action='store_true',
                   help='If the URL has no fragment, dump the resources listed in the metatab file. With a fragment, dump a resource as a CSV')

    g.add_argument('-H', '--head', default=False, action='store_true',
                   help="Dump the first 20 lines of a resoruce ")

    g.add_argument('-S', '--schema',
                   help='Dump the schema for one named resource')

    parser.add_argument('-d', '--show-declaration', default=False, action='store_true',
                        help='Parse a declaration file and print out declaration dict. Use -j or -y for the format')

    parser.add_argument('-D', '--declare', help='Parse and incorporate a declaration before parsing the file.' +
                                                ' (Adds the declaration to the start of the file as the first term. )')

    parser.add_argument('file', nargs='?', default=DEFAULT_METATAB_FILE, help='Path to a Metatab file')

    args = parser.parse_args(sys.argv[1:])

    # Specing a fragment screws up setting the default metadata file name
    if args.file.startswith('#'):
        args.file = DEFAULT_METATAB_FILE + args.file

    cache = get_cache('metapack')

    if args.info:
        prt('Version  : {}'.format(_meta.__version__))
        prt('Cache dir: {}'.format(str(cache.getsyspath('/'))))
        exit(0)

    if args.clean_cache:
        clean_cache(cache)

    if args.create is not False:
        new_metatab_file(args.file, args.create)
        exit(0)

    if args.resource or args.head:

        limit = 20 if args.head else None

        u = Url(args.file)
        resource = u.parts.fragment
        metadata_url = u.rebuild_url(False, False)

        package_url, metadata_url = resolve_package_metadata_url(metadata_url)

        try:
            doc = MetatabDoc(metadata_url, cache=cache)
        except OSError as e:
            err("Failed to open Metatab doc: {}".format(e))
            return # Never reached

        if resource:
            dump_resource(doc, resource, limit)
        else:
            dump_resources(doc)


        exit(0)

    if args.show_declaration:

        doc = MetatabDoc()
        doc.load_declarations([args.file])

        print(json.dumps({
            'terms': doc.decl_terms,
            'sections': doc.decl_sections
        }, indent=4))
        exit(0)
    else:

        package_url, metadata_url = resolve_package_metadata_url(args.file)
        try:
            doc = MetatabDoc(metadata_url, cache=cache)
        except IOError as e:
            raise
            err("Failed to open '{}': {}".format(metadata_url, e))

    if args.terms:
        for t in doc._term_parser:
            print(t)

    elif args.json:
        print(json.dumps(doc.as_dict(), indent=4))


    elif args.yaml:
        import yaml
        print(yaml.safe_dump(doc.as_dict(), default_flow_style=False, indent=4))


    elif args.schema:
        dump_schema(doc, args.schema)

    exit(0)