Esempio n. 1
0
        def __init__(self, raw_args):
            self.cwd = getcwd()

            self.raw_args = raw_args

            self.args = parser.parse_args(self.raw_args[1:])

            self.cache = get_cache('metapack')

            # This one is for loading packages that have just been
            # written to S3.
            self.tmp_cache = get_cache('temp')
            clean_cache(self.tmp_cache)

            if self.args.all_s3:
                self.args.s3 = self.args.all_s3
                self.args.excel = True
                self.args.zip = True
                self.args.csv = True
                self.args.fs = True

            self.mtfile_arg = self.args.metatabfile if self.args.metatabfile else join(
                self.cwd, DEFAULT_METATAB_FILE)

            self.mtfile_url = Url(self.mtfile_arg)
            self.resource = self.mtfile_url.parts.fragment

            self.package_url, self.mt_file = resolve_package_metadata_url(
                self.mtfile_url.rebuild_url(False, False))

            self.args.fs = self.args.csv or self.args.fs
Esempio n. 2
0
    def __init__(self, url=None, downloader=None, **kwargs):
        kwargs['proto'] = 'metatab'

        u = Url(url, **kwargs)

        assert downloader

        # If there is no file with an extension in the path, assume that this
        # is a filesystem package, and that the path should have DEFAULT_METATAB_FILE
        if file_ext(basename(
                u.path)) not in ('zip', 'xlsx') + self.simple_file_formats:
            u.path = join(u.path, DEFAULT_METATAB_FILE)

        super().__init__(str(u), downloader=downloader, **kwargs)

        self.scheme_extension = 'metatab'

        if basename(self.path) == DEFAULT_METATAB_FILE:
            frag = ''
        elif self.resource_format in self.simple_file_formats:
            frag = ''
        elif self.resource_format == 'xlsx':
            frag = 'meta'
        elif self.resource_format == 'zip':
            frag = DEFAULT_METATAB_FILE

        self.fragment = [frag, None]
Esempio n. 3
0
def update_dist(doc, old_dists, v):

    # This isn't quite correct, because it will try to remove the .csv format
    # Distributions twice, since both <name>.csv and <name>/metadata.csv have the same format.
    # (That's why theres a try/except ) But, it is effective

    name = doc.find_first_value("Root.Name")

    for d in old_dists:

        if Url(d.value).resource_format == Url(
                v).resource_format and name not in d.value:
            try:
                doc.remove_term(d)
            except ValueError:
                pass

    t = doc.find_first('Root.Distribution', v)

    if not t:
        doc['Root'].new_term('Root.Distribution', v)

        return True
    else:
        return False
Esempio n. 4
0
        def __init__(self, args):
            self.cwd = getcwd()
            self.args = args
            self.cache = get_cache('metapack')

            self.mtfile_arg = args.metatabfile if args.metatabfile else join(
                self.cwd, DEFAULT_METATAB_FILE)

            self.mtfile_url = Url(self.mtfile_arg)
            self.resource = self.mtfile_url.parts.fragment

            self.package_url, self.mt_file = resolve_package_metadata_url(
                self.mtfile_url.rebuild_url(False, False))
Esempio n. 5
0
def write_doc(doc, mt_file):
    """
    Write a Metatab doc to a CSV file, and update the Modified time
    :param doc:
    :param mt_file:
    :return:
    """

    doc['Root']['Modified'] = datetime_now()

    doc['Root'].sort_by_term(order=[
        'Root.Declare', 'Root.Title', 'Root.Description', 'Root.Identifier',
        'Root.Name', 'Root.Dataset', 'Root.Origin', 'Root.Time', 'Root.Space',
        'Root.Grain', 'Root.Version', 'Root.Group', 'Root.Tag', 'Root.Keyword',
        'Root.Subject', 'Root.Created', 'Root.Modified', 'Root.Issued',
        'Root.Access', 'Root.Distribution'
    ])

    import subprocess
    out = subprocess.run(['git', 'remote', 'show', 'origin'],
                         stdout=subprocess.PIPE).stdout.decode('utf-8')

    fetchline = next(l.split() for l in out.splitlines() if 'Fetch' in l)

    if fetchline:
        t = doc['Root'].get_or_new_term('GitUrl')
        t.value = fetchline[-1]

    u = Url(mt_file)

    if u.scheme == 'file':
        doc.write_csv(mt_file)
        return True
    else:
        return False
Esempio n. 6
0
    def test_ipy(self):
        from rowgenerators import SourceSpec, Url, RowGenerator, get_cache

        from rowgenerators.fetch import download_and_cache

        urls = ('ipynb+file:foobar.ipynb',
                'ipynb+http://example.com/foobar.ipynb', 'ipynb:foobar.ipynb')

        for url in urls:
            u = Url(url)
            print(u, u.path, u.resource_url)

            s = SourceSpec(url)
            print(s, s.proto, s.scheme, s.resource_url, s.target_file,
                  s.target_format)
            self.assertIn(s.scheme, ('file', 'http'))
            self.assertEquals('ipynb', s.proto)

        gen = RowGenerator(cache=get_cache(),
                           url='ipynb:scripts/Py3Notebook.ipynb#lst',
                           working_dir=test_data(),
                           generator_args={'mult': lambda x: x * 3})

        rows = gen.generator.execute()

        print(len(rows))
Esempio n. 7
0
def get_lib_module_dict(doc):
    """Load the 'lib' directory as a python module, so it can be used to provide functions
    for rowpipe transforms"""

    from os.path import dirname, abspath, join, isdir
    from importlib import import_module
    import sys

    u = Url(doc.ref)
    if u.proto == 'file':

        doc_dir = dirname(abspath(u.parts.path))

        # Add the dir with the metatab file to the system path
        sys.path.append(doc_dir)

        if not isdir(join(doc_dir, 'lib')):
            return {}

        try:
            m = import_module("lib")
            return {k: v for k, v in m.__dict__.items() if k in m.__all__}
        except ImportError as e:
            err("Failed to import python module form 'lib' directory: ",
                str(e))

    else:
        return {}
Esempio n. 8
0
    def __new__(cls,
                ref=None,
                cache=None,
                callback=None,
                env=None,
                save_url=None,
                acl=None):

        if cls == Package:

            if isinstance(ref, Url):
                b = Bunch(ref.dict)
            else:
                b = Bunch(Url(ref).dict)

            if b.resource_format in ('xls', 'xlsx'):
                return super(Package, cls).__new__(ExcelPackage)
            elif b.resource_format == 'zip':
                return super(Package, cls).__new__(ZipPackage)
            elif b.proto == 'gs':
                return super(Package, cls).__new__(GooglePackage)
            elif b.proto == 's3':
                return super(Package, cls).__new__(S3Package)
            elif b.resource_format == 'csv' or b.target_format == 'csv':
                return super(Package, cls).__new__(CsvPackage)
            else:
                raise PackageError(
                    "Can't determine package type for ref '{}' ".format(ref))

        else:
            return super(Package, cls).__new__(cls)
Esempio n. 9
0
def resolve_package_metadata_url(ref):
    """Re-write a url to a resource to include the likely refernce to the
    internal Metatab metadata"""

    du = Url(ref)

    if du.resource_format == 'zip':
        package_url = reparse_url(ref, fragment=False)
        metadata_url = reparse_url(ref, fragment=DEFAULT_METATAB_FILE)

    elif du.target_format == 'xlsx' or du.target_format == 'xls':
        package_url = reparse_url(ref, fragment=False)
        metadata_url = reparse_url(ref, fragment='meta')

    elif du.resource_file == DEFAULT_METATAB_FILE:
        metadata_url = reparse_url(ref)
        package_url = reparse_url(ref,
                                  path=dirname(parse_url_to_dict(ref)['path']),
                                  fragment=False) + '/'

    elif du.target_format == 'csv':
        package_url = reparse_url(ref, fragment=False)
        metadata_url = reparse_url(ref)

    elif du.proto == 'file':
        p = parse_url_to_dict(ref)

        if isfile(p['path']):
            metadata_url = reparse_url(ref)
            package_url = reparse_url(ref,
                                      path=dirname(p['path']),
                                      fragment=False)
        else:

            p['path'] = join(p['path'], DEFAULT_METATAB_FILE)
            package_url = reparse_url(ref,
                                      fragment=False,
                                      path=p['path'].rstrip('/') + '/')
            metadata_url = unparse_url_dict(p)

        # Make all of the paths absolute. Saves a lot of headaches later.
        package_url = reparse_url(package_url,
                                  path=abspath(
                                      parse_url_to_dict(package_url)['path']))
        metadata_url = reparse_url(
            metadata_url,
            path=abspath(parse_url_to_dict(metadata_url)['path']))

    else:
        metadata_url = join(ref, DEFAULT_METATAB_FILE)
        package_url = reparse_url(ref, fragment=False)

    # raise PackageError("Can't determine package URLs for '{}'".format(ref))

    return package_url, metadata_url
Esempio n. 10
0
    def _resolved_url(self):
        """Return a URL that properly combines the base_url and a possibly relative
        resource url"""

        from rowgenerators.generators import PROTO_TO_SOURCE_MAP

        if self.base_url:
            u = Url(self.base_url)

            # S3 security is a pain
            if u.proto == 's3' and self.doc.find_first_value(
                    "Root.Access") != 'private':
                print("!!!!!!", type(u))

        else:
            u = Url(self.doc.package_url)  # Url(self.doc.ref)

        if not self._self_url:
            return None

        nu = u.component_url(self._self_url)

        # For some URLs, we ned to put the proto back on.
        su = Url(self._self_url)

        if su.proto in PROTO_TO_SOURCE_MAP().keys():
            nu = reparse_url(nu, scheme_extension=su.proto)

        assert nu
        return nu
Esempio n. 11
0
        def set_mt_arg(self, metatabfile):

            self.mtfile_arg = metatabfile if metatabfile else join(
                self.cwd, DEFAULT_METATAB_FILE)

            self.mtfile_url = Url(self.mtfile_arg)
            self.resource = self.mtfile_url.parts.fragment

            self.package_url, self.mt_file = resolve_package_metadata_url(
                self.mtfile_url.rebuild_url(False, False))

            self.api_key = self.args.api or getenv('METAKAN_API_KEY')

            self.ckan_url = self.args.ckan or getenv('METAKAN_CKAN_URL')

            if not self.ckan_url:
                err("Set the --ckan option or the METAKAN_CKAN_URL env var to set the URL of a ckan instance"
                    )

            if not self.api_key:
                err("Set the --api option METAKAN_API_KEY env var  with the API key to a CKAN instance"
                    )
Esempio n. 12
0
    def write_csv(self, path=None):
        from rowgenerators import Url

        self.cleanse()

        if path is None:
            path = self.ref

        u = Url(path)

        if u.scheme != 'file':
            raise MetatabError("Can't write file to URL '{}'".format(path))

        with open(u.parts.path, 'wb') as f:
            f.write(self.as_csv())
Esempio n. 13
0
    class MetapackCliMemo(object):
        def __init__(self, args):
            self.cwd = getcwd()
            self.args = args
            self.cache = get_cache('metapack')

            if args.metatabfile and args.metatabfile.startswith('#'):
                # It's just a fragment, default metatab file
                args.metatabfile = join(self.cwd, DEFAULT_METATAB_FILE) + args.metatabfile

            self.mtfile_arg = args.metatabfile if args.metatabfile else join(self.cwd, DEFAULT_METATAB_FILE)

            self.mtfile_url = Url(self.mtfile_arg)

            self.resource = self.mtfile_url.parts.fragment

            self.package_url, self.mt_file = resolve_package_metadata_url(self.mtfile_url.rebuild_url(False, False))
Esempio n. 14
0
def get_resource_urls(doc):

    resources = {}

    for dist in doc.find("Root.Distribution"):

        package_url, metadata_url = resolve_package_metadata_url(dist.value)

        u = Url(package_url)

        if u.resource_format == 'zip':
            prt("Skipping ZIP package ", package_url)

        elif u.resource_format == 'xlsx':
            resources[basename(package_url)] = package_url
            prt("Adding XLS package ", package_url)
            pass

        elif u.resource_format == 'csv':

            resources[basename(package_url)] = package_url

            prt("Adding CSV package {}".format(basename(package_url)))

            try:
                p = open_package(package_url)
            except (IOError, MetatabError) as e:
                err("Failed to open package '{}' from reference '{}': {}".
                    format(package_url, dist.value, e))

            for r in p.resources():

                mimetype = mimetypes.guess_type(r.resolved_url)[0]

                try:
                    ext = mimetypes.guess_extension(mimetype)[1:]
                except:
                    ext = None

                # '.csv': Data>world currently get the format from the name, not the URL
                resources[r.name + '.csv'] = r.resolved_url
                prt("Adding CSV resource {}".format(r.name))
        else:
            prt('Skipping {}'.format(package_url))

    return resources
Esempio n. 15
0
    def extract_path_name(ref):

        du = Url(ref)

        if du.proto == 'file':
            path = abspath(ref)
            name = basename(splitext(path)[0])
            ref = "file://" + path
        else:
            path = ref

            if du.target_segment:
                try:
                    int(du.target_segment)
                    name = du.target_file + text_type(du.target_segment)

                except ValueError:
                    name = du.target_segment

            else:
                name = splitext(du.target_file)[0]

        return ref, path, name
Esempio n. 16
0
    def __init__(self, url=None, downloader=None, **kwargs):

        self._state = None
        self._record_type = None
        self._year = None
        self._release = None

        if url:
            u = Url(url).remove_netloc().relify_path()

            for p, v in zip(self.part_names, u.path.split('/')):
                setattr(self, p, v)

        for k, v in kwargs.items():
            if "_" + k in self.part_names:
                setattr(self, "_" + k, v)

        m = self._test_parts()
        if m:
            raise PumsUrlError('Parsing error: ' + '; '.join(m))

        urls_s = f"pums:{self._state}/{self._record_type}/{self._year}/{self._release}"

        super().__init__(urls_s, downloader, **kwargs)
Esempio n. 17
0
    class MetapackCliMemo(object):
        def __init__(self, args):
            self.cwd = getcwd()
            self.args = args
            self.cache = get_cache('metapack')

            self.set_mt_arg(args.metatabfile)

        def set_mt_arg(self, metatabfile):

            self.mtfile_arg = metatabfile if metatabfile else join(
                self.cwd, DEFAULT_METATAB_FILE)

            self.mtfile_url = Url(self.mtfile_arg)
            self.resource = self.mtfile_url.parts.fragment

            self.package_url, self.mt_file = resolve_package_metadata_url(
                self.mtfile_url.rebuild_url(False, False))

            self.api_key = self.args.api or getenv('METAKAN_API_KEY')

            self.ckan_url = self.args.ckan or getenv('METAKAN_CKAN_URL')

            if not self.ckan_url:
                err("Set the --ckan option or the METAKAN_CKAN_URL env var to set the URL of a ckan instance"
                    )

            if not self.api_key:
                err("Set the --api option METAKAN_API_KEY env var  with the API key to a CKAN instance"
                    )

        def update_mt_arg(self, metatabfile):
            """Return a new memo with a new metatabfile argument"""
            o = MetapackCliMemo(self.args)
            o.set_mt_arg(metatabfile)
            return o
Esempio n. 18
0
def metatab_build_handler(m):
    if m.args.create is not False:

        template = m.args.create if m.args.create else 'metatab'

        if not exists(m.mt_file):

            doc = make_metatab_file(template)

            doc['Root']['Identifier'] = six.text_type(uuid4())

            doc['Root']['Created'] = datetime_now()

            write_doc(doc, m.mt_file)

            prt('Created', m.mt_file)
        else:
            err('File', m.mt_file, 'already exists')

    if m.args.add:
        update_name(m.mt_file, fail_on_missing=False, report_unchanged=False)

        add_resource(m.mt_file, m.args.add, cache=m.cache)

    if False:  # m.args.resources:
        update_name(m.mt_file, fail_on_missing=False, report_unchanged=False)

        doc = MetatabDoc(m.mt_file)

        try:
            doc['Schema'].clean()
        except KeyError:
            pass

        for t in list(doc['Resources']):  # w/o list(), will iterate over new terms

            if not t.term_is('root.datafile'):
                continue

            if t.as_dict().get('url'):
                add_resource(doc, t.as_dict()['url'], m.cache)

            else:
                warn("Entry '{}' on row {} is missing a url; skipping".format(t.join, t.row))

        write_doc(doc, m.mt_file)

    if m.args.schemas:
        update_name(m.mt_file, fail_on_missing=False, report_unchanged=False)

        process_schemas(m.mt_file, cache=m.cache, clean=m.args.clean)

    if m.args.datapackage:
        update_name(m.mt_file, fail_on_missing=False, report_unchanged=False)

        from metatab.datapackage import convert_to_datapackage

        doc = MetatabDoc(m.mt_file)

        u = Url(m.mt_file)

        if u.proto == 'file':
            dpj_file = join(dirname(abspath(u.parts.path)), 'datapackage.json')
        else:
            dpj_file = join(getcwd(), 'datapackage.json')

        try:
            with open(dpj_file, 'w') as f:
                f.write(json.dumps(convert_to_datapackage(doc), indent=4))
        except ConversionError as e:
            err(e)

    if m.mtfile_url.scheme == 'file' and m.args.update:
        update_name(m.mt_file, fail_on_missing=True, force=m.args.force)
Esempio n. 19
0
def send_to_ckan(m):

    from ckanapi import RemoteCKAN, NotFound
    try:
        doc = MetatabDoc(m.mt_file, cache=m.cache)
    except (IOError, MetatabError) as e:
        err("Failed to open metatab '{}': {}".format(m.mt_file, e))

    c = RemoteCKAN(m.ckan_url, apikey=m.api_key)

    ckanid = doc.find_first_value('Root.Ckanid')
    identifier = doc.find_first_value('Root.Identitfier')
    name = doc.find_first('Root.Name')

    ckan_name = name.value.replace('.', '-')

    id_name = ckanid or ckan_name

    try:
        pkg = c.action.package_show(name_or_id=id_name)
        prt("Updating CKAN dataset for '{}'".format(ckan_name))
    except NotFound:
        pkg = c.action.package_create(name=ckan_name, package_id=identifier)
        prt("Adding CKAN dataset for '{}'".format(ckan_name))

    pkg['title'] = doc.find_first_value('Root.Title')

    if not pkg['title']:
        pkg['title'] = doc.find_first_value('Root.Description')

    try:
        pkg['notes'] = doc.markdown  #doc.find_first_value('Root.Description')
    except OSError as e:
        warn(e)

    pkg['version'] = name.properties.get('version')

    pkg['groups'] = [{'name': g.value} for g in doc['Root'].find('Root.Group')]

    pkg['tags'] = [{'name': g.value} for g in doc['Root'].find('Root.Tag')]

    def get_org(name):

        if not name:
            return None

        try:
            return
        except NotFound:
            return None

    org_name = name.get('Origin', doc['Root'].find_first_value('Root.CkanOrg'))

    if org_name:
        org_name_slug = org_name.value.replace('.', '-')
        try:

            owner_org = c.action.organization_show(id=org_name_slug).get('id')
            pkg['owner_org'] = owner_org
        except NotFound:
            warn("Didn't find org for '{}'; not setting organization ".format(
                org_name_slug))
            org_name_slug = None
    else:
        org_name_slug = None

    extras = {}

    for t in doc.find('*.*', section='Root'):
        if not t.term_is('Root.Distribution'):
            extras[t.qualified_term] = t.value

    for t in name.children:
        extras[t.qualified_term] = t.value

    pkg['extras'] = [{'key': k, 'value': v} for k, v in extras.items()]

    resources = []

    for dist in doc.find("Root.Distribution"):

        package_url, metadata_url = resolve_package_metadata_url(dist.value)

        u = Url(package_url)

        if u.resource_format == 'zip':
            d = dict(url=package_url,
                     name=basename(package_url),
                     format='ZIP',
                     mimetype=mimetypes.guess_type(package_url)[0],
                     description='ZIP version of package')
            resources.append(d)
            prt("Adding ZIP package ", d['name'])

        elif u.resource_format == 'xlsx':
            d = dict(url=package_url,
                     name=basename(package_url),
                     format='XLSX',
                     mimetype=mimetypes.guess_type(package_url)[0],
                     description='Excel version of package')
            resources.append(d)
            prt("Adding XLS package ", d['name'])

        elif u.resource_format == 'csv':

            d = dict(url=package_url,
                     name=basename(package_url),
                     format='csv',
                     mimetype=mimetypes.guess_type(metadata_url)[0],
                     description='CSV Package Metadata in Metatab format')

            resources.append(d)
            prt("Adding {} package {}".format(d['format'], d['name']))

            try:
                p = open_package(package_url)
            except (IOError, MetatabError) as e:
                err("Failed to open package '{}' from reference '{}': {}".
                    format(package_url, dist.value, e))

            for r in p.resources():

                mimetype = mimetypes.guess_type(r.resolved_url)[0]

                try:
                    ext = mimetypes.guess_extension(mimetype)[1:]
                except:
                    ext = None

                d = dict(name=r.name,
                         format=ext,
                         url=r.resolved_url,
                         mimetype=mimetype,
                         description=r.markdown)

                resources.append(d)
                prt("Adding {} resource {}".format(d['format'], d['name']))

    pkg['resources'] = resources

    c.action.package_update(**pkg)

    pkg = c.action.package_show(name_or_id=ckan_name)

    update_dist(doc, [], join(m.ckan_url, 'dataset', ckan_name))

    ##
    ## Add a term with CKAN info.

    doc['Root'].get_or_new_term('CkanId', pkg['id'])

    if org_name_slug is None and pkg.get('organization'):
        doc['Root'].get_or_new_term('CkanOrg', (pkg.get('organization')
                                                or {}).get('name'))

    groups = doc['Root'].find('Group')
    for g in groups:
        doc.remove_term(g)

    for group in pkg.get('groups', []):
        doc['Root'].new_term('Group', group['name'])

    write_doc(doc, m.mt_file)
Esempio n. 20
0
def metatab():
    import argparse
    parser = argparse.ArgumentParser(
        prog='metatab',
        description='Matatab file parser, version {}'.format(_meta.__version__))

    parser.add_argument('-C', '--clean-cache', default=False, action='store_true',
                        help="Clean the download cache")

    g = parser.add_mutually_exclusive_group(required=True)

    g.add_argument('-i', '--info', default=False, action='store_true',
                   help="Show configuration information")

    g.add_argument('-c', '--create', action='store', nargs='?', default=False,
                   help="Create a new metatab file, from named template. With no argument, uses the 'metatab' template ")

    g.add_argument('-t', '--terms', default=False, action='store_true',
                   help='Parse a file and print out the stream of terms, before interpretation')

    g.add_argument('-I', '--interp', default=False, action='store_true',
                   help='Parse a file and print out the stream of terms, after interpretation')

    g.add_argument('-j', '--json', default=False, action='store_true',
                   help='Parse a file and print out a JSON representation')

    g.add_argument('-y', '--yaml', default=False, action='store_true',
                   help='Parse a file and print out a YAML representation')

    g.add_argument('-R', '--resource', default=False, action='store_true',
                   help='If the URL has no fragment, dump the resources listed in the metatab file. With a fragment, dump a resource as a CSV')

    g.add_argument('-H', '--head', default=False, action='store_true',
                   help="Dump the first 20 lines of a resoruce ")

    g.add_argument('-S', '--schema',
                   help='Dump the schema for one named resource')

    parser.add_argument('-d', '--show-declaration', default=False, action='store_true',
                        help='Parse a declaration file and print out declaration dict. Use -j or -y for the format')

    parser.add_argument('-D', '--declare', help='Parse and incorporate a declaration before parsing the file.' +
                                                ' (Adds the declaration to the start of the file as the first term. )')

    parser.add_argument('file', nargs='?', default=DEFAULT_METATAB_FILE, help='Path to a Metatab file')

    args = parser.parse_args(sys.argv[1:])

    # Specing a fragment screws up setting the default metadata file name
    if args.file.startswith('#'):
        args.file = DEFAULT_METATAB_FILE + args.file

    cache = get_cache('metapack')

    if args.info:
        prt('Version  : {}'.format(_meta.__version__))
        prt('Cache dir: {}'.format(str(cache.getsyspath('/'))))
        exit(0)

    if args.clean_cache:
        clean_cache(cache)

    if args.create is not False:
        new_metatab_file(args.file, args.create)
        exit(0)

    if args.resource or args.head:

        limit = 20 if args.head else None

        u = Url(args.file)
        resource = u.parts.fragment
        metadata_url = u.rebuild_url(False, False)

        package_url, metadata_url = resolve_package_metadata_url(metadata_url)

        try:
            doc = MetatabDoc(metadata_url, cache=cache)
        except OSError as e:
            err("Failed to open Metatab doc: {}".format(e))
            return # Never reached

        if resource:
            dump_resource(doc, resource, limit)
        else:
            dump_resources(doc)


        exit(0)

    if args.show_declaration:

        doc = MetatabDoc()
        doc.load_declarations([args.file])

        print(json.dumps({
            'terms': doc.decl_terms,
            'sections': doc.decl_sections
        }, indent=4))
        exit(0)
    else:

        package_url, metadata_url = resolve_package_metadata_url(args.file)
        try:
            doc = MetatabDoc(metadata_url, cache=cache)
        except IOError as e:
            raise
            err("Failed to open '{}': {}".format(metadata_url, e))

    if args.terms:
        for t in doc._term_parser:
            print(t)

    elif args.json:
        print(json.dumps(doc.as_dict(), indent=4))


    elif args.yaml:
        import yaml
        print(yaml.safe_dump(doc.as_dict(), default_flow_style=False, indent=4))


    elif args.schema:
        dump_schema(doc, args.schema)

    exit(0)
Esempio n. 21
0
    def doc_dir(self):

        from os.path import abspath

        u = Url(self.ref)
        return abspath(dirname(u.parts.path))
Esempio n. 22
0
    def __init__(self,
                 ref=None,
                 decl=None,
                 package_url=None,
                 cache=None,
                 clean_cache=False):

        self._cache = cache if cache else get_cache()

        self.decl_terms = {}
        self.decl_sections = {}

        self.terms = []
        self.sections = OrderedDict()
        self.errors = []
        self.package_url = package_url

        #if Url(self.package_url).proto == 'file':
        #    path = abspath(parse_url_to_dict(self.package_url)['path'])
        #    self.package_url = reparse_url(self.package_url, path = path)

        if decl is None:
            self.decls = []
        elif not isinstance(decl, MutableSequence):
            self.decls = [decl]
        else:
            self.decls = decl

        self.load_declarations(self.decls)

        if ref:
            self._ref = ref
            self.root = None
            self._term_parser = TermParser(self._ref, doc=self)
            try:
                self.load_terms(self._term_parser)
            except SourceError as e:
                raise MetatabError(
                    "Failed to load terms for document '{}': {}".format(
                        self._ref, e))

            u = Url(self._ref)
            if u.scheme == 'file':
                try:
                    self._mtime = getmtime(u.parts.path)
                except (FileNotFoundError, OSError):
                    self._mtime = 0
            else:
                self._mtime = 0

        else:
            self._ref = None
            self._term_parser = None
            self.root = SectionTerm('Root',
                                    term='Root',
                                    doc=self,
                                    row=0,
                                    col=0,
                                    file_name=None,
                                    parent=None)
            self.add_section(self.root)
            self._mtime = time()