예제 #1
0
    def test_build_notebook_package(self):

        try:
            from metapack import MetapackDocumentUrl, get_cache
            from metapack_build.build import make_filesystem_package

            m = MetapackDocumentUrl(test_data(
                'packages/example.com/example.com-notebook/metadata.csv'),
                                    downloader=downloader)

            # process_schemas(m)

            doc = MetapackDoc(m)

            r = doc.resource('basic_a')

            self.assertEqual(2501, len(list(r)))

            package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

            _, fs_url, created = make_filesystem_package(
                m, package_dir, get_cache(), {}, False, False, False)

            print(fs_url)
        except ImportError:
            unittest.skip("Pandas not installed")
            return
예제 #2
0
    def test_build_geo_package(self):

        from rowgenerators.valuetype import ShapeValue

        m = MetapackUrl(test_data(
            'packages/sangis.org/sangis.org-census_regions/metadata.csv'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

        _, fs_url, created = make_filesystem_package(m, package_dir,
                                                     downloader.cache, {},
                                                     True)

        print(fs_url)

        doc = MetapackDoc(fs_url)

        r = doc.resource('sra')

        rows = list(r.iterdict)

        self.assertEqual(41, len(rows))

        self.assertIsInstance(rows[1]['geometry'], ShapeValue)
예제 #3
0
    def test_fixed_resource(self):
        from itertools import islice
        from rowgenerators.generator.fixed import FixedSource

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('simple-fixed')

        self.assertEqual(
            'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt',
            str(r.url))
        self.assertEqual(
            'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt',
            str(r.resolved_url))

        g = r.row_generator

        print(r.row_processor_table())

        self.assertIsInstance(g, FixedSource)

        rows = list(islice(r, 10))

        print('----')
        for row in rows:
            print(row)

        self.assertEqual('f02d53a3-6bbc-4095-a889-c4dde0ccf5', rows[5][1])
예제 #4
0
    def test_iterate_doc(self):

        doc = MetapackDoc(test_data('example1.csv'))

        if exists(test_database_path):
            remove(test_database_path)

        db = Database('sqlite:///' + test_database_path)

        mm = MetatabManager(db)

        mm.add_doc(doc)

        with mm.session() as s:
            db_doc = mm.document(identifier=doc.get_value('Root.Identifier'))

            mt_doc = db_doc.mt_doc

            self.assertEqual(23, len(mt_doc.terms))

            self.assertEqual([
                'http://example.com/example1.csv',
                'http://example.com/example2.csv'
            ], [str(r.resolved_url) for r in mt_doc.find("Root.Datafile")])

        with mm.session():
            with mm.session():
                db_doc = mm.document(
                    identifier=doc.get_value('Root.Identifier'))

                mt_doc = db_doc.mt_doc

                self.assertEqual(23, len(mt_doc.terms))
예제 #5
0
def add_resource(mt_file, ref, cache):
    """Add a resources entry, downloading the intuiting the file, replacing entries with
    the same reference"""

    if isinstance(mt_file, MetapackDoc):
        doc = mt_file
    else:
        doc = MetapackDoc(mt_file)

    if 'Resources' not in doc:
        doc.new_section('Resources')

    doc['Resources'].args = [
        e for e in set(doc['Resources'].args +
                       ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e
    ]

    seen_names = set()

    u = parse_app_url(ref)

    # The web and file URLs don't list the same.

    if u.proto == 'file':
        entries = u.list()
    else:
        entries = [ssu for su in u.list() for ssu in su.list()]

    for e in entries:
        add_single_resource(doc, e, cache=cache, seen_names=seen_names)

    write_doc(doc, mt_file)
예제 #6
0
    def load(self, url, load_all_resources=False):
        """Load a package and possibly one or all resources, from a url"""

        u = parse_app_url(url)

        d = MetapackDoc(u.clear_fragment())

        db_doc = self.document(name=d.get_value('Root.Name'))

        if not db_doc:
            self.add_doc(d)
            db_doc = self.document(name=d.get_value('Root.Name'))
            assert db_doc

        resources = []

        if load_all_resources:

            for r in self.resources(db_doc):
                self.load_resource(r)
                resources.append(r)

        elif u.target_file:

            r = self.resource(db_doc, u.target_file)

            self.load_resource(r)

            resources.append(d)

        return (db_doc, resources)
예제 #7
0
    def test_build_simple_package(self):

        cli_init()

        cache = Downloader().cache

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-simple_example-2017-us'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)
        package_dir = package_dir

        _, fs_url, created = make_filesystem_package(m, package_dir, cache, {},
                                                     False)

        fs_doc = MetapackDoc(fs_url, cache=downloader.cache)

        fs_doc.resource('random-names')

        # Excel

        _, url, created = make_excel_package(fs_url, package_dir, cache, {},
                                             False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.url for r in url.doc.resources()])

        # ZIP

        _, url, created = make_zip_package(fs_url, package_dir, cache, {},
                                           False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual([
            'data/random-names.csv', 'data/renter_cost.csv',
            'data/unicode-latin1.csv'
        ], [r.url for r in url.doc.resources()])

        #  CSV

        _, url, created = make_csv_package(fs_url, package_dir, cache, {},
                                           False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual([
            'com-simple_example-2017-us-2/data/random-names.csv',
            '.com-simple_example-2017-us-2/data/renter_cost.csv',
            'm-simple_example-2017-us-2/data/unicode-latin1.csv'
        ], [str(r.url)[-50:] for r in url.doc.resources()])
예제 #8
0
def process_schemas(mt_file, resource=None, cache=None, clean=False, report_found=True, force=False, min_rows=5000,
                    allow_codes=True):
    from metapack import MetapackDoc, MetapackResourceUrl, MetapackDocumentUrl

    if isinstance(mt_file, MetapackDoc):
        doc = mt_file
        write_doc_to_file = False
    else:
        doc = MetapackDoc(mt_file)
        write_doc_to_file = True

    try:
        if clean:
            doc['Schema'].clean()
        else:
            doc['Schema']

    except KeyError:
        doc.new_section('Schema', ['DataType', 'AltName', 'Description'])

    schemas_processed = 0

    for r in doc['Resources'].find('Root.Resource'):

        if resource and r.name != resource:
            continue

        schema_term = r.schema_term

        col_count = len(list(r.columns()))
        datatype_count = sum(1 for c in r.columns() if c['datatype'])

        if schema_term and col_count == datatype_count and force is False:
            if report_found:
                prt("Found table for '{}'; skipping".format(r.schema_name))
            continue

        if col_count != datatype_count:
            prt("Found table for '{}'; but {} columns don't have datatypes"
                .format(r.schema_name, col_count - datatype_count))

        schemas_processed += 1

        rr = r.resolved_url

        rmtree(get_materialized_data_cache(doc), ignore_errors=True)

        if isinstance(rr, MetapackDocumentUrl):
            warn('{} is a MetapackDocumentUrl; skipping', r.name)
        elif isinstance(rr, MetapackResourceUrl):
            _process_metapack_resource(doc, r, force)
        else:
            _process_normal_resource(doc, r, force, skip_start=min_rows, allow_codes=allow_codes)

    if write_doc_to_file and schemas_processed:
        write_doc(doc, mt_file)
예제 #9
0
def convert_notebook(nb_path):
    prt('Convert notebook to Metatab source package')

    if not exists(nb_path):
        err("Notebook path does not exist: '{}' ".format(nb_path))

    c = Config()

    pe = NotebookExecutor(config=c, log=logger)

    prt('Running the notebook')
    output, resources = pe.from_filename(nb_path)

    fw = FilesWriter()
    fw.build_directory = pe.output_dir

    fw.write(output, resources, notebook_name=DEFAULT_METATAB_FILE)

    de = DocumentationExporter(config=c,
                               log=logger,
                               metadata=doc_metadata(pe.doc))

    prt('Exporting documentation')
    output, resources = de.from_filename(nb_path)

    fw.build_directory = join(pe.output_dir, 'docs')
    fw.write(output, resources, notebook_name='notebook')

    new_mt_file = join(pe.output_dir, DEFAULT_METATAB_FILE)

    doc = MetapackDoc(new_mt_file)

    de.update_metatab(doc, resources)

    for lib_dir in pe.lib_dirs:
        lib_dir = normpath(lib_dir).lstrip('./')

        doc['Resources'].new_term("Root.PythonLib", lib_dir)

        path = abspath(lib_dir)
        dest = join(pe.output_dir, lib_dir)

        ensure_dir(dest)
        copytree(path, join(pe.output_dir, lib_dir))

    doc.write_csv()

    # Reset the input to use the new data

    prt('Running with new package file: {}'.format(new_mt_file))
예제 #10
0
    def mt_doc(self):
        """Return the current metatab document, which must be created with either %%metatab
        or %mt_load_package"""

        if MT_DOC_VAR not in self.shell.user_ns:

            package_url = MetapackPackageUrl(
                "metapack+file:" + os.getcwd() + '/',
                downloader=Downloader.get_instance())

            self.shell.user_ns[MT_DOC_VAR] = \
                MetapackDoc(TextRowGenerator("Declare: metatab-latest\n"), package_url=package_url)

            inline_doc = self.shell.user_ns[MT_DOC_VAR]

            if 'Resources' not in inline_doc:
                inline_doc.new_section('Resources', ['Name', 'Description'])
            if 'Resources' not in inline_doc:
                inline_doc.new_section('References', ['Name', 'Description'])

            # Give all of the sections their standard args, to make the CSV versions of the doc
            # prettier
            for name, s in inline_doc.sections.items():
                try:
                    s.args = inline_doc.decl_sections[name.lower()]['args']
                except KeyError:
                    pass

        return self.shell.user_ns[MT_DOC_VAR]
예제 #11
0
    def test_load_one_resource(self):

        if exists(test_database_path):
            remove(test_database_path)

        db = Database('sqlite:///' + test_database_path)

        mm = MetatabManager(db)

        mm.add_doc(
            MetapackDoc(
                'http://library.metatab.org/example.com-full-2017-us-1.csv'))

        ident = next(d.identifier for d in mm.documents())
        doc = mm.document(identifier=ident)

        self.assertIsNotNone(doc)

        resources = [r.name for r in mm.resources(doc)]

        self.assertEquals([
            'renter_cost', 'simple-example-altnames', 'simple-example',
            'unicode-latin1', 'unicode-utf8', 'renter_cost_excel07',
            'renter_cost_excel97', 'renter_cost-2', 'random-names',
            'random-names-fs', 'random-names-csv', 'random-names-xlsx',
            'random-names-zip', 'sra', 'rowgen', 'simple-fixed'
        ], resources)

        r = mm.resource(doc, 'random-names')

        mm.load_resource(r)
예제 #12
0
    def test_petl(self):
        from petl import look

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('simple-example')

        r.resolved_url.get_resource().get_target()

        p = r.petl()

        print(look(p))
예제 #13
0
    def test_gen_line_rows(self):
        from metatab import parse_app_url
        from metapack import MetapackDocumentUrl
        from metatab.rowgenerators import TextRowGenerator
        u = parse_app_url(test_data('line', 'line-oriented-doc.txt'),
                          proto='metapack')

        self.assertIsInstance(u, MetapackDocumentUrl)
        self.assertIsInstance(u.get_resource(), MetapackDocumentUrl)
        self.assertIsInstance(u.get_resource().get_target(),
                              MetapackDocumentUrl)

        self.assertIsInstance(u.generator, TextRowGenerator)

        doc = MetapackDoc(u)
        self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249',
                         doc.get_value('Root.Identifier'))
예제 #14
0
def run_url_add(args):
    """Add a resources entry, downloading the intuiting the file, replacing entries with
        the same reference"""

    m = MetapackCliMemo(args, downloader)

    update_name(m.mt_file, fail_on_missing=False, report_unchanged=False)

    if isinstance(m.mt_file, MetapackDoc):
        doc = m.mt_file
    else:
        doc = MetapackDoc(m.mt_file)

    if 'Resources' not in doc:
        doc.new_section('Resources')

    doc['Resources'].args = [
        e for e in set(doc['Resources'].args +
                       ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e
    ]

    seen_names = set()

    u = parse_app_url(args.url)

    # The web and file URLs don't list the same.

    if u.proto == 'file':
        entries = u.list()
    else:
        entries = [ssu for su in u.list() for ssu in su.list()]

    errors = []

    for e in entries:
        if not add_single_resource(
                doc, e, cache=m.cache, seen_names=seen_names):
            errors.append(e)

    if errors:
        prt()
        warn("Found, but failed to add these urls:")
        for e in errors:
            print('    ', e)

    write_doc(doc)
예제 #15
0
    def test_read_geo_packages(self):

        import warnings
        from requests.exceptions import HTTPError

        warnings.simplefilter("ignore")

        try:
            from publicdata.census.dataframe import CensusDataFrame
        except ImportError:
            return unittest.skip("Public data isn't installed")

        with open(test_data('line', 'line-oriented-doc.txt')) as f:
            text = f.read()

        doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest\n" + text))

        r = doc.reference('B09020')

        try:
            df = r.dataframe()
        except HTTPError:  # The Census reporter URLs fail sometimes.
            return unittest.skip("Census Reporter vanished")

        self.assertIsInstance(df, CensusDataFrame)

        r = doc.reference('sra_geo')

        gf = r.geoframe()

        self.assertEqual(41, len(gf.geometry.geom_type))

        self.assertEqual({'Polygon'}, set(gf.geometry.geom_type))

        r = doc.reference('ri_tracts')

        gf = r.geoframe()

        self.assertEqual(244, len(gf.geometry.geom_type))

        print(sorted(list(set(gf.geometry.geom_type))))

        self.assertEqual(['MultiPolygon', 'Polygon'],
                         sorted(list(set(gf.geometry.geom_type))))

        print(gf.head())
예제 #16
0
    def test_multiple_docs(self):

        if exists(test_database_path):
            remove(test_database_path)

        db = Database('sqlite:///' + test_database_path)

        mm = MetatabManager(db)

        with mm.session():  # # add_doc session are nested
            mm.add_doc(MetapackDoc(test_data('example1.csv')))
            mm.add_doc(MetapackDoc(test_data('example.com-full-2017-us.csv')))
            mm.add_doc(
                MetapackDoc(
                    'http://library.metatab.org/example.com-simple_example-2017-us-2.csv'
                ))

        with mm.session():
            self.assertEqual([
                'cfcba102-9d8f-11e7-8adb-3c0754078006',
                '316821b9-9082-4c9e-8662-db50d9d91135',
                '96cd659b-94ad-46ae-9c18-4018caa64355'
            ], [d.identifier for d in mm.documents()])

        doc = mm.document(ref="file:" + test_data('example1.csv'))
        self.assertEquals('cfcba102-9d8f-11e7-8adb-3c0754078006',
                          doc.identifier)

        doc = mm.document(
            ref=
            'metapack+http://library.metatab.org/example.com-simple_example-2017-us-2.csv'
        )
        self.assertEquals('96cd659b-94ad-46ae-9c18-4018caa64355',
                          doc.identifier)

        doc = mm.document(
            ref=
            'http://library.metatab.org/example.com-simple_example-2017-us-2.csv'
        )
        self.assertEquals('96cd659b-94ad-46ae-9c18-4018caa64355',
                          doc.identifier)

        doc = mm.document(name=doc.name)
        self.assertEquals('96cd659b-94ad-46ae-9c18-4018caa64355',
                          doc.identifier)
예제 #17
0
    def test_line_doc(self):

        from os.path import splitext, basename
        import sys

        with open(test_data('line', 'line-oriented-doc.txt')) as f:
            text = f.read()

        doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest\n" + text))

        # process_schemas(doc)

        r = doc.reference('tracts')

        self.assertEqual(628, len(list(r)))

        tracts = r.dataframe()

        self.assertEqual(-73427, tracts.lon.sum().astype(int))

        tracts = r.read_csv()

        self.assertEqual(-73427, tracts.lon.sum().astype(int))

        r.dataframe()

        # Test loading a Python Library from a package.

        ref = doc.reference('incv')

        self.assertIsNotNone(ref)

        ref_resource = parse_app_url(
            ref.url).inner.clear_fragment().get_resource()

        # The path has to be a Metatab ZIP archive, and the root directory must be the same as
        # the name of the path

        pkg_name, _ = splitext(basename(ref_resource.path))

        lib_path = ref_resource.join(pkg_name).path

        if lib_path not in sys.path:
            sys.path.insert(0, lib_path)
def update_package(md):

    name = md['name']

    root = Path('packages').joinpath(name)

    if not root.exists():
        return

    doc = MetapackDoc(str(root.joinpath('metadata.csv')))

    print(doc.name, doc.ref)

    for t in md['tags']:
        t = doc['Root'].new_term('Root.Tag', t['name'])
        print(t)

    #print(json.dumps(md, indent=4))

    doc.write()
예제 #19
0
    def test_line_oriented(self):

        doc = MetapackDoc(
            TextRowGenerator(test_data('line', 'line-oriented-doc.txt')))

        self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249',
                         doc.get_value('Root.Identifier'))
        self.assertEqual(153, len(doc.terms))

        self.assertEqual(6, len(list(doc['References'])))

        self.assertEqual(6,
                         len(list(doc['References'].find('Root.Reference'))))

        self.assertEqual(6, len(list(doc['References'].find(
            'Root.Resource'))))  # References are Resources

        rt = list(doc['References'].find('Root.Resource'))[0]

        self.assertIsInstance(rt, Reference)
예제 #20
0
    def __init__(self, args):

        self.cwd = getcwd()

        self.args = args

        self.downloader = Downloader.get_instance()

        self.cache = self.downloader.cache

        self.mtfile_arg = self.args.metatabfile if self.args.metatabfile else join(
            self.cwd, DEFAULT_METATAB_FILE)

        self.mtfile_url = MetapackUrl(self.mtfile_arg,
                                      downloader=self.downloader)

        self.resource = self.mtfile_url.target_file

        self.package_url = self.mtfile_url.package_url
        self.mt_file = self.mtfile_url.metadata_url

        self.package_root = self.package_url.join(PACKAGE_PREFIX)

        if not self.args.s3:
            doc = MetapackDoc(self.mt_file)
            self.args.s3 = doc['Root'].find_first_value('Root.S3')

        self.s3_url = parse_app_url(self.args.s3)

        if self.s3_url and not self.s3_url.scheme == 's3':
            self.s3_url = parse_app_url("s3://{}".format(self.args.s3))

        self.doc = MetapackDoc(self.mt_file)

        access_value = self.doc.find_first_value('Root.Access')

        self.acl = 'private' if access_value == 'private' else 'public-read'

        self.bucket = S3Bucket(
            self.s3_url, acl=self.acl,
            profile=self.args.profile) if self.s3_url else None
예제 #21
0
def maybe_trial_build(m):
    from shutil import copyfile
    '''Update the metadata for a trial build, then restore it'''

    if not m.args.trial:
        yield False, m.mt_file
        return

    if not m.doc._has_semver():
        raise MetapackError(
            "To use trial builds, package must have a semantic version ")

    prt('Building a trial')

    mt_file = Path(m.mt_file.fspath).parent.joinpath('trial.csv')

    copyfile(m.mt_file.fspath, mt_file)

    doc = MetapackDoc(mt_file)
    version = doc['Root'].find_first('Root.Version')
    vb = version.get_or_new_child('Version.Build')
    vb.value = 'trial'

    try:
        doc.update_name()
        doc.write()

        yield True, parse_app_url(str(mt_file), downloader)
    finally:
        mt_file.unlink()
예제 #22
0
    def test_program_resource(self):

        return  # Actually, completely broken right now

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('rowgen')

        self.assertEqual('program+file:scripts/rowgen.py', str(r.url))

        print(r.resolved_url)

        g = r.row_generator

        print(type(g))

        for row in r:
            print(row)
예제 #23
0
    def run(self, nb):

        assert str(self.package_url)

        self.doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest\n"),
                               package_url=parse_app_url(self.package_url))

        self.preprocess(nb, {})

        for section, term, value in self.extra_terms:
            self.doc[section].get_or_new_term(term, value)

        return self.doc
예제 #24
0
    def test_create_and_delete_tables(self):
        from os import remove
        from os.path import exists

        doc = MetapackDoc(test_data('example1.csv'))

        if exists(test_database_path):
            remove(test_database_path)

        db = Database('sqlite:///' + test_database_path)

        db.create_tables()

        mm = MetatabManager(db)

        mm.add_doc(doc)

        with mm.session() as s:
            self.assertEqual(1, len(list(s.query(Document))))
            self.assertEqual(154, len(list(s.query(Term))))

        with self.assertRaises(IntegrityError):
            mm.add_doc(doc)

        with mm.session() as s:
            self.assertEqual(1, len(list(s.query(Document))))
            self.assertEqual(154, len(list(s.query(Term))))

        with mm.session() as s:
            db_doc = mm.document(identifier=doc.get_value('Root.Identifier'))
            self.assertIsNotNone(db_doc)
            s.delete(db_doc)

        with mm.session() as s:
            self.assertEqual(0, len(list(s.query(Document))))
            self.assertEqual(0, len(list(s.query(Term))))
예제 #25
0
def convert_metatab_notebook(m):
    m.doc['Documentation'].get_or_new_term('Root.Readme').value = get_readme(m)

    return

    source = None  # Path(source)

    if source.suffix == '.csv':
        dest = source.with_suffix('.ipynb')
        doc = MetapackDoc(source)
        doc.ensure_identifier()
        doc.update_name(create_term=True)
        # _write_metatab_notebook(doc, dest)

    elif source.suffix == '.ipynb':
        dest = source.with_suffix('.csv')

        doc = None  # extract_notebook_metatab(source)
        doc.ensure_identifier()
        doc.update_name(create_term=True)
        write_doc(doc, dest)

    else:
        err("Source file must be either .ipynb or .csv")
예제 #26
0
    def test_line_doc_parts(self):

        doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest"))

        for fn in (
                'line/line-oriented-doc-root.txt',
                'line/line-oriented-doc-contacts.txt',
                'line/line-oriented-doc-datafiles.txt',
                'line/line-oriented-doc-references-1.txt',
                'line/line-oriented-doc-references-2.txt',
                'line/line-oriented-doc-bib.txt',
        ):
            with open(test_data(fn)) as f:
                text = f.read()

            tp = TermParser(TextRowGenerator(text),
                            resolver=doc.resolver,
                            doc=doc)

            doc.load_terms(tp)

        self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249',
                         doc.get_value('Root.Identifier'))
        self.assertEqual(157, len(doc.terms))

        self.assertEqual(5, len(list(doc['References'])))

        self.assertEqual(5,
                         len(list(doc['References'].find('Root.Reference'))))

        self.assertEqual(5, len(list(doc['References'].find(
            'Root.Resource'))))  # References are Resources

        rt = list(doc['References'].find('Root.Resource'))[0]

        self.assertIsInstance(rt, Reference)

        self.assertEqual(5, len(list(doc['Resources'])))

        self.assertEqual(5, len(list(doc['Resources'].find('Root.Datafile'))))

        self.assertEqual(5, len(list(doc['Resources'].find(
            'Root.Resource'))))  # References are Resources

        rt = list(doc['Resources'].find('Root.Resource'))[0]

        self.assertIsInstance(rt, Resource)

        doc._repr_html_()  # Check no exceptions
예제 #27
0
def build(m):

    raise NotImplementedError()

    def mp(*args):
        pass

    name = m.doc.name

    lb_file = m.package_root.fspath.joinpath('.last_build')

    if m.args.result:
        prt = print
    else:
        from metapack.cli.core import prt

    if lb_file.exists():
        # Run a test build
        ft_args = ['build', '-FT']
        if m.args.no_cache:
            ft_args = ['-n'] + ft_args
        mp(ft_args, do_cli_init=False)

        tb_path = m.package_root.fspath.joinpath('.trial_build').read_text()
        lb_path = lb_file.read_text()

        tdoc = MetapackDoc(tb_path)
        ldoc = MetapackDoc(lb_path)

        diff_hashes = 0

        for t_r in tdoc.resources():
            l_r = ldoc.resource(t_r.name)

            h1 = t_r.raw_row_generator.hash
            h2 = l_r.raw_row_generator.hash

            if h1 != h2:
                diff_hashes += 1

        if diff_hashes == 0:
            prt(f'👍 {name}: Hashes Unchanged: will not rebuild')
            return

        prt(f'🛠 {name}: Hashes changed. Marked for rebuilding')
        Path(m.mt_file.fspath).touch()

        if m.args.increment:
            m.doc.update_name(mod_version='+')
            m.doc.write()
    else:
        prt(f'🛠 {name}: No previous build')
예제 #28
0
    def preprocess_cell(self, cell, resources, index):
        from metatab.rowgenerators import TextRowGenerator

        if cell['metadata'].get('mt_final_metatab'):
            if cell['outputs']:
                o = ''.join(e['text'] for e in cell['outputs'])

                self.doc = MetapackDoc(TextRowGenerator(o))

                # Give all of the sections their standard args, to make the CSV versions of the doc
                # prettier

                for name, s in self.doc.sections.items():
                    try:
                        s.args = self.doc.decl_sections[name.lower()]['args']
                    except KeyError:
                        pass

        return cell, resources
예제 #29
0
def extract_notebook_metatab(nb_path: Path):
    """Extract the metatab lines from a notebook and return a Metapack doc """

    from metatab.rowgenerators import TextRowGenerator
    import nbformat

    with nb_path.open() as f:
        nb = nbformat.read(f, as_version=4)

    lines = '\n'.join(['Declare: metatab-latest'] + [
        get_cell_source(nb, tag)
        for tag in ['metadata', 'resources', 'schema']
    ])
    doc = MetapackDoc(TextRowGenerator(lines))

    doc['Root'].get_or_new_term('Root.Title').value = get_cell_source(
        nb, 'Title').strip('#').strip()
    doc['Root'].get_or_new_term('Root.Description').value = get_cell_source(
        nb, 'Description')

    doc['Documentation'].get_or_new_term(
        'Root.Readme').value = get_cell_source(nb, 'readme')

    return doc
예제 #30
0
def write_hashes(m):
    from metapack.exc import MetatabFileNotFound
    pm = last_build_marker_path(m)

    hashes = {}

    if pm.exists():
        hashes['last_package'] = pm.read_text()

        try:
            p = MetapackDoc(hashes['last_package'])
            hashes['last_hashes'] = {}
            for r in p.resources():
                try:
                    hashes['last_hashes'][r.name] = r.raw_row_generator.hash
                except AttributeError as e:
                    warn(
                        f"Failed to generate hash for {r.name}, rrg={type(r)}: {type(e)} {e}"
                    )
                except Exception as e:
                    warn(
                        f"Failed to generate hash for {r.name}, rrg={type(r)}: {type(e)} {e}"
                    )

        except MetatabFileNotFound:
            pass

    tm = trial_build_marker_path(m)

    if tm.exists():
        hashes['trial_package'] = tm.read_text()

        p = MetapackDoc(hashes['trial_package'])

        hashes['trial_hashes'] = {
            r.name: r.raw_row_generator.hash
            for r in p.resources()
        }

    hp = Path(m.package_root.fspath, '.hashes.yaml')

    hp.write_text(yaml.safe_dump(hashes))