Beispiel #1
0
    def test_datapackage_declare(self):
        from tempfile import NamedTemporaryFile
        import datapackage
        from os import unlink

        doc = MetatabDoc(test_data('datapackage_ex2.csv'))

        d = doc.as_dict()

        f = open('/tmp/package.json', 'w')  # NamedTemporaryFile(delete=False)
        f.write(json.dumps(d, indent=4))
        f.close()

        try:
            dp = datapackage.DataPackage(f.name)
            dp.validate()
        except:
            with open(f.name) as f2:
                print(f2.read())
            raise

        print(f.name)
        # unlink(f.name)

        doc = MetatabDoc(test_data('example1.csv'))

        from metatab.datapackage import convert_to_datapackage

        print(json.dumps(convert_to_datapackage(doc), indent=4))
Beispiel #2
0
    def test_serializer(self):

        return

        doc = MetatabDoc(test_data('schema.csv'))
        d = doc.as_dict()

        s = Serializer()
        s.load_declarations(d)

        sections = defaultdict(list)

        for e in s.semiflatten(d):
            print(e)

        return

        for e in sorted(s.serialize(d)):
            has_int = any(isinstance(ki, int) for ki in e[0])
            key_no_int = tuple(ki for ki in e[0] if not isinstance(ki, int))
            print(key_no_int)
            pr = '.'.join(key_no_int[-2:])
            t = Term(pr, e[1], row=0, col=0, file_name=None)
            section = s.decl['terms'].get(t.join(), {}).get('section', 'Root')

            sections[section].append(t)

        return

        for k, v in sections.items():
            print("=====", k)
            for t in v:
                print(t)
Beispiel #3
0
    def test_headers(self):

        from metatab import TermParser, CsvPathRowGenerator

        d1 = MetatabDoc(test_data('example1-headers.csv')).root.as_dict()
        d2 = MetatabDoc(test_data('example1.csv')).root.as_dict()

        self.compare_dict(d1, d2)
Beispiel #4
0
    def test_sections(self):

        doc = MetatabDoc()

        s = doc.new_section("SectionOne", "A B C".split())

        print(doc.sections)

        print(doc.as_dict())
Beispiel #5
0
    def test_includes(self):

        doc = MetatabDoc(test_data('include1.csv'))
        d = doc.as_dict()

        for t in doc['root'].terms:
            print(t)

        print(d)

        self.assertEquals(
            ['Include File 1', 'Include File 2', 'Include File 3'], d['note'])

        self.assertTrue(any('include2.csv' in e for e in d['include']))
        self.assertTrue(any('include3.csv' in e for e in d['include']))
Beispiel #6
0
def scrape_page(mt_file, url):
    from .util import scrape_urls_from_web_page

    doc = MetatabDoc(mt_file)

    doc['resources'].new_term('DownloadPage', url)

    d = scrape_urls_from_web_page(url)

    for k, v in d['sources'].items():
        doc['Resources'].new_term('DataFile', v['url'], description=v.get('description'))

    for k, v in d['external_documentation'].items():
        term_name = classify_url(v['url'])
        doc['Documentation'].new_term(term_name, v['url'], description=v.get('description'))

    doc.write_csv(mt_file)
Beispiel #7
0
        def errs(fn):

            with self.assertRaises(IncludeError):
                doc = MetatabDoc()
                tp = TermParser(CsvPathRowGenerator(fn), doc=doc)
                _ = list(tp)

            return tp.errors_as_dict()
Beispiel #8
0
    def test_children(self):

        doc = MetatabDoc(test_data('children.csv'))

        for t in doc.terms:
            print(t)

        import json
        print(json.dumps(doc.as_dict(), indent=4))

        for t in doc.as_dict()['parent']:
            self.assertEquals(
                {
                    'prop1': 'prop1',
                    'prop2': 'prop2',
                    '@value': 'parent'
                }, t)
Beispiel #9
0
def rewrite_resource_format(mt_file):
    doc = MetatabDoc(mt_file)

    if 'schema' in doc:
        table_schemas = {t.value: t.as_dict() for t in doc['schema']}
        del doc['schema']

        for resource in doc['resources']:
            s = resource.new_child('Schema', '')
            for column in table_schemas.get(resource.find_value('name'), {})['column']:
                c = s.new_child('column', column['name'])

                for k, v in column.items():
                    if k != 'name':
                        c.new_child(k, v)

    doc.write_csv(mt_file)
Beispiel #10
0
    def test_new_parser(self):

        tp = MetatabDoc(test_data('short.csv'))

        for t in tp.terms:
            print(t)

        import json
        print(json.dumps(tp.decl_terms, indent=4))
Beispiel #11
0
def make_metatab_file(template='metatab'):
    from os.path import dirname
    from rowgenerators.util import fs_join as join
    import metatab.templates
    from metatab.doc import MetatabDoc

    template_path = join(dirname(metatab.templates.__file__),
                         template + '.csv')

    doc = MetatabDoc(template_path)

    return doc
Beispiel #12
0
    def test_datapackage_convert(self):
        import datapackage
        from metatab.datapackage import convert_to_datapackage

        doc = MetatabDoc(test_data('example1.csv'))

        dp = convert_to_datapackage(doc)

        print(json.dumps(dp, indent=4))

        dp = datapackage.DataPackage(dp)
        dp.validate()
Beispiel #13
0
    def test_basic(self):

        fn = test_data('example1.csv')

        fn = '/Volumes/Storage/proj/virt/ambry/metatab-py/test/packages/csvs/metadata.csv'

        doc = MetatabDoc().load_csv(fn)

        terms = list(doc.terms)

        for row in doc:
            print(row)
Beispiel #14
0
    def test_parse_everything(self):

        all = [
            'example1.csv', 'example2.csv', 'example1-web.csv', 'include1.csv',
            'include2.csv', 'include3.csv', 'children.csv', 'children2.csv',
            'datapackage_ex1.csv', 'datapackage_ex1_web.csv',
            'datapackage_ex2.csv', 'issue1.csv'
        ]

        all = ['example1.csv']

        for fn in all:

            print('Testing ', fn)

            path = test_data(fn)

            json_path = test_data('json', fn.replace('.csv', '.json'))

            with open(path) as f:

                doc = MetatabDoc(path)
                d = doc.as_dict()

                if not exists(json_path):
                    with open(json_path, 'w') as f:
                        print("Writing", json_path)
                        json.dump(d, f, indent=4)

                with open(json_path) as f:
                    d2 = json.load(f)

                #import json
                #print(json.dumps(d, indent=4))

                self.compare_dict(d, d2)
Beispiel #15
0
    def test_versions(self):

        doc = MetatabDoc(test_data('example1.csv'))

        self.assertEqual('201404', doc.find_first_value('Root.Version'))
        self.assertEqual('201409',
                         doc.as_version('+5').find_first_value('Root.Version'))
        self.assertEqual('201399',
                         doc.as_version('-5').find_first_value('Root.Version'))
        self.assertEqual(
            'foobar',
            doc.as_version('foobar').find_first_value('Root.Version'))
Beispiel #16
0
    def test_sections(self):

        doc = MetatabDoc(test_data('example1.csv'))

        self.assertEqual(
            ['root', u'resources', u'contacts', u'notes', u'schema'],
            list(doc.sections.keys()))

        del doc['Resources']

        self.assertEqual(['root', u'contacts', u'notes', u'schema'],
                         list(doc.sections.keys()))

        notes = list(doc['notes'])

        self.assertEquals(2, len(notes))

        for sname, s in doc.sections.items():
            print(sname, s.value)
Beispiel #17
0
    def test_generic_row_generation(self):
        from metatab import GenericRowGenerator

        url = 'gs://14_nfiTtSiMSjDes6BSiLU-Gsqy8DIdUxpMaH6DswcVQ'

        doc = MetatabDoc(url)

        self.assertEquals('Registered Voters, By County',
                          doc.find_first('root.title').value)

        url = 'http://assets.metatab.org/examples/example-package.xls#meta'

        doc = MetatabDoc(url)

        self.assertEquals('17289303-73fa-437b-97da-2e1ed2cd01fd',
                          doc.find_first('root.identifier').value)
Beispiel #18
0
    def test_declarations(self):

        doc = MetatabDoc(test_data('example1.csv'))

        d = {k: v for k, v in doc.decl_terms.items() if 'homepage' in k}

        self.assertEqual(17, len(d))

        self.assertIn("homepage.mediatype", d.keys())
        self.assertIn("homepage.hash", d.keys())
        self.assertIn("homepage.title", d.keys())

        # Direct use of function

        ti = TermParser(
            CsvPathRowGenerator(declaration_path('metatab-latest')), False)
        ti.install_declare_terms()

        fn = test_data(
            'example1.csv')  # Not acutally used. Sets base directory

        doc = MetatabDoc(
            MetatabRowGenerator([['Declare', 'metatab-latest']], fn))

        terms = doc.decl_terms

        self.assertIn('root.homepage', terms.keys())
        self.assertIn('documentation.description', terms.keys())
        self.assertEquals(247, len(terms.keys()))

        sections = doc.decl_sections

        self.assertEquals(
            {
                'contacts', 'declaredterms', 'declaredsections', 'root',
                'resources', 'schemas', 'sources', 'documentation', 'data'
            }, set(sections.keys()))

        # Use the Declare term

        fn = test_data('example1.csv')
        doc = MetatabDoc(CsvPathRowGenerator(fn))

        d = doc._term_parser.declare_dict

        self.assertEqual({'terms', 'synonyms', 'sections'}, set(d.keys()))

        terms = d['terms']

        self.assertIn('root.homepage', terms.keys())
        self.assertIn('documentation.description', terms.keys())
        self.assertEquals(247, len(terms.keys()))

        sections = d['sections']

        self.assertEquals(
            {
                'contacts', 'declaredterms', 'declaredsections', 'root',
                'resources', 'schemas', 'sources', 'documentation', 'data'
            }, set(sections.keys()))

        self.assertEqual(['Email', 'Organization', 'Tel', 'Url'],
                         sections['contacts']['args'])
        self.assertEqual(['TermValueName', 'ChildPropertyType', 'Section'],
                         sections['declaredterms']['args'])
        self.assertEqual(['DataType', 'ValueType', 'Description'],
                         sections['schemas']['args'])
Beispiel #19
0
    def test_find(self):

        doc = MetatabDoc(test_data('example1.csv'))

        self.assertEquals('cdph.ca.gov-hci-registered_voters-county',
                          doc.find_first('Root.Identifier').value)
Beispiel #20
0
from collections import defaultdict

l = get_library()
table_meta_p = l.partition(
    'census.gov-acs_geofile-schemas-2009e-table_meta-2014-5')
column_meta_p = l.partition(
    'census.gov-acs_geofile-schemas-2009e-column_meta-2014-5')

sequence_p = l.partition(
    'census.gov-acs_geofile-schemas-2009e-table_sequence-2014-5')
sequences = {
    row.table_id: (row.sequence_number, row.start, row.table_cells)
    for row in sequence_p if row.start
}

root_doc = MetatabDoc()
root = root_doc.new_section('Root')

root.new_term('Declare', 'http://assets.metatab.org/census.csv')
root.new_term('Title', 'American Community Survey, 5 Year, 2009-2014')
root.new_term('Release', 5)
root.new_term('Year', 2014)
root.new_term('Include', 'acs20145-sources.csv')
root.new_term('Include', 'acs20145-schema.csv')

root_doc.write_csv('acs20145-metadata.csv')

src_doc = MetatabDoc()
source_sec = src_doc.new_section('Sources', ['geography', 'state'])

from censuslib import ACS09TableRowGenerator as TableRowGenerator
Beispiel #21
0
    def test_descendents(self):

        doc = MetatabDoc(test_data('example1.csv'))

        self.assertEquals(144, (len(list(doc.all_terms))))
Beispiel #22
0
    def test_update_name(self):
        import datapackage
        from metatab.datapackage import convert_to_datapackage

        for fn in ('name.csv', 'name2.csv'):

            doc = MetatabDoc(test_data(fn))

            updates = doc.update_name()

            name = doc.find_first_value("Root.Name")

            self.assertEquals('example.com-foobar-2017-ca-people-1', name)
            self.assertEquals(['Changed Name'], updates)

            try:
                doc.remove_term(doc.find_first('Root.Dataset'))
            except ValueError:
                nv = doc.find_first('Root.Name')
                nv.remove_child(nv.find_first('Name.Dataset'))

            updates = doc.update_name()

            self.assertIn("No Root.Dataset, so can't update the name", updates)

            doc.find_first('Root.Name').value = None

            updates = doc.update_name()

            self.assertIn('Setting the name to the identifier', updates)

            doc.find_first('Root.Name').value = None
            doc.find_first('Root.Identifier').value = None

            updates = doc.update_name()

            self.assertIn(
                'Failed to find DatasetName term or Identity term. Giving up',
                updates)

            self.assertIsNone(doc.get_value('Root.Name'))