def test_build_simple_package(self): cli_init() cache = Downloader().cache m = MetapackUrl(test_data( 'packages/example.com/example.com-simple_example-2017-us'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) package_dir = package_dir _, fs_url, created = make_filesystem_package(m, package_dir, cache, {}, False) fs_doc = MetapackDoc(fs_url, cache=downloader.cache) fs_doc.resource('random-names') # Excel _, url, created = make_excel_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.url for r in url.doc.resources()]) # ZIP _, url, created = make_zip_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual([ 'data/random-names.csv', 'data/renter_cost.csv', 'data/unicode-latin1.csv' ], [r.url for r in url.doc.resources()]) # CSV _, url, created = make_csv_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual([ 'com-simple_example-2017-us-2/data/random-names.csv', '.com-simple_example-2017-us-2/data/renter_cost.csv', 'm-simple_example-2017-us-2/data/unicode-latin1.csv' ], [str(r.url)[-50:] for r in url.doc.resources()])
def test_fixed_resource(self): from itertools import islice from rowgenerators.generator.fixed import FixedSource m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) doc = MetapackDoc(m) r = doc.resource('simple-fixed') self.assertEqual( 'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt', str(r.url)) self.assertEqual( 'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt', str(r.resolved_url)) g = r.row_generator print(r.row_processor_table()) self.assertIsInstance(g, FixedSource) rows = list(islice(r, 10)) print('----') for row in rows: print(row) self.assertEqual('f02d53a3-6bbc-4095-a889-c4dde0ccf5', rows[5][1])
def test_build_geo_package(self): from rowgenerators.valuetype import ShapeValue m = MetapackUrl(test_data( 'packages/sangis.org/sangis.org-census_regions/metadata.csv'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) _, fs_url, created = make_filesystem_package(m, package_dir, downloader.cache, {}, True) print(fs_url) doc = MetapackDoc(fs_url) r = doc.resource('sra') rows = list(r.iterdict) self.assertEqual(41, len(rows)) self.assertIsInstance(rows[1]['geometry'], ShapeValue)
def test_build_notebook_package(self): try: from metapack import MetapackDocumentUrl, get_cache from metapack_build.build import make_filesystem_package m = MetapackDocumentUrl(test_data( 'packages/example.com/example.com-notebook/metadata.csv'), downloader=downloader) # process_schemas(m) doc = MetapackDoc(m) r = doc.resource('basic_a') self.assertEqual(2501, len(list(r))) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) _, fs_url, created = make_filesystem_package( m, package_dir, get_cache(), {}, False, False, False) print(fs_url) except ImportError: unittest.skip("Pandas not installed") return
def build(m): raise NotImplementedError() def mp(*args): pass name = m.doc.name lb_file = m.package_root.fspath.joinpath('.last_build') if m.args.result: prt = print else: from metapack.cli.core import prt if lb_file.exists(): # Run a test build ft_args = ['build', '-FT'] if m.args.no_cache: ft_args = ['-n'] + ft_args mp(ft_args, do_cli_init=False) tb_path = m.package_root.fspath.joinpath('.trial_build').read_text() lb_path = lb_file.read_text() tdoc = MetapackDoc(tb_path) ldoc = MetapackDoc(lb_path) diff_hashes = 0 for t_r in tdoc.resources(): l_r = ldoc.resource(t_r.name) h1 = t_r.raw_row_generator.hash h2 = l_r.raw_row_generator.hash if h1 != h2: diff_hashes += 1 if diff_hashes == 0: prt(f'👍 {name}: Hashes Unchanged: will not rebuild') return prt(f'🛠 {name}: Hashes changed. Marked for rebuilding') Path(m.mt_file.fspath).touch() if m.args.increment: m.doc.update_name(mod_version='+') m.doc.write() else: prt(f'🛠 {name}: No previous build')
def test_petl(self): from petl import look m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) doc = MetapackDoc(m) r = doc.resource('simple-example') r.resolved_url.get_resource().get_target() p = r.petl() print(look(p))
def test_program_resource(self): return # Actually, completely broken right now m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) doc = MetapackDoc(m) r = doc.resource('rowgen') self.assertEqual('program+file:scripts/rowgen.py', str(r.url)) print(r.resolved_url) g = r.row_generator print(type(g)) for row in r: print(row)