def test_sync_csv_package(self): from metapack_build.package import CsvPackageBuilder package_root = MetapackPackageUrl(test_data( 'packages/example.com/example.com-simple_example-2017-us/_packages' ), downloader=downloader) source_url = 'http://library.metatab.org/example.com-simple_example-2017-us-2/metadata.csv' u = MetapackUrl(source_url, downloader=downloader) u.get_resource().get_target() p = CsvPackageBuilder( u, package_root, resource_root=u.dirname().as_type(MetapackPackageUrl)) csv_url = p.save() doc = csv_url.metadata_url.doc for r in doc.resources(): print(r.name, r.url)
def test_metapack_resources(self): cli_init() p = test_data( 'packages/example.com/example.com-metab_reuse/metadata.csv') m = MetapackUrl(p, downloader=downloader) print(m.doc.resources()) print(m.get_resource().get_target().exists())
def test_fixed_resource(self): from itertools import islice from rowgenerators.generator.fixed import FixedSource m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) doc = MetapackDoc(m) r = doc.resource('simple-fixed') self.assertEqual( 'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt', str(r.url)) self.assertEqual( 'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt', str(r.resolved_url)) g = r.row_generator print(r.row_processor_table()) self.assertIsInstance(g, FixedSource) rows = list(islice(r, 10)) print('----') for row in rows: print(row) self.assertEqual('f02d53a3-6bbc-4095-a889-c4dde0ccf5', rows[5][1])
def test_build_geo_package(self): from rowgenerators.valuetype import ShapeValue m = MetapackUrl(test_data( 'packages/sangis.org/sangis.org-census_regions/metadata.csv'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) _, fs_url, created = make_filesystem_package(m, package_dir, downloader.cache, {}, True) print(fs_url) doc = MetapackDoc(fs_url) r = doc.resource('sra') rows = list(r.iterdict) self.assertEqual(41, len(rows)) self.assertIsInstance(rows[1]['geometry'], ShapeValue)
def test_build_s3_package(self): from metapack_build.build import make_s3_csv_package cache = Downloader().cache fs_url = MetapackUrl( '/Volumes/Storage/proj/virt-proj/metapack/metapack/test-data/packages/example.com/' 'example-package/_packages/example.com-example_data_package-2017-us-1/metadata.csv', downloader=downloader) # _, url, created = make_excel_package(fs_url,package_dir,get_cache(), {}, False) # _, url, created = make_zip_package(fs_url, package_dir, get_cache(), {}, False) # _, url, created = make_csv_package(fs_url, package_dir, get_cache(), {}, False) package_dir = parse_app_url( 's3://test.library.civicknowledge.com/metatab', downloader=downloader) _, url, created = make_s3_csv_package(fs_url, package_dir, cache, {}, False) print(url) print(created)
def _exec_build(p, package_root, force, nv_name, extant_url_f, post_f): from metapack import MetapackUrl if force: reason = 'Forcing build' should_build = True elif p.is_older_than_metadata(): reason = 'Metadata is younger than package' should_build = True elif not p.exists(): reason = "Package doesn't exist" should_build = True else: reason = 'Metadata is older than package' should_build = False if should_build: prt("Building {} package ({})".format(p.type_code, reason)) url = p.save() prt("Package ( type: {} ) saved to: {}".format(p.type_code, url)) created = True else: prt("Not building {} package ({})".format(p.type_code, reason)) if not should_build and p.exists(): created = False url = extant_url_f(p) post_f() if nv_name: p.move_to_nv_name() return p, MetapackUrl(url, downloader=package_root.downloader), created
def test_build_simple_package(self): cli_init() cache = Downloader().cache m = MetapackUrl(test_data( 'packages/example.com/example.com-simple_example-2017-us'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) package_dir = package_dir _, fs_url, created = make_filesystem_package(m, package_dir, cache, {}, False) fs_doc = MetapackDoc(fs_url, cache=downloader.cache) fs_doc.resource('random-names') # Excel _, url, created = make_excel_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.url for r in url.doc.resources()]) # ZIP _, url, created = make_zip_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual([ 'data/random-names.csv', 'data/renter_cost.csv', 'data/unicode-latin1.csv' ], [r.url for r in url.doc.resources()]) # CSV _, url, created = make_csv_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual([ 'com-simple_example-2017-us-2/data/random-names.csv', '.com-simple_example-2017-us-2/data/renter_cost.csv', 'm-simple_example-2017-us-2/data/unicode-latin1.csv' ], [str(r.url)[-50:] for r in url.doc.resources()])
def __init__(self, bucket, source_package, package_root=None, dist_urls=[], callback=None, env=None): from metapack.package import Downloader self.source_package = source_package self.bucket = bucket u = MetapackUrl(source_package.access_url, downloader=Downloader.get_instance()) resource_root = u.dirname().as_type(MetapackPackageUrl) pu = MetapackUrl(source_package.private_access_url, downloader=Downloader.get_instance()) self.private_resource_root = pu.dirname().as_type(MetapackPackageUrl) super().__init__(u, package_root, resource_root, callback, env) self.dist_urls = list(dist_urls) # don't alter the input variable self.dist_urls.append(self.bucket.private_access_url( self.cache_path)) # For the S3: url for the S3 package self.dist_urls.append(self.bucket.access_url(self.cache_path)) self.set_distributions(self.dist_urls)
def test_build_transform_package(self): m = MetapackUrl(test_data( 'packages/example.com/example.com-transforms/metadata.csv'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) _, fs_url, created = make_filesystem_package(m, package_dir, downloader.cache, {}, False) print(fs_url)
def test_petl(self): from petl import look m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) doc = MetapackDoc(m) r = doc.resource('simple-example') r.resolved_url.get_resource().get_target() p = r.petl() print(look(p))
def test_build_package(self): try: cli_init() m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) cache = Downloader().cache _, fs_url, created = make_filesystem_package( m, package_dir, cache, {}, False) except ImportError as e: unittest.skip(str(e)) return print(created)
def test_resolve_packages(self): def u(v): return "http://example.com/d/{}".format(v) def f(v): return "file:/d/{}".format(v) for us in ( u('package.zip'), u('package.xlsx'), u('package.csv'), u('package/metadata.csv'), f('package.zip'), f('package.xlsx'), f('package.csv'), f('package/metadata.csv'), ): u = MetapackUrl(us, downloader=Downloader()) print(u.metadata_url)
def __init__(self, args): self.cwd = getcwd() self.args = args self.downloader = Downloader.get_instance() self.cache = self.downloader.cache self.mtfile_arg = self.args.metatabfile if self.args.metatabfile else join( self.cwd, DEFAULT_METATAB_FILE) self.mtfile_url = MetapackUrl(self.mtfile_arg, downloader=self.downloader) self.resource = self.mtfile_url.target_file self.package_url = self.mtfile_url.package_url self.mt_file = self.mtfile_url.metadata_url self.package_root = self.package_url.join(PACKAGE_PREFIX) if not self.args.s3: doc = MetapackDoc(self.mt_file) self.args.s3 = doc['Root'].find_first_value('Root.S3') self.s3_url = parse_app_url(self.args.s3) if self.s3_url and not self.s3_url.scheme == 's3': self.s3_url = parse_app_url("s3://{}".format(self.args.s3)) self.doc = MetapackDoc(self.mt_file) access_value = self.doc.find_first_value('Root.Access') self.acl = 'private' if access_value == 'private' else 'public-read' self.bucket = S3Bucket( self.s3_url, acl=self.acl, profile=self.args.profile) if self.s3_url else None
def test_program_resource(self): return # Actually, completely broken right now m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) doc = MetapackDoc(m) r = doc.resource('rowgen') self.assertEqual('program+file:scripts/rowgen.py', str(r.url)) print(r.resolved_url) g = r.row_generator print(type(g)) for row in r: print(row)