예제 #1
0
    def test_sync_csv_package(self):

        from metapack_build.package import CsvPackageBuilder

        package_root = MetapackPackageUrl(test_data(
            'packages/example.com/example.com-simple_example-2017-us/_packages'
        ),
                                          downloader=downloader)

        source_url = 'http://library.metatab.org/example.com-simple_example-2017-us-2/metadata.csv'

        u = MetapackUrl(source_url, downloader=downloader)

        u.get_resource().get_target()

        p = CsvPackageBuilder(
            u,
            package_root,
            resource_root=u.dirname().as_type(MetapackPackageUrl))

        csv_url = p.save()

        doc = csv_url.metadata_url.doc

        for r in doc.resources():
            print(r.name, r.url)
예제 #2
0
    def test_metapack_resources(self):

        cli_init()

        p = test_data(
            'packages/example.com/example.com-metab_reuse/metadata.csv')

        m = MetapackUrl(p, downloader=downloader)

        print(m.doc.resources())

        print(m.get_resource().get_target().exists())
예제 #3
0
    def test_fixed_resource(self):
        from itertools import islice
        from rowgenerators.generator.fixed import FixedSource

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('simple-fixed')

        self.assertEqual(
            'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt',
            str(r.url))
        self.assertEqual(
            'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt',
            str(r.resolved_url))

        g = r.row_generator

        print(r.row_processor_table())

        self.assertIsInstance(g, FixedSource)

        rows = list(islice(r, 10))

        print('----')
        for row in rows:
            print(row)

        self.assertEqual('f02d53a3-6bbc-4095-a889-c4dde0ccf5', rows[5][1])
예제 #4
0
    def test_build_geo_package(self):

        from rowgenerators.valuetype import ShapeValue

        m = MetapackUrl(test_data(
            'packages/sangis.org/sangis.org-census_regions/metadata.csv'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

        _, fs_url, created = make_filesystem_package(m, package_dir,
                                                     downloader.cache, {},
                                                     True)

        print(fs_url)

        doc = MetapackDoc(fs_url)

        r = doc.resource('sra')

        rows = list(r.iterdict)

        self.assertEqual(41, len(rows))

        self.assertIsInstance(rows[1]['geometry'], ShapeValue)
예제 #5
0
    def test_build_s3_package(self):
        from metapack_build.build import make_s3_csv_package

        cache = Downloader().cache

        fs_url = MetapackUrl(
            '/Volumes/Storage/proj/virt-proj/metapack/metapack/test-data/packages/example.com/'
            'example-package/_packages/example.com-example_data_package-2017-us-1/metadata.csv',
            downloader=downloader)

        # _, url, created =  make_excel_package(fs_url,package_dir,get_cache(), {}, False)

        # _, url, created = make_zip_package(fs_url, package_dir, get_cache(), {}, False)

        # _, url, created = make_csv_package(fs_url, package_dir, get_cache(), {}, False)

        package_dir = parse_app_url(
            's3://test.library.civicknowledge.com/metatab',
            downloader=downloader)

        _, url, created = make_s3_csv_package(fs_url, package_dir, cache, {},
                                              False)

        print(url)
        print(created)
예제 #6
0
def _exec_build(p, package_root, force, nv_name, extant_url_f, post_f):
    from metapack import MetapackUrl

    if force:
        reason = 'Forcing build'
        should_build = True
    elif p.is_older_than_metadata():
        reason = 'Metadata is younger than package'
        should_build = True
    elif not p.exists():
        reason = "Package doesn't exist"
        should_build = True
    else:
        reason = 'Metadata is older than package'
        should_build = False

    if should_build:
        prt("Building {} package ({})".format(p.type_code, reason))
        url = p.save()
        prt("Package ( type: {} ) saved to: {}".format(p.type_code, url))
        created = True
    else:
        prt("Not building {} package ({})".format(p.type_code, reason))

    if not should_build and p.exists():
        created = False
        url = extant_url_f(p)

    post_f()

    if nv_name:
        p.move_to_nv_name()

    return p, MetapackUrl(url, downloader=package_root.downloader), created
예제 #7
0
    def test_build_simple_package(self):

        cli_init()

        cache = Downloader().cache

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-simple_example-2017-us'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)
        package_dir = package_dir

        _, fs_url, created = make_filesystem_package(m, package_dir, cache, {},
                                                     False)

        fs_doc = MetapackDoc(fs_url, cache=downloader.cache)

        fs_doc.resource('random-names')

        # Excel

        _, url, created = make_excel_package(fs_url, package_dir, cache, {},
                                             False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.url for r in url.doc.resources()])

        # ZIP

        _, url, created = make_zip_package(fs_url, package_dir, cache, {},
                                           False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual([
            'data/random-names.csv', 'data/renter_cost.csv',
            'data/unicode-latin1.csv'
        ], [r.url for r in url.doc.resources()])

        #  CSV

        _, url, created = make_csv_package(fs_url, package_dir, cache, {},
                                           False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual([
            'com-simple_example-2017-us-2/data/random-names.csv',
            '.com-simple_example-2017-us-2/data/renter_cost.csv',
            'm-simple_example-2017-us-2/data/unicode-latin1.csv'
        ], [str(r.url)[-50:] for r in url.doc.resources()])
예제 #8
0
    def __init__(self,
                 bucket,
                 source_package,
                 package_root=None,
                 dist_urls=[],
                 callback=None,
                 env=None):
        from metapack.package import Downloader

        self.source_package = source_package
        self.bucket = bucket

        u = MetapackUrl(source_package.access_url,
                        downloader=Downloader.get_instance())

        resource_root = u.dirname().as_type(MetapackPackageUrl)

        pu = MetapackUrl(source_package.private_access_url,
                         downloader=Downloader.get_instance())

        self.private_resource_root = pu.dirname().as_type(MetapackPackageUrl)

        super().__init__(u, package_root, resource_root, callback, env)

        self.dist_urls = list(dist_urls)  # don't alter the input variable

        self.dist_urls.append(self.bucket.private_access_url(
            self.cache_path))  # For the S3: url for the S3 package
        self.dist_urls.append(self.bucket.access_url(self.cache_path))

        self.set_distributions(self.dist_urls)
예제 #9
0
    def test_build_transform_package(self):

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-transforms/metadata.csv'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

        _, fs_url, created = make_filesystem_package(m, package_dir,
                                                     downloader.cache, {},
                                                     False)

        print(fs_url)
예제 #10
0
    def test_petl(self):
        from petl import look

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('simple-example')

        r.resolved_url.get_resource().get_target()

        p = r.petl()

        print(look(p))
예제 #11
0
    def test_build_package(self):

        try:
            cli_init()

            m = MetapackUrl(test_data(
                'packages/example.com/example.com-full-2017-us/metadata.csv'),
                            downloader=downloader)

            package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

            cache = Downloader().cache

            _, fs_url, created = make_filesystem_package(
                m, package_dir, cache, {}, False)
        except ImportError as e:
            unittest.skip(str(e))
            return

        print(created)
예제 #12
0
    def test_resolve_packages(self):
        def u(v):
            return "http://example.com/d/{}".format(v)

        def f(v):
            return "file:/d/{}".format(v)

        for us in (
                u('package.zip'),
                u('package.xlsx'),
                u('package.csv'),
                u('package/metadata.csv'),
                f('package.zip'),
                f('package.xlsx'),
                f('package.csv'),
                f('package/metadata.csv'),
        ):
            u = MetapackUrl(us, downloader=Downloader())

            print(u.metadata_url)
예제 #13
0
    def __init__(self, args):

        self.cwd = getcwd()

        self.args = args

        self.downloader = Downloader.get_instance()

        self.cache = self.downloader.cache

        self.mtfile_arg = self.args.metatabfile if self.args.metatabfile else join(
            self.cwd, DEFAULT_METATAB_FILE)

        self.mtfile_url = MetapackUrl(self.mtfile_arg,
                                      downloader=self.downloader)

        self.resource = self.mtfile_url.target_file

        self.package_url = self.mtfile_url.package_url
        self.mt_file = self.mtfile_url.metadata_url

        self.package_root = self.package_url.join(PACKAGE_PREFIX)

        if not self.args.s3:
            doc = MetapackDoc(self.mt_file)
            self.args.s3 = doc['Root'].find_first_value('Root.S3')

        self.s3_url = parse_app_url(self.args.s3)

        if self.s3_url and not self.s3_url.scheme == 's3':
            self.s3_url = parse_app_url("s3://{}".format(self.args.s3))

        self.doc = MetapackDoc(self.mt_file)

        access_value = self.doc.find_first_value('Root.Access')

        self.acl = 'private' if access_value == 'private' else 'public-read'

        self.bucket = S3Bucket(
            self.s3_url, acl=self.acl,
            profile=self.args.profile) if self.s3_url else None
예제 #14
0
    def test_program_resource(self):

        return  # Actually, completely broken right now

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('rowgen')

        self.assertEqual('program+file:scripts/rowgen.py', str(r.url))

        print(r.resolved_url)

        g = r.row_generator

        print(type(g))

        for row in r:
            print(row)