def test_build_s3_package(self): from metapack_build.build import make_s3_csv_package cache = Downloader().cache fs_url = MetapackUrl( '/Volumes/Storage/proj/virt-proj/metapack/metapack/test-data/packages/example.com/' 'example-package/_packages/example.com-example_data_package-2017-us-1/metadata.csv', downloader=downloader) # _, url, created = make_excel_package(fs_url,package_dir,get_cache(), {}, False) # _, url, created = make_zip_package(fs_url, package_dir, get_cache(), {}, False) # _, url, created = make_csv_package(fs_url, package_dir, get_cache(), {}, False) package_dir = parse_app_url( 's3://test.library.civicknowledge.com/metatab', downloader=downloader) _, url, created = make_s3_csv_package(fs_url, package_dir, cache, {}, False) print(url) print(created)
def test_build_simple_package(self): cli_init() cache = Downloader().cache m = MetapackUrl(test_data( 'packages/example.com/example.com-simple_example-2017-us'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) package_dir = package_dir _, fs_url, created = make_filesystem_package(m, package_dir, cache, {}, False) fs_doc = MetapackDoc(fs_url, cache=downloader.cache) fs_doc.resource('random-names') # Excel _, url, created = make_excel_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.url for r in url.doc.resources()]) # ZIP _, url, created = make_zip_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual([ 'data/random-names.csv', 'data/renter_cost.csv', 'data/unicode-latin1.csv' ], [r.url for r in url.doc.resources()]) # CSV _, url, created = make_csv_package(fs_url, package_dir, cache, {}, False) self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'], [r.name for r in url.doc.resources()]) self.assertEqual([ 'com-simple_example-2017-us-2/data/random-names.csv', '.com-simple_example-2017-us-2/data/renter_cost.csv', 'm-simple_example-2017-us-2/data/unicode-latin1.csv' ], [str(r.url)[-50:] for r in url.doc.resources()])
def open_package(name, downloader=None): d = test_data('packages', name) from metapack import open_package as op, Downloader if downloader is None: downloader = Downloader() return op(d, downloader)
def new_cmd(args): downloader = Downloader.get_instance() m = MetapackCliMemo(args, downloader) if m.args.eda: write_eda_notebook(m) elif m.args.new_notebook: write_notebook(m) elif m.args.metatab: write_metatab_notebook(m)
def test_build_package(self): try: cli_init() m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) package_dir = m.package_url.join_dir(PACKAGE_PREFIX) cache = Downloader().cache _, fs_url, created = make_filesystem_package( m, package_dir, cache, {}, False) except ImportError as e: unittest.skip(str(e)) return print(created)
def test_resolve_packages(self): def u(v): return "http://example.com/d/{}".format(v) def f(v): return "file:/d/{}".format(v) for us in ( u('package.zip'), u('package.xlsx'), u('package.csv'), u('package/metadata.csv'), f('package.zip'), f('package.xlsx'), f('package.csv'), f('package/metadata.csv'), ): u = MetapackUrl(us, downloader=Downloader()) print(u.metadata_url)
def test_resolve_resource_urls(self): """Test how resources are resolved in packages. - A name, for excel and CSV packages - a path, for ZIP and filesystem packages - a web url, for any kind of package """ with open(test_data('packages.csv')) as f: for i, l in enumerate(DictReader(f), 2): # print(i, l['url'], l['target_file']) u = MetapackPackageUrl(l['url'], downloader=Downloader()) try: t = u.resolve_url(l['target_file']) self.assertFalse(bool(l['resolve_error'])) except ResourceError: self.assertTrue(bool(l['resolve_error'])) continue except DownloadError: raise # Testing containment because t can have path in local filesystem, which changes depending on where # test is run # print(" ", t) self.assertTrue(l['resolved_url'] in str(t), (i, l['resolved_url'], str(t))) try: g = get_generator(t.get_resource().get_target()) self.assertEqual(101, len(list(g))) self.assertFalse(bool(l['generate_error'])) except DownloadError: raise except RowGeneratorError: self.assertTrue(bool(l['generate_error'])) continue
# Copyright (c) 2017 Civic Knowledge. This file is licensed under the terms of the # MIT License, included in this distribution as LICENSE """ """ from os import remove from os.path import exists import argparse from metapack.cli.core import prt, err, warn from metapack.cli.core import get_config as _get_config from metapack.cli.core import MetapackCliMemo as _MetapackCliMemo, add_giturl, write_doc from metapack import Downloader from github import Github downloader = Downloader.get_instance() class ArgumentError(Exception): pass class MetapackCliMemo(_MetapackCliMemo): def __init__(self, args, downloader): super().__init__(args, downloader) def github(subparsers): """ Using this function requires a Github token to be set in the ~/.metapack.yaml file:
from metatab.rowgenerators import TextRowGenerator from rowgenerators import get_generator, parse_app_url from rowgenerators.exceptions import DownloadError, RowGeneratorError from tabulate import tabulate from metapack import (Downloader, MetapackDoc, MetapackPackageUrl, MetapackUrl, ResourceError) from metapack.cli.core import cli_init from metapack.constants import PACKAGE_PREFIX from metapack_build.build import (make_csv_package, make_excel_package, make_filesystem_package, make_zip_package) from support import open_package, test_data warnings.filterwarnings("ignore", category=DeprecationWarning) downloader = Downloader() def ds_hash(r): import hashlib m = hashlib.md5() for row in r: for col in row: m.update(str(col).encode('utf8')) return m.hexdigest() class TestPackages(unittest.TestCase):