def test_socrata(self): u_s = 'https://data.lacounty.gov/api/views/8rdv-6nb6/rows.csv?accessType=DOWNLOAD' u = parse_app_url(u_s) self.assertIsInstance(parse_app_url(u_s), WebUrl) self.assertEqual('https://data.lacounty.gov/api/views/8rdv-6nb6/rows.csv?accessType=DOWNLOAD', str(u.resource_url))
def test_windows_urls(self): url = 'w:/metatab36/metatab-py/metatab/templates/metatab.csv' self.assertEqual('file:w:/metatab36/metatab-py/metatab/templates/metatab.csv', str(parse_app_url(url))) url = 'N:/Desktop/metadata.csv#renter_cost' self.assertEqual('file:N:/Desktop/metadata.csv#renter_cost', str(parse_app_url(url)))
def test_component(self): with open(data_path('components.csv')) as f: for e in DictReader(f): b = parse_app_url(e['base_url']) c = parse_app_url(e['component_url']) self.assertEqual(e['class'], b.__class__.__name__, e['base_url']) self.assertEqual(e['join_dir'], str(b.join_dir(c)), e['base_url']) self.assertEqual(e['join'], str(b.join(c)), e['base_url']) self.assertEqual(str(e['join_target']), str(b.join_target(c)), e['base_url'])
def test_download(self): """Test all three stages of a collection of downloadable URLS""" dldr = Downloader() with open(data_path('sources.csv')) as f: for e in DictReader(f): if e['name'] != 'mz_no_zip': continue #print(e['name']) if not e['resource_class']: continue u = parse_app_url(e['url'], downloader=dldr) self.assertEqual(e['url_class'], u.__class__.__name__, e['name']) self.assertEqual(e['resource_format'], u.resource_format, e['name']) r = u.get_resource() self.assertEqual(e['resource_class'],r.__class__.__name__, e['name']) self.assertEqual(e['resource_format'], r.resource_format, e['name']) t = r.get_target() self.assertEqual(e['target_class'], t.__class__.__name__, e['name']) self.assertEqual(e['target_format'], t.target_format, e['name']) self.assertTrue(exists(t.path))
def get_target(self): """ Extract the target file from the archive, store it in the cache, and return a file Url to the cached file. """ assert self.zip_dir zf = ZipFile(self.path) self._target_file = ZipUrl.get_file_from_zip(self) target_path = join(self.zip_dir, self.target_file) ensure_dir(dirname(target_path)) with io.open(target_path, 'wb') as f, zf.open(self.target_file) as flo: copy_file_or_flo(flo, f) fq = self.fragment_query if 'resource_format' in fq: del fq['resource_format'] if 'resource_file' in fq: del fq['resource_file'] return parse_app_url( target_path, fragment_query=fq, fragment=[self.target_segment, None], scheme_extension=self.scheme_extension, # Clear out the resource info so we don't get a ZipUrl )
def test_xsx_zip_fragment(self): url = parse_app_url( 'http://public.source.civicknowledge.com/example.com/sources/test_data.zip#renter_cost_excel07.xlsx;Sheet1' ) self.assertEqual(['renter_cost_excel07.xlsx', 'Sheet1'], url.fragment)
def test_list(self): import appurl u = parse_app_url(dirname(appurl.__file__)) self.assertTrue(len(list(u.list())) > 10) return u = parse_app_url('http://public.source.civicknowledge.com/example.com/sources/test_data.zip') print(type(u)) for su in u.list(): for ssu in su.list(): print(ssu)
def test_downloaded_resource_type(self): u = parse_app_url( 'http://public.source.civicknowledge.com/example.com/sources/test_data.zip' ) ru = u.get_resource() self.assertIsInstance(ru, ZipUrl)
def test_parse_file_urls(self): urls = [ ('file:foo/bar/baz', 'foo/bar/baz', 'file:foo/bar/baz'), ('file:/foo/bar/baz', '/foo/bar/baz', 'file:/foo/bar/baz'), ('file://example.com/foo/bar/baz', '/foo/bar/baz', 'file://example.com/foo/bar/baz'), ('file:///foo/bar/baz', '/foo/bar/baz', 'file:/foo/bar/baz'), ] for i, o, u in urls: p = parse_app_url(i) self.assertEqual(o, p.path) self.assertEqual(u, str(p))
def test_spec_resource_format(self): us = 'http://public.source.civicknowledge.com/example.com/sources/test_data.foo#simple-example.csv&resource_format=zip' u = parse_app_url(us) self.assertEqual('zip', u.resource_format) self.assertEqual('csv', u.target_format) r = u.get_resource() self.assertIsInstance(r, ZipUrl) t = r.get_target() self.assertIsInstance(t, CsvFileUrl)
def test_excel_renter07(self): u = parse_app_url( 'http://public.source.civicknowledge.com/example.com/sources/renter_cost_excel07.zip#target_format=xlsx' ) r = u.get_resource() self.assertEqual('file', r.proto) self.assertTrue(r.exists()) self.assertEqual('renter_cost_excel07.zip', u.target_file) t = r.get_target() self.assertEqual('file', t.proto) self.assertTrue(t.exists())
def test_csv_no_csv(self): u = parse_app_url( 'http://public.source.civicknowledge.com/example.com/sources/simple-example.foo#&target_format=csv' ) self.assertIsInstance(u, WebUrl) self.assertEqual('foo', u.resource_format) self.assertEqual('csv', u.target_format) r = u.get_resource() self.assertEqual('foo', r.resource_format) self.assertEqual('csv', r.target_format) t = r.get_resource() self.assertEqual('csv', t.target_format)
def test_url_classes(self): from appurl import match_url_classes with open(data_path('url_classes.csv')) as f: for e in DictReader(f): if not e['class']: continue u = parse_app_url(e['in_url']) self.assertEqual(e['url'], str(u), e['in_url']) self.assertEqual(e['resource_url'], str(u.resource_url), e['in_url']) self.assertEqual(e['resource_file'], u.resource_file, e['in_url']) self.assertEqual(e['target_file'], u.target_file or '', e['in_url'])
def test_targets(self): u=parse_app_url('http://library.metatab.org/example.com-example_data_package-2017-us-1.xlsx') tfu = u.join_target('random-names') self.assertEqual( 'http://library.metatab.org/example.com-example_data_package-2017-us-1.xlsx#random-names', str(tfu)) r = tfu.get_resource() self.assertTrue(str(r) .endswith('library.metatab.org/example.com-example_data_package-2017-us-1.xlsx#random-names')) t = r.get_target() self.assertTrue(str(t) .endswith('library.metatab.org/example.com-example_data_package-2017-us-1.xlsx#random-names'))
def test_mz_with_zip_xl(self): u = parse_app_url( 'http://public.source.civicknowledge.com/example.com/sources/test_data.zip#renter_cost_excel07.xlsx' ) self.assertIsInstance(u, WebUrl) self.assertEqual('zip', u.resource_format) self.assertEqual('xlsx', u.target_format) r = u.get_resource() self.assertIsInstance(r, ZipUrl) self.assertEqual('zip', r.resource_format) self.assertEqual('file', r.proto) self.assertTrue(r.exists()) t = r.get_target() self.assertEqual('xlsx', t.target_format) self.assertEqual('file', t.proto) self.assertTrue(t.exists())
def test_xlsx_fragment(self): url = parse_app_url('http://example.com/renter_cost_excel07.xlsx#2') self.assertEqual(['2', None], url.dict['_fragment'])
def test_parse_s3(self): from appurl.web.s3 import S3Url u = parse_app_url(('s3://library.metatab.org')) self.assertIsInstance(u, S3Url)
def test_entry_points(self): self.assertIsInstance(parse_app_url('s3://bucket.com/foo/bar/baz.zip'), S3Url) self.assertIsInstance(parse_app_url('http://bucket.com/foo/bar/baz.zip'), WebUrl) self.assertIsInstance(parse_app_url('file://bucket.com/foo/bar/baz.zip'), ZipUrl)
def test_join_target_xls(self): u = parse_app_url('file:/a/file.xlsx') jt = u.join_target('target') self.assertEquals('file:/a/file.xlsx#target', str(jt))
def test_join_target_xls(self): from appurl.file.excel import ExcelFileUrl u = parse_app_url('file:/a/file.xlsx#foobnar') self.assertIsInstance(u, ExcelFileUrl)