def test_data_dictionary(self, _, __): responses.add( responses.GET, 'https://example.com/data.csv', body='Date,Price\n1/6/2017,4.00\n2/6/2017,4.12' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] assert_equal(len(zip_resources), 1) zip_resource = zip_resources[0] assert_equal(zip_resource['url_type'], 'upload') uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) csv_filename_in_zip = '{}.csv'.format(dataset['resources'][0]['id']) with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: assert_equal(zip_.namelist(), [csv_filename_in_zip, 'datapackage.json']) datapackage_json = zip_.read('datapackage.json') assert datapackage_json.startswith('{\n "description"') datapackage = json.loads(datapackage_json) eq(datapackage['resources'][0][u'schema'], {'fields': [{'type': 'string', 'name': u'Date'}, {'type': 'string', 'name': u'Price'}]})
def zip_filepath(dataset): dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] zip_resource = zip_resources[0] uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) return uploader.get_path(zip_resource[u'id'])
def file_remove_s3(resource_id, resource_url): try: uploader = S3ResourceUploader({}) # resource_name = find_filename_in_url(resource_url) munged_resource_name = munge.munge_filename(resource_url) filepath = uploader.get_path(resource_id, munged_resource_name) uploader.clear_key(filepath) except Exception, e: msg = 'Couldn\'t delete file from S3' log.warning(msg + str(e))
def test_uploaded_resource(self, _): responses.add_passthru('http://127.0.0.1:8983/solr') csv_content = u'Test,csv' responses.add( responses.GET, re.compile(r'http://test.ckan.net/dataset/.*/download/.*'), body=csv_content ) dataset = factories.Dataset() # add a resource which is an uploaded file with tempfile.NamedTemporaryFile() as fp: fp.write(csv_content) fp.seek(0) registry = ckanapi.LocalCKAN() resource = dict( package_id=dataset[u'id'], url=u'dummy-value', upload=fp, name=u'Rainfall', format=u'CSV' ) registry.action.resource_create(**resource) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] zip_resource = zip_resources[0] uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) csv_filename_in_zip = u'rainfall.csv' with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: # Check uploaded file assert_equal(zip_.namelist(), [csv_filename_in_zip, 'datapackage.json']) assert_equal(zip_.read(csv_filename_in_zip), 'Test,csv') # Check datapackage.json datapackage_json = zip_.read('datapackage.json') datapackage = json.loads(datapackage_json) eq(datapackage[u'resources'], [{ u'format': u'CSV', u'name': u'rainfall', u'path': csv_filename_in_zip, u'sources': [{u'path': dataset['resources'][0]['url'], u'title': u'Rainfall'}], u'title': u'Rainfall', }])
def test_simple(self, _): responses.add( responses.GET, 'https://example.com/data.csv', body='a,b,c' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] assert_equal(len(zip_resources), 1) zip_resource = zip_resources[0] assert_equal(zip_resource['url_type'], 'upload') uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) csv_filename_in_zip = '{}.csv'.format(dataset['resources'][0]['id']) with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: assert_equal(zip_.namelist(), [csv_filename_in_zip, 'datapackage.json']) assert_equal(zip_.read(csv_filename_in_zip), 'a,b,c') datapackage_json = zip_.read('datapackage.json') assert datapackage_json.startswith('{\n "description"') datapackage = json.loads(datapackage_json) eq(datapackage[u'name'][:12], u'test_dataset') eq(datapackage[u'title'], u'Test Dataset') eq(datapackage[u'description'], u'Just another test dataset.') eq(datapackage[u'resources'], [{ u'format': u'CSV', u'name': dataset['resources'][0]['id'], u'path': csv_filename_in_zip, u'sources': [{u'path': u'https://example.com/data.csv', u'title': None}], }])
def test_resource_url_with_404_error(self, _): responses.add_passthru('http://127.0.0.1:8983/solr') responses.add( responses.GET, 'https://example.com/data.csv', status=404 ) dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'name': 'rainfall', 'format': 'csv', }]) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] zip_resource = zip_resources[0] uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: # Zip doesn't contain the data, just the json file assert_equal(zip_.namelist(), ['datapackage.json']) # Check datapackage.json datapackage_json = zip_.read('datapackage.json') datapackage = json.loads(datapackage_json) eq(datapackage[u'resources'], [{ u'format': u'CSV', u'name': u'rainfall', # path is to the URL - an 'external resource' u'path': 'https://example.com/data.csv', u'title': u'rainfall', }])