def test_data_dictionary(self, _, __): responses.add( responses.GET, 'https://example.com/data.csv', body='Date,Price\n1/6/2017,4.00\n2/6/2017,4.12' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] assert_equal(len(zip_resources), 1) zip_resource = zip_resources[0] assert_equal(zip_resource['url_type'], 'upload') uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) csv_filename_in_zip = '{}.csv'.format(dataset['resources'][0]['id']) with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: assert_equal(zip_.namelist(), [csv_filename_in_zip, 'datapackage.json']) datapackage_json = zip_.read('datapackage.json') assert datapackage_json.startswith('{\n "description"') datapackage = json.loads(datapackage_json) eq(datapackage['resources'][0][u'schema'], {'fields': [{'type': 'string', 'name': u'Date'}, {'type': 'string', 'name': u'Price'}]})
def test_uploaded_resource(self, _): responses.add_passthru('http://127.0.0.1:8983/solr') csv_content = u'Test,csv' responses.add( responses.GET, re.compile(r'http://test.ckan.net/dataset/.*/download/.*'), body=csv_content ) dataset = factories.Dataset() # add a resource which is an uploaded file with tempfile.NamedTemporaryFile() as fp: fp.write(csv_content) fp.seek(0) registry = ckanapi.LocalCKAN() resource = dict( package_id=dataset[u'id'], url=u'dummy-value', upload=fp, name=u'Rainfall', format=u'CSV' ) registry.action.resource_create(**resource) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] zip_resource = zip_resources[0] uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) csv_filename_in_zip = u'rainfall.csv' with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: # Check uploaded file assert_equal(zip_.namelist(), [csv_filename_in_zip, 'datapackage.json']) assert_equal(zip_.read(csv_filename_in_zip), 'Test,csv') # Check datapackage.json datapackage_json = zip_.read('datapackage.json') datapackage = json.loads(datapackage_json) eq(datapackage[u'resources'], [{ u'format': u'CSV', u'name': u'rainfall', u'path': csv_filename_in_zip, u'sources': [{u'path': dataset['resources'][0]['url'], u'title': u'Rainfall'}], u'title': u'Rainfall', }])
def test_update_twice_skipping_second_time(self, _): # i.e. testing skip_if_no_changes=False responses.add( responses.GET, 'https://example.com/data.csv', body='a,b,c' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) with mock.patch('ckanext.downloadall.tasks.write_zip') as write_zip_: update_zip(dataset['id'], skip_if_no_changes=True) # nothings changed, so it shouldn't rewrite the zip assert not write_zip_.called
def test_dont_skip_if_no_changes(self, _): # i.e. testing skip_if_no_changes=False responses.add( responses.GET, 'https://example.com/data.csv', body='a,b,c' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) with mock.patch('ckanext.downloadall.tasks.write_zip') as write_zip_: update_zip(dataset['id'], skip_if_no_changes=False) # ensure zip would be rewritten in this case - not letting it skip assert write_zip_.called
def test_deleting_resource_causes_zip_to_update(self, _): responses.add( responses.GET, 'https://example.com/data.csv', body='a,b,c' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) dataset = helpers.call_action(u'package_patch', id=dataset['id'], resources=[]) with mock.patch('ckanext.downloadall.tasks.write_zip') as write_zip_: update_zip(dataset['id'], skip_if_no_changes=True) # ensure zip would be rewritten in this case - not letting it skip assert write_zip_.called
def test_simple(self, _): responses.add( responses.GET, 'https://example.com/data.csv', body='a,b,c' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] assert_equal(len(zip_resources), 1) zip_resource = zip_resources[0] assert_equal(zip_resource['url_type'], 'upload') uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) csv_filename_in_zip = '{}.csv'.format(dataset['resources'][0]['id']) with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: assert_equal(zip_.namelist(), [csv_filename_in_zip, 'datapackage.json']) assert_equal(zip_.read(csv_filename_in_zip), 'a,b,c') datapackage_json = zip_.read('datapackage.json') assert datapackage_json.startswith('{\n "description"') datapackage = json.loads(datapackage_json) eq(datapackage[u'name'][:12], u'test_dataset') eq(datapackage[u'title'], u'Test Dataset') eq(datapackage[u'description'], u'Just another test dataset.') eq(datapackage[u'resources'], [{ u'format': u'CSV', u'name': dataset['resources'][0]['id'], u'path': csv_filename_in_zip, u'sources': [{u'path': u'https://example.com/data.csv', u'title': None}], }])
def test_update_twice(self, _): responses.add( responses.GET, 'https://example.com/data.csv', body='a,b,c' ) responses.add_passthru('http://127.0.0.1:8983/solr') dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'format': 'csv', }]) update_zip(dataset['id']) update_zip(dataset['id'], skip_if_no_changes=False) # ensure a second zip hasn't been added dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] assert_equal(len(zip_resources), 1)
def test_resource_url_with_404_error(self, _): responses.add_passthru('http://127.0.0.1:8983/solr') responses.add( responses.GET, 'https://example.com/data.csv', status=404 ) dataset = factories.Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'name': 'rainfall', 'format': 'csv', }]) update_zip(dataset['id']) dataset = helpers.call_action(u'package_show', id=dataset['id']) zip_resources = [res for res in dataset['resources'] if res['name'] == u'All resource data'] zip_resource = zip_resources[0] uploader = ckan.lib.uploader.get_resource_uploader(zip_resource) filepath = uploader.get_path(zip_resource[u'id']) with fake_open(filepath, 'rb') as f: with zipfile.ZipFile(f) as zip_: # Zip doesn't contain the data, just the json file assert_equal(zip_.namelist(), ['datapackage.json']) # Check datapackage.json datapackage_json = zip_.read('datapackage.json') datapackage = json.loads(datapackage_json) eq(datapackage[u'resources'], [{ u'format': u'CSV', u'name': u'rainfall', # path is to the URL - an 'external resource' u'path': 'https://example.com/data.csv', u'title': u'rainfall', }])