def test_datapackage_only_requires_some_fields_to_be_valid(self): invalid_datapackage = datapackage.DataPackage({}) valid_datapackage = datapackage.DataPackage({ 'name': 'gdp', 'resources': [{ 'name': 'the-resource', 'path': 'http://example.com/some-data.csv' }] }) converter.datapackage_to_dataset(valid_datapackage) nose.tools.assert_raises(KeyError, converter.datapackage_to_dataset, invalid_datapackage)
def test_name_is_lowercased(self): self.datapackage.descriptor.update({ 'name': 'ThEnAmE', }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result['name'], self.datapackage.descriptor['name'].lower())
def test_datapackage_extras(self): self.datapackage.descriptor.update({ 'title_cn': u'國內生產總值', 'years': [2015, 2016], 'last_year': 2016, 'location': { 'country': 'China' }, }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_items_equal(result.get('extras'), [ { 'key': 'profile', 'value': u'data-package' }, { 'key': 'title_cn', 'value': u'國內生產總值' }, { 'key': 'years', 'value': '[2015, 2016]' }, { 'key': 'last_year', 'value': 2016 }, { 'key': 'location', 'value': '{"country": "China"}' }, ])
def test_datapackage_description(self): self.datapackage.descriptor.update({ 'description': 'Country, regional and world GDP in current USD.' }) result = converter.datapackage_to_dataset(self.datapackage) self.assertEquals(result.get('notes'), self.datapackage.descriptor['description'])
def test_datapackage_description(self): self.datapackage.descriptor.update({ 'description': 'Country, regional and world GDP in current USD.' }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('notes'), self.datapackage.descriptor['description'])
def handle_datapackage(self, datapackage, parameters, stats): '''Create or update a ckan dataset from datapackage and parameters''' # core dataset properties dataset = { 'title': '', 'version': '', 'state': 'active', 'url': '', 'notes': '', 'license_id': '', 'author': '', 'author_email': '', 'maintainer': '', 'maintainer_email': '', 'owner_org': None, 'private': False } dp = datapackage_lib.DataPackage(datapackage) dataset.update(converter.datapackage_to_dataset(dp)) self.__dataset_resources = dataset.get('resources', []) if self.__dataset_resources: del dataset['resources'] # Merge dataset-properties from parameters into dataset. dataset_props_from_params = parameters.get('dataset-properties') if dataset_props_from_params: dataset.update(dataset_props_from_params) package_create_url = '{}/package_create'.format(self.__base_endpoint) response = make_ckan_request(package_create_url, method='POST', json=dataset, api_key=self.__ckan_api_key) ckan_error = get_ckan_error(response) if ckan_error \ and parameters.get('overwrite_existing') \ and 'That URL is already in use.' in ckan_error.get('name', []): package_update_url = \ '{}/package_update'.format(self.__base_endpoint) log.info('CKAN dataset with url already exists. ' 'Attempting package_update.') response = make_ckan_request(package_update_url, method='POST', json=dataset, api_key=self.__ckan_api_key) ckan_error = get_ckan_error(response) if ckan_error: log.exception('CKAN returned an error: ' + json.dumps(ckan_error)) raise Exception if response['success']: self.__dataset_id = response['result']['id']
def test_datapackage_author_as_dict(self): # FIXME: Add author.web author = {'name': 'John Smith', 'email': '*****@*****.**'} self.datapackage.descriptor.update({'author': author}) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('maintainer'), author['name']) nose.tools.assert_equals(result.get('maintainer_email'), author['email'])
def test_resource_description(self): resource = {'description': 'GDPs list'} self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals( result.get('resources')[0].get('description'), resource['description'])
def test_resource_hash(self): resource = { 'hash': 'e785c0883d7a104330e69aee73d4f235', } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals( result.get('resources')[0].get('hash'), resource['hash'])
def test_resource_description(self): resource = { 'description': 'GDPs list' } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('resources')[0].get('description'), resource['description'])
def test_resource_format(self): resource = { 'format': 'CSV', } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals( result.get('resources')[0].get('format'), resource['format'])
def test_resource_hash(self): resource = { 'hash': 'e785c0883d7a104330e69aee73d4f235', } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('resources')[0].get('hash'), resource['hash'])
def test_resource_format(self): resource = { 'format': 'CSV', } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('resources')[0].get('format'), resource['format'])
def test_datapackage_only_requires_some_fields_to_be_valid(self): invalid_datapackage = datapackage.DataPackage({}) valid_datapackage = datapackage.DataPackage({ 'name': 'gdp', 'resources': [ { 'name': 'the-resource', 'path': 'http://example.com/some-data.csv' } ] }) converter.datapackage_to_dataset(valid_datapackage) nose.tools.assert_raises( KeyError, converter.datapackage_to_dataset, invalid_datapackage )
def test_resource_name_is_used_if_theres_no_title(self): resource = { 'name': 'gdp', 'title': None, } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) resource = result.get('resources')[0] nose.tools.assert_equals(result.get('resources')[0].get('name'), resource['name'])
def test_datapackage_author_as_string_without_email(self): # FIXME: Add author.web author = { 'name': 'John Smith' } self.datapackage.descriptor.update({ 'author': author['name'] }) result = converter.datapackage_to_dataset(self.datapackage) self.assertEquals(result.get('maintainer'), author['name'])
def test_resource_title_is_used_as_name(self): resource = { 'name': 'gdp', 'title': 'Gross domestic product', } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('resources')[0].get('name'), resource['title'])
def test_datapackage_author_as_string_without_email(self): # FIXME: Add author.web author = { 'name': 'John Smith' } self.datapackage.descriptor.update({ 'author': author['name'] }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('maintainer'), author['name'])
def test_resource_name_is_used_if_theres_no_title(self): resource = { 'name': 'gdp', 'title': None, } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) resource = result.get('resources')[0] nose.tools.assert_equals( result.get('resources')[0].get('name'), resource['name'])
def test_datapackage_author_as_unicode(self): # FIXME: Add author.web author = { 'name': u'John Smith', } self.datapackage.descriptor.update({ 'author': author['name'], }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('maintainer'), author['name'])
def test_resource_title_is_used_as_name(self): resource = { 'name': 'gdp', 'title': 'Gross domestic product', } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals( result.get('resources')[0].get('name'), resource['title'])
def test_datapackage_license_as_dict(self): license = { 'type': 'cc-zero', 'title': 'Creative Commons CC Zero License (cc-zero)', 'url': 'http://opendefinition.org/licenses/cc-zero/' } self.datapackage.descriptor.update({'license': license}) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('license_id'), license['type']) nose.tools.assert_equals(result.get('license_title'), license['title']) nose.tools.assert_equals(result.get('license_url'), license['url'])
def test_resource_path_is_set_to_its_local_data_path(self): resource = { 'path': 'test-data/datetimes.csv', } dp = datapackage.DataPackage({ 'name': 'datetimes', 'resources': [resource], }) result = converter.datapackage_to_dataset(dp) nose.tools.assert_equals( result.get('resources')[0].get('path'), dp.resources[0].source)
def test_datapackage_keywords(self): keywords = [ 'economy!!!', 'world bank', ] self.datapackage.descriptor.update({ 'keywords': keywords }) result = converter.datapackage_to_dataset(self.datapackage) self.assertEquals(result.get('tags'), [ {'name': 'economy'}, {'name': 'world-bank'}, ])
def test_datapackage_sources(self): sources = [{ 'name': 'World Bank and OECD', 'email': '*****@*****.**', 'web': 'http://data.worldbank.org/indicator/NY.GDP.MKTP.CD', }] self.datapackage.descriptor.update({'sources': sources}) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('author'), sources[0]['name']) nose.tools.assert_equals(result.get('author_email'), sources[0]['email']) nose.tools.assert_equals(result.get('url'), sources[0]['web'])
def test_datapackage_name_title_and_version(self): self.datapackage.descriptor.update({ 'name': 'gdp', 'title': 'Countries GDP', 'version': '1.0', }) result = converter.datapackage_to_dataset(self.datapackage) datapackage_dict = self.datapackage.to_dict() nose.tools.assert_equals(result['name'], datapackage_dict['name']) nose.tools.assert_equals(result['title'], datapackage_dict['title']) nose.tools.assert_equals(result['version'], datapackage_dict['version'])
def test_datapackage_name_title_and_version(self): self.datapackage.descriptor.update({ 'name': 'gdp', 'title': 'Countries GDP', 'version': '1.0', }) result = converter.datapackage_to_dataset(self.datapackage) self.assertEquals(result['name'], self.datapackage.descriptor['name']) self.assertEquals(result['title'], self.datapackage.descriptor['title']) self.assertEquals(result['version'], self.datapackage.descriptor['version'])
def test_datapackage_keywords(self): keywords = [ 'economy!!!', 'world bank', ] self.datapackage.descriptor.update({ 'keywords': keywords }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('tags'), [ {'name': 'economy'}, {'name': 'world-bank'}, ])
def test_resource_path_is_set_to_its_local_data_path(self): resource = { 'path': 'test-data/datetimes.csv', } dp = datapackage.DataPackage({ 'name': 'datetimes', 'resources': [resource], }) result = converter.datapackage_to_dataset(dp) nose.tools.assert_equals(result.get('resources')[0].get('path'), dp.resources[0].source)
def test_datapackage_author_as_dict(self): # FIXME: Add author.web author = { 'name': 'John Smith', 'email': '*****@*****.**' } self.datapackage.descriptor.update({ 'author': author }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('maintainer'), author['name']) nose.tools.assert_equals(result.get('maintainer_email'), author['email'])
def test_datapackage_license_as_dict(self): license = { 'type': 'cc-zero', 'title': 'Creative Commons CC Zero License (cc-zero)', 'url': 'http://opendefinition.org/licenses/cc-zero/' } self.datapackage.descriptor.update({ 'license': license }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('license_id'), license['type']) nose.tools.assert_equals(result.get('license_title'), license['title']) nose.tools.assert_equals(result.get('license_url'), license['url'])
def test_datapackage_author_as_string(self): # FIXME: Add author.web author = { 'name': 'John Smith', 'email': '*****@*****.**' } self.datapackage.descriptor.update({ 'author': '{name} <{email}>'.format(name=author['name'], email=author['email']) }) result = converter.datapackage_to_dataset(self.datapackage) self.assertEquals(result.get('maintainer'), author['name']) self.assertEquals(result.get('maintainer_email'), author['email'])
def test_resource_schema(self): schema = { 'fields': [ {'name': 'id', 'type': 'integer'}, {'name': 'title', 'type': 'string'}, ] } resource = { 'schema': schema } self.datapackage.resources[0].descriptor.update(resource) result = converter.datapackage_to_dataset(self.datapackage) self.assertEquals(result.get('resources')[0].get('schema'), resource['schema'])
def test_datapackage_extras(self): self.datapackage.descriptor.update({ 'title_cn': u'國內生產總值', 'years': [2015, 2016], 'last_year': 2016, 'location': {'country': 'China'}, }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_items_equal(result.get('extras'), [ {'key': 'profile', 'value': u'data-package'}, {'key': 'title_cn', 'value': u'國內生產總值'}, {'key': 'years', 'value': '[2015, 2016]'}, {'key': 'last_year', 'value': 2016}, {'key': 'location', 'value': '{"country": "China"}'}, ])
def test_resource_url(self, mock_requests): url = 'http://www.somewhere.com/data.csv' datapackage_dict = { 'name': 'gdp', 'title': 'Countries GDP', 'version': '1.0', 'resources': [ {'path': url} ], } mock_requests.register_uri('GET', url, body='') dp = datapackage.DataPackage(datapackage_dict) result = converter.datapackage_to_dataset(dp) self.assertEquals(result.get('resources')[0].get('url'), datapackage_dict['resources'][0]['path'])
def test_resource_url_is_set_to_its_remote_data_path(self): url = 'http://www.somewhere.com/data.csv' datapackage_dict = { 'name': 'gdp', 'title': 'Countries GDP', 'version': '1.0', 'resources': [ {'path': 'data.csv'} ], } httpretty.register_uri(httpretty.GET, url, body='') dp = datapackage.DataPackage( datapackage_dict, base_path='http://www.somewhere.com') result = converter.datapackage_to_dataset(dp) nose.tools.assert_equals(result.get('resources')[0].get('url'), dp.resources[0].source)
def test_resource_url_is_set_to_its_remote_data_path(self): url = 'http://www.somewhere.com/data.csv' datapackage_dict = { 'name': 'gdp', 'title': 'Countries GDP', 'version': '1.0', 'resources': [{ 'path': 'data.csv' }], } httpretty.register_uri(httpretty.GET, url, body='') dp = datapackage.DataPackage(datapackage_dict, base_path='http://www.somewhere.com') result = converter.datapackage_to_dataset(dp) nose.tools.assert_equals( result.get('resources')[0].get('url'), dp.resources[0].source)
def test_datapackage_sources(self): sources = [ { 'name': 'World Bank and OECD', 'email': '*****@*****.**', 'web': 'http://data.worldbank.org/indicator/NY.GDP.MKTP.CD', } ] self.datapackage.descriptor.update({ 'sources': sources }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('author'), sources[0]['name']) nose.tools.assert_equals(result.get('author_email'), sources[0]['email']) nose.tools.assert_equals(result.get('url'), sources[0]['web'])
def test_datapackage_license_as_unicode(self): self.datapackage.descriptor.update({'license': u'cc-zero'}) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('license_id'), 'cc-zero')
def test_datapackage_license_as_unicode(self): self.datapackage.descriptor.update({ 'license': u'cc-zero' }) result = converter.datapackage_to_dataset(self.datapackage) nose.tools.assert_equals(result.get('license_id'), 'cc-zero')
def test_basic_datapackage_in_setup_is_valid(self): converter.datapackage_to_dataset(self.datapackage)
def package_create_from_datapackage(context, data_dict): '''Create a new dataset (package) from a Data Package file. :param url: url of the datapackage (optional if `upload` is defined) :type url: string :param upload: the uploaded datapackage (optional if `url` is defined) :type upload: cgi.FieldStorage :param name: the name of the new dataset, must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, ``-`` and ``_``, e.g. ``'warandpeace'`` (optional, default: datapackage's name concatenated with a random string to avoid name collisions) :type name: string :param private: the visibility of the new dataset :type private: bool :param owner_org: the id of the dataset's owning organization, see :py:func:`~ckan.logic.action.get.organization_list` or :py:func:`~ckan.logic.action.get.organization_list_for_user` for available values (optional) :type owner_org: string ''' url = data_dict.get('url') upload = data_dict.get('upload') if not url and not _upload_attribute_is_valid(upload): msg = {'url': ['you must define either a url or upload attribute']} raise toolkit.ValidationError(msg) dp = _load_and_validate_datapackage(url=url, upload=upload) dataset_dict = converter.datapackage_to_dataset(dp) owner_org = data_dict.get('owner_org') if owner_org: dataset_dict['owner_org'] = owner_org private = data_dict.get('private') if private: dataset_dict['private'] = toolkit.asbool(private) name = data_dict.get('name') if name: dataset_dict['name'] = name resources = dataset_dict.get('resources', []) if resources: del dataset_dict['resources'] # Create as draft by default so if there's any issue on creating the # resources and we're unable to purge the dataset, at least it's not shown. dataset_dict['state'] = 'draft' res = _package_create_with_unique_name(context, dataset_dict, name) dataset_id = res['id'] if resources: try: _create_resources(dataset_id, context, resources) res = toolkit.get_action('package_show')( context, {'id': dataset_id}) except Exception as e: try: toolkit.get_action('package_delete')( context, {'id': dataset_id}) except Exception as e2: six.raise_from(e, e2) else: raise e res['state'] = 'active' return toolkit.get_action('package_update')(context, res)
def test_datapackage_license_as_string(self): self.datapackage.descriptor.update({ 'license': 'cc-zero' }) result = converter.datapackage_to_dataset(self.datapackage) self.assertEquals(result.get('license_id'), 'cc-zero')