def test_datapackage_only_requires_some_fields_to_be_valid(self):
        invalid_datapackage = datapackage.DataPackage({})
        valid_datapackage = datapackage.DataPackage({
            'name':
            'gdp',
            'resources': [{
                'name': 'the-resource',
                'path': 'http://example.com/some-data.csv'
            }]
        })

        converter.datapackage_to_dataset(valid_datapackage)
        nose.tools.assert_raises(KeyError, converter.datapackage_to_dataset,
                                 invalid_datapackage)
 def test_name_is_lowercased(self):
     self.datapackage.descriptor.update({
         'name': 'ThEnAmE',
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result['name'],
                              self.datapackage.descriptor['name'].lower())
 def test_datapackage_extras(self):
     self.datapackage.descriptor.update({
         'title_cn': u'國內生產總值',
         'years': [2015, 2016],
         'last_year': 2016,
         'location': {
             'country': 'China'
         },
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_items_equal(result.get('extras'), [
         {
             'key': 'profile',
             'value': u'data-package'
         },
         {
             'key': 'title_cn',
             'value': u'國內生產總值'
         },
         {
             'key': 'years',
             'value': '[2015, 2016]'
         },
         {
             'key': 'last_year',
             'value': 2016
         },
         {
             'key': 'location',
             'value': '{"country": "China"}'
         },
     ])
 def test_datapackage_description(self):
     self.datapackage.descriptor.update({
         'description': 'Country, regional and world GDP in current USD.'
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     self.assertEquals(result.get('notes'),
                       self.datapackage.descriptor['description'])
예제 #5
0
 def test_name_is_lowercased(self):
     self.datapackage.descriptor.update({
         'name': 'ThEnAmE',
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result['name'],
                              self.datapackage.descriptor['name'].lower())
예제 #6
0
 def test_datapackage_description(self):
     self.datapackage.descriptor.update({
         'description': 'Country, regional and world GDP in current USD.'
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('notes'),
                              self.datapackage.descriptor['description'])
예제 #7
0
    def handle_datapackage(self, datapackage, parameters, stats):
        '''Create or update a ckan dataset from datapackage and parameters'''

        # core dataset properties
        dataset = {
            'title': '',
            'version': '',
            'state': 'active',
            'url': '',
            'notes': '',
            'license_id': '',
            'author': '',
            'author_email': '',
            'maintainer': '',
            'maintainer_email': '',
            'owner_org': None,
            'private': False
        }

        dp = datapackage_lib.DataPackage(datapackage)
        dataset.update(converter.datapackage_to_dataset(dp))

        self.__dataset_resources = dataset.get('resources', [])
        if self.__dataset_resources:
            del dataset['resources']

        # Merge dataset-properties from parameters into dataset.
        dataset_props_from_params = parameters.get('dataset-properties')
        if dataset_props_from_params:
            dataset.update(dataset_props_from_params)

        package_create_url = '{}/package_create'.format(self.__base_endpoint)

        response = make_ckan_request(package_create_url,
                                     method='POST',
                                     json=dataset,
                                     api_key=self.__ckan_api_key)

        ckan_error = get_ckan_error(response)
        if ckan_error \
           and parameters.get('overwrite_existing') \
           and 'That URL is already in use.' in ckan_error.get('name', []):

            package_update_url = \
                '{}/package_update'.format(self.__base_endpoint)

            log.info('CKAN dataset with url already exists. '
                     'Attempting package_update.')
            response = make_ckan_request(package_update_url,
                                         method='POST',
                                         json=dataset,
                                         api_key=self.__ckan_api_key)
            ckan_error = get_ckan_error(response)

        if ckan_error:
            log.exception('CKAN returned an error: ' + json.dumps(ckan_error))
            raise Exception

        if response['success']:
            self.__dataset_id = response['result']['id']
 def test_datapackage_author_as_dict(self):
     # FIXME: Add author.web
     author = {'name': 'John Smith', 'email': '*****@*****.**'}
     self.datapackage.descriptor.update({'author': author})
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('maintainer'), author['name'])
     nose.tools.assert_equals(result.get('maintainer_email'),
                              author['email'])
    def test_resource_description(self):
        resource = {'description': 'GDPs list'}

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(
            result.get('resources')[0].get('description'),
            resource['description'])
    def test_resource_hash(self):
        resource = {
            'hash': 'e785c0883d7a104330e69aee73d4f235',
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(
            result.get('resources')[0].get('hash'), resource['hash'])
예제 #11
0
    def test_resource_description(self):
        resource = {
            'description': 'GDPs list'
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(result.get('resources')[0].get('description'),
                                 resource['description'])
    def test_resource_format(self):
        resource = {
            'format': 'CSV',
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(
            result.get('resources')[0].get('format'), resource['format'])
예제 #13
0
    def test_resource_hash(self):
        resource = {
            'hash': 'e785c0883d7a104330e69aee73d4f235',
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(result.get('resources')[0].get('hash'),
                                 resource['hash'])
예제 #14
0
    def test_resource_format(self):
        resource = {
            'format': 'CSV',
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(result.get('resources')[0].get('format'),
                                 resource['format'])
예제 #15
0
    def test_datapackage_only_requires_some_fields_to_be_valid(self):
        invalid_datapackage = datapackage.DataPackage({})
        valid_datapackage = datapackage.DataPackage({
            'name': 'gdp',
            'resources': [
                {
                    'name': 'the-resource',
                    'path': 'http://example.com/some-data.csv'
                }
            ]
        })

        converter.datapackage_to_dataset(valid_datapackage)
        nose.tools.assert_raises(
            KeyError,
            converter.datapackage_to_dataset,
            invalid_datapackage
        )
예제 #16
0
 def test_resource_name_is_used_if_theres_no_title(self):
     resource = {
         'name': 'gdp',
         'title': None,
     }
     self.datapackage.resources[0].descriptor.update(resource)
     result = converter.datapackage_to_dataset(self.datapackage)
     resource = result.get('resources')[0]
     nose.tools.assert_equals(result.get('resources')[0].get('name'),
                              resource['name'])
예제 #17
0
 def test_datapackage_author_as_string_without_email(self):
     # FIXME: Add author.web
     author = {
         'name': 'John Smith'
     }
     self.datapackage.descriptor.update({
         'author': author['name']
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     self.assertEquals(result.get('maintainer'), author['name'])
예제 #18
0
    def test_resource_title_is_used_as_name(self):
        resource = {
            'name': 'gdp',
            'title': 'Gross domestic product',
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(result.get('resources')[0].get('name'),
                                 resource['title'])
예제 #19
0
 def test_datapackage_author_as_string_without_email(self):
     # FIXME: Add author.web
     author = {
         'name': 'John Smith'
     }
     self.datapackage.descriptor.update({
         'author': author['name']
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('maintainer'), author['name'])
 def test_resource_name_is_used_if_theres_no_title(self):
     resource = {
         'name': 'gdp',
         'title': None,
     }
     self.datapackage.resources[0].descriptor.update(resource)
     result = converter.datapackage_to_dataset(self.datapackage)
     resource = result.get('resources')[0]
     nose.tools.assert_equals(
         result.get('resources')[0].get('name'), resource['name'])
 def test_datapackage_author_as_unicode(self):
     # FIXME: Add author.web
     author = {
         'name': u'John Smith',
     }
     self.datapackage.descriptor.update({
         'author': author['name'],
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('maintainer'), author['name'])
    def test_resource_title_is_used_as_name(self):
        resource = {
            'name': 'gdp',
            'title': 'Gross domestic product',
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        nose.tools.assert_equals(
            result.get('resources')[0].get('name'), resource['title'])
 def test_datapackage_license_as_dict(self):
     license = {
         'type': 'cc-zero',
         'title': 'Creative Commons CC Zero License (cc-zero)',
         'url': 'http://opendefinition.org/licenses/cc-zero/'
     }
     self.datapackage.descriptor.update({'license': license})
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('license_id'), license['type'])
     nose.tools.assert_equals(result.get('license_title'), license['title'])
     nose.tools.assert_equals(result.get('license_url'), license['url'])
    def test_resource_path_is_set_to_its_local_data_path(self):
        resource = {
            'path': 'test-data/datetimes.csv',
        }
        dp = datapackage.DataPackage({
            'name': 'datetimes',
            'resources': [resource],
        })

        result = converter.datapackage_to_dataset(dp)
        nose.tools.assert_equals(
            result.get('resources')[0].get('path'), dp.resources[0].source)
예제 #25
0
 def test_datapackage_keywords(self):
     keywords = [
         'economy!!!', 'world bank',
     ]
     self.datapackage.descriptor.update({
         'keywords': keywords
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     self.assertEquals(result.get('tags'), [
         {'name': 'economy'},
         {'name': 'world-bank'},
     ])
 def test_datapackage_sources(self):
     sources = [{
         'name': 'World Bank and OECD',
         'email': '*****@*****.**',
         'web': 'http://data.worldbank.org/indicator/NY.GDP.MKTP.CD',
     }]
     self.datapackage.descriptor.update({'sources': sources})
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('author'), sources[0]['name'])
     nose.tools.assert_equals(result.get('author_email'),
                              sources[0]['email'])
     nose.tools.assert_equals(result.get('url'), sources[0]['web'])
예제 #27
0
 def test_datapackage_name_title_and_version(self):
     self.datapackage.descriptor.update({
         'name': 'gdp',
         'title': 'Countries GDP',
         'version': '1.0',
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     datapackage_dict = self.datapackage.to_dict()
     nose.tools.assert_equals(result['name'], datapackage_dict['name'])
     nose.tools.assert_equals(result['title'], datapackage_dict['title'])
     nose.tools.assert_equals(result['version'],
                              datapackage_dict['version'])
예제 #28
0
 def test_datapackage_name_title_and_version(self):
     self.datapackage.descriptor.update({
         'name': 'gdp',
         'title': 'Countries GDP',
         'version': '1.0',
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     self.assertEquals(result['name'], self.datapackage.descriptor['name'])
     self.assertEquals(result['title'],
                       self.datapackage.descriptor['title'])
     self.assertEquals(result['version'],
                       self.datapackage.descriptor['version'])
 def test_datapackage_name_title_and_version(self):
     self.datapackage.descriptor.update({
         'name': 'gdp',
         'title': 'Countries GDP',
         'version': '1.0',
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     datapackage_dict = self.datapackage.to_dict()
     nose.tools.assert_equals(result['name'], datapackage_dict['name'])
     nose.tools.assert_equals(result['title'], datapackage_dict['title'])
     nose.tools.assert_equals(result['version'],
                              datapackage_dict['version'])
예제 #30
0
 def test_datapackage_keywords(self):
     keywords = [
         'economy!!!', 'world bank',
     ]
     self.datapackage.descriptor.update({
         'keywords': keywords
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('tags'), [
         {'name': 'economy'},
         {'name': 'world-bank'},
     ])
예제 #31
0
    def test_resource_path_is_set_to_its_local_data_path(self):
        resource = {
            'path': 'test-data/datetimes.csv',
        }
        dp = datapackage.DataPackage({
            'name': 'datetimes',
            'resources': [resource],
        })

        result = converter.datapackage_to_dataset(dp)
        nose.tools.assert_equals(result.get('resources')[0].get('path'),
                                 dp.resources[0].source)
예제 #32
0
 def test_datapackage_author_as_dict(self):
     # FIXME: Add author.web
     author = {
         'name': 'John Smith',
         'email': '*****@*****.**'
     }
     self.datapackage.descriptor.update({
         'author': author
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('maintainer'), author['name'])
     nose.tools.assert_equals(result.get('maintainer_email'),
                              author['email'])
예제 #33
0
 def test_datapackage_license_as_dict(self):
     license = {
         'type': 'cc-zero',
         'title': 'Creative Commons CC Zero License (cc-zero)',
         'url': 'http://opendefinition.org/licenses/cc-zero/'
     }
     self.datapackage.descriptor.update({
         'license': license
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('license_id'), license['type'])
     nose.tools.assert_equals(result.get('license_title'), license['title'])
     nose.tools.assert_equals(result.get('license_url'), license['url'])
예제 #34
0
 def test_datapackage_author_as_string(self):
     # FIXME: Add author.web
     author = {
         'name': 'John Smith',
         'email': '*****@*****.**'
     }
     self.datapackage.descriptor.update({
         'author': '{name} <{email}>'.format(name=author['name'],
                                             email=author['email'])
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     self.assertEquals(result.get('maintainer'), author['name'])
     self.assertEquals(result.get('maintainer_email'), author['email'])
예제 #35
0
    def test_resource_schema(self):
        schema = {
            'fields': [
                {'name': 'id', 'type': 'integer'},
                {'name': 'title', 'type': 'string'},
            ]
        }
        resource = {
            'schema': schema
        }

        self.datapackage.resources[0].descriptor.update(resource)
        result = converter.datapackage_to_dataset(self.datapackage)
        self.assertEquals(result.get('resources')[0].get('schema'),
                          resource['schema'])
예제 #36
0
 def test_datapackage_extras(self):
     self.datapackage.descriptor.update({
         'title_cn': u'國內生產總值',
         'years': [2015, 2016],
         'last_year': 2016,
         'location': {'country': 'China'},
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_items_equal(result.get('extras'), [
         {'key': 'profile', 'value': u'data-package'},
         {'key': 'title_cn', 'value': u'國內生產總值'},
         {'key': 'years', 'value': '[2015, 2016]'},
         {'key': 'last_year', 'value': 2016},
         {'key': 'location', 'value': '{"country": "China"}'},
     ])
예제 #37
0
    def test_resource_url(self, mock_requests):
        url = 'http://www.somewhere.com/data.csv'
        datapackage_dict = {
            'name': 'gdp',
            'title': 'Countries GDP',
            'version': '1.0',
            'resources': [
                {'path': url}
            ],
        }
        mock_requests.register_uri('GET', url, body='')

        dp = datapackage.DataPackage(datapackage_dict)
        result = converter.datapackage_to_dataset(dp)
        self.assertEquals(result.get('resources')[0].get('url'),
                          datapackage_dict['resources'][0]['path'])
예제 #38
0
 def test_resource_url_is_set_to_its_remote_data_path(self):
     url = 'http://www.somewhere.com/data.csv'
     datapackage_dict = {
         'name': 'gdp',
         'title': 'Countries GDP',
         'version': '1.0',
         'resources': [
             {'path': 'data.csv'}
         ],
     }
     httpretty.register_uri(httpretty.GET, url, body='')
     dp = datapackage.DataPackage(
         datapackage_dict, base_path='http://www.somewhere.com')
     result = converter.datapackage_to_dataset(dp)
     nose.tools.assert_equals(result.get('resources')[0].get('url'),
                              dp.resources[0].source)
 def test_resource_url_is_set_to_its_remote_data_path(self):
     url = 'http://www.somewhere.com/data.csv'
     datapackage_dict = {
         'name': 'gdp',
         'title': 'Countries GDP',
         'version': '1.0',
         'resources': [{
             'path': 'data.csv'
         }],
     }
     httpretty.register_uri(httpretty.GET, url, body='')
     dp = datapackage.DataPackage(datapackage_dict,
                                  base_path='http://www.somewhere.com')
     result = converter.datapackage_to_dataset(dp)
     nose.tools.assert_equals(
         result.get('resources')[0].get('url'), dp.resources[0].source)
예제 #40
0
 def test_datapackage_sources(self):
     sources = [
         {
             'name': 'World Bank and OECD',
             'email': '*****@*****.**',
             'web': 'http://data.worldbank.org/indicator/NY.GDP.MKTP.CD',
         }
     ]
     self.datapackage.descriptor.update({
         'sources': sources
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('author'), sources[0]['name'])
     nose.tools.assert_equals(result.get('author_email'),
                              sources[0]['email'])
     nose.tools.assert_equals(result.get('url'), sources[0]['web'])
 def test_datapackage_license_as_unicode(self):
     self.datapackage.descriptor.update({'license': u'cc-zero'})
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('license_id'), 'cc-zero')
예제 #42
0
 def test_datapackage_license_as_unicode(self):
     self.datapackage.descriptor.update({
         'license': u'cc-zero'
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     nose.tools.assert_equals(result.get('license_id'), 'cc-zero')
 def test_basic_datapackage_in_setup_is_valid(self):
     converter.datapackage_to_dataset(self.datapackage)
예제 #44
0
def package_create_from_datapackage(context, data_dict):
    '''Create a new dataset (package) from a Data Package file.

    :param url: url of the datapackage (optional if `upload` is defined)
    :type url: string
    :param upload: the uploaded datapackage (optional if `url` is defined)
    :type upload: cgi.FieldStorage
    :param name: the name of the new dataset, must be between 2 and 100
        characters long and contain only lowercase alphanumeric characters,
        ``-`` and ``_``, e.g. ``'warandpeace'`` (optional, default:
        datapackage's name concatenated with a random string to avoid
        name collisions)
    :type name: string
    :param private: the visibility of the new dataset
    :type private: bool
    :param owner_org: the id of the dataset's owning organization, see
        :py:func:`~ckan.logic.action.get.organization_list` or
        :py:func:`~ckan.logic.action.get.organization_list_for_user` for
        available values (optional)
    :type owner_org: string
    '''
    url = data_dict.get('url')
    upload = data_dict.get('upload')
    if not url and not _upload_attribute_is_valid(upload):
        msg = {'url': ['you must define either a url or upload attribute']}
        raise toolkit.ValidationError(msg)

    dp = _load_and_validate_datapackage(url=url, upload=upload)

    dataset_dict = converter.datapackage_to_dataset(dp)

    owner_org = data_dict.get('owner_org')
    if owner_org:
        dataset_dict['owner_org'] = owner_org

    private = data_dict.get('private')
    if private:
        dataset_dict['private'] = toolkit.asbool(private)

    name = data_dict.get('name')
    if name:
        dataset_dict['name'] = name

    resources = dataset_dict.get('resources', [])
    if resources:
        del dataset_dict['resources']

    # Create as draft by default so if there's any issue on creating the
    # resources and we're unable to purge the dataset, at least it's not shown.
    dataset_dict['state'] = 'draft'
    res = _package_create_with_unique_name(context, dataset_dict, name)

    dataset_id = res['id']

    if resources:
        try:
            _create_resources(dataset_id, context, resources)
            res = toolkit.get_action('package_show')(
                context, {'id': dataset_id})
        except Exception as e:
            try:
                toolkit.get_action('package_delete')(
                    context, {'id': dataset_id})
            except Exception as e2:
                six.raise_from(e, e2)
            else:
                raise e

    res['state'] = 'active'
    return toolkit.get_action('package_update')(context, res)
예제 #45
0
 def test_basic_datapackage_in_setup_is_valid(self):
     converter.datapackage_to_dataset(self.datapackage)
예제 #46
0
 def test_datapackage_license_as_string(self):
     self.datapackage.descriptor.update({
         'license': 'cc-zero'
     })
     result = converter.datapackage_to_dataset(self.datapackage)
     self.assertEquals(result.get('license_id'), 'cc-zero')