Ejemplo n.º 1
0
    def test_dataset_to_resource(self):
        data_dict = copy.deepcopy(self.test_data)
        assert 'resources' not in data_dict

        utils.dataset_to_resource(data_dict)
        assert 'resources' in data_dict

        utils.dataset_to_resource(data_dict)
        assert 'resources' in data_dict
Ejemplo n.º 2
0
    def test_dataset_to_resource_invalid(self):
        data_dict = copy.deepcopy(self.test_data)
        data_dict.pop('direct_download_URL')
        data_dict.pop('checksum')
        data_dict.pop('mimetype')
        assert 'resources' not in data_dict

        utils.dataset_to_resource(data_dict)
        # dataset_to_resource can handle missing data, so resources is created
        assert 'resources' in data_dict
Ejemplo n.º 3
0
def package_update(context, data_dict):
    '''
    Updates the dataset.

    Extends ckan's similar method to instantly re-index the SOLR index.
    Otherwise the changes would only be added during a re-index (a rebuild of search index,
    to be specific).

    :type context: dict
    :param context: context
    :type data_dict: dict
    :param data_dict: dataset as dictionary

    :rtype: dictionary
    '''
    # Get all resources here since we get only 'dataset' resources from WUI.
    package_context = {'model': model, 'ignore_auth': True, 'validate': True,
                       'extras_as_string': True}

    user = model.User.get(context['user'])
    if not user.name == "harvest":
        _remove_extras_from_data_dict(data_dict)

    package_data = package_show(package_context, data_dict)

    if not 'resources' in data_dict:
        # When this is reached, we are updating a dataset, not creating a new resource
        old_resources = package_data.get('resources', [])
        data_dict['resources'] = old_resources
        data_dict = utils.dataset_to_resource(data_dict)
    else:
        data_dict['accept-terms'] = 'yes'  # This is not needed when adding a resource

    _handle_pids(data_dict)

    _add_ida_download_url(data_dict)

    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if package_data.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_update_package_schema()

    pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict)

    # Logging for production use
    _log_action('Package', 'update', context['user'], data_dict['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': True}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    # update_dict calls index_package, so it would basically be the same
    index.update_dict(pkg_dict)

    return pkg_dict1
Ejemplo n.º 4
0
    def test_resource_handling_2(self):
        data_dict = copy.deepcopy(self.test_data3)
        utils.resource_to_dataset(data_dict)
        assert 'direct_download_URL' in data_dict
        assert 'resources' in data_dict

        data_dict['availability'] = 'direct_download'

        utils.dataset_to_resource(data_dict)
        assert 'resources' in data_dict

        utils.resource_to_dataset(data_dict)
        assert 'resources' in data_dict

        assert data_dict.get('algorithm') == self.test_data3['resources'][0]['algorithm']
        assert data_dict.get('checksum') == self.test_data3['resources'][0]['hash']
        assert data_dict.get('mimetype') == self.test_data3['resources'][0]['mimetype']
        assert data_dict.get('direct_download_URL') == self.test_data3['resources'][0]['url']
Ejemplo n.º 5
0
def package_create(context, data_dict):
    """
    Creates a new dataset.

    Extends ckan's similar method to instantly reindex the SOLR index,
    so that this newly added package emerges in search results instantly instead of
    during the next timed reindexing.

    :param context: context
    :param data_dict: data dictionary (package data)

    :rtype: dictionary
    """
    user = model.User.get(context['user'])
    if data_dict.get('type') == 'harvest' and not user.sysadmin:
        ckan.lib.base.abort(401, _('Unauthorized to add a harvest source'))

    if not user.name == "harvest":
        _remove_extras_from_data_dict(data_dict)

    data_dict = utils.dataset_to_resource(data_dict)

    if not user.name == 'harvest':
        _handle_package_id_on_create(data_dict)
    _handle_pids(data_dict)

    _add_ida_download_url(data_dict)
    
    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_create_package_schema()

    pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict)

    # Logging for production use
    _log_action('Package', 'create', context['user'], pkg_dict1['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': False}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    index.index_package(pkg_dict)
    return pkg_dict1
Ejemplo n.º 6
0
def package_create(context, data_dict):
    """
    Creates a new dataset.

    Extends ckan's similar method to instantly reindex the SOLR index,
    so that this newly added package emerges in search results instantly instead of
    during the next timed reindexing.

    :param context: context
    :param data_dict: data dictionary (package data)

    :rtype: dictionary
    """
    user = model.User.get(context['user'])
    if data_dict.get('type') == 'harvest' and not user.sysadmin:
        ckan.lib.base.abort(401, _('Unauthorized to add a harvest source'))

    data_dict = utils.dataset_to_resource(data_dict)

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)
    
    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_create_package_schema()

    pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict)

    # Logging for production use
    _log_action('Package', 'create', context['user'], pkg_dict1['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': False}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    index.index_package(pkg_dict)
    return pkg_dict1
Ejemplo n.º 7
0
def package_create(context, data_dict):
    """
    Creates a new dataset.

    Extends ckan's similar method to instantly reindex the SOLR index, 
    so that this newly added package emerges in search results instantly instead of 
    during the next timed reindexing.

    :param context: context
    :param data_dict: data dictionary (package data)

    :rtype: dictionary
    """
    user = model.User.get(context['user'])
    try:
        if data_dict['type'] == 'harvest' and not user.sysadmin:
            ckan.lib.base.abort(401, _('Unauthorized to add a harvest source'))

    except KeyError:
        log.debug("Tried to check the package type, but it wasn't present!")
        # TODO: JUHO: Dubious to let pass without checking user.sysadmin
        pass

    data_dict = utils.dataset_to_resource(data_dict)

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)
    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_create_package_schema()

    pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict)

    # Logging for production use
    _log_action('Package', 'create', context['user'], pkg_dict1['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': False}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    index.index_package(pkg_dict)
    return pkg_dict1
Ejemplo n.º 8
0
def package_update(context, data_dict):
    '''
    Updates the dataset.

    Extends ckan's similar method to instantly re-index the SOLR index.
    Otherwise the changes would only be added during a re-index (a rebuild of search index,
    to be specific).

    :type context: dict
    :param context: context
    :type data_dict: dict
    :param data_dict: dataset as dictionary

    :rtype: dictionary
    '''
    # Get all resources here since we get only 'dataset' resources from WUI.
    package_context = {'model': model, 'ignore_auth': True, 'validate': True,
                       'extras_as_string': True}
    package_data = package_show(package_context, data_dict)
    # package_data = ckan.logic.action.get.package_show(package_context, data_dict)

    old_resources = package_data.get('resources', [])

    if not 'resources' in data_dict:
        # When this is reached, we are updating a dataset, not creating a new resource
        data_dict['resources'] = old_resources
        data_dict = utils.dataset_to_resource(data_dict)
    else:
        data_dict['accept-terms'] = 'yes'  # This is not needed when adding a resource

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)

    # # Check if data version has changed and if so, generate a new version_PID
    # if not data_dict['version'] == temp_pkg_dict['version']:
    #     data_dict['pids'].append(
    #         {
    #             u'provider': u'kata',
    #             u'id': utils.generate_pid(),
    #             u'type': u'version',
    #         })

    if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'):
        context['schema'] = Schemas.private_package_schema()

    data_dict.pop('persist_schema', False)

    if package_data.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_update_package_schema()

    pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict)

    # Logging for production use
    _log_action('Package', 'update', context['user'], data_dict['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': True}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    # update_dict calls index_package, so it would basically be the same
    index.update_dict(pkg_dict)

    return pkg_dict1
Ejemplo n.º 9
0
def package_update(context, data_dict):
    '''
    Updates the dataset.

    Extends ckan's similar method to instantly re-index the SOLR index.
    Otherwise the changes would only be added during a re-index (a rebuild of search index,
    to be specific).

    :type context: dict
    :param context: context
    :type data_dict: dict
    :param data_dict: dataset as dictionary

    :rtype: dictionary
    '''
    # Get all resources here since we get only 'dataset' resources from WUI.
    package_context = {'model': model, 'ignore_auth': True, 'validate': True,
                    'extras_as_string': True}
    package_data = package_show(package_context, data_dict)
    # package_data = ckan.logic.action.get.package_show(package_context, data_dict)

    old_resources = package_data.get('resources', [])

    if not 'resources' in data_dict:
        # When this is reached, we are updating a dataset, not creating a new resource
        data_dict['resources'] = old_resources
        data_dict = utils.dataset_to_resource(data_dict)

    _handle_pids(context, data_dict)

    _add_ida_download_url(context, data_dict)

    # # Check if data version has changed and if so, generate a new version_PID
    # if not data_dict['version'] == temp_pkg_dict['version']:
    #     data_dict['pids'].append(
    #         {
    #             u'provider': u'kata',
    #             u'id': utils.generate_pid(),
    #             u'type': u'version',
    #         })

    # This fixes extras fields being cleared when adding a resource. This is be because the extras are not properly
    # cleared in show_package_schema conversions. Some fields stay in extras and they cause all other fields to be
    # dropped in package_update(). When updating a dataset via UI or API, the conversion to extras occur in
    # package_update() and popping extras here should have no effect.

    data_dict.pop('extras', None)
    # TODO: MIKKO: Get rid of popping extras here and rather pop the additional extras in converters so we could remove the
    # popping and the above "context['allow_partial_update'] = True" which causes the extras to be processed in a way
    # that nothing gets added to extras from the converters and everything not initially present in extras gets removed.

    # TODO: JUHO: Apply correct schema depending on dataset
    # This is quick resolution. More robust way would be to check through
    # model.Package to which harvest source the dataset belongs and then get the
    # type of the harvester (eg. DDI)
    # if data_dict['name'].startswith('FSD'):
    #     context['schema'] = schemas.update_package_schema_ddi()

    if package_data.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_update_package_schema()

    pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict)

    # Logging for production use
    _log_action('Package', 'update', context['user'], data_dict['id'])

    context = {'model': model, 'ignore_auth': True, 'validate': False,
               'extras_as_string': True}
    pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1)
    index = index_for('package')
    # update_dict calls index_package, so it would basically be the same
    index.update_dict(pkg_dict)
    return pkg_dict1