def test_dataset_to_resource(self): data_dict = copy.deepcopy(self.test_data) assert 'resources' not in data_dict utils.dataset_to_resource(data_dict) assert 'resources' in data_dict utils.dataset_to_resource(data_dict) assert 'resources' in data_dict
def test_dataset_to_resource_invalid(self): data_dict = copy.deepcopy(self.test_data) data_dict.pop('direct_download_URL') data_dict.pop('checksum') data_dict.pop('mimetype') assert 'resources' not in data_dict utils.dataset_to_resource(data_dict) # dataset_to_resource can handle missing data, so resources is created assert 'resources' in data_dict
def package_update(context, data_dict): ''' Updates the dataset. Extends ckan's similar method to instantly re-index the SOLR index. Otherwise the changes would only be added during a re-index (a rebuild of search index, to be specific). :type context: dict :param context: context :type data_dict: dict :param data_dict: dataset as dictionary :rtype: dictionary ''' # Get all resources here since we get only 'dataset' resources from WUI. package_context = {'model': model, 'ignore_auth': True, 'validate': True, 'extras_as_string': True} user = model.User.get(context['user']) if not user.name == "harvest": _remove_extras_from_data_dict(data_dict) package_data = package_show(package_context, data_dict) if not 'resources' in data_dict: # When this is reached, we are updating a dataset, not creating a new resource old_resources = package_data.get('resources', []) data_dict['resources'] = old_resources data_dict = utils.dataset_to_resource(data_dict) else: data_dict['accept-terms'] = 'yes' # This is not needed when adding a resource _handle_pids(data_dict) _add_ida_download_url(data_dict) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if package_data.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_update_package_schema() pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict) # Logging for production use _log_action('Package', 'update', context['user'], data_dict['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': True} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') # update_dict calls index_package, so it would basically be the same index.update_dict(pkg_dict) return pkg_dict1
def test_resource_handling_2(self): data_dict = copy.deepcopy(self.test_data3) utils.resource_to_dataset(data_dict) assert 'direct_download_URL' in data_dict assert 'resources' in data_dict data_dict['availability'] = 'direct_download' utils.dataset_to_resource(data_dict) assert 'resources' in data_dict utils.resource_to_dataset(data_dict) assert 'resources' in data_dict assert data_dict.get('algorithm') == self.test_data3['resources'][0]['algorithm'] assert data_dict.get('checksum') == self.test_data3['resources'][0]['hash'] assert data_dict.get('mimetype') == self.test_data3['resources'][0]['mimetype'] assert data_dict.get('direct_download_URL') == self.test_data3['resources'][0]['url']
def package_create(context, data_dict): """ Creates a new dataset. Extends ckan's similar method to instantly reindex the SOLR index, so that this newly added package emerges in search results instantly instead of during the next timed reindexing. :param context: context :param data_dict: data dictionary (package data) :rtype: dictionary """ user = model.User.get(context['user']) if data_dict.get('type') == 'harvest' and not user.sysadmin: ckan.lib.base.abort(401, _('Unauthorized to add a harvest source')) if not user.name == "harvest": _remove_extras_from_data_dict(data_dict) data_dict = utils.dataset_to_resource(data_dict) if not user.name == 'harvest': _handle_package_id_on_create(data_dict) _handle_pids(data_dict) _add_ida_download_url(data_dict) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_create_package_schema() pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict) # Logging for production use _log_action('Package', 'create', context['user'], pkg_dict1['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': False} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') index.index_package(pkg_dict) return pkg_dict1
def package_create(context, data_dict): """ Creates a new dataset. Extends ckan's similar method to instantly reindex the SOLR index, so that this newly added package emerges in search results instantly instead of during the next timed reindexing. :param context: context :param data_dict: data dictionary (package data) :rtype: dictionary """ user = model.User.get(context['user']) if data_dict.get('type') == 'harvest' and not user.sysadmin: ckan.lib.base.abort(401, _('Unauthorized to add a harvest source')) data_dict = utils.dataset_to_resource(data_dict) _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_create_package_schema() pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict) # Logging for production use _log_action('Package', 'create', context['user'], pkg_dict1['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': False} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') index.index_package(pkg_dict) return pkg_dict1
def package_create(context, data_dict): """ Creates a new dataset. Extends ckan's similar method to instantly reindex the SOLR index, so that this newly added package emerges in search results instantly instead of during the next timed reindexing. :param context: context :param data_dict: data dictionary (package data) :rtype: dictionary """ user = model.User.get(context['user']) try: if data_dict['type'] == 'harvest' and not user.sysadmin: ckan.lib.base.abort(401, _('Unauthorized to add a harvest source')) except KeyError: log.debug("Tried to check the package type, but it wasn't present!") # TODO: JUHO: Dubious to let pass without checking user.sysadmin pass data_dict = utils.dataset_to_resource(data_dict) _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_create_package_schema() pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict) # Logging for production use _log_action('Package', 'create', context['user'], pkg_dict1['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': False} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') index.index_package(pkg_dict) return pkg_dict1
def package_update(context, data_dict): ''' Updates the dataset. Extends ckan's similar method to instantly re-index the SOLR index. Otherwise the changes would only be added during a re-index (a rebuild of search index, to be specific). :type context: dict :param context: context :type data_dict: dict :param data_dict: dataset as dictionary :rtype: dictionary ''' # Get all resources here since we get only 'dataset' resources from WUI. package_context = {'model': model, 'ignore_auth': True, 'validate': True, 'extras_as_string': True} package_data = package_show(package_context, data_dict) # package_data = ckan.logic.action.get.package_show(package_context, data_dict) old_resources = package_data.get('resources', []) if not 'resources' in data_dict: # When this is reached, we are updating a dataset, not creating a new resource data_dict['resources'] = old_resources data_dict = utils.dataset_to_resource(data_dict) else: data_dict['accept-terms'] = 'yes' # This is not needed when adding a resource _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) # # Check if data version has changed and if so, generate a new version_PID # if not data_dict['version'] == temp_pkg_dict['version']: # data_dict['pids'].append( # { # u'provider': u'kata', # u'id': utils.generate_pid(), # u'type': u'version', # }) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if package_data.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_update_package_schema() pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict) # Logging for production use _log_action('Package', 'update', context['user'], data_dict['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': True} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') # update_dict calls index_package, so it would basically be the same index.update_dict(pkg_dict) return pkg_dict1
def package_update(context, data_dict): ''' Updates the dataset. Extends ckan's similar method to instantly re-index the SOLR index. Otherwise the changes would only be added during a re-index (a rebuild of search index, to be specific). :type context: dict :param context: context :type data_dict: dict :param data_dict: dataset as dictionary :rtype: dictionary ''' # Get all resources here since we get only 'dataset' resources from WUI. package_context = {'model': model, 'ignore_auth': True, 'validate': True, 'extras_as_string': True} package_data = package_show(package_context, data_dict) # package_data = ckan.logic.action.get.package_show(package_context, data_dict) old_resources = package_data.get('resources', []) if not 'resources' in data_dict: # When this is reached, we are updating a dataset, not creating a new resource data_dict['resources'] = old_resources data_dict = utils.dataset_to_resource(data_dict) _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) # # Check if data version has changed and if so, generate a new version_PID # if not data_dict['version'] == temp_pkg_dict['version']: # data_dict['pids'].append( # { # u'provider': u'kata', # u'id': utils.generate_pid(), # u'type': u'version', # }) # This fixes extras fields being cleared when adding a resource. This is be because the extras are not properly # cleared in show_package_schema conversions. Some fields stay in extras and they cause all other fields to be # dropped in package_update(). When updating a dataset via UI or API, the conversion to extras occur in # package_update() and popping extras here should have no effect. data_dict.pop('extras', None) # TODO: MIKKO: Get rid of popping extras here and rather pop the additional extras in converters so we could remove the # popping and the above "context['allow_partial_update'] = True" which causes the extras to be processed in a way # that nothing gets added to extras from the converters and everything not initially present in extras gets removed. # TODO: JUHO: Apply correct schema depending on dataset # This is quick resolution. More robust way would be to check through # model.Package to which harvest source the dataset belongs and then get the # type of the harvester (eg. DDI) # if data_dict['name'].startswith('FSD'): # context['schema'] = schemas.update_package_schema_ddi() if package_data.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_update_package_schema() pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict) # Logging for production use _log_action('Package', 'update', context['user'], data_dict['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': True} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') # update_dict calls index_package, so it would basically be the same index.update_dict(pkg_dict) return pkg_dict1