def package_update(context, data_dict): ''' Updates the dataset. Extends ckan's similar method to instantly re-index the SOLR index. Otherwise the changes would only be added during a re-index (a rebuild of search index, to be specific). :type context: dict :param context: context :type data_dict: dict :param data_dict: dataset as dictionary :rtype: dictionary ''' # Get all resources here since we get only 'dataset' resources from WUI. package_context = {'model': model, 'ignore_auth': True, 'validate': True, 'extras_as_string': True} user = model.User.get(context['user']) if not user.name == "harvest": _remove_extras_from_data_dict(data_dict) package_data = package_show(package_context, data_dict) if not 'resources' in data_dict: # When this is reached, we are updating a dataset, not creating a new resource old_resources = package_data.get('resources', []) data_dict['resources'] = old_resources data_dict = utils.dataset_to_resource(data_dict) else: data_dict['accept-terms'] = 'yes' # This is not needed when adding a resource _handle_pids(data_dict) _add_ida_download_url(data_dict) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if package_data.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_update_package_schema() pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict) # Logging for production use _log_action('Package', 'update', context['user'], data_dict['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': True} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') # update_dict calls index_package, so it would basically be the same index.update_dict(pkg_dict) return pkg_dict1
def package_create(context, data_dict): """ Creates a new dataset. Extends ckan's similar method to instantly reindex the SOLR index, so that this newly added package emerges in search results instantly instead of during the next timed reindexing. :param context: context :param data_dict: data dictionary (package data) :rtype: dictionary """ user = model.User.get(context['user']) if data_dict.get('type') == 'harvest' and not user.sysadmin: ckan.lib.base.abort(401, _('Unauthorized to add a harvest source')) if not user.name == "harvest": _remove_extras_from_data_dict(data_dict) data_dict = utils.dataset_to_resource(data_dict) if not user.name == 'harvest': _handle_package_id_on_create(data_dict) _handle_pids(data_dict) _add_ida_download_url(data_dict) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_create_package_schema() pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict) # Logging for production use _log_action('Package', 'create', context['user'], pkg_dict1['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': False} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') index.index_package(pkg_dict) return pkg_dict1
def package_show(context, data_dict): ''' Return the metadata of a dataset (package) and its resources. Called before showing the dataset in some interface (browser, API), or when adding package to Solr index (no validation / conversions then). :param id: the id or name of the dataset :type id: string :rtype: dictionary ''' if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_show_package_schema() if not data_dict.get('id') and not data_dict.get('name'): # Get package by data PIDs data_dict['id'] = utils.get_package_id_by_data_pids(data_dict) pkg_dict1 = ckan.logic.action.get.package_show(context, data_dict) pkg_dict1 = utils.resource_to_dataset(pkg_dict1) # Remove empty agents that come from padding the agent list in converters if 'agent' in pkg_dict1: agents = filter(None, pkg_dict1.get('agent', [])) pkg_dict1['agent'] = agents or [] # Normally logic function should not catch the raised errors # but here it is needed so action package_show won't catch it instead # Hiding information from API calls try: check_access('package_update', context) except NotAuthorized: pkg_dict1 = utils.hide_sensitive_fields(pkg_dict1) pkg = Package.get(pkg_dict1['id']) if 'erelated' in pkg.extras: erelated = pkg.extras['erelated'] if len(erelated): for value in erelated.split(';'): if len(Session.query(Related).filter(Related.title == value).all()) == 0: data_dict = {'title': value, 'type': _("Paper"), 'dataset_id': pkg.id} related_create(context, data_dict) # Update package.title to match package.extras.title_0 extras_title = pkg.extras.get(u'title_0') if extras_title and extras_title != pkg.title: repo.new_revision() pkg.title = pkg.extras[u'title_0'] pkg.save() rebuild(pkg.id) # Rebuild solr-index for this dataset return pkg_dict1
def package_create(context, data_dict): """ Creates a new dataset. Extends ckan's similar method to instantly reindex the SOLR index, so that this newly added package emerges in search results instantly instead of during the next timed reindexing. :param context: context :param data_dict: data dictionary (package data) :rtype: dictionary """ user = model.User.get(context['user']) if data_dict.get('type') == 'harvest' and not user.sysadmin: ckan.lib.base.abort(401, _('Unauthorized to add a harvest source')) data_dict = utils.dataset_to_resource(data_dict) _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_create_package_schema() pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict) # Logging for production use _log_action('Package', 'create', context['user'], pkg_dict1['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': False} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') index.index_package(pkg_dict) return pkg_dict1
def package_show(context, data_dict): ''' Return the metadata of a dataset (package) and its resources. Called before showing the dataset in some interface (browser, API), or when adding package to Solr index (no validation / conversions then). :param id: the id or name of the dataset :type id: string :rtype: dictionary ''' if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_show_package_schema() context['use_cache'] = False # Disable package retrieval directly from Solr as contact.email is not there. if not data_dict.get('id') and not data_dict.get('name'): # Get package by data PIDs data_dict['id'] = utils.get_package_id_by_primary_pid(data_dict) pkg_dict1 = ckan.logic.action.get.package_show(context, data_dict) pkg_dict1 = utils.resource_to_dataset(pkg_dict1) # Remove empty agents that come from padding the agent list in converters if 'agent' in pkg_dict1: agents = filter(None, pkg_dict1.get('agent', [])) pkg_dict1['agent'] = agents or [] # Normally logic function should not catch the raised errors # but here it is needed so action package_show won't catch it instead # Hiding information from API calls try: check_access('package_update', context) except NotAuthorized: pkg_dict1 = utils.hide_sensitive_fields(pkg_dict1) pkg = Package.get(pkg_dict1['id']) if 'erelated' in pkg.extras: erelated = pkg.extras['erelated'] if len(erelated): for value in erelated.split(';'): if len(Session.query(Related).filter(Related.title == value).all()) == 0: data_dict = {'title': value, 'type': _("Paper"), 'dataset_id': pkg.id} related_create(context, data_dict) return pkg_dict1
def package_create(context, data_dict): """ Creates a new dataset. Extends ckan's similar method to instantly reindex the SOLR index, so that this newly added package emerges in search results instantly instead of during the next timed reindexing. :param context: context :param data_dict: data dictionary (package data) :rtype: dictionary """ user = model.User.get(context['user']) try: if data_dict['type'] == 'harvest' and not user.sysadmin: ckan.lib.base.abort(401, _('Unauthorized to add a harvest source')) except KeyError: log.debug("Tried to check the package type, but it wasn't present!") # TODO: JUHO: Dubious to let pass without checking user.sysadmin pass data_dict = utils.dataset_to_resource(data_dict) _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_create_package_schema() pkg_dict1 = ckan.logic.action.create.package_create(context, data_dict) # Logging for production use _log_action('Package', 'create', context['user'], pkg_dict1['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': False} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') index.index_package(pkg_dict) return pkg_dict1
def index(self): ''' Simplified index function compared to CKAN's original one. :return: render home/index.html ''' try: # package search context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'schema': kata_schemas.package_search_schema()} data_dict = { 'q': '*:*', 'facet.field': g.facets, 'facet.limit': -1, 'rows': 0, 'start': 0, 'sort': 'title_string desc', 'fq': 'capacity:"public" +dataset_type:dataset' } query = logic.get_action('package_search')( context, data_dict) c.package_count = query['count'] c.facets = query['facets'] maintain.deprecate_context_item( 'facets', 'Use `c.search_facets` instead.') c.search_facets = query['search_facets'] c.num_tags = len(c.facets.get('tags')) c.num_discipline = len(c.facets.get('extras_discipline')) except search.SearchError: c.package_count = 0 c.groups = [] c.num_tags = 0 c.num_discipline = 0 return render("home/index.html", cache_force=True)
def get_package_ratings_for_data_dict(data_dict): ''' Create a metadata rating (1-5) for given data_dict. This is the same as :meth:`get_package_ratings` but can be used for getting metadata ratings e.g. for search results where only raw data_dicts are available rather than already-converted package dicts. :param data_dict: A CKAN data_dict ''' from ckanext.kata.schemas import Schemas # Importing here prevents circular import context = { 'model': model, 'schema': Schemas.show_package_schema() } try: pkg_dict = get_action('package_show')(context, data_dict) except ValidationError: return (0, u'○○○○○') return get_package_ratings(pkg_dict)
def package_update(context, data_dict): ''' Updates the dataset. Extends ckan's similar method to instantly re-index the SOLR index. Otherwise the changes would only be added during a re-index (a rebuild of search index, to be specific). :type context: dict :param context: context :type data_dict: dict :param data_dict: dataset as dictionary :rtype: dictionary ''' # Get all resources here since we get only 'dataset' resources from WUI. package_context = {'model': model, 'ignore_auth': True, 'validate': True, 'extras_as_string': True} package_data = package_show(package_context, data_dict) # package_data = ckan.logic.action.get.package_show(package_context, data_dict) old_resources = package_data.get('resources', []) if not 'resources' in data_dict: # When this is reached, we are updating a dataset, not creating a new resource data_dict['resources'] = old_resources data_dict = utils.dataset_to_resource(data_dict) else: data_dict['accept-terms'] = 'yes' # This is not needed when adding a resource _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) # # Check if data version has changed and if so, generate a new version_PID # if not data_dict['version'] == temp_pkg_dict['version']: # data_dict['pids'].append( # { # u'provider': u'kata', # u'id': utils.generate_pid(), # u'type': u'version', # }) if asbool(data_dict.get('private')) and not data_dict.get('persist_schema'): context['schema'] = Schemas.private_package_schema() data_dict.pop('persist_schema', False) if package_data.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_update_package_schema() pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict) # Logging for production use _log_action('Package', 'update', context['user'], data_dict['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': True} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') # update_dict calls index_package, so it would basically be the same index.update_dict(pkg_dict) return pkg_dict1
def package_update(context, data_dict): ''' Updates the dataset. Extends ckan's similar method to instantly re-index the SOLR index. Otherwise the changes would only be added during a re-index (a rebuild of search index, to be specific). :type context: dict :param context: context :type data_dict: dict :param data_dict: dataset as dictionary :rtype: dictionary ''' # Get all resources here since we get only 'dataset' resources from WUI. package_context = {'model': model, 'ignore_auth': True, 'validate': True, 'extras_as_string': True} package_data = package_show(package_context, data_dict) # package_data = ckan.logic.action.get.package_show(package_context, data_dict) old_resources = package_data.get('resources', []) if not 'resources' in data_dict: # When this is reached, we are updating a dataset, not creating a new resource data_dict['resources'] = old_resources data_dict = utils.dataset_to_resource(data_dict) _handle_pids(context, data_dict) _add_ida_download_url(context, data_dict) # # Check if data version has changed and if so, generate a new version_PID # if not data_dict['version'] == temp_pkg_dict['version']: # data_dict['pids'].append( # { # u'provider': u'kata', # u'id': utils.generate_pid(), # u'type': u'version', # }) # This fixes extras fields being cleared when adding a resource. This is be because the extras are not properly # cleared in show_package_schema conversions. Some fields stay in extras and they cause all other fields to be # dropped in package_update(). When updating a dataset via UI or API, the conversion to extras occur in # package_update() and popping extras here should have no effect. data_dict.pop('extras', None) # TODO: MIKKO: Get rid of popping extras here and rather pop the additional extras in converters so we could remove the # popping and the above "context['allow_partial_update'] = True" which causes the extras to be processed in a way # that nothing gets added to extras from the converters and everything not initially present in extras gets removed. # TODO: JUHO: Apply correct schema depending on dataset # This is quick resolution. More robust way would be to check through # model.Package to which harvest source the dataset belongs and then get the # type of the harvester (eg. DDI) # if data_dict['name'].startswith('FSD'): # context['schema'] = schemas.update_package_schema_ddi() if package_data.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_update_package_schema() pkg_dict1 = ckan.logic.action.update.package_update(context, data_dict) # Logging for production use _log_action('Package', 'update', context['user'], data_dict['id']) context = {'model': model, 'ignore_auth': True, 'validate': False, 'extras_as_string': True} pkg_dict = ckan.logic.action.get.package_show(context, pkg_dict1) index = index_for('package') # update_dict calls index_package, so it would basically be the same index.update_dict(pkg_dict) return pkg_dict1