def validate_publisher_create(data_dict, context): session = context['session'] group_plugin = lib_plugins.lookup_group_plugin('organization') try: _schema = group_plugin.form_to_db_schema_options({ 'type': 'create', 'api': 'api_version' in context, 'context': context }) except AttributeError: _schema = group_plugin.form_to_db_schema() data, errors = lib_plugins.plugin_validate(group_plugin, context, data_dict, _schema, 'organization_create') session.rollback() if errors: return errors return dict()
def dge_organization_publisher(context, data_dict=None): try: model = context['model'] id = logic.get_or_bust(data_dict, 'id') group = model.Group.get(id) context['group'] = group if group is None: raise NotFound if not group.is_organization: raise NotFound group_dict = model_dictize.group_dictize( group, context, packages_field='dataset_count', include_tags=False, include_extras=True, include_groups=False, include_users=False, ) group_plugin = lib_plugins.lookup_group_plugin(group_dict['type']) schema = logic.schema.default_show_group_schema() group_dict, errors = lib_plugins.plugin_validate( group_plugin, context, group_dict, schema, 'organization_show') return group_dict except: return {}
def bd_dataset_validate(context, data_dict): context["ignore_auth"] = True if "type" not in data_dict: package_plugin = lib_plugins.lookup_package_plugin() try: # use first type as default if user didn't provide type package_type = package_plugin.package_types()[0] except (AttributeError, IndexError): # in case a 'dataset' plugin was registered w/o fallback package_type = "dataset" data_dict["type"] = package_type package_plugin = lib_plugins.lookup_package_plugin(data_dict["type"]) if "id" in data_dict: del data_dict["id"] for r in data_dict["resources"]: if "id" in r: del r["id"] if "name" in data_dict: # add a char so that name doesn't collide with existing package data_dict["name"] += "_" schema = context.get("schema") or package_plugin.create_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, "package_create" ) if errors: return {"sucess": False, "data": data, "errors": errors} return {"sucess": True, "data": data, "errors": errors}
def package_validate(context, data_dict): model = context['model'] id = data_dict.get("id") pkg = model.Package.get(id) if id else None if pkg is None: action = 'package_create' type = data_dict.get('type', 'dataset') else: action = 'package_update' type = pkg.type context["package"] = pkg data_dict["id"] = pkg.id logic.check_access(action, context, data_dict) package_plugin = lib_plugins.lookup_package_plugin(type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() if action == 'package_create' \ else package_plugin.update_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, action) if errors: raise ValidationError(errors) if 'groups_list' in data: del data['groups_list'] return data
def package_list_show_for_reindex(context, dataset_ids): ''' Wraps the default package_show and adds additional information to the resources: resource size (for uploaded files) and resource revision timestamp ''' model = context['model'] context['session'] = model.Session dataset_dicts = [] all_datasets = model.Session.query(model.Package).filter(model.Package.id.in_(dataset_ids)).all() for pkg in all_datasets: # log.info('Package {}'.format(pkg.id)) if pkg is None: raise NotFound context['package'] = pkg context['reindexing'] = True package_dict = None if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) package_dict_validated = False if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) # for item in plugins.PluginImplementations(plugins.IResourceController): # for resource_dict in package_dict['resources']: # item.before_show(resource_dict) if not package_dict_validated: package_plugin = lib_plugins.lookup_package_plugin( package_dict['type']) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.show_package_schema() if schema and context.get('validate', True): package_dict, errors = lib_plugins.plugin_validate( package_plugin, context, package_dict, schema, 'package_show') for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) additional_hdx_package_show_processing(context, package_dict, just_for_reindexing=True) dataset_dicts.append(package_dict) return dataset_dicts
def get_dataset_validation_error_or_none(pkg_dict, context): # Convert dataset if pkg_dict.get('type') == 'deposited-dataset': pkg_dict = convert_deposited_dataset_to_regular_dataset(pkg_dict) # Validate dataset package_plugin = lib_plugins.lookup_package_plugin('dataset') schema = package_plugin.update_package_schema() data, errors = lib_plugins.plugin_validate(package_plugin, context, pkg_dict, schema, 'package_update') errors.pop('owner_org', None) if data.get('owner_org') == 'unknown': errors['owner_org_dest'] = ['Missing Value'] return ValidationError(errors) if errors else None
def group_or_org_plugin_dictize(context, group_dict, include_followers, is_org): plugins = p if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.read(group) group_plugin = lib_plugins.lookup_group_plugin(group_dict['type']) try: schema = group_plugin.db_to_form_schema_options({ 'type': 'show', 'api': 'api_version' in context, 'context': context }) except AttributeError: schema = group_plugin.db_to_form_schema() if include_followers: model = context['model'] group_dict['num_followers'] = get_action('group_follower_count')( { 'model': model, 'session': model.Session }, { 'id': group_dict['id'] }) else: group_dict['num_followers'] = 0 if not group_dict.get('display_name'): group_dict['display_name'] = None if not group_dict.get('package_count'): group_dict['package_count'] = None schema = default_show_group_schema() group_dict, errors = lib_plugins.plugin_validate( group_plugin, context, group_dict, schema, 'organization_show' if is_org else 'group_show') return group_dict
def package_create(fb, context, data_dict): model = context['model'] user = context['user'] package_type = data_dict.get('type') package_plugin = lib_plugins.lookup_package_plugin(package_type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() # We modify the schema here to replace owner_org_validator by our own if 'owner_org' in schema: schema['owner_org'] = [uds_oov if f is default_oov else f for f in schema['owner_org']] check_access('package_create', context, data_dict) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_create') log.debug('package_create validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), data.get('name'), data_dict) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") admins = [] if user: user_obj = model.User.by_name(user.decode('utf8')) if user_obj: admins = [user_obj] data['creator_user_id'] = user_obj.id pkg = model_save.package_dict_save(data, context) model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() data['id'] = pkg.id context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) if not context.get('defer_commit'): model.repo.commit() ## need to let rest api create context["package"] = pkg ## this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug('Created object %s' % pkg.name) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) return_id_only = context.get('return_id_only', False) output = context['id'] if return_id_only \ else get_action('package_show')(context, {'id': context['id']}) return output
def package_update(context, data_dict): """Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. It is recommended to call :py:func:`ckan.logic.action.get.package_show`, make the desired changes to the result, and then call ``package_update()`` with it. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface. For further parameters see :py:func:`~ckan.logic.action.create.package_create`. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary """ model = context["model"] user = context["user"] name_or_id = data_dict.get("id") or data_dict["name"] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_("Package was not found.")) context["package"] = pkg data_dict["id"] = pkg.id data_dict["type"] = pkg.type _check_access("package_update", context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if "schema" in context: schema = context["schema"] else: schema = package_plugin.update_package_schema() if "api_version" not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, "check_data_dict", None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, "package_update") log.debug( "package_update validate_errs=%r user=%s package=%s data=%r", errors, context.get("user"), context.get("package").name if context.get("package") else "", data, ) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if "message" in context: rev.message = context["message"] else: rev.message = _(u"REST API: Update object %s") % data.get("name") # avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update({"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update["ignore_auth"] = True context_org_update["defer_commit"] = True context_org_update["add_revision"] = False _get_action("package_owner_org_update")(context_org_update, {"id": pkg.id, "organization_id": pkg.owner_org}) # Needed to let extensions know the new resources ids model.Session.flush() if data.get("resources"): for index, resource in enumerate(data["resources"]): resource["id"] = pkg.resources[index].id for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) if not context.get("defer_commit"): model.repo.commit() log.debug("Updated object %s" % pkg.name) return_id_only = context.get("return_id_only", False) # Make sure that a user provided schema is not used on package_show context.pop("schema", None) # we could update the dataset so we should still be able to read it. context["ignore_auth"] = True output = data_dict["id"] if return_id_only else _get_action("package_show")(context, {"id": data_dict["id"]}) return output
package_type = 'dataset' # in case a 'dataset' plugin was registered w/o fallback package_plugin = lib_plugins.lookup_package_plugin(package_type) package_dict['type'] = package_type else: package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) errors = {} # if package has been previously imported try: existing_package_dict = self._find_existing_package(package_dict) if 'metadata_modified' not in package_dict or \ package_dict['metadata_modified'] > existing_package_dict.get('metadata_modified'): schema = package_plugin.update_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, base_context, package_dict, schema, 'package_update') except NotFound: schema = package_plugin.create_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, base_context, package_dict, schema, 'package_create') if errors: raise ValidationError(errors) result = self._create_or_update_package( package_dict, harvest_object, package_dict_form='package_show') if result: log.info("Updating organization %s last_harvested date and harvester name %s",
def _group_or_org_update(context, data_dict, is_org=False): model = context['model'] user = context['user'] session = context['session'] id = _get_or_bust(data_dict, 'id') group = model.Group.get(id) context["group"] = group if group is None: raise NotFound('Group was not found.') data_dict['type'] = group.type # get the schema group_plugin = lib_plugins.lookup_group_plugin(group.type) try: schema = group_plugin.form_to_db_schema_options({ 'type': 'update', 'api': 'api_version' in context, 'context': context }) except AttributeError: schema = group_plugin.form_to_db_schema() upload = uploader.get_uploader('group', group.image_url) upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') if is_org: _check_access('organization_update', context, data_dict) else: _check_access('group_update', context, data_dict) if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, 'organization_update' if is_org else 'group_update') log.debug('group_update validate_errs=%r user=%s group=%s data_dict=%r', errors, context.get('user'), context.get('group').name if context.get('group') else '', data_dict) if errors: session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") group = model_save.group_dict_save(data, context, prevent_packages_update=is_org) if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.edit(group) if is_org: activity_type = 'changed organization' else: activity_type = 'changed group' activity_dict = { 'user_id': model.User.by_name(user.decode('utf8')).id, 'object_id': group.id, 'activity_type': activity_type, } # Handle 'deleted' groups. # When the user marks a group as deleted this comes through here as # a 'changed' group activity. We detect this and change it to a 'deleted' # activity. if group.state == u'deleted': if session.query(ckan.model.Activity).filter_by( object_id=group.id, activity_type='deleted').all(): # A 'deleted group' activity for this group has already been # emitted. # FIXME: What if the group was deleted and then activated again? activity_dict = None else: # We will emit a 'deleted group' activity. activity_dict['activity_type'] = 'deleted group' if activity_dict is not None: activity_dict['data'] = { 'group': dictization.table_dictize(group, context) } activity_create_context = { 'model': model, 'user': user, 'defer_commit': True, 'ignore_auth': True, 'session': session } _get_action('activity_create')(activity_create_context, activity_dict) # TODO: Also create an activity detail recording what exactly changed # in the group. upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() return model_dictize.group_dictize(group, context)
def package_list_show_for_reindex(context, dataset_ids): ''' Wraps the default package_show and adds additional information to the resources: resource size (for uploaded files) and resource revision timestamp ''' model = context['model'] context['session'] = model.Session dataset_dicts = [] all_datasets = model.Session.query(model.Package).filter( model.Package.id.in_(dataset_ids)).all() for pkg in all_datasets: # log.info('Package {}'.format(pkg.id)) if pkg is None: raise NotFound context['package'] = pkg context['reindexing'] = True package_dict = None if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) package_dict_validated = False if context.get('for_view'): for item in plugins.PluginImplementations( plugins.IPackageController): package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) # for item in plugins.PluginImplementations(plugins.IResourceController): # for resource_dict in package_dict['resources']: # item.before_show(resource_dict) if not package_dict_validated: package_plugin = lib_plugins.lookup_package_plugin( package_dict['type']) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.show_package_schema() if schema and context.get('validate', True): package_dict, errors = lib_plugins.plugin_validate( package_plugin, context, package_dict, schema, 'package_show') for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) # added because showcase schema validation is generating "ckan.lib.navl.dictization_functions.Missing" if 'tracking_summary' in package_dict and not package_dict.get( 'tracking_summary'): del package_dict['tracking_summary'] if package_dict.get( 'type' ) == 'dataset': # this shouldn't be executed from showcases for resource_dict in package_dict.get('resources', []): if _should_manually_load_property_value( context, resource_dict, 'size'): resource_dict['size'] = _get_resource_filesize( resource_dict) if _should_manually_load_property_value( context, resource_dict, 'revision_last_updated'): resource_dict[ 'revision_last_updated'] = _get_resource_revison_timestamp( resource_dict) if _should_manually_load_property_value( context, resource_dict, 'hdx_rel_url'): resource_dict[ 'hdx_rel_url'] = _get_resource_hdx_relative_url( resource_dict) # downloads_list = (res['tracking_summary']['total'] for res in package_dict.get('resources', []) if # res.get('tracking_summary', {}).get('total')) # package_dict['total_res_downloads'] = sum(downloads_list) if _should_manually_load_property_value(context, package_dict, 'total_res_downloads'): total_res_downloads = jql.downloads_per_dataset_all_cached( ).get(package_dict['id'], 0) log.debug('Dataset {} has {} downloads'.format( package_dict['id'], total_res_downloads)) package_dict['total_res_downloads'] = total_res_downloads if _should_manually_load_property_value(context, package_dict, 'pageviews_last_14_days'): pageviews_last_14_days = jql.pageviews_per_dataset_last_14_days_cached( ).get(package_dict['id'], 0) log.debug( 'Dataset {} has {} page views in the last 14 days'.format( package_dict['id'], pageviews_last_14_days)) package_dict['pageviews_last_14_days'] = pageviews_last_14_days if _should_manually_load_property_value(context, package_dict, 'has_quickcharts'): package_dict['has_quickcharts'] = False for resource_dict in package_dict.get('resources', []): resource_views = get_action('resource_view_list')( context, { 'id': resource_dict['id'] }) or [] for view in resource_views: if view.get("view_type") == 'hdx_hxl_preview': package_dict['has_quickcharts'] = True break if _should_manually_load_property_value(context, package_dict, 'has_geodata'): package_dict['has_geodata'] = False for resource_dict in package_dict.get('resources', []): if resource_dict.get('format') in GEODATA_FORMATS: package_dict['has_geodata'] = True break if _should_manually_load_property_value(context, package_dict, 'has_showcases'): package_dict['has_showcases'] = False package_dict['num_of_showcases'] = 0 num_of_showcases = len( hdx_get_package_showcase_id_list( context, {'package_id': package_dict['id']})) if num_of_showcases > 0: package_dict['has_showcases'] = True package_dict['num_of_showcases'] = num_of_showcases dataset_dicts.append(package_dict) return dataset_dicts
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return data_dict_json = json.dumps(pkg_dict) if config.get("ckan.cache_validated_datasets", True): package_plugin = lib_plugins.lookup_package_plugin(pkg_dict.get("type")) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, {"model": model, "session": model.Session}, pkg_dict, schema, "package_show" ) pkg_dict["validated_data_dict"] = json.dumps( validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder ) pkg_dict["data_dict"] = data_dict_json # add to string field for sorting title = pkg_dict.get("title") if title: pkg_dict["title_string"] = title if (not pkg_dict.get("state")) or ("active" not in pkg_dict.get("state")): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + pkg_dict.keys() # include the extras in the main namespace extras = pkg_dict.get("extras", []) for extra in extras: key, value = extra["key"], extra["value"] if isinstance(value, (tuple, list)): value = " ".join(map(unicode, value)) key = "".join([c for c in key if c in KEY_CHARS]) pkg_dict["extras_" + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop("extras", None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop("tags", []) context = {"model": model} for tag in tags: if tag.get("vocabulary_id"): data = {"id": tag["vocabulary_id"]} vocab = logic.get_action("vocabulary_show")(context, data) key = u"vocab_%s" % vocab["name"] if key in pkg_dict: pkg_dict[key].append(tag["name"]) else: pkg_dict[key] = [tag["name"]] else: non_vocab_tag_names.append(tag["name"]) pkg_dict["tags"] = non_vocab_tag_names # add groups groups = pkg_dict.pop("groups", []) # we use the capacity to make things private in the search index if pkg_dict["private"]: pkg_dict["capacity"] = "private" else: pkg_dict["capacity"] = "public" pkg_dict["groups"] = [group["name"] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get("organization"): pkg_dict["organization"] = pkg_dict["organization"]["name"] else: pkg_dict["organization"] = None # tracking tracking_summary = pkg_dict.pop("tracking_summary", None) if tracking_summary: pkg_dict["views_total"] = tracking_summary["total"] pkg_dict["views_recent"] = tracking_summary["recent"] resource_fields = [ ("name", "res_name"), ("description", "res_description"), ("format", "res_format"), ("url", "res_url"), ("resource_type", "res_type"), ] resource_extras = [(e, "res_extras_" + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get("resources", []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u"")] pkg_dict.pop("resources", None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type(rel["type"]) rel_dict[type].append(model.Package.get(rel["subject_package_id"]).name) for rel in subjects: type = rel["type"] rel_dict[type].append(model.Package.get(rel["object_package_id"]).name) for key, value in rel_dict.iteritems(): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict["dataset_type"] = pkg_dict["type"] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = key.encode("ascii", "ignore") if key.endswith("_date"): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + "Z" else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except ValueError: continue new_dict[key] = value pkg_dict = new_dict for k in ("title", "notes", "title_string"): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict["metadata_created"] += "Z" pkg_dict["metadata_modified"] += "Z" # mark this CKAN instance as data source: pkg_dict["site_id"] = config.get("ckan.site_id") # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ["title"]: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict["index_id"] = hashlib.md5("%s%s" % (pkg_dict["id"], config.get("ckan.site_id"))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, "Plugin must return non empty package dict on index" # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get("ckan.search.solr_commit", "true")): commit = False conn.add_many([pkg_dict], _commit=commit) except solr.core.SolrException, e: msg = "Solr returned an error: {0} {1} - {2}".format( e.httpcode, e.reason, e.body[:1000] # limit huge responses ) raise SearchIndexError(msg)
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id # FIXME: first modifications to package_updade begin here: # tag strings are reconstructed because validators are stripping # tags passed and only taking taks as tag_string values # image upload support has also been added here old_data = get_action('package_show')(context, {'id': pkg.id}) ''' Constructing the tag_string from the given tags. There must be at least one tag, otherwise the tag_string will be empty and a validation error will be raised. ''' if not data_dict.get('tag_string'): data_dict['tag_string'] = ', '.join( h.dict_list_reduce(data_dict.get('tags', {}), 'name')) for key, value in old_data.iteritems(): if key not in data_dict: data_dict[key] = value # data_dict['resources'] = data_dict.get('resources', old_data.get('resources')) # iso_topic_cat = data_dict.get('iso_topic_string', []) # if isinstance(iso_topic_cat, basestring): # iso_topic_cat = [iso_topic_cat] # # data_dict['iso_topic_string'] = ','.join(iso_topic_cat) # Set the package last modified date data_dict['record_last_modified'] = str(datetime.date.today()) # If the Created Date has not yet been set, then set it if data_dict['edc_state'] == 'DRAFT' and not data_dict.get( 'record_create_date'): data_dict['record_create_date'] = str(datetime.date.today()) # If the Publish Date has not yet been set, then set it if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get( 'record_publish_date'): data_dict['record_publish_date'] = str(datetime.date.today()) # If the Archive Date has not yet been set, then set it if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get( 'record_archive_date'): data_dict['record_archive_date'] = str(datetime.date.today()) _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() image_url = old_data.get('image_url', None) upload = uploader.Upload('edc', image_url) upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') # Adding image display url for the uploaded image image_url = data_dict.get('image_url') data_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename(image_url) data_dict['image_display_url'] = h.url_for_static( 'uploads/edc/%s' % data_dict.get('image_url'), qualified=True) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) # FIXME: modifications to package_update end here^ data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") # avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) upload.upload(uploader.get_max_image_size()) # TODO the next two blocks are copied from ckan/ckan/logic/action/update.py # This codebase is currently hard to maintain because large chunks of the # CKAN action API and the CKAN controllers are simply overriden. This is # probably worse than just forking CKAN would have been, because in that # case at least we could track changes. - @deniszgonjanin # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( { 'model': context['model'], 'user': context['user'], 'ignore_auth': True }, { 'package': data }) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) ''' Send state change notifications if required; Added by Khalegh Mamakani Using a thread to run the job in the background so that package_update will not wait for notifications sending. ''' old_state = old_data.get('edc_state') context = { 'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj } dataset_url = config.get('ckan.site_url') + h.url_for( controller='package', action="read", id=data_dict['name']) import threading notify_thread = threading.Thread(target=check_record_state, args=(context, old_state, data_dict, g.site_title, g.site_url, dataset_url)) notify_thread.start() return output
def _group_or_org_update(context, data_dict, is_org=False): model = context["model"] user = context["user"] session = context["session"] id = _get_or_bust(data_dict, "id") group = model.Group.get(id) context["group"] = group if group is None: raise NotFound("Group was not found.") data_dict["type"] = group.type # get the schema group_plugin = lib_plugins.lookup_group_plugin(group.type) try: schema = group_plugin.form_to_db_schema_options( {"type": "update", "api": "api_version" in context, "context": context} ) except AttributeError: schema = group_plugin.form_to_db_schema() upload = uploader.get_uploader("group", group.image_url) upload.update_data_dict(data_dict, "image_url", "image_upload", "clear_upload") if is_org: _check_access("organization_update", context, data_dict) else: _check_access("group_update", context, data_dict) if "api_version" not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, "organization_update" if is_org else "group_update" ) log.debug( "group_update validate_errs=%r user=%s group=%s data_dict=%r", errors, context.get("user"), context.get("group").name if context.get("group") else "", data_dict, ) if errors: session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if "message" in context: rev.message = context["message"] else: rev.message = _(u"REST API: Update object %s") % data.get("name") group = model_save.group_dict_save(data, context, prevent_packages_update=is_org) if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.edit(group) if is_org: activity_type = "changed organization" else: activity_type = "changed group" activity_dict = { "user_id": model.User.by_name(user.decode("utf8")).id, "object_id": group.id, "activity_type": activity_type, } # Handle 'deleted' groups. # When the user marks a group as deleted this comes through here as # a 'changed' group activity. We detect this and change it to a 'deleted' # activity. if group.state == u"deleted": if session.query(ckan.model.Activity).filter_by(object_id=group.id, activity_type="deleted").all(): # A 'deleted group' activity for this group has already been # emitted. # FIXME: What if the group was deleted and then activated again? activity_dict = None else: # We will emit a 'deleted group' activity. activity_dict["activity_type"] = "deleted group" if activity_dict is not None: activity_dict["data"] = {"group": dictization.table_dictize(group, context)} activity_create_context = { "model": model, "user": user, "defer_commit": True, "ignore_auth": True, "session": session, } _get_action("activity_create")(activity_create_context, activity_dict) # TODO: Also create an activity detail recording what exactly changed # in the group. upload.upload(uploader.get_max_image_size()) if not context.get("defer_commit"): model.repo.commit() return model_dictize.group_dictize(group, context)
def package_update(context, data_dict): model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() # We modify the schema here to replace owner_org_validator by our own if 'owner_org' in schema: schema['owner_org'] = [uds_oov if f is default_oov else f for f in schema['owner_org']] if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else get_action('package_show')(context, {'id': data_dict['id']}) return output
def hdx_group_or_org_create(context, data_dict, is_org=False): # Overriding default so that orgs can have multiple images model = context['model'] user = context['user'] session = context['session'] data_dict['is_organization'] = is_org if is_org: check_access('organization_create', context, data_dict) else: check_access('group_create', context, data_dict) # get the schema group_type = data_dict.get('type') group_plugin = lib_plugins.lookup_group_plugin(group_type) try: schema = group_plugin.form_to_db_schema_options({ 'type': 'create', 'api': 'api_version' in context, 'context': context}) except AttributeError: schema = group_plugin.form_to_db_schema() # try: # customization = json.loads(group.extras['customization']) # except: customization = {'image_sq': '', 'image_rect': ''} try: data_dict['customization'] = json.loads(data_dict['customization']) except: data_dict['customization'] = {} if 'image_sq_upload' in data_dict and data_dict['image_sq_upload'] != '' and data_dict['image_sq_upload'] != None: # If old image was uploaded remove it if customization['image_sq']: remove_image(customization['image_sq']) upload1 = uploader.Upload('group', customization['image_sq']) upload1.update_data_dict(data_dict, 'image_sq', 'image_sq_upload', 'clear_upload') if 'image_rect_upload' in data_dict and data_dict['image_rect_upload'] != '' and data_dict[ 'image_rect_upload'] != None: if customization['image_rect']: remove_image(customization['image_rect']) upload2 = uploader.Upload('group', customization['image_rect']) upload2.update_data_dict(data_dict, 'image_rect', 'image_rect_upload', 'clear_upload') storage_path = uploader.get_storage_path() ##Rearrange things the way we need them try: if data_dict['image_sq'] != '' and data_dict['image_sq'] != None: data_dict['customization']['image_sq'] = data_dict['image_sq'] else: data_dict['customization']['image_sq'] = customization['image_sq'] except KeyError: data_dict['customization']['image_sq'] = '' try: if data_dict['image_rect'] != '' and data_dict['image_rect'] != None: data_dict['customization']['image_rect'] = data_dict['image_rect'] else: data_dict['customization']['image_rect'] = customization['image_rect'] except KeyError: data_dict['customization']['image_rect'] = '' data_dict['customization'] = json.dumps(data_dict['customization']) if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, 'organization_create' if is_org else 'group_create') log.debug('group_create validate_errs=%r user=%s group=%s data_dict=%r', errors, context.get('user'), data_dict.get('name'), data_dict) if errors: session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") group = model_save.group_dict_save(data, context) # Needed to let extensions know the group id session.flush() if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.create(group) if is_org: activity_type = 'new organization' else: activity_type = 'new group' user_id = model.User.by_name(user.decode('utf8')).id activity_dict = { 'user_id': user_id, 'object_id': group.id, 'activity_type': activity_type, } activity_dict['data'] = { 'group': ckan.lib.dictization.table_dictize(group, context) } activity_create_context = { 'model': model, 'user': user, 'defer_commit': True, 'ignore_auth': True, 'session': session } logic.get_action('activity_create')(activity_create_context, activity_dict) try: upload1.upload(uploader.get_max_image_size()) except: pass try: upload2.upload(uploader.get_max_image_size()) except: pass if not context.get('defer_commit'): model.repo.commit() context["group"] = group context["id"] = group.id # creator of group/org becomes an admin # this needs to be after the repo.commit or else revisions break member_dict = { 'id': group.id, 'object': user_id, 'object_type': 'user', 'capacity': 'admin', } member_create_context = { 'model': model, 'user': user, 'ignore_auth': True, # we are not a member of the group at this point 'session': session } logic.get_action('member_create')(member_create_context, member_dict) log.debug('Created object %s' % group.name) return model_dictize.group_dictize(group, context)
def _group_or_org_create(context, data_dict, is_org=False): model = context["model"] user = context["user"] session = context["session"] data_dict["is_organization"] = is_org upload = uploader.Upload("group") upload.update_data_dict(data_dict, "image_url", "image_upload", "clear_upload") # get the schema group_type = data_dict.get("type") group_plugin = lib_plugins.lookup_group_plugin(group_type) try: schema = group_plugin.form_to_db_schema_options( {"type": "create", "api": "api_version" in context, "context": context} ) except AttributeError: schema = group_plugin.form_to_db_schema() if "api_version" not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, "organization_create" if is_org else "group_create" ) log.debug( "group_create validate_errs=%r user=%s group=%s data_dict=%r", errors, context.get("user"), data_dict.get("name"), data_dict, ) if errors: session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if "message" in context: rev.message = context["message"] else: rev.message = _(u"REST API: Create object %s") % data.get("name") group = model_save.group_dict_save(data, context) if user: admins = [model.User.by_name(user.decode("utf8"))] else: admins = [] model.setup_default_user_roles(group, admins) # Needed to let extensions know the group id session.flush() if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.create(group) if is_org: activity_type = "new organization" else: activity_type = "new group" user_id = model.User.by_name(user.decode("utf8")).id activity_dict = {"user_id": user_id, "object_id": group.id, "activity_type": activity_type} activity_dict["data"] = {"group": ckan.lib.dictization.table_dictize(group, context)} activity_create_context = { "model": model, "user": user, "defer_commit": True, "ignore_auth": True, "session": session, } logic.get_action("activity_create")(activity_create_context, activity_dict) upload.upload(uploader.get_max_image_size()) if not context.get("defer_commit"): model.repo.commit() context["group"] = group context["id"] = group.id # creator of group/org becomes an admin # this needs to be after the repo.commit or else revisions break member_dict = {"id": group.id, "object": user_id, "object_type": "user", "capacity": "admin"} member_create_context = { "model": model, "user": user, "ignore_auth": True, # we are not a member of the group at this point "session": session, } logic.get_action("member_create")(member_create_context, member_dict) log.debug("Created object %s" % group.name) return model_dictize.group_dictize(group, context)
def package_create(fb, context, data_dict): model = context['model'] user = context['user'] package_type = data_dict.get('type') package_plugin = lib_plugins.lookup_package_plugin(package_type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() # We modify the schema here to replace owner_org_validator by our own if 'owner_org' in schema: schema['owner_org'] = [ uds_oov if f is default_oov else f for f in schema['owner_org'] ] check_access('package_create', context, data_dict) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, 'package_create') log.debug('package_create validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), data.get('name'), data_dict) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") admins = [] if user: user_obj = model.User.by_name(user.decode('utf8')) if user_obj: admins = [user_obj] data['creator_user_id'] = user_obj.id pkg = model_save.package_dict_save(data, context) model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() data['id'] = pkg.id context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) if not context.get('defer_commit'): model.repo.commit() ## need to let rest api create context["package"] = pkg ## this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug('Created object %s' % pkg.name) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) return_id_only = context.get('return_id_only', False) output = context['id'] if return_id_only \ else get_action('package_show')(context, {'id': context['id']}) return output
def package_create(context, data_dict): '''Create a new dataset (package). You must be authorized to create new datasets. If you specify any groups for the new dataset, you must also be authorized to edit these groups. Plugins may change the parameters of this function depending on the value of the ``type`` parameter, see the :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface. :param name: the name of the new dataset, must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, ``-`` and ``_``, e.g. ``'warandpeace'`` :type name: string :param title: the title of the dataset (optional, default: same as ``name``) :type title: string :param private: If ``True`` creates a private dataset :type private: bool :param author: the name of the dataset's author (optional) :type author: string :param author_email: the email address of the dataset's author (optional) :type author_email: string :param maintainer: the name of the dataset's maintainer (optional) :type maintainer: string :param maintainer_email: the email address of the dataset's maintainer (optional) :type maintainer_email: string :param license_id: the id of the dataset's license, see :py:func:`~ckan.logic.action.get.license_list` for available values (optional) :type license_id: license id string :param notes: a description of the dataset (optional) :type notes: string :param url: a URL for the dataset's source (optional) :type url: string :param version: (optional) :type version: string, no longer than 100 characters :param state: the current state of the dataset, e.g. ``'active'`` or ``'deleted'``, only active datasets show up in search results and other lists of datasets, this parameter will be ignored if you are not authorized to change the state of the dataset (optional, default: ``'active'``) :type state: string :param type: the type of the dataset (optional), :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugins associate themselves with different dataset types and provide custom dataset handling behaviour for these types :type type: string :param resources: the dataset's resources, see :py:func:`resource_create` for the format of resource dictionaries (optional) :type resources: list of resource dictionaries :param tags: the dataset's tags, see :py:func:`tag_create` for the format of tag dictionaries (optional) :type tags: list of tag dictionaries :param extras: the dataset's extras (optional), extras are arbitrary (key: value) metadata items that can be added to datasets, each extra dictionary should have keys ``'key'`` (a string), ``'value'`` (a string) :type extras: list of dataset extra dictionaries :param relationships_as_object: see :py:func:`package_relationship_create` for the format of relationship dictionaries (optional) :type relationships_as_object: list of relationship dictionaries :param relationships_as_subject: see :py:func:`package_relationship_create` for the format of relationship dictionaries (optional) :type relationships_as_subject: list of relationship dictionaries :param groups: the groups to which the dataset belongs (optional), each group dictionary should have one or more of the following keys which identify an existing group: ``'id'`` (the id of the group, string), or ``'name'`` (the name of the group, string), to see which groups exist call :py:func:`~ckan.logic.action.get.group_list` :type groups: list of dictionaries :param owner_org: the id of the dataset's owning organization, see :py:func:`~ckan.logic.action.get.organization_list` or :py:func:`~ckan.logic.action.get.organization_list_for_user` for available values (optional) :type owner_org: string :returns: the newly created dataset (unless 'return_id_only' is set to True in the context, in which case just the dataset id will be returned) :rtype: dictionary ''' process_batch_mode(context, data_dict) model = context['model'] user = context['user'] if 'type' not in data_dict: package_plugin = lib_plugins.lookup_package_plugin() try: # use first type as default if user didn't provide type package_type = package_plugin.package_types()[0] except (AttributeError, IndexError): package_type = 'dataset' # in case a 'dataset' plugin was registered w/o fallback package_plugin = lib_plugins.lookup_package_plugin(package_type) data_dict['type'] = package_type else: package_plugin = lib_plugins.lookup_package_plugin(data_dict['type']) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() _check_access('package_create', context, data_dict) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work package_plugin.check_data_dict(data_dict) # Inject a code representing the batch within which this dataset was modified if context.get(BATCH_MODE) != BATCH_MODE_DONT_GROUP: data_dict['batch'] = get_batch_or_generate(data_dict.get('owner_org')) data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, 'package_create') if 'tags' in data: data['tags'] = helpers.get_tag_vocabulary(data['tags']) if 'groups' in data: additions = { 'key': 'solr_additions', 'value': helpers.build_additions(data['groups']) } if not 'extras' in data: data['extras'] = [] data['extras'].append(additions) log.debug('package_create validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), data.get('name'), data_dict) if errors: model.Session.rollback() raise logic.ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") if user: user_obj = model.User.by_name(user.decode('utf8')) if user_obj: data['creator_user_id'] = user_obj.id # Replace model_save.package_dict_save() call with our wrapped version to be able to save groups # pkg = model_save.package_dict_save(data, context) from ckanext.hdx_package.actions.update import modified_save pkg = modified_save(context, data) #pkg = model_save.package_dict_save(data, context) # Needed to let extensions know the package and resources ids model.Session.flush() data['id'] = pkg.id if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True context_org_update['add_revision'] = False _get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) # Make sure that a user provided schema is not used in create_views # and on package_show context.pop('schema', None) # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( { 'model': context['model'], 'user': context['user'], 'ignore_auth': True }, { 'package': data }) if not context.get('defer_commit'): model.repo.commit() # need to let rest api create context["package"] = pkg # this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug('Created object %s' % pkg.name) return_id_only = context.get('return_id_only', False) output = context['id'] if return_id_only \ else _get_action('package_show')(context, {'id': context['id']}) return output
def _group_or_org_create(context, data_dict, is_org=False): model = context['model'] user = context['user'] session = context['session'] data_dict['is_organization'] = is_org upload = uploader.Upload('group') upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') # get the schema group_type = data_dict.get('type') group_plugin = lib_plugins.lookup_group_plugin(group_type) try: schema = group_plugin.form_to_db_schema_options({ 'type': 'create', 'api': 'api_version' in context, 'context': context}) except AttributeError: schema = group_plugin.form_to_db_schema() if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, 'organization_create' if is_org else 'group_create') log.debug('group_create validate_errs=%r user=%s group=%s data_dict=%r', errors, context.get('user'), data_dict.get('name'), data_dict) if errors: session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") group = model_save.group_dict_save(data, context) if user: admins = [model.User.by_name(user.decode('utf8'))] else: admins = [] model.setup_default_user_roles(group, admins) # Needed to let extensions know the group id session.flush() if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.create(group) if is_org: activity_type = 'new organization' else: activity_type = 'new group' user_id = model.User.by_name(user.decode('utf8')).id activity_dict = { 'user_id': user_id, 'object_id': group.id, 'activity_type': activity_type, } activity_dict['data'] = { 'group': ckan.lib.dictization.table_dictize(group, context) } activity_create_context = { 'model': model, 'user': user, 'defer_commit': True, 'ignore_auth': True, 'session': session } logic.get_action('activity_create')(activity_create_context, activity_dict) upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() context["group"] = group context["id"] = group.id # creator of group/org becomes an admin # this needs to be after the repo.commit or else revisions break member_dict = { 'id': group.id, 'object': user_id, 'object_type': 'user', 'capacity': 'admin', } member_create_context = { 'model': model, 'user': user, 'ignore_auth': True, # we are not a member of the group at this point 'session': session } logic.get_action('member_create')(member_create_context, member_dict) log.debug('Created object %s' % group.name) return model_dictize.group_dictize(group, context)
def _group_or_org_update( context: Context, data_dict: DataDict, is_org: bool = False): model = context['model'] session = context['session'] id = _get_or_bust(data_dict, 'id') group = model.Group.get(id) if group is None: raise NotFound('Group was not found.') context["group"] = group data_dict['type'] = group.type # get the schema group_plugin = lib_plugins.lookup_group_plugin(group.type) try: schema = group_plugin.form_to_db_schema_options({'type': 'update', 'api': 'api_version' in context, 'context': context}) except AttributeError: schema = group_plugin.form_to_db_schema() upload = uploader.get_uploader('group') upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') if is_org: _check_access('organization_update', context, data_dict) else: _check_access('group_update', context, data_dict) if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, 'organization_update' if is_org else 'group_update') group = context.get('group') log.debug('group_update validate_errs=%r user=%s group=%s data_dict=%r', errors, context.get('user'), group.name if group else '', data_dict) if errors: session.rollback() raise ValidationError(errors) contains_packages = 'packages' in data_dict group = model_save.group_dict_save( data, context, prevent_packages_update=is_org or not contains_packages ) if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.edit(group) upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() return model_dictize.group_dictize(group, context)
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) return output
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. .. note:: Update methods may delete parameters not explicitly provided in the data_dict. If you want to edit only a specific attribute use `package_patch` instead. It is recommended to call :py:func:`ckan.logic.action.get.package_show`, make the desired changes to the result, and then call ``package_update()`` with it. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface. For further parameters see :py:func:`~ckan.logic.action.create.package_create`. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] session = context['session'] name_or_id = data_dict.get('id') or data_dict.get('name') if name_or_id is None: raise ValidationError({'id': _('Missing value')}) pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg # immutable fields data_dict["id"] = pkg.id data_dict['type'] = pkg.type _check_access('package_update', context, data_dict) user = context['user'] # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) resource_uploads = [] for resource in data_dict.get('resources', []): # file uploads/clearing upload = uploader.get_resource_uploader(resource) if 'mimetype' not in resource: if hasattr(upload, 'mimetype'): resource['mimetype'] = upload.mimetype if 'size' not in resource and 'url_type' in resource: if hasattr(upload, 'filesize'): resource['size'] = upload.filesize resource_uploads.append(upload) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) # Needed to let extensions know the new resources ids model.Session.flush() for index, (resource, upload) in enumerate( zip(data.get('resources', []), resource_uploads)): resource['id'] = pkg.resources[index].id upload.upload(resource['id'], uploader.get_max_resource_size()) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_dataset_update(context, data) # Create activity if not pkg.private: user_obj = model.User.by_name(user) if user_obj: user_id = user_obj.id else: user_id = 'not logged in' activity = pkg.activity_stream_item('changed', user_id) session.add(activity) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) return output
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return # tracking summary values will be stale, never store them tracking_summary = pkg_dict.pop('tracking_summary', None) for r in pkg_dict.get('resources', []): r.pop('tracking_summary', None) data_dict_json = json.dumps(pkg_dict) if config.get('ckan.cache_validated_datasets', True): package_plugin = lib_plugins.lookup_package_plugin( pkg_dict.get('type')) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, {'model': model, 'session': model.Session}, pkg_dict, schema, 'package_show') pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) pkg_dict['data_dict'] = data_dict_json # add to string field for sorting title = pkg_dict.get('title') if title: pkg_dict['title_string'] = title # delete the package if there is no state, or the state is `deleted` if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + pkg_dict.keys() # include the extras in the main namespace extras = pkg_dict.get('extras', []) for extra in extras: key, value = extra['key'], extra['value'] if isinstance(value, (tuple, list)): value = " ".join(map(unicode, value)) key = ''.join([c for c in key if c in KEY_CHARS]) pkg_dict['extras_' + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop('extras', None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop('tags', []) context = {'model': model} for tag in tags: if tag.get('vocabulary_id'): data = {'id': tag['vocabulary_id']} vocab = logic.get_action('vocabulary_show')(context, data) key = u'vocab_%s' % vocab['name'] if key in pkg_dict: pkg_dict[key].append(tag['name']) else: pkg_dict[key] = [tag['name']] else: non_vocab_tag_names.append(tag['name']) pkg_dict['tags'] = non_vocab_tag_names # add groups groups = pkg_dict.pop('groups', []) # we use the capacity to make things private in the search index if pkg_dict['private']: pkg_dict['capacity'] = 'private' else: pkg_dict['capacity'] = 'public' pkg_dict['groups'] = [group['name'] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get('organization'): pkg_dict['organization'] = pkg_dict['organization']['name'] else: pkg_dict['organization'] = None # tracking if not tracking_summary: tracking_summary = model.TrackingSummary.get_for_package( pkg_dict['id']) pkg_dict['views_total'] = tracking_summary['total'] pkg_dict['views_recent'] = tracking_summary['recent'] resource_fields = [('name', 'res_name'), ('description', 'res_description'), ('format', 'res_format'), ('url', 'res_url'), ('resource_type', 'res_type')] resource_extras = [(e, 'res_extras_' + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get('resources', []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')] pkg_dict.pop('resources', None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type(rel['type']) rel_dict[type].append(model.Package.get(rel['subject_package_id']).name) for rel in subjects: type = rel['type'] rel_dict[type].append(model.Package.get(rel['object_package_id']).name) for key, value in rel_dict.iteritems(): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict['dataset_type'] = pkg_dict['type'] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = key.encode('ascii', 'ignore') if key.endswith('_date'): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + 'Z' else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except ValueError: continue new_dict[key] = value pkg_dict = new_dict for k in ('title', 'notes', 'title_string'): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict['metadata_created'] += 'Z' pkg_dict['metadata_modified'] += 'Z' # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ['title']: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict['index_id'] = hashlib.md5('%s%s' % (pkg_dict['id'],config.get('ckan.site_id'))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, 'Plugin must return non empty package dict on index' # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get('ckan.search.solr_commit', 'true')): commit = False conn.add(docs=[pkg_dict], commit=commit) except pysolr.SolrError, e: msg = 'Solr returned an error: {0}'.format( e[:1000] # limit huge responses ) raise SearchIndexError(msg)
def package_show(context, data_dict): '''Return the metadata of a dataset (package) and its resources. :param id: the id or name of the dataset :type id: string :param use_default_schema: use default package schema instead of a custom schema defined with an IDatasetForm plugin (default: False) :type use_default_schema: bool :param include_tracking: add tracking information to dataset and resources (default: False) :type include_tracking: bool :rtype: dictionary ''' model = context['model'] context['session'] = model.Session name_or_id = data_dict.get("id") or _get_or_bust(data_dict, 'name_or_id') pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound context['package'] = pkg _check_access('package_show', context, data_dict) if data_dict.get('use_default_schema', False): context['schema'] = ckan.logic.schema.default_show_package_schema() include_tracking = asbool(data_dict.get('include_tracking', False)) package_dict = None use_cache = (context.get('use_cache', True) and not 'revision_id' in context and not 'revision_date' in context) if use_cache: try: search_result = search.show(name_or_id) except (search.SearchError, socket.error): pass else: use_validated_cache = 'schema' not in context if use_validated_cache and 'validated_data_dict' in search_result: package_json = search_result['validated_data_dict'] package_dict = json.loads(package_json) package_dict_validated = True else: package_dict = json.loads(search_result['data_dict']) package_dict_validated = False metadata_modified = pkg.metadata_modified.isoformat() search_metadata_modified = search_result['metadata_modified'] # solr stores less precice datetime, # truncate to 22 charactors to get good enough match if metadata_modified[:22] != search_metadata_modified[:22]: package_dict = None if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) package_dict_validated = False if include_tracking: # page-view tracking summary data package_dict['tracking_summary'] = ( model.TrackingSummary.get_for_package(package_dict['id'])) for resource_dict in package_dict['resources']: _add_tracking_summary_to_resource_dict(resource_dict, model) if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) for item in plugins.PluginImplementations(plugins.IResourceController): for resource_dict in package_dict['resources']: item.before_show(resource_dict) if not package_dict_validated: package_plugin = lib_plugins.lookup_package_plugin( package_dict['type']) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.show_package_schema() if schema and context.get('validate', True): package_dict, errors = lib_plugins.plugin_validate( package_plugin, context, package_dict, schema, 'package_show') for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) return package_dict
# Inject a code representing the batch within which this dataset was modified # KEEP_OLD - keep the code before this update # DONT_GROUP - don't use any code if context.get('batch_mode') == 'KEEP_OLD': try: batch_extras = pkg._extras.get('batch') if batch_extras and batch_extras.state == 'active': data_dict['batch'] = batch_extras.value except Exception, e: log.info(str(e)) elif context.get('batch_mode') != 'DONT_GROUP': data_dict['batch'] = get_batch_or_generate(data_dict.get('owner_org')) data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise logic.ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else:
def package_create(context, data_dict): '''Create a new dataset (package). You must be authorized to create new datasets. If you specify any groups for the new dataset, you must also be authorized to edit these groups. Plugins may change the parameters of this function depending on the value of the ``type`` parameter, see the :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface. :param name: the name of the new dataset, must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, ``-`` and ``_``, e.g. ``'warandpeace'`` :type name: string :param title: the title of the dataset (optional, default: same as ``name``) :type title: string :param author: the name of the dataset's author (optional) :type author: string :param author_email: the email address of the dataset's author (optional) :type author_email: string :param maintainer: the name of the dataset's maintainer (optional) :type maintainer: string :param maintainer_email: the email address of the dataset's maintainer (optional) :type maintainer_email: string :param license_id: the id of the dataset's license, see :py:func:`~ckan.logic.action.get.license_list` for available values (optional) :type license_id: license id string :param notes: a description of the dataset (optional) :type notes: string :param url: a URL for the dataset's source (optional) :type url: string :param version: (optional) :type version: string, no longer than 100 characters :param state: the current state of the dataset, e.g. ``'active'`` or ``'deleted'``, only active datasets show up in search results and other lists of datasets, this parameter will be ignored if you are not authorized to change the state of the dataset (optional, default: ``'active'``) :type state: string :param type: the type of the dataset (optional), :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugins associate themselves with different dataset types and provide custom dataset handling behaviour for these types :type type: string :param resources: the dataset's resources, see :py:func:`resource_create` for the format of resource dictionaries (optional) :type resources: list of resource dictionaries :param tags: the dataset's tags, see :py:func:`tag_create` for the format of tag dictionaries (optional) :type tags: list of tag dictionaries :param extras: the dataset's extras (optional), extras are arbitrary (key: value) metadata items that can be added to datasets, each extra dictionary should have keys ``'key'`` (a string), ``'value'`` (a string) :type extras: list of dataset extra dictionaries :param relationships_as_object: see :py:func:`package_relationship_create` for the format of relationship dictionaries (optional) :type relationships_as_object: list of relationship dictionaries :param relationships_as_subject: see :py:func:`package_relationship_create` for the format of relationship dictionaries (optional) :type relationships_as_subject: list of relationship dictionaries :param groups: the groups to which the dataset belongs (optional), each group dictionary should have one or more of the following keys which identify an existing group: ``'id'`` (the id of the group, string), ``'name'`` (the name of the group, string), ``'title'`` (the title of the group, string), to see which groups exist call :py:func:`~ckan.logic.action.get.group_list` :type groups: list of dictionaries :param owner_org: the id of the dataset's owning organization, see :py:func:`~ckan.logic.action.get.organization_list` or :py:func:`~ckan.logic.action.get.organization_list_for_user` for available values (optional) :type owner_org: string :returns: the newly created dataset (unless 'return_id_only' is set to True in the context, in which case just the dataset id will be returned) :rtype: dictionary ''' model = context['model'] user = context['user'] package_type = data_dict.get('type') package_plugin = lib_plugins.lookup_package_plugin(package_type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() _check_access('package_create', context, data_dict) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_create') log.debug('package_create validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), data.get('name'), data_dict) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") admins = [] if user: user_obj = model.User.by_name(user.decode('utf8')) if user_obj: admins = [user_obj] data['creator_user_id'] = user_obj.id pkg = model_save.package_dict_save(data, context) model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() data['id'] = pkg.id context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) if not context.get('defer_commit'): model.repo.commit() ## need to let rest api create context["package"] = pkg ## this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug('Created object %s' % pkg.name) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) return_id_only = context.get('return_id_only', False) output = context['id'] if return_id_only \ else _get_action('package_show')(context, {'id': context['id']}) return output
package_type = 'dataset' # in case a 'dataset' plugin was registered w/o fallback package_plugin = lib_plugins.lookup_package_plugin(package_type) package_dict['type'] = package_type else: package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) errors = {} # if package has been previously imported try: existing_package_dict = self._find_existing_package(package_dict) if 'metadata_modified' not in package_dict or \ package_dict['metadata_modified'] > existing_package_dict.get('metadata_modified'): schema = package_plugin.update_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, base_context, package_dict, schema, 'package_update') except NotFound: schema = package_plugin.create_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, base_context, package_dict, schema, 'package_create') if errors: raise ValidationError(errors) result = self._create_or_update_package( package_dict, harvest_object, package_dict_form='package_show') return result except ValidationError, e:
def blacklist_package_update(context, data_dict): model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id data_dict['type'] = pkg.type # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() schema.update({ 'black_list': [toolkit.get_validator('ignore_missing'), toolkit.get_converter('convert_to_extras')] }) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True}, {'package': data}) if not context.get('defer_commit'): model.repo.commit() return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else get_action('package_show')(context, {'id': data_dict['id']}) return output
def package_show(context, data_dict): """Return the metadata of a dataset (package) and its resources. IMPORTANT: This monkeypatches ckan core's `package_show` method, and bypasses the `use_cache` part of that function. It was disabled for some reason, and needs testing before switching it back on. :param id: the id or name of the dataset :type id: string :param use_default_schema: use default package schema instead of a custom schema defined with an IDatasetForm plugin (default: False) :type use_default_schema: bool :param include_tracking: add tracking information to dataset and resources (default: False) :type include_tracking: bool :rtype: dictionary """ model = context['model'] context['session'] = model.Session name_or_id = data_dict.get("id") or _get_or_bust(data_dict, 'name_or_id') pkg = model.Package.get(name_or_id) if pkg is None: raise logic.NotFound context['package'] = pkg _check_access('package_show', context, data_dict) if data_dict.get('use_default_schema', False): context['schema'] = ckan.logic.schema.default_show_package_schema() include_tracking = asbool(data_dict.get('include_tracking', False)) package_dict = None if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) package_dict_validated = False if include_tracking: # page-view tracking summary data package_dict['tracking_summary'] = ( model.TrackingSummary.get_for_package(package_dict['id'])) for resource_dict in package_dict['resources']: _add_tracking_summary_to_resource_dict(resource_dict, model) if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) for item in plugins.PluginImplementations(plugins.IResourceController): for resource_dict in package_dict['resources']: item.before_show(resource_dict) if not package_dict_validated: package_plugin = lib_plugins.lookup_package_plugin( package_dict['type']) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.show_package_schema() if schema and context.get('validate', True): package_dict, errors = lib_plugins.plugin_validate( package_plugin, context, package_dict, schema, 'package_show') for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) return package_dict
def _group_or_org_update(context, data_dict, is_org=False): model = context['model'] user = context['user'] session = context['session'] id = _get_or_bust(data_dict, 'id') group = model.Group.get(id) context["group"] = group if group is None: raise NotFound('Group was not found.') # get the schema group_plugin = lib_plugins.lookup_group_plugin(group.type) try: schema = group_plugin.form_to_db_schema_options({'type':'update', 'api':'api_version' in context, 'context': context}) except AttributeError: schema = group_plugin.form_to_db_schema() upload = uploader.Upload('group', group.image_url) upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') if is_org: _check_access('organization_update', context, data_dict) else: _check_access('group_update', context, data_dict) if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, 'organization_update' if is_org else 'group_update') log.debug('group_update validate_errs=%r user=%s group=%s data_dict=%r', errors, context.get('user'), context.get('group').name if context.get('group') else '', data_dict) if errors: session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") # when editing an org we do not want to update the packages if using the # new templates. if ((not is_org) and not converters.asbool( config.get('ckan.legacy_templates', False)) and 'api_version' not in context): context['prevent_packages_update'] = True group = model_save.group_dict_save(data, context) if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.edit(group) if is_org: activity_type = 'changed organization' else: activity_type = 'changed group' activity_dict = { 'user_id': model.User.by_name(user.decode('utf8')).id, 'object_id': group.id, 'activity_type': activity_type, } # Handle 'deleted' groups. # When the user marks a group as deleted this comes through here as # a 'changed' group activity. We detect this and change it to a 'deleted' # activity. if group.state == u'deleted': if session.query(ckan.model.Activity).filter_by( object_id=group.id, activity_type='deleted').all(): # A 'deleted group' activity for this group has already been # emitted. # FIXME: What if the group was deleted and then activated again? activity_dict = None else: # We will emit a 'deleted group' activity. activity_dict['activity_type'] = 'deleted group' if activity_dict is not None: activity_dict['data'] = { 'group': dictization.table_dictize(group, context) } activity_create_context = { 'model': model, 'user': user, 'defer_commit': True, 'ignore_auth': True, 'session': session } # DGU checks if activity streams are enabled first, to avoid Auth Audit # issue #1421 if converters.asbool( config.get('ckan.activity_streams_enabled', 'true')): _get_action('activity_create')(activity_create_context, activity_dict) # TODO: Also create an activity detail recording what exactly changed # in the group. upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() return model_dictize.group_dictize(group, context)
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id # FIXME: first modifications to package_updade begin here: # tag strings are reconstructed because validators are stripping # tags passed and only taking taks as tag_string values # image upload support has also been added here old_data = get_action('package_show')(context, {'id': pkg.id}) ''' Constructing the tag_string from the given tags. There must be at least one tag, otherwise the tag_string will be empty and a validation error will be raised. ''' if not data_dict.get('tag_string'): data_dict['tag_string'] = ', '.join( h.dict_list_reduce(data_dict.get('tags', {}), 'name')) for key, value in old_data.iteritems() : if key not in data_dict : data_dict[key] = value #data_dict['resources'] = data_dict.get('resources', old_data.get('resources')) # iso_topic_cat = data_dict.get('iso_topic_string', []) # if isinstance(iso_topic_cat, basestring): # iso_topic_cat = [iso_topic_cat] # # data_dict['iso_topic_string'] = ','.join(iso_topic_cat) #Set the package last modified date data_dict['record_last_modified'] = str(datetime.date.today()) # If the Created Date has not yet been set, then set it if data_dict['edc_state'] == 'DRAFT' and not data_dict.get('record_create_date'): data_dict['record_create_date'] = str(datetime.date.today()) # If the Publish Date has not yet been set, then set it if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get('record_publish_date'): data_dict['record_publish_date'] = str(datetime.date.today()) # If the Archive Date has not yet been set, then set it if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get('record_archive_date'): data_dict['record_archive_date'] = str(datetime.date.today()) _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() image_url = old_data.get('image_url', None) upload = uploader.Upload('edc', image_url) upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') #Adding image display url for the uploaded image image_url = data_dict.get('image_url') data_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename(image_url) data_dict['image_display_url'] = h.url_for_static('uploads/edc/%s' % data_dict.get('image_url'), qualified=True) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) # FIXME: modifications to package_update end here^ data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) upload.upload(uploader.get_max_image_size()) #TODO the next two blocks are copied from ckan/ckan/logic/action/update.py # This codebase is currently hard to maintain because large chunks of the # CKAN action API and the CKAN controllers are simply overriden. This is # probably worse than just forking CKAN would have been, because in that # case at least we could track changes. - @deniszgonjanin # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True}, {'package': data}) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) ''' Send state change notifications if required; Added by Khalegh Mamakani Using a thread to run the job in the background so that package_update will not wait for notifications sending. ''' old_state = old_data.get('edc_state') context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} dataset_url = config.get('ckan.site_url') + h.url_for(controller='package', action="read", id = data_dict['name']) import threading notify_thread = threading.Thread(target=check_record_state, args=(context, old_state, data_dict, g.site_title, g.site_url, dataset_url) ) notify_thread.start() return output
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. It is recommended to call :py:func:`ckan.logic.action.get.package_show`, make the desired changes to the result, and then call ``package_update()`` with it. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface. For further parameters see :py:func:`~ckan.logic.action.create.package_create`. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id data_dict['type'] = pkg.type _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True context_org_update['add_revision'] = False _get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) return output
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return # tracking summary values will be stale, never store them tracking_summary = pkg_dict.pop('tracking_summary', None) for r in pkg_dict.get('resources', []): r.pop('tracking_summary', None) data_dict_json = json.dumps(pkg_dict) if config.get('ckan.cache_validated_datasets', True): package_plugin = lib_plugins.lookup_package_plugin( pkg_dict.get('type')) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, { 'model': model, 'session': model.Session }, pkg_dict, schema, 'package_show') pkg_dict['validated_data_dict'] = json.dumps( validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) pkg_dict['data_dict'] = data_dict_json # add to string field for sorting title = pkg_dict.get('title') if title: pkg_dict['title_string'] = title # delete the package if there is no state, or the state is `deleted` if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + list(pkg_dict.keys()) # include the extras in the main namespace extras = pkg_dict.get('extras', []) for extra in extras: key, value = extra['key'], extra['value'] if isinstance(value, (tuple, list)): value = " ".join(map(text_type, value)) key = ''.join([c for c in key if c in KEY_CHARS]) pkg_dict['extras_' + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop('extras', None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop('tags', []) context = {'model': model} for tag in tags: if tag.get('vocabulary_id'): data = {'id': tag['vocabulary_id']} vocab = logic.get_action('vocabulary_show')(context, data) key = u'vocab_%s' % vocab['name'] if key in pkg_dict: pkg_dict[key].append(tag['name']) else: pkg_dict[key] = [tag['name']] else: non_vocab_tag_names.append(tag['name']) pkg_dict['tags'] = non_vocab_tag_names # add groups groups = pkg_dict.pop('groups', []) # we use the capacity to make things private in the search index if pkg_dict['private']: pkg_dict['capacity'] = 'private' else: pkg_dict['capacity'] = 'public' pkg_dict['groups'] = [group['name'] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get('organization'): pkg_dict['organization'] = pkg_dict['organization']['name'] else: pkg_dict['organization'] = None # tracking if not tracking_summary: tracking_summary = model.TrackingSummary.get_for_package( pkg_dict['id']) pkg_dict['views_total'] = tracking_summary['total'] pkg_dict['views_recent'] = tracking_summary['recent'] resource_fields = [('name', 'res_name'), ('description', 'res_description'), ('format', 'res_format'), ('url', 'res_url'), ('resource_type', 'res_type')] resource_extras = [(e, 'res_extras_' + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get('resources', []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')] pkg_dict.pop('resources', None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type( rel['type']) rel_dict[type].append( model.Package.get(rel['subject_package_id']).name) for rel in subjects: type = rel['type'] rel_dict[type].append( model.Package.get(rel['object_package_id']).name) for key, value in six.iteritems(rel_dict): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict['dataset_type'] = pkg_dict['type'] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = six.ensure_str(key) if key.endswith('_date'): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + 'Z' else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except (ValueError, IndexError): continue new_dict[key] = value pkg_dict = new_dict for k in ('title', 'notes', 'title_string'): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict['metadata_created'] += 'Z' pkg_dict['metadata_modified'] += 'Z' # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ['title']: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict['index_id'] = hashlib.md5( six.b('%s%s' % (pkg_dict['id'], config.get('ckan.site_id')))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, 'Plugin must return non empty package dict on index' # permission labels determine visibility in search, can't be set # in original dataset or before_index plugins labels = lib_plugins.get_permission_labels() dataset = model.Package.get(pkg_dict['id']) pkg_dict['permission_labels'] = labels.get_dataset_labels( dataset) if dataset else [] # TestPackageSearchIndex-workaround # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get('ckan.search.solr_commit', 'true')): commit = False conn.add(docs=[pkg_dict], commit=commit) except pysolr.SolrError as e: msg = 'Solr returned an error: {0}'.format( e.args[0][:1000] # limit huge responses ) raise SearchIndexError(msg) except socket.error as e: err = 'Could not connect to Solr using {0}: {1}'.format( conn.url, str(e)) log.error(err) raise SearchIndexError(err) commit_debug_msg = 'Not committed yet' if defer_commit else 'Committed' log.debug('Updated index for %s [%s]' % (pkg_dict.get('name'), commit_debug_msg))
def _group_or_org_create(context, data_dict, is_org=False): model = context['model'] user = context['user'] session = context['session'] data_dict['is_organization'] = is_org upload = uploader.Upload('group') upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') # get the schema group_plugin = lib_plugins.lookup_group_plugin( group_type=data_dict.get('type')) try: schema = group_plugin.form_to_db_schema_options({ 'type': 'create', 'api': 'api_version' in context, 'context': context}) except AttributeError: schema = group_plugin.form_to_db_schema() if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( group_plugin, context, data_dict, schema, 'organization_create' if is_org else 'group_create') log.debug('group_create validate_errs=%r user=%s group=%s data_dict=%r', errors, context.get('user'), data_dict.get('name'), data_dict) if errors: session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") group = model_save.group_dict_save(data, context) if user: admins = [model.User.by_name(user.decode('utf8'))] else: admins = [] model.setup_default_user_roles(group, admins) # Needed to let extensions know the group id session.flush() if is_org: plugin_type = plugins.IOrganizationController else: plugin_type = plugins.IGroupController for item in plugins.PluginImplementations(plugin_type): item.create(group) if is_org: activity_type = 'new organization' else: activity_type = 'new group' user_id = model.User.by_name(user.decode('utf8')).id activity_dict = { 'user_id': user_id, 'object_id': group.id, 'activity_type': activity_type, } activity_dict['data'] = { 'group': ckan.lib.dictization.table_dictize(group, context) } activity_create_context = { 'model': model, 'user': user, 'defer_commit': True, 'ignore_auth': True, 'session': session } logic.get_action('activity_create')(activity_create_context, activity_dict) upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() context["group"] = group context["id"] = group.id # creator of group/org becomes an admin # this needs to be after the repo.commit or else revisions break member_dict = { 'id': group.id, 'object': user_id, 'object_type': 'user', 'capacity': 'admin', } member_create_context = { 'model': model, 'user': user, 'ignore_auth': True, # we are not a member of the group at this point 'session': session } logic.get_action('member_create')(member_create_context, member_dict) log.debug('Created object %s' % group.name) return model_dictize.group_dictize(group, context)