def bd_dataset_validate(context, data_dict): context["ignore_auth"] = True if "type" not in data_dict: package_plugin = lib_plugins.lookup_package_plugin() try: # use first type as default if user didn't provide type package_type = package_plugin.package_types()[0] except (AttributeError, IndexError): # in case a 'dataset' plugin was registered w/o fallback package_type = "dataset" data_dict["type"] = package_type package_plugin = lib_plugins.lookup_package_plugin(data_dict["type"]) if "id" in data_dict: del data_dict["id"] for r in data_dict["resources"]: if "id" in r: del r["id"] if "name" in data_dict: # add a char so that name doesn't collide with existing package data_dict["name"] += "_" schema = context.get("schema") or package_plugin.create_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, "package_create" ) if errors: return {"sucess": False, "data": data, "errors": errors} return {"sucess": True, "data": data, "errors": errors}
def _resource_form(self, package_type): # backwards compatibility with plugins not inheriting from # DefaultDatasetPlugin and not implmenting resource_form plugin = lookup_package_plugin(package_type) if hasattr(plugin, 'resource_form'): result = plugin.resource_form() if result is not None: return result return lookup_package_plugin().resource_form()
def package_update(next_action, context, data_dict): ''' :param context: :param data_dict: ''' model = context['model'] name_or_id = data_dict.get('id') or data_dict.get('name') pkg = model.Package.get(name_or_id) if pkg is None: raise toolkit.ObjectNotFound(toolkit._('Package was not found.')) toolkit.check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() # We modify the schema here to replace owner_org_validator by our own if 'owner_org' in schema: schema['owner_org'] = [ owner_org_validator if f is default_owner_org_validator else f for f in schema['owner_org'] ] context['schema'] = schema return next_action(context, data_dict)
def package_show(context, data_dict): model = context['model'] context['session'] = model.Session name_or_id = data_dict.get("id") or data_dict['name_or_id'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound context['package'] = pkg check_access('package_show', context, data_dict) package_dict = model_dictize.package_dictize(pkg, context) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) schema = lib_plugins.lookup_package_plugin( package_dict['type']).db_to_form_schema() if schema and context.get('validate', True): package_dict, errors = validate(package_dict, schema, context=context) return package_dict
def datasets_validation_report(context, data_dict): toolkit.check_access('datasets_validation_report', context, data_dict) search_params = {'q': '*:*', 'include_private': True, 'rows': 1000} query = toolkit.get_action('package_search')({ 'ignore_auth': True }, search_params) count = query['count'] datasets = query['results'] out = { 'count': count, 'datasets': [], } # get the schema package_plugin = lib_plugins.lookup_package_plugin('dataset') schema = package_plugin.update_package_schema() context = { 'model': model, 'session': model.Session, 'user': toolkit.c.user } for dataset in datasets: data, errors = package_plugin.validate(context, dataset, schema, 'package_update') if errors: out['datasets'].append({ 'id': dataset['id'], 'name': dataset['name'], 'errors': errors, }) return out
def package_show(context, data_dict): model = context['model'] context['session'] = model.Session name_or_id = data_dict.get("id") or data_dict['name_or_id'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound context['package'] = pkg check_access('package_show', context, data_dict) package_dict = model_dictize.package_dictize(pkg, context) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) try: schema = package_plugin.db_to_form_schema_options({ 'type':'show', 'api': 'api_version' in context, 'context': context }) except AttributeError: schema = package_plugin.db_to_form_schema() if schema and context.get('validate', True): package_dict, errors = validate(package_dict, schema, context=context) return package_dict
def dataset_is_valid(package): """ Check if given dataset is valid. Uses schema from plugin. Return true if dataset is valid. """ package_plugin = plugins.lookup_package_plugin(package['type']) _, errors = validate(package, package_plugin.update_package_schema(), {'model': model, 'session': model.Session, 'user': c.user}) return not bool(errors)
def package_create(context, data_dict): model = context["model"] user = context["user"] model.Session.remove() model.Session()._context = context package_type = data_dict.get("type") package_plugin = lib_plugins.lookup_package_plugin(package_type) try: schema = package_plugin.form_to_db_schema_options( {"type": "create", "api": "api_version" in context, "context": context} ) except AttributeError: schema = package_plugin.form_to_db_schema() check_access("package_create", context, data_dict) if "api_version" not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: package_plugin.check_data_dict(data_dict, schema) except TypeError: package_plugin.check_data_dict(data_dict) data, errors = validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, error_summary(errors)) rev = model.repo.new_revision() rev.author = user if "message" in context: rev.message = context["message"] else: rev.message = _(u"REST API: Create object %s") % data.get("name") pkg = model_save.package_dict_save(data, context) admins = [] if user: admins = [model.User.by_name(user.decode("utf8"))] model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) if not context.get("defer_commit"): model.repo.commit() ## need to let rest api create context["package"] = pkg ## this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug("Created object %s" % str(pkg.name)) return get_action("package_show")(context, {"id": context["id"]})
def package_validate(context, data_dict): model = context['model'] id = data_dict.get("id") pkg = model.Package.get(id) if id else None if pkg is None: action = 'package_create' type = data_dict.get('type', 'dataset') else: action = 'package_update' type = pkg.type context["package"] = pkg data_dict["id"] = pkg.id logic.check_access(action, context, data_dict) package_plugin = lib_plugins.lookup_package_plugin(type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() if action == 'package_create' \ else package_plugin.update_package_schema() data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, action) if errors: raise ValidationError(errors) if 'groups_list' in data: del data['groups_list'] return data
def package_update_validate(context, data_dict): model = context['model'] user = context['user'] id = data_dict["id"] model.Session.remove() model.Session()._context = context pkg = model.Package.get(id) context["package"] = pkg if pkg is None: raise NotFound(_('Package was not found.')) data_dict["id"] = pkg.id # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) try: schema = package_plugin.form_to_db_schema_options({'type':'update', 'api':'api_version' in context, 'context': context}) except AttributeError: schema = package_plugin.form_to_db_schema() check_access('package_update', context, data_dict) data, errors = validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, error_summary(errors)) return data
def package_show(context, data_dict): model = context['model'] context['session'] = model.Session name_or_id = data_dict.get("id") or data_dict['name_or_id'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound context['package'] = pkg check_access('package_show', context, data_dict) package_dict = model_dictize.package_dictize(pkg, context) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) schema = lib_plugins.lookup_package_plugin(package_dict['type']).db_to_form_schema() if schema: package_dict, errors = validate(package_dict, schema, context=context) return package_dict
def package_update_validate(context, data_dict): model = context['model'] user = context['user'] id = _get_or_bust(data_dict, "id") pkg = model.Package.get(id) context["package"] = pkg if pkg is None: raise NotFound(_('Package was not found.')) data_dict["id"] = pkg.id # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) try: schema = package_plugin.form_to_db_schema_options({ 'type': 'update', 'api': 'api_version' in context, 'context': context }) except AttributeError: schema = package_plugin.form_to_db_schema() _check_access('package_update', context, data_dict) data, errors = _validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors) return data
def package_update_validate(context, data_dict): model = context["model"] user = context["user"] id = _get_or_bust(data_dict, "id") pkg = model.Package.get(id) context["package"] = pkg if pkg is None: raise NotFound(_("Package was not found.")) data_dict["id"] = pkg.id # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) try: schema = package_plugin.form_to_db_schema_options( {"type": "update", "api": "api_version" in context, "context": context} ) except AttributeError: schema = package_plugin.form_to_db_schema() _check_access("package_update", context, data_dict) data, errors = _validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors) return data
def package_list_show_for_reindex(context, dataset_ids): ''' Wraps the default package_show and adds additional information to the resources: resource size (for uploaded files) and resource revision timestamp ''' model = context['model'] context['session'] = model.Session dataset_dicts = [] all_datasets = model.Session.query(model.Package).filter(model.Package.id.in_(dataset_ids)).all() for pkg in all_datasets: # log.info('Package {}'.format(pkg.id)) if pkg is None: raise NotFound context['package'] = pkg context['reindexing'] = True package_dict = None if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) package_dict_validated = False if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) # for item in plugins.PluginImplementations(plugins.IResourceController): # for resource_dict in package_dict['resources']: # item.before_show(resource_dict) if not package_dict_validated: package_plugin = lib_plugins.lookup_package_plugin( package_dict['type']) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.show_package_schema() if schema and context.get('validate', True): package_dict, errors = lib_plugins.plugin_validate( package_plugin, context, package_dict, schema, 'package_show') for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) additional_hdx_package_show_processing(context, package_dict, just_for_reindexing=True) dataset_dicts.append(package_dict) return dataset_dicts
def _get_pkg_template(template_type, package_type=None): pkg_plugin = lookup_package_plugin(package_type) method = getattr(pkg_plugin, template_type) signature = inspect.signature(method) if len(signature.parameters): return method(package_type) else: return method()
def package_update(context, data_dict): model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] model.Session.remove() model.Session()._context = context pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) try: schema = package_plugin.form_to_db_schema_options({ 'type': 'update', 'api': 'api_version' in context, 'context': context }) except AttributeError: schema = package_plugin.form_to_db_schema() if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: package_plugin.check_data_dict(data_dict, schema) except TypeError: package_plugin.check_data_dict(data_dict) data, errors = validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, error_summary(errors)) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") pkg = model_save.package_dict_save(data, context) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) if not context.get('defer_commit'): model.repo.commit() return get_action('package_show')(context, data_dict)
def dataset_is_valid(package): """ Check if given dataset is valid. Uses schema from plugin. Return true if dataset is valid. """ package['accept-terms'] = u'True' package_plugin = plugins.lookup_package_plugin(package['type']) _, errors = validate(package, package_plugin.update_package_schema(), {'model': model, 'session': model.Session, 'user': c.user}) return not bool(errors)
def _get_pkg_template(template_type, package_type=None): pkg_plugin = lookup_package_plugin(package_type) method = getattr(pkg_plugin, template_type) try: return method(package_type) except TypeError as err: if u'takes 1' not in str(err) and u'takes exactly 1' not in str(err): raise return method()
def package_create_validate(context, data_dict): model = context['model'] schema = lib_plugins.lookup_package_plugin().form_to_db_schema() _check_access('package_create',context,data_dict) data, errors = _validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors) else: return data
def package_update(context, data_dict): model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] model.Session.remove() model.Session()._context = context pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) try: schema = package_plugin.form_to_db_schema_options({'type':'update', 'api':'api_version' in context, 'context': context}) except AttributeError: schema = package_plugin.form_to_db_schema() if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: package_plugin.check_data_dict(data_dict, schema) except TypeError: package_plugin.check_data_dict(data_dict) data, errors = validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, error_summary(errors)) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") pkg = model_save.package_dict_save(data, context) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) if not context.get('defer_commit'): model.repo.commit() return get_action('package_show')(context, data_dict)
def test_create_dataset(self, opts, *args): '''An example that creates a dataset using the action api. ''' self._fake_request_context() # Create a context for action api calls context = { 'model': model, 'session': model.Session, 'user': self.site_user.get('name'), 'ignore_auth': True, 'api_version': '3', 'allow_partial_update': False } # Decide how to handle package identifiers if opts.identifier: # Create a dataset reusing an existing UUID # Note Override the catalog-wide schema inside this context from ckan.lib.plugins import lookup_package_plugin sch1 = lookup_package_plugin().create_package_schema() sch1['id'] = [unicode] context['schema'] = sch1 else: # Generate a new UUID; use package_create's default behavior pass # Create an api request body pkg_dict = { 'title': opts.title, 'name': opts.name, 'notes': opts.description, 'license_id': 'cc-zero', 'dataset_type': u'inspire', 'owner_org': opts.owner_org, 'inspire': { 'title': opts.title, 'abstract': opts.description, 'topic_category': ["economy"], } } # If reusing an identifier, add the relevant keys if opts.identifier: pkg_dict['id'] = opts.identifier pkg_dict['inspire']['identifier'] = opts.identifier # Perform action pkg = get_action('package_create')(context, pkg_dict) print 'Created dataset with: id=%(id)s name=%(name)s:' % (pkg)
def test_create_dataset(self, opts, *args): '''An example that creates a dataset using the action api. ''' self._fake_request_context() # Create a context for action api calls context = { 'model': model, 'session': model.Session, 'user': self.site_user.get('name'), 'ignore_auth': True, 'api_version': '3', 'allow_partial_update': False } # Decide how to handle package identifiers if opts.identifier: # Create a dataset reusing an existing UUID # Note Override the catalog-wide schema inside this context from ckan.lib.plugins import lookup_package_plugin sch1 = lookup_package_plugin().create_package_schema() sch1['id'] = [unicode] context['schema'] = sch1 else: # Generate a new UUID; use package_create's default behavior pass # Create an api request body pkg_dict = { 'title': opts.title, 'name': opts.name, 'notes': opts.description, 'license_id': 'cc-zero', 'dataset_type': u'inspire', 'owner_org': opts.owner_org, 'inspire': { 'title': opts.title, 'abstract': opts.description, 'topic_category': ["economy"], } } # If reusing an identifier, add the relevant keys if opts.identifier: pkg_dict['id'] = opts.identifier pkg_dict['inspire']['identifier'] = opts.identifier # Perform action pkg = get_action('package_create')(context, pkg_dict); print 'Created dataset with: id=%(id)s name=%(name)s:' %(pkg)
def resource_update(context, data_dict): '''Update a resource. To update a resource you must be authorized to update the dataset that the resource belongs to. For further parameters see ``resource_create()``. :param id: the id of the resource to update :type id: string :returns: the updated resource :rtype: string ''' model = context['model'] user = context['user'] id = _get_or_bust(data_dict, "id") resource = model.Resource.get(id) context["resource"] = resource if not resource: logging.error('Could not find resource ' + id) raise NotFound(_('Resource was not found.')) _check_access('resource_update', context, data_dict) if 'schema' in context: schema = context['schema'] else: package_plugin = lib_plugins.lookup_package_plugin( resource.resource_group.package.type) schema = package_plugin.update_package_schema()['resources'] data, errors = _validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name", "") resource = model_save.resource_dict_save(data, context) if not context.get('defer_commit'): model.repo.commit() return model_dictize.resource_dictize(resource, context)
def package_create_validate(context, data_dict): model = context["model"] schema = lib_plugins.lookup_package_plugin().form_to_db_schema() model.Session.remove() model.Session()._context = context check_access("package_create", context, data_dict) data, errors = validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, error_summary(errors)) else: return data
def get_dataset_validation_error_or_none(pkg_dict, context): # Convert dataset if pkg_dict.get('type') == 'deposited-dataset': pkg_dict = convert_deposited_dataset_to_regular_dataset(pkg_dict) # Validate dataset package_plugin = lib_plugins.lookup_package_plugin('dataset') schema = package_plugin.update_package_schema() data, errors = lib_plugins.plugin_validate(package_plugin, context, pkg_dict, schema, 'package_update') errors.pop('owner_org', None) if data.get('owner_org') == 'unknown': errors['owner_org_dest'] = ['Missing Value'] return ValidationError(errors) if errors else None
def package_create(next_action, context, data_dict): ''' :param next_action: :param context: :param data_dict: ''' package_type = data_dict.get('type') package_plugin = lib_plugins.lookup_package_plugin(package_type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() # We modify the schema here to replace owner_org_validator by our own if 'owner_org' in schema: schema['owner_org'] = [owner_org_validator if f is default_owner_org_validator else f for f in schema['owner_org']] context['schema'] = schema return next_action(context, data_dict)
def _comments_template(self, package_type): return lookup_package_plugin(package_type).comments_template()
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return # tracking summary values will be stale, never store them tracking_summary = pkg_dict.pop('tracking_summary', None) for r in pkg_dict.get('resources', []): r.pop('tracking_summary', None) data_dict_json = json.dumps(pkg_dict) if config.get('ckan.cache_validated_datasets', True): package_plugin = lib_plugins.lookup_package_plugin( pkg_dict.get('type')) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, { 'model': model, 'session': model.Session }, pkg_dict, schema, 'package_show') pkg_dict['validated_data_dict'] = json.dumps( validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) pkg_dict['data_dict'] = data_dict_json # add to string field for sorting title = pkg_dict.get('title') if title: pkg_dict['title_string'] = title # delete the package if there is no state, or the state is `deleted` if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + list(pkg_dict.keys()) # include the extras in the main namespace extras = pkg_dict.get('extras', []) for extra in extras: key, value = extra['key'], extra['value'] if isinstance(value, (tuple, list)): value = " ".join(map(text_type, value)) key = ''.join([c for c in key if c in KEY_CHARS]) pkg_dict['extras_' + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop('extras', None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop('tags', []) context = {'model': model} for tag in tags: if tag.get('vocabulary_id'): data = {'id': tag['vocabulary_id']} vocab = logic.get_action('vocabulary_show')(context, data) key = u'vocab_%s' % vocab['name'] if key in pkg_dict: pkg_dict[key].append(tag['name']) else: pkg_dict[key] = [tag['name']] else: non_vocab_tag_names.append(tag['name']) pkg_dict['tags'] = non_vocab_tag_names # add groups groups = pkg_dict.pop('groups', []) # we use the capacity to make things private in the search index if pkg_dict['private']: pkg_dict['capacity'] = 'private' else: pkg_dict['capacity'] = 'public' pkg_dict['groups'] = [group['name'] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get('organization'): pkg_dict['organization'] = pkg_dict['organization']['name'] else: pkg_dict['organization'] = None # tracking if not tracking_summary: tracking_summary = model.TrackingSummary.get_for_package( pkg_dict['id']) pkg_dict['views_total'] = tracking_summary['total'] pkg_dict['views_recent'] = tracking_summary['recent'] resource_fields = [('name', 'res_name'), ('description', 'res_description'), ('format', 'res_format'), ('url', 'res_url'), ('resource_type', 'res_type')] resource_extras = [(e, 'res_extras_' + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get('resources', []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')] pkg_dict.pop('resources', None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type( rel['type']) rel_dict[type].append( model.Package.get(rel['subject_package_id']).name) for rel in subjects: type = rel['type'] rel_dict[type].append( model.Package.get(rel['object_package_id']).name) for key, value in six.iteritems(rel_dict): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict['dataset_type'] = pkg_dict['type'] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = six.ensure_str(key) if key.endswith('_date'): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + 'Z' else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except (ValueError, IndexError): continue new_dict[key] = value pkg_dict = new_dict for k in ('title', 'notes', 'title_string'): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict['metadata_created'] += 'Z' pkg_dict['metadata_modified'] += 'Z' # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ['title']: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict['index_id'] = hashlib.md5( six.b('%s%s' % (pkg_dict['id'], config.get('ckan.site_id')))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, 'Plugin must return non empty package dict on index' # permission labels determine visibility in search, can't be set # in original dataset or before_index plugins labels = lib_plugins.get_permission_labels() dataset = model.Package.get(pkg_dict['id']) pkg_dict['permission_labels'] = labels.get_dataset_labels( dataset) if dataset else [] # TestPackageSearchIndex-workaround # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get('ckan.search.solr_commit', 'true')): commit = False conn.add(docs=[pkg_dict], commit=commit) except pysolr.SolrError as e: msg = 'Solr returned an error: {0}'.format( e.args[0][:1000] # limit huge responses ) raise SearchIndexError(msg) except socket.error as e: err = 'Could not connect to Solr using {0}: {1}'.format( conn.url, str(e)) log.error(err) raise SearchIndexError(err) commit_debug_msg = 'Not committed yet' if defer_commit else 'Committed' log.debug('Updated index for %s [%s]' % (pkg_dict.get('name'), commit_debug_msg))
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return # tracking summary values will be stale, never store them tracking_summary = pkg_dict.pop('tracking_summary', None) for r in pkg_dict.get('resources', []): r.pop('tracking_summary', None) data_dict_json = json.dumps(pkg_dict) if config.get('ckan.cache_validated_datasets', True): package_plugin = lib_plugins.lookup_package_plugin( pkg_dict.get('type')) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, {'model': model, 'session': model.Session}, pkg_dict, schema, 'package_show') pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) pkg_dict['data_dict'] = data_dict_json # add to string field for sorting title = pkg_dict.get('title') if title: pkg_dict['title_string'] = title # delete the package if there is no state, or the state is `deleted` if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + pkg_dict.keys() # include the extras in the main namespace extras = pkg_dict.get('extras', []) for extra in extras: key, value = extra['key'], extra['value'] if isinstance(value, (tuple, list)): value = " ".join(map(unicode, value)) key = ''.join([c for c in key if c in KEY_CHARS]) pkg_dict['extras_' + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop('extras', None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop('tags', []) context = {'model': model} for tag in tags: if tag.get('vocabulary_id'): data = {'id': tag['vocabulary_id']} vocab = logic.get_action('vocabulary_show')(context, data) key = u'vocab_%s' % vocab['name'] if key in pkg_dict: pkg_dict[key].append(tag['name']) else: pkg_dict[key] = [tag['name']] else: non_vocab_tag_names.append(tag['name']) pkg_dict['tags'] = non_vocab_tag_names # add groups groups = pkg_dict.pop('groups', []) # we use the capacity to make things private in the search index if pkg_dict['private']: pkg_dict['capacity'] = 'private' else: pkg_dict['capacity'] = 'public' pkg_dict['groups'] = [group['name'] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get('organization'): pkg_dict['organization'] = pkg_dict['organization']['name'] else: pkg_dict['organization'] = None # tracking if not tracking_summary: tracking_summary = model.TrackingSummary.get_for_package( pkg_dict['id']) pkg_dict['views_total'] = tracking_summary['total'] pkg_dict['views_recent'] = tracking_summary['recent'] resource_fields = [('name', 'res_name'), ('description', 'res_description'), ('format', 'res_format'), ('url', 'res_url'), ('resource_type', 'res_type')] resource_extras = [(e, 'res_extras_' + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get('resources', []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')] pkg_dict.pop('resources', None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type(rel['type']) rel_dict[type].append(model.Package.get(rel['subject_package_id']).name) for rel in subjects: type = rel['type'] rel_dict[type].append(model.Package.get(rel['object_package_id']).name) for key, value in rel_dict.iteritems(): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict['dataset_type'] = pkg_dict['type'] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = key.encode('ascii', 'ignore') if key.endswith('_date'): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + 'Z' else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except ValueError: continue new_dict[key] = value pkg_dict = new_dict for k in ('title', 'notes', 'title_string'): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict['metadata_created'] += 'Z' pkg_dict['metadata_modified'] += 'Z' # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ['title']: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict['index_id'] = hashlib.md5('%s%s' % (pkg_dict['id'],config.get('ckan.site_id'))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, 'Plugin must return non empty package dict on index' # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get('ckan.search.solr_commit', 'true')): commit = False conn.add(docs=[pkg_dict], commit=commit) except pysolr.SolrError, e: msg = 'Solr returned an error: {0}'.format( e[:1000] # limit huge responses ) raise SearchIndexError(msg)
def _get_pkg_template(template_type, package_type=None): pkg_plugin = lookup_package_plugin(package_type) return getattr(pkg_plugin, template_type)()
def package_update(context, data_dict): """Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary """ model = context["model"] user = context["user"] name_or_id = data_dict.get("id") or data_dict["name"] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_("Package was not found.")) context["package"] = pkg data_dict["id"] = pkg.id _check_access("package_update", context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) try: schema = package_plugin.form_to_db_schema_options( {"type": "update", "api": "api_version" in context, "context": context} ) except AttributeError: schema = package_plugin.form_to_db_schema() if "api_version" not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: package_plugin.check_data_dict(data_dict, schema) except TypeError: package_plugin.check_data_dict(data_dict) data, errors = _validate(data_dict, schema, context) log.debug( "package_update validate_errs=%r user=%s package=%s data=%r", errors, context.get("user"), context.get("package").name if context.get("package") else "", data, ) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if "message" in context: rev.message = context["message"] else: rev.message = _(u"REST API: Update object %s") % data.get("name") pkg = model_save.package_dict_save(data, context) context_no_auth = context.copy() context_no_auth["ignore_auth"] = True _get_action("package_owner_org_update")(context_no_auth, {"id": pkg.id, "organization_id": pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) if not context.get("defer_commit"): model.repo.commit() log.debug("Updated object %s" % str(pkg.name)) return_id_only = context.get("return_id_only", False) output = data_dict["id"] if return_id_only else _get_action("package_show")(context, {"id": data_dict["id"]}) return output
def package_create(context, data_dict): '''Create a new dataset (package). You must be authorized to create new datasets. If you specify any groups for the new dataset, you must also be authorized to edit these groups. Plugins may change the parameters of this function depending on the value of the ``type`` parameter, see the ``IDatasetForm`` plugin interface. :param name: the name of the new dataset, must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, ``-`` and ``_``, e.g. ``'warandpeace'`` :type name: string :param title: the title of the dataset (optional, default: same as ``name``) :type title: string :param author: the name of the dataset's author (optional) :type author: string :param author_email: the email address of the dataset's author (optional) :type author_email: string :param maintainer: the name of the dataset's maintainer (optional) :type maintainer: string :param maintainer_email: the email address of the dataset's maintainer (optional) :type maintainer_email: string :param license_id: the id of the dataset's license, see ``license_list()`` for available values (optional) :type license_id: license id string :param notes: a description of the dataset (optional) :type notes: string :param url: a URL for the dataset's source (optional) :type url: string :param version: (optional) :type version: string, no longer than 100 characters :param state: the current state of the dataset, e.g. ``'active'`` or ``'deleted'``, only active datasets show up in search results and other lists of datasets, this parameter will be ignored if you are not authorized to change the state of the dataset (optional, default: ``'active'``) :type state: string :param type: the type of the dataset (optional), ``IDatasetForm`` plugins associate themselves with different dataset types and provide custom dataset handling behaviour for these types :type type: string :param resources: the dataset's resources, see ``resource_create()`` for the format of resource dictionaries (optional) :type resources: list of resource dictionaries :param tags: the dataset's tags, see ``tag_create()`` for the format of tag dictionaries (optional) :type tags: list of tag dictionaries :param extras: the dataset's extras (optional), extras are arbitrary (key: value) metadata items that can be added to datasets, each extra dictionary should have keys ``'key'`` (a string), ``'value'`` (a string), and optionally ``'deleted'`` :type extras: list of dataset extra dictionaries :param relationships_as_object: see ``package_relationship_create()`` for the format of relationship dictionaries (optional) :type relationships_as_object: list of relationship dictionaries :param relationships_as_subject: see ``package_relationship_create()`` for the format of relationship dictionaries (optional) :type relationships_as_subject: list of relationship dictionaries :param groups: the groups to which the dataset belongs (optional), each group dictionary should have one or more of the following keys which identify an existing group: ``'id'`` (the id of the group, string), ``'name'`` (the name of the group, string), ``'title'`` (the title of the group, string), to see which groups exist call ``group_list()`` :type groups: list of dictionaries :returns: the newly created dataset (unless 'return_id_only' is set to True in the context, in which case just the dataset id will be returned) :rtype: dictionary ''' model = context['model'] user = context['user'] package_type = data_dict.get('type') package_plugin = lib_plugins.lookup_package_plugin(package_type) try: schema = package_plugin.form_to_db_schema_options({'type':'create', 'api':'api_version' in context, 'context': context}) except AttributeError: schema = package_plugin.form_to_db_schema() _check_access('package_create', context, data_dict) if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: package_plugin.check_data_dict(data_dict, schema) except TypeError: package_plugin.check_data_dict(data_dict) data, errors = _validate(data_dict, schema, context) log.debug('package_create validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), data.get('name'), data_dict) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") pkg = model_save.package_dict_save(data, context) admins = [] if user: admins = [model.User.by_name(user.decode('utf8'))] model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() data['id'] = pkg.id context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) if not context.get('defer_commit'): model.repo.commit() ## need to let rest api create context["package"] = pkg ## this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug('Created object %s' % str(pkg.name)) return_id_only = context.get('return_id_only', False) output = context['id'] if return_id_only \ else _get_action('package_show')(context, {'id':context['id']}) return output
def blacklist_package_update(context, data_dict): model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id data_dict['type'] = pkg.type # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() schema.update({ 'black_list': [toolkit.get_validator('ignore_missing'), toolkit.get_converter('convert_to_extras')] }) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True}, {'package': data}) if not context.get('defer_commit'): model.repo.commit() return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else get_action('package_show')(context, {'id': data_dict['id']}) return output
def dataset_import(context, data_dict): '''Import a dataset from a given XML source. This action, depending also on the value of its flags, can raise one of: * actions.Invalid: received invalid input * actions.IdentifierConflict: a package with the same identifier already exists * actions.NameConflict: a package with the same name already exists * toolkit.ValidationError: validation fails while trying to create a package :param source: This is either a string representing a (local or external) URL or a file-like object. :type q: string or file-like :param dtype: the dataset-type i.e. the schema of imported metadata :type dtype: string :param owner_org: the machine-name for the owner organization :type owner_org: string :param continue_on_errors: hint on what to do when validation fails :type continue_on_errors: boolean :param rename_if_conflict: hint on what to do when a name conflict is encountered :type rename_if_conflict: boolean :rtype: basic info for the newly created package ''' # Read parameters try: source = data_dict['source'] except KeyError: raise Invalid({'source': 'The `source` parameter is required'}) dtype = data_dict.get('dtype', 'inspire') try: owner_org = data_dict['owner_org'] except KeyError: raise Invalid({'owner_org': 'The `owner_org` parameter is required.\n' 'Hint: Use `organization_list_for_user` to retrieve a valid list.'}) allow_rename = data_dict.get('rename_if_conflict', False) allow_validation_errors = data_dict.get('continue_on_errors', False) # Fetch raw XML data xmldata = None if isinstance(source, basestring): # Assume source is a URL if not source.startswith('http://'): source = pylons.config['ckan.site_url'] + source.strip('/') source = urlparse.urlparse(source) r1 = requests.get(source.geturl()) if not r1.ok: raise Invalid({'source': _('Cannot fetch metadata from source URL')}) elif not r1.headers['content-type'] in ['application/xml', 'text/xml']: raise Invalid({'source': _('The source does not contain XML data')}) else: xmldata = r1.content else: # Assume source is a file-like object try: xmldata = source.read() except: raise Invalid({'source': _('Cannot read from source')}) # Parse XML data as metadata of `dtype` schema obj = make_metadata(dtype) try: obj = xml_serializer_for(obj).loads(xmldata) except AssertionError as ex: raise ex except Exception as ex: # Map all parse exceptions to Invalid log.info('Failed to parse XML metadata: %s', ex) raise Invalid({'source': _('The given XML file is malformed: %s') % (ex)}) # Prepare package dict pkg_dict = {'version': '1.0'} pkg_dict.update(obj.deduce_fields()) pkg_dict.update({ 'owner_org': owner_org, 'type': 'dataset', 'dataset_type': dtype, dtype: obj.to_dict(flat=False), }) # If an identifier is passed, check that this is not already present. # Note This is no guarantee that the identifier will be available when # `package_create` is actually invoked. identifier = pkg_dict.get('id') if identifier and _check_package_id_exists(context, identifier): raise IdentifierConflict({ 'id': _('A package identified as %s already exists') % (identifier)}) # Find and assign a machine-name for this package # Note We just find the 1st available name. As noted before, this is no # guarantee that will be available when `package_create` is invoked. basename = pkg_dict['name'] max_num_probes = 10 if allow_rename else 1 name = _find_a_package_name(context, basename, max_num_probes) if not name: raise NameConflict({ 'name': _('The package name %r is not available') % (basename)}) else: pkg_dict['name'] = name pkg_dict['title'] += ' ' + name[len(basename):] # Create/Update package schema1, validation_errors, error_message = None, None, None if identifier: # Must override catalog-wide schema for actions in this context schema1 = lookup_package_plugin().create_package_schema() schema1['id'] = [unicode] ctx = _make_context(context) if schema1: ctx['schema'] = schema1 try: pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict) except toolkit.ValidationError as ex: if 'name' in ex.error_dict: # The name is probably taken, re-raise exception raise ex elif allow_validation_errors: # Save errors and retry with a different context validation_errors = ex.error_dict error_message = ex.message or _('The dataset contains invalid metadata') ctx = _make_context(context, skip_validation=True) if schema1: ctx['schema'] = schema1 pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict) log.warn('Forced to create an invalid package as %r ' % (name)) else: raise ex assert name == pkg_dict['name'] assert (not identifier) or (identifier == pkg_dict['id']) return { # Provide basic package fields 'id': pkg_dict['id'], 'name': name, 'title': pkg_dict['title'], 'state': pkg_dict.get('state'), # Provide details on validation (meaningfull if allow_validation_errors) 'validation': { 'message': error_message, 'errors': validation_errors, }, }
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] model.Session.remove() model.Session()._context = context pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) try: schema = package_plugin.form_to_db_schema_options({ 'type': 'update', 'api': 'api_version' in context, 'context': context }) except AttributeError: schema = package_plugin.form_to_db_schema() if 'api_version' not in context: # old plugins do not support passing the schema so we need # to ensure they still work try: package_plugin.check_data_dict(data_dict, schema) except TypeError: package_plugin.check_data_dict(data_dict) data, errors = _validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, _error_summary(errors)) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") pkg = model_save.package_dict_save(data, context) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) if not context.get('defer_commit'): model.repo.commit() return _get_action('package_show')(context, data_dict)
def package_create(fb, context, data_dict): model = context['model'] user = context['user'] package_type = data_dict.get('type') package_plugin = lib_plugins.lookup_package_plugin(package_type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() # We modify the schema here to replace owner_org_validator by our own if 'owner_org' in schema: schema['owner_org'] = [uds_oov if f is default_oov else f for f in schema['owner_org']] check_access('package_create', context, data_dict) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_create') log.debug('package_create validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), data.get('name'), data_dict) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") admins = [] if user: user_obj = model.User.by_name(user.decode('utf8')) if user_obj: admins = [user_obj] data['creator_user_id'] = user_obj.id pkg = model_save.package_dict_save(data, context) model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() data['id'] = pkg.id context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) if not context.get('defer_commit'): model.repo.commit() ## need to let rest api create context["package"] = pkg ## this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug('Created object %s' % pkg.name) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) return_id_only = context.get('return_id_only', False) output = context['id'] if return_id_only \ else get_action('package_show')(context, {'id': context['id']}) return output
def _setup_template_variables(self, context, data_dict, package_type=None): return lookup_package_plugin(package_type).setup_template_variables(context, data_dict)
def _new_template(self, package_type): return lookup_package_plugin(package_type).new_template()
def dataset_import(context, data_dict): '''Import a dataset from a given XML source. This action, depending also on the value of its flags, can raise one of: * actions.Invalid: received invalid input * actions.IdentifierConflict: a package with the same identifier already exists * actions.NameConflict: a package with the same name already exists * toolkit.ValidationError: validation fails while trying to create a package :param source: This is either a string representing a (local or external) URL or a file-like object. :type q: string or file-like :param dtype: the dataset-type i.e. the schema of imported metadata :type dtype: string :param owner_org: the machine-name for the owner organization :type owner_org: string :param continue_on_errors: hint on what to do when validation fails :type continue_on_errors: boolean :param rename_if_conflict: hint on what to do when a name conflict is encountered :type rename_if_conflict: boolean :rtype: basic info for the newly created package ''' # Read parameters try: source = data_dict['source'] except KeyError: raise Invalid({'source': 'The `source` parameter is required'}) dtype = data_dict.get('dtype', 'datacite') try: owner_org = data_dict['owner_org'] except KeyError: raise Invalid({ 'owner_org': 'The `owner_org` parameter is required.\n' 'Hint: Use `organization_list_for_user` to retrieve a valid list.' }) allow_rename = data_dict.get('rename_if_conflict', False) allow_validation_errors = data_dict.get('continue_on_errors', False) log.debug('dtype: %s, source %s, source type: %s', dtype, source, type(source)) # Fetch raw XML data xmldata = None if isinstance(source, basestring): # Assume source is a URL if not source.startswith('http://'): source = pylons.config['ckan.site_url'] + source.strip('/') source = urlparse.urlparse(source) r1 = requests.get(source.geturl()) if not r1.ok: raise Invalid( {'source': _('Cannot fetch metadata from source URL')}) elif not r1.headers['content-type'] in ['application/xml', 'text/xml']: raise Invalid( {'source': _('The source does not contain XML data')}) else: xmldata = r1.content else: # Assume source is a file-like object try: log.debug('source is %s', source) xmldata = source.read() log.debug('xmldata is %s', xmldata) except: raise Invalid({'source': _('Cannot read from source')}) # Parse XML data as metadata of `dtype` schema obj = make_metadata(dtype) log.debug('obj is: %s', obj) try: obj = xml_serializer_for(obj).loads(xmldata) except AssertionError as ex: raise ex except Exception as ex: # Map all parse exceptions to Invalid log.info('Failed to parse XML metadata: %s', ex) raise Invalid( {'source': _('The given XML file is malformed: %s') % (ex)}) # Prepare package dict log.debug('updated obj is: %s', obj) pkg_dict = {'version': '1.0'} pkg_dict.update(obj.deduce_fields()) pkg_dict.update({ 'owner_org': owner_org, 'type': 'dataset', 'dataset_type': dtype, dtype: obj.to_dict(flat=False), }) log.debug('pkg_dict: %s', pkg_dict) # If an identifier is passed, check that this is not already present. # Note This is no guarantee that the identifier will be available when # `package_create` is actually invoked. identifier = pkg_dict.get('id') if identifier and _check_package_id_exists(context, identifier): raise IdentifierConflict({ 'id': _('A package identified as %s already exists') % (identifier) }) # Find and assign a machine-name for this package # Note We just find the 1st available name. As noted before, this is no # guarantee that will be available when `package_create` is invoked. basename = pkg_dict['name'] max_num_probes = 10 if allow_rename else 1 name = _find_a_package_name(context, basename, max_num_probes) if not name: raise NameConflict( {'name': _('The package name %r is not available') % (basename)}) else: pkg_dict['name'] = name pkg_dict['title'] += ' ' + name[len(basename):] # add core fields description and subject pkg_dict['notes'] = pkg_dict['datacite']['abstract'] pkg_dict['closed_tag'] = pkg_dict['datacite']['subject_closed'] #log.debug('abstract %s', pkg_dict['datacite']['abstract']) # Create/Update package schema1, validation_errors, error_message = None, None, None if identifier: # Must override catalog-wide schema for actions in this context schema1 = lookup_package_plugin().create_package_schema() schema1['id'] = [unicode] ctx = _make_context(context) if schema1: ctx['schema'] = schema1 try: pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict) except toolkit.ValidationError as ex: if 'name' in ex.error_dict: # The name is probably taken, re-raise exception raise ex elif allow_validation_errors: # Save errors and retry with a different context validation_errors = ex.error_dict error_message = ex.message or _( 'The dataset contains invalid metadata') ctx = _make_context(context, skip_validation=True) if schema1: ctx['schema'] = schema1 pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict) log.warn('Forced to create an invalid package as %r ' % (name)) else: raise ex assert name == pkg_dict['name'] assert (not identifier) or (identifier == pkg_dict['id']) return { # Provide basic package fields 'id': pkg_dict['id'], 'name': name, 'title': pkg_dict['title'], 'state': pkg_dict.get('state'), # Provide details on validation (meaningfull if allow_validation_errors) 'validation': { 'message': error_message, 'errors': validation_errors, }, }
def _history_template(self, package_type): return lookup_package_plugin(package_type).history_template()
def import_stage(self, harvest_object): log.debug('In DCATRDFHarvester import_stage') status = self._get_object_extra(harvest_object, 'status') if status == 'delete': # Delete package context = { 'model': model, 'session': model.Session, 'user': self._get_user_name(), 'ignore_auth': True } p.toolkit.get_action('package_delete')( context, { 'id': harvest_object.package_id }) log.info('Deleted package {0} with guid {1}'.format( harvest_object.package_id, harvest_object.guid)) return True if harvest_object.content is None: self._save_object_error( 'Empty content for object {0}'.format(harvest_object.id), harvest_object, 'Import') return False try: dataset = json.loads(harvest_object.content) except ValueError: self._save_object_error( 'Could not parse content for object {0}'.format( harvest_object.id), harvest_object, 'Import') return False # Get the last harvested object (if any) previous_object = model.Session.query(HarvestObject) \ .filter(HarvestObject.guid == harvest_object.guid) \ .filter( HarvestObject.current == True # noqa ).first() # Flag previous object as not current anymore if previous_object: previous_object.current = False previous_object.add() # Flag this object as the current one harvest_object.current = True harvest_object.add() context = { 'user': self._get_user_name(), 'return_id_only': True, 'ignore_auth': True, } dataset = self.modify_package_dict(dataset, {}, harvest_object) # Check if a dataset with the same guid exists existing_dataset = self._get_existing_dataset(harvest_object.guid) try: package_plugin = lib_plugins.lookup_package_plugin( dataset.get('type', None)) if existing_dataset: package_schema = package_plugin.update_package_schema() for harvester in p.PluginImplementations(IDCATRDFHarvester): package_schema = harvester.update_package_schema_for_update( package_schema) context['schema'] = package_schema # Don't change the dataset name even if the title has dataset['name'] = existing_dataset['name'] dataset['id'] = existing_dataset['id'] harvester_tmp_dict = {} # check if resources already exist based on their URI existing_resources = existing_dataset.get('resources') resource_mapping = { r.get('uri'): r.get('id') for r in existing_resources if r.get('uri') } for resource in dataset.get('resources'): res_uri = resource.get('uri') if res_uri and res_uri in resource_mapping: resource['id'] = resource_mapping[res_uri] for harvester in p.PluginImplementations(IDCATRDFHarvester): harvester.before_update(harvest_object, dataset, harvester_tmp_dict) try: if dataset: # Save reference to the package on the object harvest_object.package_id = dataset['id'] harvest_object.add() p.toolkit.get_action('package_update')(context, dataset) else: log.info('Ignoring dataset %s' % existing_dataset['name']) return 'unchanged' except p.toolkit.ValidationError as e: self._save_object_error( 'Update validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False for harvester in p.PluginImplementations(IDCATRDFHarvester): err = harvester.after_update(harvest_object, dataset, harvester_tmp_dict) if err: self._save_object_error( 'RDFHarvester plugin error: %s' % err, harvest_object, 'Import') return False log.info('Updated dataset %s' % dataset['name']) else: package_schema = package_plugin.create_package_schema() for harvester in p.PluginImplementations(IDCATRDFHarvester): package_schema = harvester.update_package_schema_for_create( package_schema) context['schema'] = package_schema # We need to explicitly provide a package ID dataset['id'] = str(uuid.uuid4()) package_schema['id'] = [str] harvester_tmp_dict = {} name = dataset['name'] for harvester in p.PluginImplementations(IDCATRDFHarvester): harvester.before_create(harvest_object, dataset, harvester_tmp_dict) try: if dataset: # Save reference to the package on the object harvest_object.package_id = dataset['id'] harvest_object.add() # Defer constraints and flush so the dataset can be indexed with # the harvest object id (on the after_show hook from the harvester # plugin) model.Session.execute( 'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED' ) model.Session.flush() p.toolkit.get_action('package_create')(context, dataset) else: log.info('Ignoring dataset %s' % name) return 'unchanged' except p.toolkit.ValidationError as e: self._save_object_error( 'Create validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False for harvester in p.PluginImplementations(IDCATRDFHarvester): err = harvester.after_create(harvest_object, dataset, harvester_tmp_dict) if err: self._save_object_error( 'RDFHarvester plugin error: %s' % err, harvest_object, 'Import') return False log.info('Created dataset %s' % dataset['name']) except Exception as e: self._save_object_error( 'Error importing dataset %s: %r / %s' % (dataset.get('name', ''), e, traceback.format_exc()), harvest_object, 'Import') return False finally: model.Session.commit() return True
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id # FIXME: first modifications to package_updade begin here: # tag strings are reconstructed because validators are stripping # tags passed and only taking taks as tag_string values # image upload support has also been added here old_data = get_action('package_show')(context, {'id': pkg.id}) ''' Constructing the tag_string from the given tags. There must be at least one tag, otherwise the tag_string will be empty and a validation error will be raised. ''' if not data_dict.get('tag_string'): data_dict['tag_string'] = ', '.join( h.dict_list_reduce(data_dict.get('tags', {}), 'name')) for key, value in old_data.iteritems(): if key not in data_dict: data_dict[key] = value # data_dict['resources'] = data_dict.get('resources', old_data.get('resources')) # iso_topic_cat = data_dict.get('iso_topic_string', []) # if isinstance(iso_topic_cat, basestring): # iso_topic_cat = [iso_topic_cat] # # data_dict['iso_topic_string'] = ','.join(iso_topic_cat) # Set the package last modified date data_dict['record_last_modified'] = str(datetime.date.today()) # If the Created Date has not yet been set, then set it if data_dict['edc_state'] == 'DRAFT' and not data_dict.get( 'record_create_date'): data_dict['record_create_date'] = str(datetime.date.today()) # If the Publish Date has not yet been set, then set it if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get( 'record_publish_date'): data_dict['record_publish_date'] = str(datetime.date.today()) # If the Archive Date has not yet been set, then set it if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get( 'record_archive_date'): data_dict['record_archive_date'] = str(datetime.date.today()) _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() image_url = old_data.get('image_url', None) upload = uploader.Upload('edc', image_url) upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') # Adding image display url for the uploaded image image_url = data_dict.get('image_url') data_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename(image_url) data_dict['image_display_url'] = h.url_for_static( 'uploads/edc/%s' % data_dict.get('image_url'), qualified=True) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) # FIXME: modifications to package_update end here^ data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") # avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) upload.upload(uploader.get_max_image_size()) # TODO the next two blocks are copied from ckan/ckan/logic/action/update.py # This codebase is currently hard to maintain because large chunks of the # CKAN action API and the CKAN controllers are simply overriden. This is # probably worse than just forking CKAN would have been, because in that # case at least we could track changes. - @deniszgonjanin # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( { 'model': context['model'], 'user': context['user'], 'ignore_auth': True }, { 'package': data }) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) ''' Send state change notifications if required; Added by Khalegh Mamakani Using a thread to run the job in the background so that package_update will not wait for notifications sending. ''' old_state = old_data.get('edc_state') context = { 'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj } dataset_url = config.get('ckan.site_url') + h.url_for( controller='package', action="read", id=data_dict['name']) import threading notify_thread = threading.Thread(target=check_record_state, args=(context, old_state, data_dict, g.site_title, g.site_url, dataset_url)) notify_thread.start() return output
def _db_to_form_schema(self, package_type=None): """This is an interface to manipulate data from the database into a format suitable for the form (optional)""" return lookup_package_plugin(package_type).db_to_form_schema()
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id # FIXME: first modifications to package_updade begin here: # tag strings are reconstructed because validators are stripping # tags passed and only taking taks as tag_string values # image upload support has also been added here old_data = get_action('package_show')(context, {'id': pkg.id}) ''' Constructing the tag_string from the given tags. There must be at least one tag, otherwise the tag_string will be empty and a validation error will be raised. ''' if not data_dict.get('tag_string'): data_dict['tag_string'] = ', '.join( h.dict_list_reduce(data_dict.get('tags', {}), 'name')) for key, value in old_data.iteritems() : if key not in data_dict : data_dict[key] = value #data_dict['resources'] = data_dict.get('resources', old_data.get('resources')) # iso_topic_cat = data_dict.get('iso_topic_string', []) # if isinstance(iso_topic_cat, basestring): # iso_topic_cat = [iso_topic_cat] # # data_dict['iso_topic_string'] = ','.join(iso_topic_cat) #Set the package last modified date data_dict['record_last_modified'] = str(datetime.date.today()) # If the Created Date has not yet been set, then set it if data_dict['edc_state'] == 'DRAFT' and not data_dict.get('record_create_date'): data_dict['record_create_date'] = str(datetime.date.today()) # If the Publish Date has not yet been set, then set it if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get('record_publish_date'): data_dict['record_publish_date'] = str(datetime.date.today()) # If the Archive Date has not yet been set, then set it if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get('record_archive_date'): data_dict['record_archive_date'] = str(datetime.date.today()) _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() image_url = old_data.get('image_url', None) upload = uploader.Upload('edc', image_url) upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') #Adding image display url for the uploaded image image_url = data_dict.get('image_url') data_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename(image_url) data_dict['image_display_url'] = h.url_for_static('uploads/edc/%s' % data_dict.get('image_url'), qualified=True) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) # FIXME: modifications to package_update end here^ data, errors = _validate(data_dict, schema, context) # log.debug('package_update validate_errs=%r user=%s package=%s data=%r', # errors, context.get('user'), # context.get('package').name if context.get('package') else '', # data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) upload.upload(uploader.get_max_image_size()) #TODO the next two blocks are copied from ckan/ckan/logic/action/update.py # This codebase is currently hard to maintain because large chunks of the # CKAN action API and the CKAN controllers are simply overriden. This is # probably worse than just forking CKAN would have been, because in that # case at least we could track changes. - @deniszgonjanin # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True}, {'package': data}) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) ''' Send state change notifications if required; Added by Khalegh Mamakani Using a thread to run the job in the background so that package_update will not wait for notifications sending. ''' old_state = old_data.get('edc_state') context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} dataset_url = config.get('ckan.site_url') + h.url_for(controller='package', action="read", id = data_dict['name']) import threading notify_thread = threading.Thread(target=check_record_state, args=(context, old_state, data_dict, g.site_title, g.site_url, dataset_url) ) notify_thread.start() return output
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) return output
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. .. note:: Update methods may delete parameters not explicitly provided in the data_dict. If you want to edit only a specific attribute use `package_patch` instead. It is recommended to call :py:func:`ckan.logic.action.get.package_show`, make the desired changes to the result, and then call ``package_update()`` with it. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface. For further parameters see :py:func:`~ckan.logic.action.create.package_create`. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] session = context['session'] name_or_id = data_dict.get('id') or data_dict.get('name') if name_or_id is None: raise ValidationError({'id': _('Missing value')}) pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg # immutable fields data_dict["id"] = pkg.id data_dict['type'] = pkg.type _check_access('package_update', context, data_dict) user = context['user'] # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) resource_uploads = [] for resource in data_dict.get('resources', []): # file uploads/clearing upload = uploader.get_resource_uploader(resource) if 'mimetype' not in resource: if hasattr(upload, 'mimetype'): resource['mimetype'] = upload.mimetype if 'size' not in resource and 'url_type' in resource: if hasattr(upload, 'filesize'): resource['size'] = upload.filesize resource_uploads.append(upload) data, errors = lib_plugins.plugin_validate( package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) # Needed to let extensions know the new resources ids model.Session.flush() for index, (resource, upload) in enumerate( zip(data.get('resources', []), resource_uploads)): resource['id'] = pkg.resources[index].id upload.upload(resource['id'], uploader.get_max_resource_size()) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_dataset_update(context, data) # Create activity if not pkg.private: user_obj = model.User.by_name(user) if user_obj: user_id = user_obj.id else: user_id = 'not logged in' activity = pkg.activity_stream_item('changed', user_id) session.add(activity) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) return output
def _form_to_db_schema(self, package_type=None): return lookup_package_plugin(package_type).form_to_db_schema()
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return data_dict_json = json.dumps(pkg_dict) if config.get("ckan.cache_validated_datasets", True): package_plugin = lib_plugins.lookup_package_plugin(pkg_dict.get("type")) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, {"model": model, "session": model.Session}, pkg_dict, schema, "package_show" ) pkg_dict["validated_data_dict"] = json.dumps( validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder ) pkg_dict["data_dict"] = data_dict_json # add to string field for sorting title = pkg_dict.get("title") if title: pkg_dict["title_string"] = title if (not pkg_dict.get("state")) or ("active" not in pkg_dict.get("state")): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + pkg_dict.keys() # include the extras in the main namespace extras = pkg_dict.get("extras", []) for extra in extras: key, value = extra["key"], extra["value"] if isinstance(value, (tuple, list)): value = " ".join(map(unicode, value)) key = "".join([c for c in key if c in KEY_CHARS]) pkg_dict["extras_" + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop("extras", None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop("tags", []) context = {"model": model} for tag in tags: if tag.get("vocabulary_id"): data = {"id": tag["vocabulary_id"]} vocab = logic.get_action("vocabulary_show")(context, data) key = u"vocab_%s" % vocab["name"] if key in pkg_dict: pkg_dict[key].append(tag["name"]) else: pkg_dict[key] = [tag["name"]] else: non_vocab_tag_names.append(tag["name"]) pkg_dict["tags"] = non_vocab_tag_names # add groups groups = pkg_dict.pop("groups", []) # we use the capacity to make things private in the search index if pkg_dict["private"]: pkg_dict["capacity"] = "private" else: pkg_dict["capacity"] = "public" pkg_dict["groups"] = [group["name"] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get("organization"): pkg_dict["organization"] = pkg_dict["organization"]["name"] else: pkg_dict["organization"] = None # tracking tracking_summary = pkg_dict.pop("tracking_summary", None) if tracking_summary: pkg_dict["views_total"] = tracking_summary["total"] pkg_dict["views_recent"] = tracking_summary["recent"] resource_fields = [ ("name", "res_name"), ("description", "res_description"), ("format", "res_format"), ("url", "res_url"), ("resource_type", "res_type"), ] resource_extras = [(e, "res_extras_" + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get("resources", []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u"")] pkg_dict.pop("resources", None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type(rel["type"]) rel_dict[type].append(model.Package.get(rel["subject_package_id"]).name) for rel in subjects: type = rel["type"] rel_dict[type].append(model.Package.get(rel["object_package_id"]).name) for key, value in rel_dict.iteritems(): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict["dataset_type"] = pkg_dict["type"] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = key.encode("ascii", "ignore") if key.endswith("_date"): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + "Z" else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except ValueError: continue new_dict[key] = value pkg_dict = new_dict for k in ("title", "notes", "title_string"): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict["metadata_created"] += "Z" pkg_dict["metadata_modified"] += "Z" # mark this CKAN instance as data source: pkg_dict["site_id"] = config.get("ckan.site_id") # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ["title"]: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict["index_id"] = hashlib.md5("%s%s" % (pkg_dict["id"], config.get("ckan.site_id"))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, "Plugin must return non empty package dict on index" # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get("ckan.search.solr_commit", "true")): commit = False conn.add_many([pkg_dict], _commit=commit) except solr.core.SolrException, e: msg = "Solr returned an error: {0} {1} - {2}".format( e.httpcode, e.reason, e.body[:1000] # limit huge responses ) raise SearchIndexError(msg)
def package_create(context, data_dict): '''Create a new dataset (package). You must be authorized to create new datasets. If you specify any groups for the new dataset, you must also be authorized to edit these groups. Plugins may change the parameters of this function depending on the value of the ``type`` parameter, see the ``IDatasetForm`` plugin interface. :param name: the name of the new dataset, must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, ``-`` and ``_``, e.g. ``'warandpeace'`` :type name: string :param title: the title of the dataset (optional, default: same as ``name``) :type title: string :param author: the name of the dataset's author (optional) :type author: string :param author_email: the email address of the dataset's author (optional) :type author_email: string :param maintainer: the name of the dataset's maintainer (optional) :type maintainer: string :param maintainer_email: the email address of the dataset's maintainer (optional) :type maintainer_email: string :param license_id: the id of the dataset's license, see ``license_list()`` for available values (optional) :type license_id: license id string :param notes: a description of the dataset (optional) :type notes: string :param url: a URL for the dataset's source (optional) :type url: string :param version: (optional) :type version: string, no longer than 100 characters :param state: the current state of the dataset, e.g. ``'active'`` or ``'deleted'``, only active datasets show up in search results and other lists of datasets, this parameter will be ignored if you are not authorized to change the state of the dataset (optional, default: ``'active'``) :type state: string :param type: the type of the dataset (optional), ``IDatasetForm`` plugins associate themselves with different dataset types and provide custom dataset handling behaviour for these types :type type: string :param resources: the dataset's resources, see ``resource_create()`` for the format of resource dictionaries (optional) :type resources: list of resource dictionaries :param tags: the dataset's tags, see ``tag_create()`` for the format of tag dictionaries (optional) :type tags: list of tag dictionaries :param extras: the dataset's extras (optional), extras are arbitrary (key: value) metadata items that can be added to datasets, each extra dictionary should have keys ``'key'`` (a string), ``'value'`` (a string), and optionally ``'deleted'`` :type extras: list of dataset extra dictionaries :param relationships_as_object: see ``package_relationship_create()`` for the format of relationship dictionaries (optional) :type relationships_as_object: list of relationship dictionaries :param relationships_as_subject: see ``package_relationship_create()`` for the format of relationship dictionaries (optional) :type relationships_as_subject: list of relationship dictionaries :param groups: the groups to which the dataset belongs (optional), each group dictionary should have one or more of the following keys which identify an existing group: ``'id'`` (the id of the group, string), ``'name'`` (the name of the group, string), ``'title'`` (the title of the group, string), to see which groups exist call ``group_list()`` :type groups: list of dictionaries :param owner_org: the id of the dataset's owning organization, see ``organization_list()`` or ``organization_list_for_user`` for available values (optional) :type owner_org: string :returns: the newly created dataset (unless 'return_id_only' is set to True in the context, in which case just the dataset id will be returned) :rtype: dictionary ''' model = context['model'] user = context['user'] package_type = data_dict.get('type') package_plugin = lib_plugins.lookup_package_plugin(package_type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.create_package_schema() _check_access('package_create', context, data_dict) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work package_plugin.check_data_dict(data_dict) data, errors = _validate(data_dict, schema, context) log.debug('package_create validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), data.get('name'), data_dict) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Create object %s') % data.get("name") admins = [] if user: user_obj = model.User.by_name(user.decode('utf8')) if user_obj: admins = [user_obj] data['creator_user_id'] = user_obj.id pkg = model_save.package_dict_save(data, context) model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() data['id'] = pkg.id context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) if not context.get('defer_commit'): model.repo.commit() ## need to let rest api create context["package"] = pkg ## this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug('Created object %s' % pkg.name) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) return_id_only = context.get('return_id_only', False) output = context['id'] if return_id_only \ else _get_action('package_show')(context, {'id':context['id']}) return output
return 'unchanged' except p.toolkit.ValidationError, e: self._save_object_error('Update validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False for harvester in p.PluginImplementations(IDCATRDFHarvester): err = harvester.after_update(harvest_object, dataset, harvester_tmp_dict) if err: self._save_object_error('RDFHarvester plugin error: %s' % err, harvest_object, 'Import') return False log.info('Updated dataset %s' % dataset['name']) else: package_plugin = lib_plugins.lookup_package_plugin(dataset.get('type', None)) package_schema = package_plugin.create_package_schema() context['schema'] = package_schema # We need to explicitly provide a package ID dataset['id'] = unicode(uuid.uuid4()) package_schema['id'] = [unicode] harvester_tmp_dict = {} name = dataset['name'] for harvester in p.PluginImplementations(IDCATRDFHarvester): harvester.before_create(harvest_object, dataset, harvester_tmp_dict) try:
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. It is recommended to call :py:func:`ckan.logic.action.get.package_show`, make the desired changes to the result, and then call ``package_update()`` with it. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface. For further parameters see :py:func:`~ckan.logic.action.create.package_create`. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id data_dict['type'] = pkg.type _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate(package_plugin, context, data_dict, schema, 'package_update') log.debug('package_update validate_errs=%r user=%s package=%s data=%r', errors, context.get('user'), context.get('package').name if context.get('package') else '', data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True context_org_update['add_revision'] = False _get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) return output
def _setup_template_variables(context, data_dict, package_type=None): return lookup_package_plugin(package_type).setup_template_variables( context, data_dict )
def _check_data_dict(self, data_dict, package_type=None): """Check if the return data is correct, mostly for checking out if spammers are submitting only part of the form""" return lookup_package_plugin(package_type).check_data_dict(data_dict)
def _search_template(self, package_type): return lookup_package_plugin(package_type).search_template()
return False for harvester in p.PluginImplementations(IDCATRDFHarvester): err = harvester.after_update(harvest_object, dataset, harvester_tmp_dict) if err: self._save_object_error( 'RDFHarvester plugin error: %s' % err, harvest_object, 'Import') return False log.info('Updated dataset %s' % dataset['name']) else: package_plugin = lib_plugins.lookup_package_plugin( dataset.get('type', None)) package_schema = package_plugin.create_package_schema() context['schema'] = package_schema # We need to explicitly provide a package ID dataset['id'] = unicode(uuid.uuid4()) package_schema['id'] = [unicode] harvester_tmp_dict = {} name = dataset['name'] for harvester in p.PluginImplementations(IDCATRDFHarvester): harvester.before_create(harvest_object, dataset, harvester_tmp_dict)
def package_create(context, data_dict): """Create a new dataset (package). You must be authorized to create new datasets. If you specify any groups for the new dataset, you must also be authorized to edit these groups. Plugins may change the parameters of this function depending on the value of the ``type`` parameter, see the ``IDatasetForm`` plugin interface. :param name: the name of the new dataset, must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, ``-`` and ``_``, e.g. ``'warandpeace'`` :type name: string :param title: the title of the dataset (optional, default: same as ``name``) :type title: string :param author: the name of the dataset's author (optional) :type author: string :param author_email: the email address of the dataset's author (optional) :type author_email: string :param maintainer: the name of the dataset's maintainer (optional) :type maintainer: string :param maintainer_email: the email address of the dataset's maintainer (optional) :type maintainer_email: string :param license_id: the id of the dataset's license, see ``license_list()`` for available values (optional) :type license_id: license id string :param notes: a description of the dataset (optional) :type notes: string :param url: a URL for the dataset's source (optional) :type url: string :param version: (optional) :type version: string, no longer than 100 characters :param state: the current state of the dataset, e.g. ``'active'`` or ``'deleted'``, only active datasets show up in search results and other lists of datasets, this parameter will be ignored if you are not authorized to change the state of the dataset (optional, default: ``'active'``) :type state: string :param type: the type of the dataset (optional), ``IDatasetForm`` plugins associate themselves with different dataset types and provide custom dataset handling behaviour for these types :type type: string :param resources: the dataset's resources, see ``resource_create()`` for the format of resource dictionaries (optional) :type resources: list of resource dictionaries :param tags: the dataset's tags, see ``tag_create()`` for the format of tag dictionaries (optional) :type tags: list of tag dictionaries :param extras: the dataset's extras (optional), extras are arbitrary (key: value) metadata items that can be added to datasets, each extra dictionary should have keys ``'key'`` (a string), ``'value'`` (a string) :type extras: list of dataset extra dictionaries :param relationships_as_object: see ``package_relationship_create()`` for the format of relationship dictionaries (optional) :type relationships_as_object: list of relationship dictionaries :param relationships_as_subject: see ``package_relationship_create()`` for the format of relationship dictionaries (optional) :type relationships_as_subject: list of relationship dictionaries :param groups: the groups to which the dataset belongs (optional), each group dictionary should have one or more of the following keys which identify an existing group: ``'id'`` (the id of the group, string), ``'name'`` (the name of the group, string), ``'title'`` (the title of the group, string), to see which groups exist call ``group_list()`` :type groups: list of dictionaries :param owner_org: the id of the dataset's owning organization, see ``organization_list()`` or ``organization_list_for_user`` for available values (optional) :type owner_org: string :returns: the newly created dataset (unless 'return_id_only' is set to True in the context, in which case just the dataset id will be returned) :rtype: dictionary """ model = context["model"] user = context["user"] package_type = data_dict.get("type") package_plugin = lib_plugins.lookup_package_plugin(package_type) if "schema" in context: schema = context["schema"] else: schema = package_plugin.create_package_schema() _check_access("package_create", context, data_dict) if "api_version" not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, "check_data_dict", None) if check_data_dict: try: check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work package_plugin.check_data_dict(data_dict) data, errors = _validate(data_dict, schema, context) if "tags" in data: data["tags"] = get_tag_vocabulary(data["tags"]) if "groups" in data: data["extras"].append({"key": "solr_additions", "value": build_additions(data["groups"])}) log.debug( "package_create validate_errs=%r user=%s package=%s data=%r", errors, context.get("user"), data.get("name"), data_dict, ) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if "message" in context: rev.message = context["message"] else: rev.message = _(u"REST API: Create object %s") % data.get("name") admins = [] if user: user_obj = model.User.by_name(user.decode("utf8")) if user_obj: admins = [user_obj] data["creator_user_id"] = user_obj.id pkg = model_save.package_dict_save(data, context) model.setup_default_user_roles(pkg, admins) # Needed to let extensions know the package id model.Session.flush() data["id"] = pkg.id context_org_update = context.copy() context_org_update["ignore_auth"] = True context_org_update["defer_commit"] = True _get_action("package_owner_org_update")(context_org_update, {"id": pkg.id, "organization_id": pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.create(pkg) item.after_create(context, data) if not context.get("defer_commit"): model.repo.commit() # need to let rest api create context["package"] = pkg # this is added so that the rest controller can make a new location context["id"] = pkg.id log.debug("Created object %s" % pkg.name) # Make sure that a user provided schema is not used on package_show context.pop("schema", None) return_id_only = context.get("return_id_only", False) output = context["id"] if return_id_only else _get_action("package_show")(context, {"id": context["id"]}) return output
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' log.info('context: %s', context) model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] log.info('package update: %s', data_dict) pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id pkg_dict = _get_action("package_show")(context, {'id': pkg.id}) pkg_dict.update(data_dict) _check_access('package_update', context, data_dict) _get_action('notify_package_update')(context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) #we cannot modify model but we can ensure no modification via GUI/API delete_keys = ['author_email', 'maintainer', 'maintainer_email'] for key in delete_keys: if key in pkg_dict: del pkg_dict[key] data, errors = _validate(pkg_dict, schema, context) log.debug('package_update validate_errs=%r user=%s package=%s', errors, context.get('user'), context.get('package').name if context.get('package') else '') if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, { 'id': pkg.id, 'organization_id': pkg.owner_org }) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) #action will be audited in resource_create or resource_update action if not context.get('defer_audit', None): audit_helper(data_dict, output, 'package_update') _get_action('package_unlock')(context, {'id': pkg.id}) return output
def _read_template(self, package_type): return lookup_package_plugin(package_type).read_template()