Example #1
0
def bd_dataset_validate(context, data_dict):
    context["ignore_auth"] = True

    if "type" not in data_dict:
        package_plugin = lib_plugins.lookup_package_plugin()
        try:
            # use first type as default if user didn't provide type
            package_type = package_plugin.package_types()[0]
        except (AttributeError, IndexError):
            # in case a 'dataset' plugin was registered w/o fallback
            package_type = "dataset"
        data_dict["type"] = package_type
    package_plugin = lib_plugins.lookup_package_plugin(data_dict["type"])

    if "id" in data_dict:
        del data_dict["id"]
    for r in data_dict["resources"]:
        if "id" in r:
            del r["id"]
    if "name" in data_dict:
        # add a char so that name doesn't collide with existing package
        data_dict["name"] += "_"

    schema = context.get("schema") or package_plugin.create_package_schema()
    data, errors = lib_plugins.plugin_validate(
        package_plugin, context, data_dict, schema, "package_create"
    )

    if errors:
        return {"sucess": False, "data": data, "errors": errors}

    return {"sucess": True, "data": data, "errors": errors}
Example #2
0
 def _resource_form(self, package_type):
     # backwards compatibility with plugins not inheriting from
     # DefaultDatasetPlugin and not implmenting resource_form
     plugin = lookup_package_plugin(package_type)
     if hasattr(plugin, 'resource_form'):
         result = plugin.resource_form()
         if result is not None:
             return result
     return lookup_package_plugin().resource_form()
def package_update(next_action, context, data_dict):
    '''

    :param context:
    :param data_dict:

    '''
    model = context['model']
    name_or_id = data_dict.get('id') or data_dict.get('name')

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise toolkit.ObjectNotFound(toolkit._('Package was not found.'))

    toolkit.check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()
    # We modify the schema here to replace owner_org_validator by our own
    if 'owner_org' in schema:
        schema['owner_org'] = [
            owner_org_validator if f is default_owner_org_validator else f
            for f in schema['owner_org']
        ]
    context['schema'] = schema

    return next_action(context, data_dict)
Example #4
0
def package_show(context, data_dict):

    model = context['model']
    context['session'] = model.Session
    name_or_id = data_dict.get("id") or data_dict['name_or_id']

    pkg = model.Package.get(name_or_id)

    if pkg is None:
        raise NotFound

    context['package'] = pkg

    check_access('package_show', context, data_dict)

    package_dict = model_dictize.package_dictize(pkg, context)

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.read(pkg)

    schema = lib_plugins.lookup_package_plugin(
        package_dict['type']).db_to_form_schema()

    if schema and context.get('validate', True):
        package_dict, errors = validate(package_dict, schema, context=context)

    return package_dict
Example #5
0
def datasets_validation_report(context, data_dict):

    toolkit.check_access('datasets_validation_report', context, data_dict)
    search_params = {'q': '*:*', 'include_private': True, 'rows': 1000}
    query = toolkit.get_action('package_search')({
        'ignore_auth': True
    }, search_params)

    count = query['count']
    datasets = query['results']

    out = {
        'count': count,
        'datasets': [],
    }

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin('dataset')
    schema = package_plugin.update_package_schema()
    context = {
        'model': model,
        'session': model.Session,
        'user': toolkit.c.user
    }
    for dataset in datasets:
        data, errors = package_plugin.validate(context, dataset, schema,
                                               'package_update')
        if errors:
            out['datasets'].append({
                'id': dataset['id'],
                'name': dataset['name'],
                'errors': errors,
            })

    return out
Example #6
0
File: get.py Project: slmnhq/ckan
def package_show(context, data_dict):

    model = context['model']
    context['session'] = model.Session
    name_or_id = data_dict.get("id") or data_dict['name_or_id']

    pkg = model.Package.get(name_or_id)

    if pkg is None:
        raise NotFound

    context['package'] = pkg

    check_access('package_show', context, data_dict)

    package_dict = model_dictize.package_dictize(pkg, context)

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.read(pkg)

    package_plugin = lib_plugins.lookup_package_plugin(package_dict['type'])
    try:
        schema = package_plugin.db_to_form_schema_options({
            'type':'show',
            'api': 'api_version' in context,
            'context': context })
    except AttributeError:
        schema = package_plugin.db_to_form_schema()

    if schema and context.get('validate', True):
        package_dict, errors = validate(package_dict, schema, context=context)

    return package_dict
Example #7
0
def dataset_is_valid(package):
    """ Check if given dataset is valid. Uses schema from plugin.
        Return true if dataset is valid.
    """
    package_plugin = plugins.lookup_package_plugin(package['type'])
    _, errors = validate(package, package_plugin.update_package_schema(), {'model': model, 'session': model.Session, 'user': c.user})
    return not bool(errors)
Example #8
0
def package_create(context, data_dict):

    model = context["model"]
    user = context["user"]
    model.Session.remove()
    model.Session()._context = context

    package_type = data_dict.get("type")
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    try:
        schema = package_plugin.form_to_db_schema_options(
            {"type": "create", "api": "api_version" in context, "context": context}
        )
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    check_access("package_create", context, data_dict)

    if "api_version" not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            package_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            package_plugin.check_data_dict(data_dict)

    data, errors = validate(data_dict, schema, context)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, error_summary(errors))

    rev = model.repo.new_revision()
    rev.author = user
    if "message" in context:
        rev.message = context["message"]
    else:
        rev.message = _(u"REST API: Create object %s") % data.get("name")

    pkg = model_save.package_dict_save(data, context)
    admins = []
    if user:
        admins = [model.User.by_name(user.decode("utf8"))]

    model.setup_default_user_roles(pkg, admins)
    # Needed to let extensions know the package id
    model.Session.flush()

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.create(pkg)

    if not context.get("defer_commit"):
        model.repo.commit()

    ## need to let rest api create
    context["package"] = pkg
    ## this is added so that the rest controller can make a new location
    context["id"] = pkg.id
    log.debug("Created object %s" % str(pkg.name))
    return get_action("package_show")(context, {"id": context["id"]})
Example #9
0
def package_validate(context, data_dict):
    model = context['model']
    id = data_dict.get("id")

    pkg = model.Package.get(id) if id else None

    if pkg is None:
        action = 'package_create'
        type = data_dict.get('type', 'dataset')
    else:
        action = 'package_update'
        type = pkg.type
        context["package"] = pkg
        data_dict["id"] = pkg.id

    logic.check_access(action, context, data_dict)
    package_plugin = lib_plugins.lookup_package_plugin(type)

    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.create_package_schema() if action == 'package_create' \
            else package_plugin.update_package_schema()

    data, errors = lib_plugins.plugin_validate(
        package_plugin, context, data_dict, schema, action)

    if errors:
        raise ValidationError(errors)

    if 'groups_list' in data:
        del data['groups_list']
    return data
Example #10
0
def package_update_validate(context, data_dict):
    model = context['model']
    user = context['user']

    id = data_dict["id"]
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(id)
    context["package"] = pkg

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    data_dict["id"] = pkg.id

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    try:
        schema = package_plugin.form_to_db_schema_options({'type':'update',
                                               'api':'api_version' in context,
                                               'context': context})
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    check_access('package_update', context, data_dict)

    data, errors = validate(data_dict, schema, context)


    if errors:
        model.Session.rollback()
        raise ValidationError(errors, error_summary(errors))
    return data
Example #11
0
def package_show(context, data_dict):

    model = context['model']
    context['session'] = model.Session
    name_or_id = data_dict.get("id") or data_dict['name_or_id']

    pkg = model.Package.get(name_or_id)

    if pkg is None:
        raise NotFound

    context['package'] = pkg

    check_access('package_show', context, data_dict)

    package_dict = model_dictize.package_dictize(pkg, context)

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.read(pkg)

    schema = lib_plugins.lookup_package_plugin(package_dict['type']).db_to_form_schema()

    if schema:
        package_dict, errors = validate(package_dict, schema, context=context)

    return package_dict
Example #12
0
def package_update_validate(context, data_dict):
    model = context['model']
    user = context['user']

    id = _get_or_bust(data_dict, "id")

    pkg = model.Package.get(id)
    context["package"] = pkg

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    data_dict["id"] = pkg.id

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    try:
        schema = package_plugin.form_to_db_schema_options({
            'type':
            'update',
            'api':
            'api_version' in context,
            'context':
            context
        })
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    _check_access('package_update', context, data_dict)

    data, errors = _validate(data_dict, schema, context)
    if errors:
        model.Session.rollback()
        raise ValidationError(errors)
    return data
Example #13
0
def package_update_validate(context, data_dict):
    model = context["model"]
    user = context["user"]

    id = _get_or_bust(data_dict, "id")

    pkg = model.Package.get(id)
    context["package"] = pkg

    if pkg is None:
        raise NotFound(_("Package was not found."))
    data_dict["id"] = pkg.id

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    try:
        schema = package_plugin.form_to_db_schema_options(
            {"type": "update", "api": "api_version" in context, "context": context}
        )
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    _check_access("package_update", context, data_dict)

    data, errors = _validate(data_dict, schema, context)
    if errors:
        model.Session.rollback()
        raise ValidationError(errors)
    return data
Example #14
0
def package_list_show_for_reindex(context, dataset_ids):
    '''
    Wraps the default package_show and adds additional information to the resources:
    resource size (for uploaded files) and resource revision timestamp
    '''

    model = context['model']
    context['session'] = model.Session

    dataset_dicts = []
    all_datasets = model.Session.query(model.Package).filter(model.Package.id.in_(dataset_ids)).all()

    for pkg in all_datasets:
        # log.info('Package {}'.format(pkg.id))
        if pkg is None:
            raise NotFound

        context['package'] = pkg
        context['reindexing'] = True

        package_dict = None


        if not package_dict:
            package_dict = model_dictize.package_dictize(pkg, context)
            package_dict_validated = False



        if context.get('for_view'):
            for item in plugins.PluginImplementations(plugins.IPackageController):
                package_dict = item.before_view(package_dict)

        for item in plugins.PluginImplementations(plugins.IPackageController):
            item.read(pkg)

        # for item in plugins.PluginImplementations(plugins.IResourceController):
        #     for resource_dict in package_dict['resources']:
        #         item.before_show(resource_dict)

        if not package_dict_validated:
            package_plugin = lib_plugins.lookup_package_plugin(
                package_dict['type'])
            if 'schema' in context:
                schema = context['schema']
            else:
                schema = package_plugin.show_package_schema()
            if schema and context.get('validate', True):
                package_dict, errors = lib_plugins.plugin_validate(
                    package_plugin, context, package_dict, schema,
                    'package_show')

        for item in plugins.PluginImplementations(plugins.IPackageController):
            item.after_show(context, package_dict)

        additional_hdx_package_show_processing(context, package_dict, just_for_reindexing=True)

        dataset_dicts.append(package_dict)
    return dataset_dicts
Example #15
0
def _get_pkg_template(template_type, package_type=None):
    pkg_plugin = lookup_package_plugin(package_type)
    method = getattr(pkg_plugin, template_type)
    signature = inspect.signature(method)
    if len(signature.parameters):
        return method(package_type)
    else:
        return method()
Example #16
0
def package_update(context, data_dict):

    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    try:
        schema = package_plugin.form_to_db_schema_options({
            'type':
            'update',
            'api':
            'api_version' in context,
            'context':
            context
        })
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    if 'api_version' not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            package_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            package_plugin.check_data_dict(data_dict)

    data, errors = validate(data_dict, schema, context)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, error_summary(errors))

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    pkg = model_save.package_dict_save(data, context)

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)
    if not context.get('defer_commit'):
        model.repo.commit()
    return get_action('package_show')(context, data_dict)
Example #17
0
def dataset_is_valid(package):
    """ Check if given dataset is valid. Uses schema from plugin.
        Return true if dataset is valid.
    """
    package['accept-terms'] = u'True'
    package_plugin = plugins.lookup_package_plugin(package['type'])
    _, errors = validate(package, package_plugin.update_package_schema(),
                         {'model': model, 'session': model.Session, 'user': c.user})
    return not bool(errors)
Example #18
0
def _get_pkg_template(template_type, package_type=None):
    pkg_plugin = lookup_package_plugin(package_type)
    method = getattr(pkg_plugin, template_type)
    try:
        return method(package_type)
    except TypeError as err:
        if u'takes 1' not in str(err) and u'takes exactly 1' not in str(err):
            raise
        return method()
Example #19
0
def package_create_validate(context, data_dict):
    model = context['model']
    schema = lib_plugins.lookup_package_plugin().form_to_db_schema()

    _check_access('package_create',context,data_dict)

    data, errors = _validate(data_dict, schema, context)
    if errors:
        model.Session.rollback()
        raise ValidationError(errors)
    else:
        return data
Example #20
0
def package_update(context, data_dict):

    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    try:
        schema = package_plugin.form_to_db_schema_options({'type':'update',
                                               'api':'api_version' in context,
                                               'context': context})
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    if 'api_version' not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            package_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            package_plugin.check_data_dict(data_dict)

    data, errors = validate(data_dict, schema, context)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, error_summary(errors))

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    pkg = model_save.package_dict_save(data, context)

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)
    if not context.get('defer_commit'):
        model.repo.commit()
    return get_action('package_show')(context, data_dict)
Example #21
0
def package_create_validate(context, data_dict):
    model = context['model']
    schema = lib_plugins.lookup_package_plugin().form_to_db_schema()

    _check_access('package_create',context,data_dict)

    data, errors = _validate(data_dict, schema, context)
    if errors:
        model.Session.rollback()
        raise ValidationError(errors)
    else:
        return data
Example #22
0
    def test_create_dataset(self, opts, *args):
        '''An example that creates a dataset using the action api.
        '''

        self._fake_request_context()

        # Create a context for action api calls
        context = {
            'model': model,
            'session': model.Session,
            'user': self.site_user.get('name'),
            'ignore_auth': True,
            'api_version': '3',
            'allow_partial_update': False
        }

        # Decide how to handle package identifiers
        if opts.identifier:
            # Create a dataset reusing an existing UUID
            # Note Override the catalog-wide schema inside this context
            from ckan.lib.plugins import lookup_package_plugin
            sch1 = lookup_package_plugin().create_package_schema()
            sch1['id'] = [unicode]
            context['schema'] = sch1
        else:
            # Generate a new UUID; use package_create's default behavior
            pass

        # Create an api request body
        pkg_dict = {
            'title': opts.title,
            'name': opts.name,
            'notes': opts.description,
            'license_id': 'cc-zero',
            'dataset_type': u'inspire',
            'owner_org': opts.owner_org,
            'inspire': {
                'title': opts.title,
                'abstract': opts.description,
                'topic_category': ["economy"],
            }
        }

        # If reusing an identifier, add the relevant keys
        if opts.identifier:
            pkg_dict['id'] = opts.identifier
            pkg_dict['inspire']['identifier'] = opts.identifier

        # Perform action
        pkg = get_action('package_create')(context, pkg_dict)
        print 'Created dataset with: id=%(id)s name=%(name)s:' % (pkg)
    def test_create_dataset(self, opts, *args):
        '''An example that creates a dataset using the action api.
        '''
       
        self._fake_request_context()
        
        # Create a context for action api calls
        context = {
            'model': model,
            'session': model.Session,
            'user': self.site_user.get('name'),
            'ignore_auth': True,
            'api_version': '3',
            'allow_partial_update': False
        }
        
        # Decide how to handle package identifiers
        if opts.identifier:
            # Create a dataset reusing an existing UUID
            # Note Override the catalog-wide schema inside this context
            from ckan.lib.plugins import lookup_package_plugin
            sch1 = lookup_package_plugin().create_package_schema()
            sch1['id'] = [unicode]
            context['schema'] = sch1
        else:
            # Generate a new UUID; use package_create's default behavior
            pass
        
        # Create an api request body
        pkg_dict = {
            'title': opts.title,
            'name': opts.name,
            'notes': opts.description,
            'license_id': 'cc-zero',
            'dataset_type': u'inspire',
            'owner_org': opts.owner_org,
            'inspire': {
                'title': opts.title,
                'abstract': opts.description,
                'topic_category': ["economy"],
            }
        }

        # If reusing an identifier, add the relevant keys
        if opts.identifier:
            pkg_dict['id'] = opts.identifier
            pkg_dict['inspire']['identifier'] = opts.identifier
        
        # Perform action
        pkg = get_action('package_create')(context, pkg_dict);
        print 'Created dataset with: id=%(id)s name=%(name)s:' %(pkg)
Example #24
0
def resource_update(context, data_dict):
    '''Update a resource.

    To update a resource you must be authorized to update the dataset that the
    resource belongs to.

    For further parameters see ``resource_create()``.

    :param id: the id of the resource to update
    :type id: string

    :returns: the updated resource
    :rtype: string

    '''
    model = context['model']
    user = context['user']
    id = _get_or_bust(data_dict, "id")

    resource = model.Resource.get(id)
    context["resource"] = resource

    if not resource:
        logging.error('Could not find resource ' + id)
        raise NotFound(_('Resource was not found.'))

    _check_access('resource_update', context, data_dict)

    if 'schema' in context:
        schema = context['schema']
    else:
        package_plugin = lib_plugins.lookup_package_plugin(
            resource.resource_group.package.type)
        schema = package_plugin.update_package_schema()['resources']

    data, errors = _validate(data_dict, schema, context)
    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name", "")

    resource = model_save.resource_dict_save(data, context)
    if not context.get('defer_commit'):
        model.repo.commit()
    return model_dictize.resource_dictize(resource, context)
Example #25
0
def resource_update(context, data_dict):
    '''Update a resource.

    To update a resource you must be authorized to update the dataset that the
    resource belongs to.

    For further parameters see ``resource_create()``.

    :param id: the id of the resource to update
    :type id: string

    :returns: the updated resource
    :rtype: string

    '''
    model = context['model']
    user = context['user']
    id = _get_or_bust(data_dict, "id")

    resource = model.Resource.get(id)
    context["resource"] = resource

    if not resource:
        logging.error('Could not find resource ' + id)
        raise NotFound(_('Resource was not found.'))

    _check_access('resource_update', context, data_dict)

    if 'schema' in context:
        schema = context['schema']
    else:
        package_plugin = lib_plugins.lookup_package_plugin(
            resource.resource_group.package.type)
        schema = package_plugin.update_package_schema()['resources']

    data, errors = _validate(data_dict, schema, context)
    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name", "")

    resource = model_save.resource_dict_save(data, context)
    if not context.get('defer_commit'):
        model.repo.commit()
    return model_dictize.resource_dictize(resource, context)
Example #26
0
def package_create_validate(context, data_dict):
    model = context["model"]
    schema = lib_plugins.lookup_package_plugin().form_to_db_schema()
    model.Session.remove()
    model.Session()._context = context

    check_access("package_create", context, data_dict)

    data, errors = validate(data_dict, schema, context)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, error_summary(errors))
    else:
        return data
Example #27
0
def get_dataset_validation_error_or_none(pkg_dict, context):
    # Convert dataset
    if pkg_dict.get('type') == 'deposited-dataset':
        pkg_dict = convert_deposited_dataset_to_regular_dataset(pkg_dict)

    # Validate dataset
    package_plugin = lib_plugins.lookup_package_plugin('dataset')
    schema = package_plugin.update_package_schema()
    data, errors = lib_plugins.plugin_validate(package_plugin, context,
                                               pkg_dict, schema,
                                               'package_update')
    errors.pop('owner_org', None)
    if data.get('owner_org') == 'unknown':
        errors['owner_org_dest'] = ['Missing Value']

    return ValidationError(errors) if errors else None
Example #28
0
def package_create(next_action, context, data_dict):
    '''
    :param next_action:
    :param context:
    :param data_dict:

    '''
    package_type = data_dict.get('type')
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.create_package_schema()
    # We modify the schema here to replace owner_org_validator by our own
    if 'owner_org' in schema:
        schema['owner_org'] = [owner_org_validator if f is default_owner_org_validator
                               else f for f in schema['owner_org']]
    context['schema'] = schema

    return next_action(context, data_dict)
Example #29
0
 def _comments_template(self, package_type):
     return lookup_package_plugin(package_type).comments_template()
Example #30
0
    def index_package(self, pkg_dict, defer_commit=False):
        if pkg_dict is None:
            return

        # tracking summary values will be stale, never store them
        tracking_summary = pkg_dict.pop('tracking_summary', None)
        for r in pkg_dict.get('resources', []):
            r.pop('tracking_summary', None)

        data_dict_json = json.dumps(pkg_dict)

        if config.get('ckan.cache_validated_datasets', True):
            package_plugin = lib_plugins.lookup_package_plugin(
                pkg_dict.get('type'))

            schema = package_plugin.show_package_schema()
            validated_pkg_dict, errors = lib_plugins.plugin_validate(
                package_plugin, {
                    'model': model,
                    'session': model.Session
                }, pkg_dict, schema, 'package_show')
            pkg_dict['validated_data_dict'] = json.dumps(
                validated_pkg_dict,
                cls=ckan.lib.navl.dictization_functions.MissingNullEncoder)

        pkg_dict['data_dict'] = data_dict_json

        # add to string field for sorting
        title = pkg_dict.get('title')
        if title:
            pkg_dict['title_string'] = title

        # delete the package if there is no state, or the state is `deleted`
        if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
            return self.delete_package(pkg_dict)

        index_fields = RESERVED_FIELDS + list(pkg_dict.keys())

        # include the extras in the main namespace
        extras = pkg_dict.get('extras', [])
        for extra in extras:
            key, value = extra['key'], extra['value']
            if isinstance(value, (tuple, list)):
                value = " ".join(map(text_type, value))
            key = ''.join([c for c in key if c in KEY_CHARS])
            pkg_dict['extras_' + key] = value
            if key not in index_fields:
                pkg_dict[key] = value
        pkg_dict.pop('extras', None)

        # add tags, removing vocab tags from 'tags' list and adding them as
        # vocab_<tag name> so that they can be used in facets
        non_vocab_tag_names = []
        tags = pkg_dict.pop('tags', [])
        context = {'model': model}

        for tag in tags:
            if tag.get('vocabulary_id'):
                data = {'id': tag['vocabulary_id']}
                vocab = logic.get_action('vocabulary_show')(context, data)
                key = u'vocab_%s' % vocab['name']
                if key in pkg_dict:
                    pkg_dict[key].append(tag['name'])
                else:
                    pkg_dict[key] = [tag['name']]
            else:
                non_vocab_tag_names.append(tag['name'])

        pkg_dict['tags'] = non_vocab_tag_names

        # add groups
        groups = pkg_dict.pop('groups', [])

        # we use the capacity to make things private in the search index
        if pkg_dict['private']:
            pkg_dict['capacity'] = 'private'
        else:
            pkg_dict['capacity'] = 'public'

        pkg_dict['groups'] = [group['name'] for group in groups]

        # if there is an owner_org we want to add this to groups for index
        # purposes
        if pkg_dict.get('organization'):
            pkg_dict['organization'] = pkg_dict['organization']['name']
        else:
            pkg_dict['organization'] = None

        # tracking
        if not tracking_summary:
            tracking_summary = model.TrackingSummary.get_for_package(
                pkg_dict['id'])
        pkg_dict['views_total'] = tracking_summary['total']
        pkg_dict['views_recent'] = tracking_summary['recent']

        resource_fields = [('name', 'res_name'),
                           ('description', 'res_description'),
                           ('format', 'res_format'), ('url', 'res_url'),
                           ('resource_type', 'res_type')]
        resource_extras = [(e, 'res_extras_' + e)
                           for e in model.Resource.get_extra_columns()]
        # flatten the structure for indexing:
        for resource in pkg_dict.get('resources', []):
            for (okey, nkey) in resource_fields + resource_extras:
                pkg_dict[nkey] = pkg_dict.get(nkey,
                                              []) + [resource.get(okey, u'')]
        pkg_dict.pop('resources', None)

        rel_dict = collections.defaultdict(list)
        subjects = pkg_dict.pop("relationships_as_subject", [])
        objects = pkg_dict.pop("relationships_as_object", [])
        for rel in objects:
            type = model.PackageRelationship.forward_to_reverse_type(
                rel['type'])
            rel_dict[type].append(
                model.Package.get(rel['subject_package_id']).name)
        for rel in subjects:
            type = rel['type']
            rel_dict[type].append(
                model.Package.get(rel['object_package_id']).name)
        for key, value in six.iteritems(rel_dict):
            if key not in pkg_dict:
                pkg_dict[key] = value

        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE

        # Save dataset type
        pkg_dict['dataset_type'] = pkg_dict['type']

        # clean the dict fixing keys and dates
        # FIXME where are we getting these dirty keys from?  can we not just
        # fix them in the correct place or is this something that always will
        # be needed?  For my data not changing the keys seems to not cause a
        # problem.
        new_dict = {}
        bogus_date = datetime.datetime(1, 1, 1)
        for key, value in pkg_dict.items():
            key = six.ensure_str(key)
            if key.endswith('_date'):
                try:
                    date = parse(value, default=bogus_date)
                    if date != bogus_date:
                        value = date.isoformat() + 'Z'
                    else:
                        # The date field was empty, so dateutil filled it with
                        # the default bogus date
                        value = None
                except (ValueError, IndexError):
                    continue
            new_dict[key] = value
        pkg_dict = new_dict

        for k in ('title', 'notes', 'title_string'):
            if k in pkg_dict and pkg_dict[k]:
                pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])

        # modify dates (SOLR is quite picky with dates, and only accepts ISO dates
        # with UTC time (i.e trailing Z)
        # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        pkg_dict['metadata_created'] += 'Z'
        pkg_dict['metadata_modified'] += 'Z'

        # mark this CKAN instance as data source:
        pkg_dict['site_id'] = config.get('ckan.site_id')

        # Strip a selection of the fields.
        # These fields are possible candidates for sorting search results on,
        # so we strip leading spaces because solr will sort " " before "a" or "A".
        for field_name in ['title']:
            try:
                value = pkg_dict.get(field_name)
                if value:
                    pkg_dict[field_name] = value.lstrip()
            except KeyError:
                pass

        # add a unique index_id to avoid conflicts
        import hashlib
        pkg_dict['index_id'] = hashlib.md5(
            six.b('%s%s' %
                  (pkg_dict['id'], config.get('ckan.site_id')))).hexdigest()

        for item in PluginImplementations(IPackageController):
            pkg_dict = item.before_index(pkg_dict)

        assert pkg_dict, 'Plugin must return non empty package dict on index'

        # permission labels determine visibility in search, can't be set
        # in original dataset or before_index plugins
        labels = lib_plugins.get_permission_labels()
        dataset = model.Package.get(pkg_dict['id'])
        pkg_dict['permission_labels'] = labels.get_dataset_labels(
            dataset) if dataset else []  # TestPackageSearchIndex-workaround

        # send to solr:
        try:
            conn = make_connection()
            commit = not defer_commit
            if not asbool(config.get('ckan.search.solr_commit', 'true')):
                commit = False
            conn.add(docs=[pkg_dict], commit=commit)
        except pysolr.SolrError as e:
            msg = 'Solr returned an error: {0}'.format(
                e.args[0][:1000]  # limit huge responses
            )
            raise SearchIndexError(msg)
        except socket.error as e:
            err = 'Could not connect to Solr using {0}: {1}'.format(
                conn.url, str(e))
            log.error(err)
            raise SearchIndexError(err)

        commit_debug_msg = 'Not committed yet' if defer_commit else 'Committed'
        log.debug('Updated index for %s [%s]' %
                  (pkg_dict.get('name'), commit_debug_msg))
Example #31
0
    def index_package(self, pkg_dict, defer_commit=False):
        if pkg_dict is None:
            return

        # tracking summary values will be stale, never store them
        tracking_summary = pkg_dict.pop('tracking_summary', None)
        for r in pkg_dict.get('resources', []):
            r.pop('tracking_summary', None)

        data_dict_json = json.dumps(pkg_dict)

        if config.get('ckan.cache_validated_datasets', True):
            package_plugin = lib_plugins.lookup_package_plugin(
                pkg_dict.get('type'))

            schema = package_plugin.show_package_schema()
            validated_pkg_dict, errors = lib_plugins.plugin_validate(
                package_plugin, {'model': model, 'session': model.Session},
                pkg_dict, schema, 'package_show')
            pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict,
                cls=ckan.lib.navl.dictization_functions.MissingNullEncoder)

        pkg_dict['data_dict'] = data_dict_json

        # add to string field for sorting
        title = pkg_dict.get('title')
        if title:
            pkg_dict['title_string'] = title

        # delete the package if there is no state, or the state is `deleted`
        if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
            return self.delete_package(pkg_dict)

        index_fields = RESERVED_FIELDS + pkg_dict.keys()

        # include the extras in the main namespace
        extras = pkg_dict.get('extras', [])
        for extra in extras:
            key, value = extra['key'], extra['value']
            if isinstance(value, (tuple, list)):
                value = " ".join(map(unicode, value))
            key = ''.join([c for c in key if c in KEY_CHARS])
            pkg_dict['extras_' + key] = value
            if key not in index_fields:
                pkg_dict[key] = value
        pkg_dict.pop('extras', None)

        # add tags, removing vocab tags from 'tags' list and adding them as
        # vocab_<tag name> so that they can be used in facets
        non_vocab_tag_names = []
        tags = pkg_dict.pop('tags', [])
        context = {'model': model}

        for tag in tags:
            if tag.get('vocabulary_id'):
                data = {'id': tag['vocabulary_id']}
                vocab = logic.get_action('vocabulary_show')(context, data)
                key = u'vocab_%s' % vocab['name']
                if key in pkg_dict:
                    pkg_dict[key].append(tag['name'])
                else:
                    pkg_dict[key] = [tag['name']]
            else:
                non_vocab_tag_names.append(tag['name'])

        pkg_dict['tags'] = non_vocab_tag_names

        # add groups
        groups = pkg_dict.pop('groups', [])

        # we use the capacity to make things private in the search index
        if pkg_dict['private']:
            pkg_dict['capacity'] = 'private'
        else:
            pkg_dict['capacity'] = 'public'

        pkg_dict['groups'] = [group['name'] for group in groups]

        # if there is an owner_org we want to add this to groups for index
        # purposes
        if pkg_dict.get('organization'):
           pkg_dict['organization'] = pkg_dict['organization']['name']
        else:
           pkg_dict['organization'] = None

        # tracking
        if not tracking_summary:
            tracking_summary = model.TrackingSummary.get_for_package(
                pkg_dict['id'])
        pkg_dict['views_total'] = tracking_summary['total']
        pkg_dict['views_recent'] = tracking_summary['recent']

        resource_fields = [('name', 'res_name'),
                           ('description', 'res_description'),
                           ('format', 'res_format'),
                           ('url', 'res_url'),
                           ('resource_type', 'res_type')]
        resource_extras = [(e, 'res_extras_' + e) for e
                            in model.Resource.get_extra_columns()]
        # flatten the structure for indexing:
        for resource in pkg_dict.get('resources', []):
            for (okey, nkey) in resource_fields + resource_extras:
                pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')]
        pkg_dict.pop('resources', None)

        rel_dict = collections.defaultdict(list)
        subjects = pkg_dict.pop("relationships_as_subject", [])
        objects = pkg_dict.pop("relationships_as_object", [])
        for rel in objects:
            type = model.PackageRelationship.forward_to_reverse_type(rel['type'])
            rel_dict[type].append(model.Package.get(rel['subject_package_id']).name)
        for rel in subjects:
            type = rel['type']
            rel_dict[type].append(model.Package.get(rel['object_package_id']).name)
        for key, value in rel_dict.iteritems():
            if key not in pkg_dict:
                pkg_dict[key] = value

        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE

        # Save dataset type
        pkg_dict['dataset_type'] = pkg_dict['type']

        # clean the dict fixing keys and dates
        # FIXME where are we getting these dirty keys from?  can we not just
        # fix them in the correct place or is this something that always will
        # be needed?  For my data not changing the keys seems to not cause a
        # problem.
        new_dict = {}
        bogus_date = datetime.datetime(1, 1, 1)
        for key, value in pkg_dict.items():
            key = key.encode('ascii', 'ignore')
            if key.endswith('_date'):
                try:
                    date = parse(value, default=bogus_date)
                    if date != bogus_date:
                        value = date.isoformat() + 'Z'
                    else:
                        # The date field was empty, so dateutil filled it with
                        # the default bogus date
                        value = None
                except ValueError:
                    continue
            new_dict[key] = value
        pkg_dict = new_dict

        for k in ('title', 'notes', 'title_string'):
            if k in pkg_dict and pkg_dict[k]:
                pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])

        # modify dates (SOLR is quite picky with dates, and only accepts ISO dates
        # with UTC time (i.e trailing Z)
        # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        pkg_dict['metadata_created'] += 'Z'
        pkg_dict['metadata_modified'] += 'Z'

        # mark this CKAN instance as data source:
        pkg_dict['site_id'] = config.get('ckan.site_id')

        # Strip a selection of the fields.
        # These fields are possible candidates for sorting search results on,
        # so we strip leading spaces because solr will sort " " before "a" or "A".
        for field_name in ['title']:
            try:
                value = pkg_dict.get(field_name)
                if value:
                    pkg_dict[field_name] = value.lstrip()
            except KeyError:
                pass

        # add a unique index_id to avoid conflicts
        import hashlib
        pkg_dict['index_id'] = hashlib.md5('%s%s' % (pkg_dict['id'],config.get('ckan.site_id'))).hexdigest()

        for item in PluginImplementations(IPackageController):
            pkg_dict = item.before_index(pkg_dict)

        assert pkg_dict, 'Plugin must return non empty package dict on index'

        # send to solr:
        try:
            conn = make_connection()
            commit = not defer_commit
            if not asbool(config.get('ckan.search.solr_commit', 'true')):
                commit = False
            conn.add(docs=[pkg_dict], commit=commit)
        except pysolr.SolrError, e:
            msg = 'Solr returned an error: {0}'.format(
                e[:1000] # limit huge responses
            )
            raise SearchIndexError(msg)
Example #32
0
def _get_pkg_template(template_type, package_type=None):
    pkg_plugin = lookup_package_plugin(package_type)
    return getattr(pkg_plugin, template_type)()
Example #33
0
def package_update(context, data_dict):
    """Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin
    interface.

    For further parameters see ``package_create()``.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if 'return_package_dict' is True in the
              context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    """
    model = context["model"]
    user = context["user"]
    name_or_id = data_dict.get("id") or data_dict["name"]

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_("Package was not found."))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    _check_access("package_update", context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    try:
        schema = package_plugin.form_to_db_schema_options(
            {"type": "update", "api": "api_version" in context, "context": context}
        )
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    if "api_version" not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            package_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            package_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)
    log.debug(
        "package_update validate_errs=%r user=%s package=%s data=%r",
        errors,
        context.get("user"),
        context.get("package").name if context.get("package") else "",
        data,
    )

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if "message" in context:
        rev.message = context["message"]
    else:
        rev.message = _(u"REST API: Update object %s") % data.get("name")

    pkg = model_save.package_dict_save(data, context)

    context_no_auth = context.copy()
    context_no_auth["ignore_auth"] = True
    _get_action("package_owner_org_update")(context_no_auth, {"id": pkg.id, "organization_id": pkg.owner_org})

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)
    if not context.get("defer_commit"):
        model.repo.commit()

    log.debug("Updated object %s" % str(pkg.name))

    return_id_only = context.get("return_id_only", False)

    output = data_dict["id"] if return_id_only else _get_action("package_show")(context, {"id": data_dict["id"]})

    return output
Example #34
0
def package_create(context, data_dict):
    '''Create a new dataset (package).

    You must be authorized to create new datasets. If you specify any groups
    for the new dataset, you must also be authorized to edit these groups.

    Plugins may change the parameters of this function depending on the value
    of the ``type`` parameter, see the ``IDatasetForm`` plugin interface.

    :param name: the name of the new dataset, must be between 2 and 100
        characters long and contain only lowercase alphanumeric characters,
        ``-`` and ``_``, e.g. ``'warandpeace'``
    :type name: string
    :param title: the title of the dataset (optional, default: same as
        ``name``)
    :type title: string
    :param author: the name of the dataset's author (optional)
    :type author: string
    :param author_email: the email address of the dataset's author (optional)
    :type author_email: string
    :param maintainer: the name of the dataset's maintainer (optional)
    :type maintainer: string
    :param maintainer_email: the email address of the dataset's maintainer
        (optional)
    :type maintainer_email: string
    :param license_id: the id of the dataset's license, see ``license_list()``
        for available values (optional)
    :type license_id: license id string
    :param notes: a description of the dataset (optional)
    :type notes: string
    :param url: a URL for the dataset's source (optional)
    :type url: string
    :param version: (optional)
    :type version: string, no longer than 100 characters
    :param state: the current state of the dataset, e.g. ``'active'`` or
        ``'deleted'``, only active datasets show up in search results and
        other lists of datasets, this parameter will be ignored if you are not
        authorized to change the state of the dataset (optional, default:
        ``'active'``)
    :type state: string
    :param type: the type of the dataset (optional), ``IDatasetForm`` plugins
        associate themselves with different dataset types and provide custom
        dataset handling behaviour for these types
    :type type: string
    :param resources: the dataset's resources, see ``resource_create()``
        for the format of resource dictionaries (optional)
    :type resources: list of resource dictionaries
    :param tags: the dataset's tags, see ``tag_create()`` for the format
        of tag dictionaries (optional)
    :type tags: list of tag dictionaries
    :param extras: the dataset's extras (optional), extras are arbitrary
        (key: value) metadata items that can be added to datasets, each extra
        dictionary should have keys ``'key'`` (a string), ``'value'`` (a
        string), and optionally ``'deleted'``
    :type extras: list of dataset extra dictionaries
    :param relationships_as_object: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_object: list of relationship dictionaries
    :param relationships_as_subject: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_subject: list of relationship dictionaries
    :param groups: the groups to which the dataset belongs (optional), each
        group dictionary should have one or more of the following keys which
        identify an existing group:
        ``'id'`` (the id of the group, string), ``'name'`` (the name of the
        group, string), ``'title'`` (the title of the group, string), to see
        which groups exist call ``group_list()``
    :type groups: list of dictionaries

    :returns: the newly created dataset (unless 'return_id_only' is set to True
              in the context, in which case just the dataset id will be returned)
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']

    package_type = data_dict.get('type')
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    try:
        schema = package_plugin.form_to_db_schema_options({'type':'create',
                                               'api':'api_version' in context,
                                               'context': context})
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    _check_access('package_create', context, data_dict)

    if 'api_version' not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            package_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            package_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)
    log.debug('package_create validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              data.get('name'), data_dict)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % data.get("name")

    pkg = model_save.package_dict_save(data, context)
    admins = []
    if user:
        admins = [model.User.by_name(user.decode('utf8'))]

    model.setup_default_user_roles(pkg, admins)
    # Needed to let extensions know the package id
    model.Session.flush()
    data['id'] = pkg.id

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update,
                                            {'id': pkg.id,
                                             'organization_id': pkg.owner_org})

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.create(pkg)

        item.after_create(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    ## need to let rest api create
    context["package"] = pkg
    ## this is added so that the rest controller can make a new location
    context["id"] = pkg.id
    log.debug('Created object %s' % str(pkg.name))

    return_id_only = context.get('return_id_only', False)

    output = context['id'] if return_id_only \
            else _get_action('package_show')(context, {'id':context['id']})

    return output
Example #35
0
def blacklist_package_update(context, data_dict):

    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id
    data_dict['type'] = pkg.type

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)		

    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

	schema.update({
		'black_list': [toolkit.get_validator('ignore_missing'),
				toolkit.get_converter('convert_to_extras')]
		})


    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)
    
    data, errors = lib_plugins.plugin_validate(
        package_plugin, context, data_dict, schema, 'package_update')

    
        
    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    get_action('package_owner_org_update')(context_org_update,
                                            {'id': pkg.id,
                                             'organization_id': pkg.owner_org})

    # Needed to let extensions know the new resources ids
    model.Session.flush()
    if data.get('resources'):
        for index, resource in enumerate(data['resources']):
            resource['id'] = pkg.resources[index].id

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)

    # Create default views for resources if necessary
    if data.get('resources'):
        logic.get_action('package_create_default_resource_views')(
            {'model': context['model'], 'user': context['user'],
             'ignore_auth': True},
            {'package': data})

    if not context.get('defer_commit'):
        model.repo.commit()

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else get_action('package_show')(context, {'id': data_dict['id']})

    return output
def dataset_import(context, data_dict):
    '''Import a dataset from a given XML source.

    This action, depending also on the value of its flags, can raise one of:

      * actions.Invalid: received invalid input
      * actions.IdentifierConflict: a package with the same identifier already exists
      * actions.NameConflict: a package with the same name already exists
      * toolkit.ValidationError: validation fails while trying to create a package 

    :param source: This is either a string representing a (local or external) URL 
        or a file-like object.
    :type q: string or file-like
    
    :param dtype: the dataset-type i.e. the schema of imported metadata
    :type dtype: string

    :param owner_org: the machine-name for the owner organization 
    :type owner_org: string

    :param continue_on_errors: hint on what to do when validation fails
    :type continue_on_errors: boolean
    
    :param rename_if_conflict: hint on what to do when a name conflict is encountered
    :type rename_if_conflict: boolean

    :rtype: basic info for the newly created package 
    '''
      
    # Read parameters

    try:
        source = data_dict['source']
    except KeyError:
        raise Invalid({'source': 'The `source` parameter is required'})
    
    dtype = data_dict.get('dtype', 'inspire')

    try:
        owner_org = data_dict['owner_org']
    except KeyError:
        raise Invalid({'owner_org':
            'The `owner_org` parameter is required.\n'
            'Hint: Use `organization_list_for_user` to retrieve a valid list.'})
        
    allow_rename = data_dict.get('rename_if_conflict', False)
    allow_validation_errors = data_dict.get('continue_on_errors', False)

    # Fetch raw XML data
    
    xmldata = None
    
    if isinstance(source, basestring):
        # Assume source is a URL
        if not source.startswith('http://'):
            source = pylons.config['ckan.site_url'] + source.strip('/')
        source = urlparse.urlparse(source)
        r1 = requests.get(source.geturl())
        if not r1.ok:
            raise Invalid({'source': _('Cannot fetch metadata from source URL')})
        elif not r1.headers['content-type'] in ['application/xml', 'text/xml']:
            raise Invalid({'source': _('The source does not contain XML data')})
        else:
            xmldata = r1.content
    else:
        # Assume source is a file-like object
        try:
            xmldata = source.read()
        except:
            raise Invalid({'source': _('Cannot read from source')})

    # Parse XML data as metadata of `dtype` schema
    
    obj = make_metadata(dtype)
    try:
        obj = xml_serializer_for(obj).loads(xmldata)
    except AssertionError as ex:
        raise ex
    except Exception as ex:
        # Map all parse exceptions to Invalid
        log.info('Failed to parse XML metadata: %s', ex)
        raise Invalid({'source': _('The given XML file is malformed: %s') % (ex)})

    # Prepare package dict

    pkg_dict = {'version': '1.0'}
    pkg_dict.update(obj.deduce_fields())
    pkg_dict.update({ 
        'owner_org': owner_org,
        'type': 'dataset',
        'dataset_type': dtype,
        dtype: obj.to_dict(flat=False),
    })
    
    # If an identifier is passed, check that this is not already present.
    # Note This is no guarantee that the identifier will be available when
    # `package_create` is actually invoked.

    identifier = pkg_dict.get('id')
    if identifier and _check_package_id_exists(context, identifier):
        raise IdentifierConflict({
           'id':  _('A package identified as %s already exists') % (identifier)})
 
    # Find and assign a machine-name for this package
    # Note We just find the 1st available name. As noted before, this is no 
    # guarantee that will be available when `package_create` is invoked.
    
    basename = pkg_dict['name']
    max_num_probes = 10 if allow_rename else 1
    name = _find_a_package_name(context, basename, max_num_probes)
    if not name:
        raise NameConflict({
            'name': _('The package name %r is not available') % (basename)})
    else:
        pkg_dict['name'] = name
        pkg_dict['title'] += ' ' + name[len(basename):]
    
    # Create/Update package
    
    schema1, validation_errors, error_message = None, None, None
    
    if identifier:
        # Must override catalog-wide schema for actions in this context
        schema1 = lookup_package_plugin().create_package_schema()
        schema1['id'] = [unicode]
    
    ctx = _make_context(context)
    if schema1:
        ctx['schema'] = schema1
    
    try:
        pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict)
    except toolkit.ValidationError as ex:
        if 'name' in ex.error_dict:
            # The name is probably taken, re-raise exception
            raise ex
        elif allow_validation_errors:
            # Save errors and retry with a different context
            validation_errors = ex.error_dict
            error_message = ex.message or _('The dataset contains invalid metadata')
            ctx = _make_context(context, skip_validation=True)
            if schema1:
                ctx['schema'] = schema1
            pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict)
            log.warn('Forced to create an invalid package as %r ' % (name))
        else:
            raise ex

    assert name == pkg_dict['name']
    assert (not identifier) or (identifier == pkg_dict['id'])

    return {
        # Provide basic package fields
        'id': pkg_dict['id'], 
        'name': name,
        'title': pkg_dict['title'],
        'state': pkg_dict.get('state'),
        # Provide details on validation (meaningfull if allow_validation_errors)
        'validation': {
            'message': error_message,
            'errors': validation_errors,
        },
    }
Example #37
0
File: update.py Project: arkka/ckan
def package_update(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin
    interface.

    For further parameters see ``package_create()``.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    _check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    try:
        schema = package_plugin.form_to_db_schema_options({
            'type':
            'update',
            'api':
            'api_version' in context,
            'context':
            context
        })
    except AttributeError:
        schema = package_plugin.form_to_db_schema()

    if 'api_version' not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            package_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            package_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, _error_summary(errors))

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    pkg = model_save.package_dict_save(data, context)

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)
    if not context.get('defer_commit'):
        model.repo.commit()
    return _get_action('package_show')(context, data_dict)
Example #38
0
def package_create(fb, context, data_dict):
    model = context['model']
    user = context['user']

    package_type = data_dict.get('type')
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.create_package_schema()
    # We modify the schema here to replace owner_org_validator by our own
    if 'owner_org' in schema:
        schema['owner_org'] = [uds_oov if f is default_oov else f for f in schema['owner_org']]

    check_access('package_create', context, data_dict)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work
                package_plugin.check_data_dict(data_dict)

    data, errors = lib_plugins.plugin_validate(
        package_plugin, context, data_dict, schema, 'package_create')
    log.debug('package_create validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              data.get('name'), data_dict)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % data.get("name")

    admins = []
    if user:
        user_obj = model.User.by_name(user.decode('utf8'))
        if user_obj:
            admins = [user_obj]
            data['creator_user_id'] = user_obj.id

    pkg = model_save.package_dict_save(data, context)

    model.setup_default_user_roles(pkg, admins)
    # Needed to let extensions know the package id
    model.Session.flush()
    data['id'] = pkg.id

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    get_action('package_owner_org_update')(context_org_update,
                                            {'id': pkg.id,
                                             'organization_id': pkg.owner_org})

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.create(pkg)

        item.after_create(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    ## need to let rest api create
    context["package"] = pkg
    ## this is added so that the rest controller can make a new location
    context["id"] = pkg.id
    log.debug('Created object %s' % pkg.name)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    return_id_only = context.get('return_id_only', False)

    output = context['id'] if return_id_only \
        else get_action('package_show')(context, {'id': context['id']})

    return output
Example #39
0
 def _setup_template_variables(self, context, data_dict, package_type=None):
     return lookup_package_plugin(package_type).setup_template_variables(context, data_dict)
Example #40
0
 def _new_template(self, package_type):
     return lookup_package_plugin(package_type).new_template()
Example #41
0
def dataset_import(context, data_dict):
    '''Import a dataset from a given XML source.

    This action, depending also on the value of its flags, can raise one of:

      * actions.Invalid: received invalid input
      * actions.IdentifierConflict: a package with the same identifier already exists
      * actions.NameConflict: a package with the same name already exists
      * toolkit.ValidationError: validation fails while trying to create a package 

    :param source: This is either a string representing a (local or external) URL 
        or a file-like object.
    :type q: string or file-like
    
    :param dtype: the dataset-type i.e. the schema of imported metadata
    :type dtype: string

    :param owner_org: the machine-name for the owner organization 
    :type owner_org: string

    :param continue_on_errors: hint on what to do when validation fails
    :type continue_on_errors: boolean
    
    :param rename_if_conflict: hint on what to do when a name conflict is encountered
    :type rename_if_conflict: boolean

    :rtype: basic info for the newly created package 
    '''

    # Read parameters
    try:
        source = data_dict['source']
    except KeyError:
        raise Invalid({'source': 'The `source` parameter is required'})

    dtype = data_dict.get('dtype', 'datacite')

    try:
        owner_org = data_dict['owner_org']
    except KeyError:
        raise Invalid({
            'owner_org':
            'The `owner_org` parameter is required.\n'
            'Hint: Use `organization_list_for_user` to retrieve a valid list.'
        })

    allow_rename = data_dict.get('rename_if_conflict', False)
    allow_validation_errors = data_dict.get('continue_on_errors', False)
    log.debug('dtype: %s, source %s, source type: %s', dtype, source,
              type(source))
    # Fetch raw XML data

    xmldata = None

    if isinstance(source, basestring):
        # Assume source is a URL
        if not source.startswith('http://'):
            source = pylons.config['ckan.site_url'] + source.strip('/')
        source = urlparse.urlparse(source)
        r1 = requests.get(source.geturl())
        if not r1.ok:
            raise Invalid(
                {'source': _('Cannot fetch metadata from source URL')})
        elif not r1.headers['content-type'] in ['application/xml', 'text/xml']:
            raise Invalid(
                {'source': _('The source does not contain XML data')})
        else:
            xmldata = r1.content
    else:
        # Assume source is a file-like object
        try:
            log.debug('source is %s', source)
            xmldata = source.read()
            log.debug('xmldata is %s', xmldata)
        except:
            raise Invalid({'source': _('Cannot read from source')})

    # Parse XML data as metadata of `dtype` schema

    obj = make_metadata(dtype)
    log.debug('obj is: %s', obj)
    try:
        obj = xml_serializer_for(obj).loads(xmldata)
    except AssertionError as ex:
        raise ex
    except Exception as ex:
        # Map all parse exceptions to Invalid
        log.info('Failed to parse XML metadata: %s', ex)
        raise Invalid(
            {'source': _('The given XML file is malformed: %s') % (ex)})

    # Prepare package dict
    log.debug('updated obj is: %s', obj)

    pkg_dict = {'version': '1.0'}
    pkg_dict.update(obj.deduce_fields())
    pkg_dict.update({
        'owner_org': owner_org,
        'type': 'dataset',
        'dataset_type': dtype,
        dtype: obj.to_dict(flat=False),
    })
    log.debug('pkg_dict: %s', pkg_dict)
    # If an identifier is passed, check that this is not already present.
    # Note This is no guarantee that the identifier will be available when
    # `package_create` is actually invoked.

    identifier = pkg_dict.get('id')
    if identifier and _check_package_id_exists(context, identifier):
        raise IdentifierConflict({
            'id':
            _('A package identified as %s already exists') % (identifier)
        })

    # Find and assign a machine-name for this package
    # Note We just find the 1st available name. As noted before, this is no
    # guarantee that will be available when `package_create` is invoked.

    basename = pkg_dict['name']
    max_num_probes = 10 if allow_rename else 1
    name = _find_a_package_name(context, basename, max_num_probes)
    if not name:
        raise NameConflict(
            {'name': _('The package name %r is not available') % (basename)})
    else:
        pkg_dict['name'] = name
        pkg_dict['title'] += ' ' + name[len(basename):]

    # add core fields description and subject
    pkg_dict['notes'] = pkg_dict['datacite']['abstract']
    pkg_dict['closed_tag'] = pkg_dict['datacite']['subject_closed']
    #log.debug('abstract %s', pkg_dict['datacite']['abstract'])
    # Create/Update package

    schema1, validation_errors, error_message = None, None, None

    if identifier:
        # Must override catalog-wide schema for actions in this context
        schema1 = lookup_package_plugin().create_package_schema()
        schema1['id'] = [unicode]

    ctx = _make_context(context)
    if schema1:
        ctx['schema'] = schema1

    try:
        pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict)
    except toolkit.ValidationError as ex:
        if 'name' in ex.error_dict:
            # The name is probably taken, re-raise exception
            raise ex
        elif allow_validation_errors:
            # Save errors and retry with a different context
            validation_errors = ex.error_dict
            error_message = ex.message or _(
                'The dataset contains invalid metadata')
            ctx = _make_context(context, skip_validation=True)
            if schema1:
                ctx['schema'] = schema1
            pkg_dict = _get_action('package_create')(ctx, data_dict=pkg_dict)
            log.warn('Forced to create an invalid package as %r ' % (name))
        else:
            raise ex

    assert name == pkg_dict['name']
    assert (not identifier) or (identifier == pkg_dict['id'])

    return {
        # Provide basic package fields
        'id': pkg_dict['id'],
        'name': name,
        'title': pkg_dict['title'],
        'state': pkg_dict.get('state'),
        # Provide details on validation (meaningfull if allow_validation_errors)
        'validation': {
            'message': error_message,
            'errors': validation_errors,
        },
    }
Example #42
0
 def _history_template(self, package_type):
     return lookup_package_plugin(package_type).history_template()
Example #43
0
    def import_stage(self, harvest_object):

        log.debug('In DCATRDFHarvester import_stage')

        status = self._get_object_extra(harvest_object, 'status')
        if status == 'delete':
            # Delete package
            context = {
                'model': model,
                'session': model.Session,
                'user': self._get_user_name(),
                'ignore_auth': True
            }

            p.toolkit.get_action('package_delete')(
                context, {
                    'id': harvest_object.package_id
                })
            log.info('Deleted package {0} with guid {1}'.format(
                harvest_object.package_id, harvest_object.guid))
            return True

        if harvest_object.content is None:
            self._save_object_error(
                'Empty content for object {0}'.format(harvest_object.id),
                harvest_object, 'Import')
            return False

        try:
            dataset = json.loads(harvest_object.content)
        except ValueError:
            self._save_object_error(
                'Could not parse content for object {0}'.format(
                    harvest_object.id), harvest_object, 'Import')
            return False

        # Get the last harvested object (if any)
        previous_object = model.Session.query(HarvestObject) \
                                       .filter(HarvestObject.guid == harvest_object.guid) \
                                       .filter(
            HarvestObject.current == True  # noqa
        ).first()

        # Flag previous object as not current anymore
        if previous_object:
            previous_object.current = False
            previous_object.add()

        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()

        context = {
            'user': self._get_user_name(),
            'return_id_only': True,
            'ignore_auth': True,
        }

        dataset = self.modify_package_dict(dataset, {}, harvest_object)

        # Check if a dataset with the same guid exists
        existing_dataset = self._get_existing_dataset(harvest_object.guid)

        try:
            package_plugin = lib_plugins.lookup_package_plugin(
                dataset.get('type', None))
            if existing_dataset:
                package_schema = package_plugin.update_package_schema()
                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    package_schema = harvester.update_package_schema_for_update(
                        package_schema)
                context['schema'] = package_schema

                # Don't change the dataset name even if the title has
                dataset['name'] = existing_dataset['name']
                dataset['id'] = existing_dataset['id']

                harvester_tmp_dict = {}

                # check if resources already exist based on their URI
                existing_resources = existing_dataset.get('resources')
                resource_mapping = {
                    r.get('uri'): r.get('id')
                    for r in existing_resources if r.get('uri')
                }
                for resource in dataset.get('resources'):
                    res_uri = resource.get('uri')
                    if res_uri and res_uri in resource_mapping:
                        resource['id'] = resource_mapping[res_uri]

                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    harvester.before_update(harvest_object, dataset,
                                            harvester_tmp_dict)

                try:
                    if dataset:
                        # Save reference to the package on the object
                        harvest_object.package_id = dataset['id']
                        harvest_object.add()

                        p.toolkit.get_action('package_update')(context,
                                                               dataset)
                    else:
                        log.info('Ignoring dataset %s' %
                                 existing_dataset['name'])
                        return 'unchanged'
                except p.toolkit.ValidationError as e:
                    self._save_object_error(
                        'Update validation Error: %s' % str(e.error_summary),
                        harvest_object, 'Import')
                    return False

                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    err = harvester.after_update(harvest_object, dataset,
                                                 harvester_tmp_dict)

                    if err:
                        self._save_object_error(
                            'RDFHarvester plugin error: %s' % err,
                            harvest_object, 'Import')
                        return False

                log.info('Updated dataset %s' % dataset['name'])

            else:
                package_schema = package_plugin.create_package_schema()
                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    package_schema = harvester.update_package_schema_for_create(
                        package_schema)
                context['schema'] = package_schema

                # We need to explicitly provide a package ID
                dataset['id'] = str(uuid.uuid4())
                package_schema['id'] = [str]

                harvester_tmp_dict = {}

                name = dataset['name']
                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    harvester.before_create(harvest_object, dataset,
                                            harvester_tmp_dict)

                try:
                    if dataset:
                        # Save reference to the package on the object
                        harvest_object.package_id = dataset['id']
                        harvest_object.add()

                        # Defer constraints and flush so the dataset can be indexed with
                        # the harvest object id (on the after_show hook from the harvester
                        # plugin)
                        model.Session.execute(
                            'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED'
                        )
                        model.Session.flush()

                        p.toolkit.get_action('package_create')(context,
                                                               dataset)
                    else:
                        log.info('Ignoring dataset %s' % name)
                        return 'unchanged'
                except p.toolkit.ValidationError as e:
                    self._save_object_error(
                        'Create validation Error: %s' % str(e.error_summary),
                        harvest_object, 'Import')
                    return False

                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    err = harvester.after_create(harvest_object, dataset,
                                                 harvester_tmp_dict)

                    if err:
                        self._save_object_error(
                            'RDFHarvester plugin error: %s' % err,
                            harvest_object, 'Import')
                        return False

                log.info('Created dataset %s' % dataset['name'])

        except Exception as e:
            self._save_object_error(
                'Error importing dataset %s: %r / %s' %
                (dataset.get('name', ''), e, traceback.format_exc()),
                harvest_object, 'Import')
            return False

        finally:
            model.Session.commit()

        return True
Example #44
0
def package_update(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin
    interface.

    For further parameters see ``package_create()``.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if 'return_package_dict' is True in the
              context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''

    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']

    pkg = model.Package.get(name_or_id)

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    # FIXME: first modifications to package_updade begin here:
    # tag strings are reconstructed because validators are stripping
    # tags passed and only taking taks as tag_string values
    # image upload support has also been added here
    old_data = get_action('package_show')(context, {'id': pkg.id})
    '''
    Constructing the tag_string from the given tags.
    There must be at least one tag, otherwise the tag_string will be empty and a validation error
    will be raised.
    '''
    if not data_dict.get('tag_string'):
        data_dict['tag_string'] = ', '.join(
            h.dict_list_reduce(data_dict.get('tags', {}), 'name'))

    for key, value in old_data.iteritems():
        if key not in data_dict:
            data_dict[key] = value

    # data_dict['resources'] = data_dict.get('resources', old_data.get('resources'))


#     iso_topic_cat = data_dict.get('iso_topic_string', [])
#     if isinstance(iso_topic_cat, basestring):
#         iso_topic_cat = [iso_topic_cat]
#
#     data_dict['iso_topic_string'] = ','.join(iso_topic_cat)

# Set the package last modified date
    data_dict['record_last_modified'] = str(datetime.date.today())

    # If the Created Date has not yet been set, then set it
    if data_dict['edc_state'] == 'DRAFT' and not data_dict.get(
            'record_create_date'):
        data_dict['record_create_date'] = str(datetime.date.today())

    # If the Publish Date has not yet been set, then set it
    if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get(
            'record_publish_date'):
        data_dict['record_publish_date'] = str(datetime.date.today())

    # If the Archive Date has not yet been set, then set it
    if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get(
            'record_archive_date'):
        data_dict['record_archive_date'] = str(datetime.date.today())

    _check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    image_url = old_data.get('image_url', None)

    upload = uploader.Upload('edc', image_url)
    upload.update_data_dict(data_dict, 'image_url', 'image_upload',
                            'clear_upload')

    # Adding image display url for the uploaded image
    image_url = data_dict.get('image_url')
    data_dict['image_display_url'] = image_url

    if image_url and not image_url.startswith('http'):
        image_url = munge.munge_filename(image_url)
        data_dict['image_display_url'] = h.url_for_static(
            'uploads/edc/%s' % data_dict.get('image_url'), qualified=True)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)
    # FIXME: modifications to package_update end here^

    data, errors = lib_plugins.plugin_validate(package_plugin, context,
                                               data_dict, schema,
                                               'package_update')
    log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              context.get('package').name if context.get('package') else '',
              data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    # avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)

    upload.upload(uploader.get_max_image_size())

    # TODO the next two blocks are copied from ckan/ckan/logic/action/update.py
    # This codebase is currently hard to maintain because large chunks of the
    # CKAN action API and the CKAN controllers are simply overriden. This is
    # probably worse than just forking CKAN would have been, because in that
    # case at least we could track changes. - @deniszgonjanin

    # Needed to let extensions know the new resources ids
    model.Session.flush()
    if data.get('resources'):
        for index, resource in enumerate(data['resources']):
            resource['id'] = pkg.resources[index].id

    # Create default views for resources if necessary
    if data.get('resources'):
        logic.get_action('package_create_default_resource_views')(
            {
                'model': context['model'],
                'user': context['user'],
                'ignore_auth': True
            }, {
                'package': data
            })

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
        else _get_action('package_show')(context, {'id': data_dict['id']})
    '''
    Send state change notifications if required; Added by Khalegh Mamakani
    Using a thread to run the job in the background so that package_update will not wait for notifications sending.
    '''

    old_state = old_data.get('edc_state')

    context = {
        'model': model,
        'session': model.Session,
        'user': c.user or c.author,
        'auth_user_obj': c.userobj
    }

    dataset_url = config.get('ckan.site_url') + h.url_for(
        controller='package', action="read", id=data_dict['name'])
    import threading

    notify_thread = threading.Thread(target=check_record_state,
                                     args=(context, old_state, data_dict,
                                           g.site_title, g.site_url,
                                           dataset_url))
    notify_thread.start()

    return output
Example #45
0
 def _db_to_form_schema(self, package_type=None):
     """This is an interface to manipulate data from the database
     into a format suitable for the form (optional)"""
     return lookup_package_plugin(package_type).db_to_form_schema()
Example #46
0
def package_update(context, data_dict):

    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin
    interface.

    For further parameters see ``package_create()``.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if 'return_package_dict' is True in the
              context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''


    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']

    pkg = model.Package.get(name_or_id)

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    # FIXME: first modifications to package_updade begin here:
    # tag strings are reconstructed because validators are stripping
    # tags passed and only taking taks as tag_string values
    # image upload support has also been added here
    old_data = get_action('package_show')(context, {'id': pkg.id})

    '''
    Constructing the tag_string from the given tags.
    There must be at least one tag, otherwise the tag_string will be empty and a validation error
    will be raised.
    '''
    if not data_dict.get('tag_string'):
        data_dict['tag_string'] = ', '.join(
                h.dict_list_reduce(data_dict.get('tags', {}), 'name'))


    for key, value in old_data.iteritems() :
        if key not in data_dict :
            data_dict[key] = value

    #data_dict['resources'] = data_dict.get('resources', old_data.get('resources'))


#     iso_topic_cat = data_dict.get('iso_topic_string', [])
#     if isinstance(iso_topic_cat, basestring):
#         iso_topic_cat = [iso_topic_cat]
#
#     data_dict['iso_topic_string'] = ','.join(iso_topic_cat)


    #Set the package last modified date
    data_dict['record_last_modified'] = str(datetime.date.today())

    # If the Created Date has not yet been set, then set it
    if data_dict['edc_state'] == 'DRAFT' and not data_dict.get('record_create_date'):
        data_dict['record_create_date'] = str(datetime.date.today())

    # If the Publish Date has not yet been set, then set it
    if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get('record_publish_date'):
        data_dict['record_publish_date'] = str(datetime.date.today())

    # If the Archive Date has not yet been set, then set it
    if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get('record_archive_date'):
        data_dict['record_archive_date'] = str(datetime.date.today())

    _check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    image_url = old_data.get('image_url', None)

    upload = uploader.Upload('edc', image_url)
    upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload')

    #Adding image display url for the uploaded image
    image_url = data_dict.get('image_url')
    data_dict['image_display_url'] = image_url

    if image_url and not image_url.startswith('http'):
        image_url = munge.munge_filename(image_url)
        data_dict['image_display_url'] = h.url_for_static('uploads/edc/%s' % data_dict.get('image_url'), qualified=True)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)
    # FIXME: modifications to package_update end here^

    data, errors = _validate(data_dict, schema, context)
#     log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
#               errors, context.get('user'),
#               context.get('package').name if context.get('package') else '',
#               data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")



    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update,
                                            {'id': pkg.id,
                                             'organization_id': pkg.owner_org})

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)


    upload.upload(uploader.get_max_image_size())

    #TODO the next two blocks are copied from ckan/ckan/logic/action/update.py
    # This codebase is currently hard to maintain because large chunks of the
    # CKAN action API and the CKAN controllers are simply overriden. This is
    # probably worse than just forking CKAN would have been, because in that
    # case at least we could track changes. - @deniszgonjanin

    # Needed to let extensions know the new resources ids
    model.Session.flush()
    if data.get('resources'):
        for index, resource in enumerate(data['resources']):
            resource['id'] = pkg.resources[index].id

    # Create default views for resources if necessary
    if data.get('resources'):
        logic.get_action('package_create_default_resource_views')(
            {'model': context['model'], 'user': context['user'],
             'ignore_auth': True},
            {'package': data})

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else _get_action('package_show')(context, {'id': data_dict['id']})


    '''
    Send state change notifications if required; Added by Khalegh Mamakani
    Using a thread to run the job in the background so that package_update will not wait for notifications sending.
    '''

    old_state = old_data.get('edc_state')

    context = {'model': model, 'session': model.Session,
               'user': c.user or c.author, 'auth_user_obj': c.userobj}

    dataset_url = config.get('ckan.site_url') + h.url_for(controller='package', action="read", id = data_dict['name'])
    import threading

    notify_thread = threading.Thread(target=check_record_state, args=(context, old_state, data_dict, g.site_title, g.site_url, dataset_url) )
    notify_thread.start()

    return output
Example #47
0
def package_update(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin
    interface.

    For further parameters see ``package_create()``.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if 'return_package_dict' is True in the
              context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    _check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)

    data, errors = lib_plugins.plugin_validate(
        package_plugin, context, data_dict, schema, 'package_update')
    log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              context.get('package').name if context.get('package') else '',
              data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update,
                                            {'id': pkg.id,
                                             'organization_id': pkg.owner_org})

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else _get_action('package_show')(context, {'id': data_dict['id']})

    return output
Example #48
0
def package_update(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    .. note:: Update methods may delete parameters not explicitly provided in the
        data_dict. If you want to edit only a specific attribute use `package_patch`
        instead.

    It is recommended to call
    :py:func:`ckan.logic.action.get.package_show`, make the desired changes to
    the result, and then call ``package_update()`` with it.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the
    :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface.

    For further parameters see
    :py:func:`~ckan.logic.action.create.package_create`.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in
              the context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''
    model = context['model']
    session = context['session']
    name_or_id = data_dict.get('id') or data_dict.get('name')
    if name_or_id is None:
        raise ValidationError({'id': _('Missing value')})

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg

    # immutable fields
    data_dict["id"] = pkg.id
    data_dict['type'] = pkg.type

    _check_access('package_update', context, data_dict)

    user = context['user']
    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)

    resource_uploads = []
    for resource in data_dict.get('resources', []):
        # file uploads/clearing
        upload = uploader.get_resource_uploader(resource)

        if 'mimetype' not in resource:
            if hasattr(upload, 'mimetype'):
                resource['mimetype'] = upload.mimetype

        if 'size' not in resource and 'url_type' in resource:
            if hasattr(upload, 'filesize'):
                resource['size'] = upload.filesize

        resource_uploads.append(upload)

    data, errors = lib_plugins.plugin_validate(
        package_plugin, context, data_dict, schema, 'package_update')
    log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              context.get('package').name if context.get('package') else '',
              data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update,
                                            {'id': pkg.id,
                                             'organization_id': pkg.owner_org})

    # Needed to let extensions know the new resources ids
    model.Session.flush()
    for index, (resource, upload) in enumerate(
            zip(data.get('resources', []), resource_uploads)):
        resource['id'] = pkg.resources[index].id

        upload.upload(resource['id'], uploader.get_max_resource_size())

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_dataset_update(context, data)

    # Create activity
    if not pkg.private:
        user_obj = model.User.by_name(user)
        if user_obj:
            user_id = user_obj.id
        else:
            user_id = 'not logged in'

        activity = pkg.activity_stream_item('changed', user_id)
        session.add(activity)

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else _get_action('package_show')(context, {'id': data_dict['id']})

    return output
Example #49
0
 def _form_to_db_schema(self, package_type=None):
     return lookup_package_plugin(package_type).form_to_db_schema()
Example #50
0
File: index.py Project: banzsh/ckan
    def index_package(self, pkg_dict, defer_commit=False):
        if pkg_dict is None:
            return

        data_dict_json = json.dumps(pkg_dict)

        if config.get("ckan.cache_validated_datasets", True):
            package_plugin = lib_plugins.lookup_package_plugin(pkg_dict.get("type"))

            schema = package_plugin.show_package_schema()
            validated_pkg_dict, errors = lib_plugins.plugin_validate(
                package_plugin, {"model": model, "session": model.Session}, pkg_dict, schema, "package_show"
            )
            pkg_dict["validated_data_dict"] = json.dumps(
                validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder
            )

        pkg_dict["data_dict"] = data_dict_json

        # add to string field for sorting
        title = pkg_dict.get("title")
        if title:
            pkg_dict["title_string"] = title

        if (not pkg_dict.get("state")) or ("active" not in pkg_dict.get("state")):
            return self.delete_package(pkg_dict)

        index_fields = RESERVED_FIELDS + pkg_dict.keys()

        # include the extras in the main namespace
        extras = pkg_dict.get("extras", [])
        for extra in extras:
            key, value = extra["key"], extra["value"]
            if isinstance(value, (tuple, list)):
                value = " ".join(map(unicode, value))
            key = "".join([c for c in key if c in KEY_CHARS])
            pkg_dict["extras_" + key] = value
            if key not in index_fields:
                pkg_dict[key] = value
        pkg_dict.pop("extras", None)

        # add tags, removing vocab tags from 'tags' list and adding them as
        # vocab_<tag name> so that they can be used in facets
        non_vocab_tag_names = []
        tags = pkg_dict.pop("tags", [])
        context = {"model": model}

        for tag in tags:
            if tag.get("vocabulary_id"):
                data = {"id": tag["vocabulary_id"]}
                vocab = logic.get_action("vocabulary_show")(context, data)
                key = u"vocab_%s" % vocab["name"]
                if key in pkg_dict:
                    pkg_dict[key].append(tag["name"])
                else:
                    pkg_dict[key] = [tag["name"]]
            else:
                non_vocab_tag_names.append(tag["name"])

        pkg_dict["tags"] = non_vocab_tag_names

        # add groups
        groups = pkg_dict.pop("groups", [])

        # we use the capacity to make things private in the search index
        if pkg_dict["private"]:
            pkg_dict["capacity"] = "private"
        else:
            pkg_dict["capacity"] = "public"

        pkg_dict["groups"] = [group["name"] for group in groups]

        # if there is an owner_org we want to add this to groups for index
        # purposes
        if pkg_dict.get("organization"):
            pkg_dict["organization"] = pkg_dict["organization"]["name"]
        else:
            pkg_dict["organization"] = None

        # tracking
        tracking_summary = pkg_dict.pop("tracking_summary", None)
        if tracking_summary:
            pkg_dict["views_total"] = tracking_summary["total"]
            pkg_dict["views_recent"] = tracking_summary["recent"]

        resource_fields = [
            ("name", "res_name"),
            ("description", "res_description"),
            ("format", "res_format"),
            ("url", "res_url"),
            ("resource_type", "res_type"),
        ]
        resource_extras = [(e, "res_extras_" + e) for e in model.Resource.get_extra_columns()]
        # flatten the structure for indexing:
        for resource in pkg_dict.get("resources", []):
            for (okey, nkey) in resource_fields + resource_extras:
                pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u"")]
        pkg_dict.pop("resources", None)

        rel_dict = collections.defaultdict(list)
        subjects = pkg_dict.pop("relationships_as_subject", [])
        objects = pkg_dict.pop("relationships_as_object", [])
        for rel in objects:
            type = model.PackageRelationship.forward_to_reverse_type(rel["type"])
            rel_dict[type].append(model.Package.get(rel["subject_package_id"]).name)
        for rel in subjects:
            type = rel["type"]
            rel_dict[type].append(model.Package.get(rel["object_package_id"]).name)
        for key, value in rel_dict.iteritems():
            if key not in pkg_dict:
                pkg_dict[key] = value

        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE

        # Save dataset type
        pkg_dict["dataset_type"] = pkg_dict["type"]

        # clean the dict fixing keys and dates
        # FIXME where are we getting these dirty keys from?  can we not just
        # fix them in the correct place or is this something that always will
        # be needed?  For my data not changing the keys seems to not cause a
        # problem.
        new_dict = {}
        bogus_date = datetime.datetime(1, 1, 1)
        for key, value in pkg_dict.items():
            key = key.encode("ascii", "ignore")
            if key.endswith("_date"):
                try:
                    date = parse(value, default=bogus_date)
                    if date != bogus_date:
                        value = date.isoformat() + "Z"
                    else:
                        # The date field was empty, so dateutil filled it with
                        # the default bogus date
                        value = None
                except ValueError:
                    continue
            new_dict[key] = value
        pkg_dict = new_dict

        for k in ("title", "notes", "title_string"):
            if k in pkg_dict and pkg_dict[k]:
                pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])

        # modify dates (SOLR is quite picky with dates, and only accepts ISO dates
        # with UTC time (i.e trailing Z)
        # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        pkg_dict["metadata_created"] += "Z"
        pkg_dict["metadata_modified"] += "Z"

        # mark this CKAN instance as data source:
        pkg_dict["site_id"] = config.get("ckan.site_id")

        # Strip a selection of the fields.
        # These fields are possible candidates for sorting search results on,
        # so we strip leading spaces because solr will sort " " before "a" or "A".
        for field_name in ["title"]:
            try:
                value = pkg_dict.get(field_name)
                if value:
                    pkg_dict[field_name] = value.lstrip()
            except KeyError:
                pass

        # add a unique index_id to avoid conflicts
        import hashlib

        pkg_dict["index_id"] = hashlib.md5("%s%s" % (pkg_dict["id"], config.get("ckan.site_id"))).hexdigest()

        for item in PluginImplementations(IPackageController):
            pkg_dict = item.before_index(pkg_dict)

        assert pkg_dict, "Plugin must return non empty package dict on index"

        # send to solr:
        try:
            conn = make_connection()
            commit = not defer_commit
            if not asbool(config.get("ckan.search.solr_commit", "true")):
                commit = False
            conn.add_many([pkg_dict], _commit=commit)
        except solr.core.SolrException, e:
            msg = "Solr returned an error: {0} {1} - {2}".format(
                e.httpcode, e.reason, e.body[:1000]  # limit huge responses
            )
            raise SearchIndexError(msg)
Example #51
0
def package_create(context, data_dict):
    '''Create a new dataset (package).

    You must be authorized to create new datasets. If you specify any groups
    for the new dataset, you must also be authorized to edit these groups.

    Plugins may change the parameters of this function depending on the value
    of the ``type`` parameter, see the ``IDatasetForm`` plugin interface.

    :param name: the name of the new dataset, must be between 2 and 100
        characters long and contain only lowercase alphanumeric characters,
        ``-`` and ``_``, e.g. ``'warandpeace'``
    :type name: string
    :param title: the title of the dataset (optional, default: same as
        ``name``)
    :type title: string
    :param author: the name of the dataset's author (optional)
    :type author: string
    :param author_email: the email address of the dataset's author (optional)
    :type author_email: string
    :param maintainer: the name of the dataset's maintainer (optional)
    :type maintainer: string
    :param maintainer_email: the email address of the dataset's maintainer
        (optional)
    :type maintainer_email: string
    :param license_id: the id of the dataset's license, see ``license_list()``
        for available values (optional)
    :type license_id: license id string
    :param notes: a description of the dataset (optional)
    :type notes: string
    :param url: a URL for the dataset's source (optional)
    :type url: string
    :param version: (optional)
    :type version: string, no longer than 100 characters
    :param state: the current state of the dataset, e.g. ``'active'`` or
        ``'deleted'``, only active datasets show up in search results and
        other lists of datasets, this parameter will be ignored if you are not
        authorized to change the state of the dataset (optional, default:
        ``'active'``)
    :type state: string
    :param type: the type of the dataset (optional), ``IDatasetForm`` plugins
        associate themselves with different dataset types and provide custom
        dataset handling behaviour for these types
    :type type: string
    :param resources: the dataset's resources, see ``resource_create()``
        for the format of resource dictionaries (optional)
    :type resources: list of resource dictionaries
    :param tags: the dataset's tags, see ``tag_create()`` for the format
        of tag dictionaries (optional)
    :type tags: list of tag dictionaries
    :param extras: the dataset's extras (optional), extras are arbitrary
        (key: value) metadata items that can be added to datasets, each extra
        dictionary should have keys ``'key'`` (a string), ``'value'`` (a
        string), and optionally ``'deleted'``
    :type extras: list of dataset extra dictionaries
    :param relationships_as_object: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_object: list of relationship dictionaries
    :param relationships_as_subject: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_subject: list of relationship dictionaries
    :param groups: the groups to which the dataset belongs (optional), each
        group dictionary should have one or more of the following keys which
        identify an existing group:
        ``'id'`` (the id of the group, string), ``'name'`` (the name of the
        group, string), ``'title'`` (the title of the group, string), to see
        which groups exist call ``group_list()``
    :type groups: list of dictionaries
    :param owner_org: the id of the dataset's owning organization, see
        ``organization_list()`` or ``organization_list_for_user`` for
        available values (optional)
    :type owner_org: string

    :returns: the newly created dataset (unless 'return_id_only' is set to True
              in the context, in which case just the dataset id will be returned)
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']

    package_type = data_dict.get('type')
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.create_package_schema()

    _check_access('package_create', context, data_dict)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work
                package_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)
    log.debug('package_create validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'), data.get('name'), data_dict)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % data.get("name")

    admins = []
    if user:
        user_obj = model.User.by_name(user.decode('utf8'))
        if user_obj:
            admins = [user_obj]
            data['creator_user_id'] = user_obj.id

    pkg = model_save.package_dict_save(data, context)

    model.setup_default_user_roles(pkg, admins)
    # Needed to let extensions know the package id
    model.Session.flush()
    data['id'] = pkg.id

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.create(pkg)

        item.after_create(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    ## need to let rest api create
    context["package"] = pkg
    ## this is added so that the rest controller can make a new location
    context["id"] = pkg.id
    log.debug('Created object %s' % pkg.name)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    return_id_only = context.get('return_id_only', False)

    output = context['id'] if return_id_only \
            else _get_action('package_show')(context, {'id':context['id']})

    return output
Example #52
0
                        return 'unchanged'
                except p.toolkit.ValidationError, e:
                    self._save_object_error('Update validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
                    return False

                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    err = harvester.after_update(harvest_object, dataset, harvester_tmp_dict)

                    if err:
                        self._save_object_error('RDFHarvester plugin error: %s' % err, harvest_object, 'Import')
                        return False

                log.info('Updated dataset %s' % dataset['name'])

            else:
                package_plugin = lib_plugins.lookup_package_plugin(dataset.get('type', None))

                package_schema = package_plugin.create_package_schema()
                context['schema'] = package_schema

                # We need to explicitly provide a package ID
                dataset['id'] = unicode(uuid.uuid4())
                package_schema['id'] = [unicode]

                harvester_tmp_dict = {}

                name = dataset['name']
                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    harvester.before_create(harvest_object, dataset, harvester_tmp_dict)

                try:
Example #53
0
def package_update(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    It is recommended to call
    :py:func:`ckan.logic.action.get.package_show`, make the desired changes to
    the result, and then call ``package_update()`` with it.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the
    :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface.

    For further parameters see
    :py:func:`~ckan.logic.action.create.package_create`.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in
              the context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id
    data_dict['type'] = pkg.type

    _check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)

    data, errors = lib_plugins.plugin_validate(package_plugin, context,
                                               data_dict, schema,
                                               'package_update')
    log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              context.get('package').name if context.get('package') else '',
              data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    context_org_update['add_revision'] = False
    _get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    # Needed to let extensions know the new resources ids
    model.Session.flush()
    if data.get('resources'):
        for index, resource in enumerate(data['resources']):
            resource['id'] = pkg.resources[index].id

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else _get_action('package_show')(context, {'id': data_dict['id']})

    return output
Example #54
0
def _setup_template_variables(context, data_dict, package_type=None):
    return lookup_package_plugin(package_type).setup_template_variables(
        context, data_dict
    )
Example #55
0
 def _check_data_dict(self, data_dict, package_type=None):
     """Check if the return data is correct, mostly for checking out if
     spammers are submitting only part of the form"""
     return lookup_package_plugin(package_type).check_data_dict(data_dict)
Example #56
0
 def _search_template(self, package_type):
     return lookup_package_plugin(package_type).search_template()
Example #57
0
                return False

            for harvester in p.PluginImplementations(IDCATRDFHarvester):
                err = harvester.after_update(harvest_object, dataset,
                                             harvester_tmp_dict)

                if err:
                    self._save_object_error(
                        'RDFHarvester plugin error: %s' % err, harvest_object,
                        'Import')
                    return False

            log.info('Updated dataset %s' % dataset['name'])

        else:
            package_plugin = lib_plugins.lookup_package_plugin(
                dataset.get('type', None))

            package_schema = package_plugin.create_package_schema()
            context['schema'] = package_schema

            # We need to explicitly provide a package ID
            dataset['id'] = unicode(uuid.uuid4())
            package_schema['id'] = [unicode]

            harvester_tmp_dict = {}

            name = dataset['name']
            for harvester in p.PluginImplementations(IDCATRDFHarvester):
                harvester.before_create(harvest_object, dataset,
                                        harvester_tmp_dict)
Example #58
0
def package_create(context, data_dict):
    """Create a new dataset (package).

    You must be authorized to create new datasets. If you specify any groups
    for the new dataset, you must also be authorized to edit these groups.

    Plugins may change the parameters of this function depending on the value
    of the ``type`` parameter, see the ``IDatasetForm`` plugin interface.

    :param name: the name of the new dataset, must be between 2 and 100
        characters long and contain only lowercase alphanumeric characters,
        ``-`` and ``_``, e.g. ``'warandpeace'``
    :type name: string
    :param title: the title of the dataset (optional, default: same as
        ``name``)
    :type title: string
    :param author: the name of the dataset's author (optional)
    :type author: string
    :param author_email: the email address of the dataset's author (optional)
    :type author_email: string
    :param maintainer: the name of the dataset's maintainer (optional)
    :type maintainer: string
    :param maintainer_email: the email address of the dataset's maintainer
        (optional)
    :type maintainer_email: string
    :param license_id: the id of the dataset's license, see ``license_list()``
        for available values (optional)
    :type license_id: license id string
    :param notes: a description of the dataset (optional)
    :type notes: string
    :param url: a URL for the dataset's source (optional)
    :type url: string
    :param version: (optional)
    :type version: string, no longer than 100 characters
    :param state: the current state of the dataset, e.g. ``'active'`` or
        ``'deleted'``, only active datasets show up in search results and
        other lists of datasets, this parameter will be ignored if you are not
        authorized to change the state of the dataset (optional, default:
        ``'active'``)
    :type state: string
    :param type: the type of the dataset (optional), ``IDatasetForm`` plugins
        associate themselves with different dataset types and provide custom
        dataset handling behaviour for these types
    :type type: string
    :param resources: the dataset's resources, see ``resource_create()``
        for the format of resource dictionaries (optional)
    :type resources: list of resource dictionaries
    :param tags: the dataset's tags, see ``tag_create()`` for the format
        of tag dictionaries (optional)
    :type tags: list of tag dictionaries
    :param extras: the dataset's extras (optional), extras are arbitrary
        (key: value) metadata items that can be added to datasets, each extra
        dictionary should have keys ``'key'`` (a string), ``'value'`` (a
        string)
    :type extras: list of dataset extra dictionaries
    :param relationships_as_object: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_object: list of relationship dictionaries
    :param relationships_as_subject: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_subject: list of relationship dictionaries
    :param groups: the groups to which the dataset belongs (optional), each
        group dictionary should have one or more of the following keys which
        identify an existing group:
        ``'id'`` (the id of the group, string), ``'name'`` (the name of the
        group, string), ``'title'`` (the title of the group, string), to see
        which groups exist call ``group_list()``
    :type groups: list of dictionaries
    :param owner_org: the id of the dataset's owning organization, see
        ``organization_list()`` or ``organization_list_for_user`` for
        available values (optional)
    :type owner_org: string

    :returns: the newly created dataset (unless 'return_id_only' is set to True
              in the context, in which case just the dataset id will be returned)
    :rtype: dictionary

    """
    model = context["model"]
    user = context["user"]

    package_type = data_dict.get("type")
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    if "schema" in context:
        schema = context["schema"]
    else:
        schema = package_plugin.create_package_schema()

    _check_access("package_create", context, data_dict)

    if "api_version" not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, "check_data_dict", None)
        if check_data_dict:
            try:
                check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work
                package_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)
    if "tags" in data:
        data["tags"] = get_tag_vocabulary(data["tags"])
    if "groups" in data:
        data["extras"].append({"key": "solr_additions", "value": build_additions(data["groups"])})

    log.debug(
        "package_create validate_errs=%r user=%s package=%s data=%r",
        errors,
        context.get("user"),
        data.get("name"),
        data_dict,
    )

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if "message" in context:
        rev.message = context["message"]
    else:
        rev.message = _(u"REST API: Create object %s") % data.get("name")

    admins = []
    if user:
        user_obj = model.User.by_name(user.decode("utf8"))
        if user_obj:
            admins = [user_obj]
            data["creator_user_id"] = user_obj.id

    pkg = model_save.package_dict_save(data, context)
    model.setup_default_user_roles(pkg, admins)
    # Needed to let extensions know the package id
    model.Session.flush()
    data["id"] = pkg.id

    context_org_update = context.copy()
    context_org_update["ignore_auth"] = True
    context_org_update["defer_commit"] = True
    _get_action("package_owner_org_update")(context_org_update, {"id": pkg.id, "organization_id": pkg.owner_org})

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.create(pkg)

        item.after_create(context, data)

    if not context.get("defer_commit"):
        model.repo.commit()

    # need to let rest api create
    context["package"] = pkg
    # this is added so that the rest controller can make a new location
    context["id"] = pkg.id
    log.debug("Created object %s" % pkg.name)

    # Make sure that a user provided schema is not used on package_show
    context.pop("schema", None)

    return_id_only = context.get("return_id_only", False)

    output = context["id"] if return_id_only else _get_action("package_show")(context, {"id": context["id"]})

    return output
Example #59
0
def package_update(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin
    interface.

    For further parameters see ``package_create()``.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if 'return_package_dict' is True in the
              context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''
    log.info('context: %s', context)
    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']
    log.info('package update: %s', data_dict)
    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id
    pkg_dict = _get_action("package_show")(context, {'id': pkg.id})
    pkg_dict.update(data_dict)
    _check_access('package_update', context, data_dict)
    _get_action('notify_package_update')(context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)

    #we cannot modify model but we can ensure no modification via GUI/API
    delete_keys = ['author_email', 'maintainer', 'maintainer_email']
    for key in delete_keys:
        if key in pkg_dict:
            del pkg_dict[key]

    data, errors = _validate(pkg_dict, schema, context)
    log.debug('package_update validate_errs=%r user=%s package=%s', errors,
              context.get('user'),
              context.get('package').name if context.get('package') else '')

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else _get_action('package_show')(context, {'id': data_dict['id']})
    #action will be audited in resource_create or resource_update action
    if not context.get('defer_audit', None):
        audit_helper(data_dict, output, 'package_update')

    _get_action('package_unlock')(context, {'id': pkg.id})

    return output
Example #60
0
 def _read_template(self, package_type):
     return lookup_package_plugin(package_type).read_template()