Example #1
0
def load_environment(global_conf, app_conf):
    """Configure the Pylons environment via the ``pylons.config``
    object
    """

    ######  Pylons monkey-patch
    # this must be run at a time when the env is semi-setup, thus inlined here.
    # Required by the deliverance plugin and iATI
    from pylons.wsgiapp import PylonsApp
    import pkg_resources
    find_controller_generic = PylonsApp.find_controller

    # This is from pylons 1.0 source, will monkey-patch into 0.9.7
    def find_controller(self, controller):
        if controller in self.controller_classes:
            return self.controller_classes[controller]
        # Check to see if its a dotted name
        if '.' in controller or ':' in controller:
            mycontroller = pkg_resources.EntryPoint.parse(
                'x=%s' % controller).load(False)
            self.controller_classes[controller] = mycontroller
            return mycontroller
        return find_controller_generic(self, controller)

    PylonsApp.find_controller = find_controller
    ###### END evil monkey-patch

    os.environ['CKAN_CONFIG'] = global_conf['__file__']

    # Pylons paths
    root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    paths = dict(root=root,
                 controllers=os.path.join(root, 'controllers'),
                 static_files=os.path.join(root, 'public'),
                 templates=[os.path.join(root, 'templates')])

    # Initialize config with the basic options

    config.init_app(global_conf, app_conf, package='ckan', paths=paths)

    # load all CKAN plugins
    p.load_all(config)

    for plugin in p.PluginImplementations(p.IConfigurer):
        # must do update in place as this does not work:
        # config = plugin.update_config(config)
        plugin.update_config(config)

    # This is set up before globals are initialized
    site_id = os.environ.get('CKAN_SITE_ID')
    if site_id:
        config['ckan.site_id'] = site_id

    site_url = config.get('ckan.site_url', '')
    ckan_host = config['ckan.host'] = urlparse(site_url).netloc
    if config.get('ckan.site_id') is None:
        if ':' in ckan_host:
            ckan_host, port = ckan_host.split(':')
        assert ckan_host, 'You need to configure ckan.site_url or ' \
                          'ckan.site_id for SOLR search-index rebuild to work.'
        config['ckan.site_id'] = ckan_host

    # Init SOLR settings and check if the schema is compatible
    #from ckan.lib.search import SolrSettings, check_solr_schema_version
    search.SolrSettings.init(config.get('solr_url'), config.get('solr_user'),
                             config.get('solr_password'))
    search.check_solr_schema_version()

    config['routes.map'] = routing.make_map()
    config['pylons.app_globals'] = app_globals.Globals()

    # add helper functions
    restrict_helpers = asbool(config.get('ckan.restrict_template_vars',
                                         'true'))
    helpers = _Helpers(h, restrict_helpers)
    config['pylons.h'] = helpers

    ## redo template setup to use genshi.search_path (so remove std template setup)
    template_paths = [paths['templates'][0]]
    extra_template_paths = config.get('extra_template_paths', '')
    if extra_template_paths:
        # must be first for them to override defaults
        template_paths = extra_template_paths.split(',') + template_paths

    # Translator (i18n)
    translator = Translator(pylons.translator)

    def template_loaded(template):
        translator.setup(template)

    # Markdown ignores the logger config, so to get rid of excessive
    # markdown debug messages in the log, set it to the level of the
    # root logger.
    logging.getLogger("MARKDOWN").setLevel(logging.getLogger().level)

    # Create the Genshi TemplateLoader
    config['pylons.app_globals'].genshi_loader = TemplateLoader(
        template_paths, auto_reload=True, callback=template_loaded)

    # CONFIGURATION OPTIONS HERE (note: all config options will override
    # any Pylons config options)

    # Setup the SQLAlchemy database engine
    # Suppress a couple of sqlalchemy warnings
    warnings.filterwarnings(
        'ignore', '^Unicode type received non-unicode bind param value',
        sqlalchemy.exc.SAWarning)
    warnings.filterwarnings(
        'ignore', "^Did not recognize type 'BIGINT' of column 'size'",
        sqlalchemy.exc.SAWarning)
    warnings.filterwarnings(
        'ignore',
        "^Did not recognize type 'tsvector' of column 'search_vector'",
        sqlalchemy.exc.SAWarning)

    ckan_db = os.environ.get('CKAN_DB')

    if ckan_db:
        config['sqlalchemy.url'] = ckan_db
    engine = sqlalchemy.engine_from_config(config, 'sqlalchemy.')

    if not model.meta.engine:
        model.init_model(engine)

    for plugin in p.PluginImplementations(p.IConfigurable):
        plugin.configure(config)
Example #2
0
    def search(self):
        from ckan.lib.search import SearchError, SearchQueryError

        package_type = self._guess_package_type()

        try:
            context = {
                'model': model,
                'user': c.user,
                'auth_user_obj': c.userobj
            }
            check_access('site_read', context)
        except NotAuthorized:
            abort(403, _('Not authorized to see this page'))

        # unicode format (decoded from utf8)
        q = c.q = request.params.get('q', u'')
        c.query_error = False
        page = h.get_page_number(request.params)

        limit = int(config.get('ckan.datasets_per_page', 20))

        # most search operations should reset the page counter:
        params_nopage = [(k, v) for k, v in request.params.items()
                         if k != 'page']

        def drill_down_url(alternative_url=None, **by):
            return h.add_url_param(alternative_url=alternative_url,
                                   controller='package',
                                   action='search',
                                   new_params=by)

        c.drill_down_url = drill_down_url

        def remove_field(key, value=None, replace=None):
            return h.remove_url_param(key,
                                      value=value,
                                      replace=replace,
                                      controller='package',
                                      action='search',
                                      alternative_url=package_type)

        c.remove_field = remove_field

        sort_by = request.params.get('sort', None)
        params_nosort = [(k, v) for k, v in params_nopage if k != 'sort']

        def _sort_by(fields):
            """
            Sort by the given list of fields.
            Each entry in the list is a 2-tuple: (fieldname, sort_order)
            eg - [('metadata_modified', 'desc'), ('name', 'asc')]
            If fields is empty, then the default ordering is used.
            """
            params = params_nosort[:]

            if fields:
                sort_string = ', '.join('%s %s' % f for f in fields)
                params.append(('sort', sort_string))
            return search_url(params, package_type)

        c.sort_by = _sort_by
        if not sort_by:
            c.sort_by_fields = []
        else:
            c.sort_by_fields = [
                field.split()[0] for field in sort_by.split(',')
            ]

        def pager_url(q=None, page=None):
            params = list(params_nopage)
            params.append(('page', page))
            return search_url(params, package_type)

        c.search_url_params = urlencode(_encode_params(params_nopage))

        try:
            c.fields = []
            # c.fields_grouped will contain a dict of params containing
            # a list of values eg {'tags':['tag1', 'tag2']}
            c.fields_grouped = {}
            search_extras = {}
            fq = ''
            for (param, value) in request.params.items():
                if param not in ['q', 'page', 'sort'] \
                        and len(value) and not param.startswith('_'):
                    if not param.startswith('ext_'):
                        c.fields.append((param, value))
                        fq += ' +%s:"%s"' % (param, value)
                        if param not in c.fields_grouped:
                            c.fields_grouped[param] = [value]
                        else:
                            c.fields_grouped[param].append(value)
                    else:
                        search_extras[param] = value

            context = {
                'model': model,
                'session': model.Session,
                'user': c.user,
                'for_view': True,
                'auth_user_obj': c.userobj
            }

            # Unless changed via config options, don't show other dataset
            # types any search page. Potential alternatives are do show them
            # on the default search page (dataset) or on one other search page
            search_all_type = config.get('ckan.search.show_all_types',
                                         'dataset')
            search_all = False

            try:
                # If the "type" is set to True or False, convert to bool
                # and we know that no type was specified, so use traditional
                # behaviour of applying this only to dataset type
                search_all = asbool(search_all_type)
                search_all_type = 'dataset'
            # Otherwise we treat as a string representing a type
            except ValueError:
                search_all = True

            if not package_type:
                package_type = 'dataset'

            if not search_all or package_type != search_all_type:
                # Only show datasets of this particular type
                fq += ' +dataset_type:{type}'.format(type=package_type)

            facets = OrderedDict()

            default_facet_titles = {
                'publisher_name': _('Data Contributors'),
                'groups': _('Groups'),
                'tags': _('Tags'),
                'res_format': _('Formats'),
                'license_id': _('Licenses'),
                'region': _('Geography'),
            }

            for facet in h.facets():
                if facet in default_facet_titles:
                    facets[facet] = default_facet_titles[facet]
                else:
                    facets[facet] = facet

            # Facet titles
            for plugin in p.PluginImplementations(p.IFacets):
                facets = plugin.dataset_facets(facets, package_type)

            c.facet_titles = facets

            data_dict = {
                'q':
                q,
                'fq':
                fq.strip(),
                'facet.field':
                facets.keys(),
                'rows':
                limit,
                'start': (page - 1) * limit,
                'sort':
                sort_by,
                'extras':
                search_extras,
                'include_private':
                asbool(config.get('ckan.search.default_include_private',
                                  True)),
            }

            query = get_action('package_search')(context, data_dict)
            c.sort_by_selected = query['sort']

            c.page = h.Page(collection=query['results'],
                            page=page,
                            url=pager_url,
                            item_count=query['count'],
                            items_per_page=limit)
            c.search_facets = query['search_facets']
            c.page.items = query['results']
        except SearchQueryError as se:
            # User's search parameters are invalid, in such a way that is not
            # achievable with the web interface, so return a proper error to
            # discourage spiders which are the main cause of this.
            log.info('Dataset search query rejected: %r', se.args)
            abort(
                400,
                _('Invalid search query: {error_message}').format(
                    error_message=str(se)))
        except SearchError as se:
            # May be bad input from the user, but may also be more serious like
            # bad code causing a SOLR syntax error, or a problem connecting to
            # SOLR
            log.error('Dataset search error: %r', se.args)
            c.query_error = True
            c.search_facets = {}
            c.page = h.Page(collection=[])
        except NotAuthorized:
            abort(403, _('Not authorized to see this page'))

        c.search_facets_limits = {}
        for facet in c.search_facets.keys():
            try:
                limit = int(
                    request.params.get(
                        '_%s_limit' % facet,
                        int(config.get('search.facets.default', 10))))
            except ValueError:
                abort(
                    400,
                    _('Parameter "{parameter_name}" is not '
                      'an integer').format(parameter_name='_%s_limit' % facet))
            c.search_facets_limits[facet] = limit

        self._setup_template_variables(context, {}, package_type=package_type)

        return render(self._search_template(package_type),
                      extra_vars={'dataset_type': package_type})
Example #3
0
def resource_update(context, data_dict):
    '''Update a resource.

    To update a resource you must be authorized to update the dataset that the
    resource belongs to.

    For further parameters see
    :py:func:`~ckan.logic.action.create.resource_create`.

    :param id: the id of the resource to update
    :type id: string

    :returns: the updated resource
    :rtype: string

    '''
    model = context['model']
    user = context['user']
    id = _get_or_bust(data_dict, "id")

    if not data_dict.get('url'):
        data_dict['url'] = ''

    resource = model.Resource.get(id)
    context["resource"] = resource
    old_resource_format = resource.format

    if not resource:
        log.debug('Could not find resource %s', id)
        raise NotFound(_('Resource was not found.'))

    _check_access('resource_update', context, data_dict)
    del context["resource"]

    package_id = resource.package.id
    pkg_dict = _get_action('package_show')(dict(context, return_type='dict'), {
        'id': package_id
    })

    for n, p in enumerate(pkg_dict['resources']):
        if p['id'] == id:
            break
    else:
        log.error('Could not find resource %s after all', id)
        raise NotFound(_('Resource was not found.'))

    # Persist the datastore_active extra if already present and not provided
    if ('datastore_active' in resource.extras
            and 'datastore_active' not in data_dict):
        data_dict['datastore_active'] = resource.extras['datastore_active']

    for plugin in plugins.PluginImplementations(plugins.IResourceController):
        plugin.before_update(context, pkg_dict['resources'][n], data_dict)

    pkg_dict['resources'][n] = data_dict

    try:
        context['use_cache'] = False
        updated_pkg_dict = _get_action('package_update')(context, pkg_dict)
    except ValidationError as e:
        try:
            raise ValidationError(e.error_dict['resources'][n])
        except (KeyError, IndexError):
            raise ValidationError(e.error_dict)

    resource = _get_action('resource_show')(context, {'id': id})

    if old_resource_format != resource['format']:
        _get_action('resource_create_default_resource_views')(
            {
                'model': context['model'],
                'user': context['user'],
                'ignore_auth': True
            }, {
                'package': updated_pkg_dict,
                'resource': resource
            })

    for plugin in plugins.PluginImplementations(plugins.IResourceController):
        plugin.after_update(context, resource)

    return resource
Example #4
0
def package_update(fb, context, data_dict):
    model = context['model']
    user = context['user']
    name_or_id = data_dict.get("id") or data_dict['name']

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg
    data_dict["id"] = pkg.id

    check_access('package_update', context, data_dict)

    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()
    # We modify the schema here to replace owner_org_validator by our own
    if 'owner_org' in schema:
        schema['owner_org'] = [
            uds_oov if f is default_oov else f for f in schema['owner_org']
        ]

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)

    data, errors = lib_plugins.plugin_validate(package_plugin, context,
                                               data_dict, schema,
                                               'package_update')
    log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              context.get('package').name if context.get('package') else '',
              data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Update object %s') % data.get("name")

    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else get_action('package_show')(context, {'id': data_dict['id']})

    return output
Example #5
0
def _update_facet_titles(facets, group_type):
    for plugin in plugins.PluginImplementations(plugins.IFacets):
        facets = plugin.group_facets(facets, group_type, None)
    return facets
Example #6
0
def _group_or_org_create(context, data_dict, is_org=False):
    model = context['model']
    user = context['user']
    session = context['session']
    parent = context.get('parent', None)
    data_dict['is_organization'] = is_org

    # get the schema
    group_plugin = lib_plugins.lookup_group_plugin(
        group_type=data_dict.get('type'))
    try:
        schema = group_plugin.form_to_db_schema_options({
            'type':
            'create',
            'api':
            'api_version' in context,
            'context':
            context
        })
    except AttributeError:
        schema = group_plugin.form_to_db_schema()

    if 'api_version' not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            group_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            group_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)
    log.debug('group_create validate_errs=%r user=%s group=%s data_dict=%r',
              errors, context.get('user'), data_dict.get('name'), data_dict)

    if errors:
        session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user

    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % data.get("name")

    group = model_save.group_dict_save(data, context)

    if parent:
        parent_group = model.Group.get(parent)
        if parent_group:
            member = model.Member(group=parent_group,
                                  table_id=group.id,
                                  table_name='group')
            session.add(member)
            log.debug('Group %s is made child of group %s', group.name,
                      parent_group.name)

    if user:
        admins = [model.User.by_name(user.decode('utf8'))]
    else:
        admins = []
    model.setup_default_user_roles(group, admins)
    # Needed to let extensions know the group id
    session.flush()

    if is_org:
        plugin_type = plugins.IOrganizationController
    else:
        plugin_type = plugins.IGroupController

    for item in plugins.PluginImplementations(plugin_type):
        item.create(group)

    if is_org:
        activity_type = 'new organization'
    else:
        activity_type = 'new group'

    user_id = model.User.by_name(user.decode('utf8')).id

    activity_dict = {
        'user_id': user_id,
        'object_id': group.id,
        'activity_type': activity_type,
    }
    activity_dict['data'] = {
        'group': ckan.lib.dictization.table_dictize(group, context)
    }
    activity_create_context = {
        'model': model,
        'user': user,
        'defer_commit': True,
        'ignore_auth': True,
        'session': session
    }
    logic.get_action('activity_create')(activity_create_context, activity_dict)

    if not context.get('defer_commit'):
        model.repo.commit()
    context["group"] = group
    context["id"] = group.id

    # creator of group/org becomes an admin
    # this needs to be after the repo.commit or else revisions break
    member_dict = {
        'id': group.id,
        'object': user_id,
        'object_type': 'user',
        'capacity': 'admin',
    }
    member_create_context = {
        'model': model,
        'user': user,
        'ignore_auth': True,  # we are not a member of the group at this point
        'session': session
    }
    logic.get_action('member_create')(member_create_context, member_dict)

    log.debug('Created object %s' % group.name)
    return model_dictize.group_dictize(group, context)
Example #7
0
def datapusher_submit(context, data_dict):
    ''' Submit a job to the datapusher. The datapusher is a service that
    imports tabular data into the datastore.

    :param resource_id: The resource id of the resource that the data
        should be imported in. The resource's URL will be used to get the data.
    :type resource_id: string
    :param set_url_type: If set to True, the ``url_type`` of the resource will
        be set to ``datastore`` and the resource URL will automatically point
        to the :ref:`datastore dump <dump>` URL. (optional, default: False)
    :type set_url_type: bool
    :param ignore_hash: If set to True, the datapusher will reload the file
        even if it haven't changed. (optional, default: False)
    :type ignore_hash: bool

    Returns ``True`` if the job has been submitted and ``False`` if the job
    has not been submitted, i.e. when the datapusher is not configured.

    :rtype: bool
    '''
    schema = context.get('schema', dpschema.datapusher_submit_schema())
    data_dict, errors = _validate(data_dict, schema, context)
    if errors:
        raise p.toolkit.ValidationError(errors)

    res_id = data_dict['resource_id']

    p.toolkit.check_access('datapusher_submit', context, data_dict)

    try:
        resource_dict = p.toolkit.get_action('resource_show')(context, {
            'id': res_id,
        })
    except logic.NotFound:
        return False

    datapusher_url = config.get('ckan.datapusher.url')

    site_url = h.url_for('/', qualified=True)
    callback_url = h.url_for('/api/3/action/datapusher_hook', qualified=True)

    user = p.toolkit.get_action('user_show')(context, {'id': context['user']})

    for plugin in p.PluginImplementations(interfaces.IDataPusher):
        upload = plugin.can_upload(res_id)
        if not upload:
            msg = "Plugin {0} rejected resource {1}"\
                .format(plugin.__class__.__name__, res_id)
            log.info(msg)
            return False

    task = {
        'entity_id': res_id,
        'entity_type': 'resource',
        'task_type': 'datapusher',
        'last_updated': str(datetime.datetime.utcnow()),
        'state': 'submitting',
        'key': 'datapusher',
        'value': '{}',
        'error': '{}',
    }
    try:
        existing_task = p.toolkit.get_action('task_status_show')(context, {
            'entity_id': res_id,
            'task_type': 'datapusher',
            'key': 'datapusher'
        })
        assume_task_stale_after = datetime.timedelta(seconds=int(
            config.get('ckan.datapusher.assume_task_stale_after', 3600)))
        if existing_task.get('state') == 'pending':
            updated = datetime.datetime.strptime(
                existing_task['last_updated'], '%Y-%m-%dT%H:%M:%S.%f')
            time_since_last_updated = datetime.datetime.utcnow() - updated
            if time_since_last_updated > assume_task_stale_after:
                # it's been a while since the job was last updated - it's more
                # likely something went wrong with it and the state wasn't
                # updated than its still in progress. Let it be restarted.
                log.info('A pending task was found %r, but it is only %s hours'
                         'old', existing_task['id'], time_since_last_updated)
            else:
                log.info('A pending task was found %s for this resource, so '
                         'skipping this duplicate task', existing_task['id'])
                return False

        task['id'] = existing_task['id']
    except logic.NotFound:
        pass

    context['ignore_auth'] = True
    p.toolkit.get_action('task_status_update')(context, task)

    try:
        r = requests.post(
            urlparse.urljoin(datapusher_url, 'job'),
            headers={
                'Content-Type': 'application/json'
            },
            data=json.dumps({
                'api_key': user['apikey'],
                'job_type': 'push_to_datastore',
                'result_url': callback_url,
                'metadata': {
                    'ignore_hash': data_dict.get('ignore_hash', False),
                    'ckan_url': site_url,
                    'resource_id': res_id,
                    'set_url_type': data_dict.get('set_url_type', False),
                    'task_created': task['last_updated'],
                    'original_url': resource_dict.get('url'),
                }
            }))
        r.raise_for_status()
    except requests.exceptions.ConnectionError, e:
        error = {'message': 'Could not connect to DataPusher.',
                 'details': str(e)}
        task['error'] = json.dumps(error)
        task['state'] = 'error'
        task['last_updated'] = str(datetime.datetime.utcnow()),
        p.toolkit.get_action('task_status_update')(context, task)
        raise p.toolkit.ValidationError(error)
Example #8
0
    def test_after_upload_called(self):
        dataset = factories.Dataset()
        resource = factories.Resource(package_id=dataset["id"])

        # Push data directly to the DataStore for the resource to be marked as
        # `datastore_active=True`, so the grid view can be created
        data = {
            "resource_id": resource["id"],
            "fields": [
                {
                    "id": "a",
                    "type": "text"
                },
                {
                    "id": "b",
                    "type": "text"
                },
            ],
            "records": [{
                "a": "1",
                "b": "2"
            }],
            "force": True,
        }
        helpers.call_action("datastore_create", **data)

        # Create a task for `datapusher_hook` to update
        task_dict = {
            "entity_id": resource["id"],
            "entity_type": "resource",
            "task_type": "datapusher",
            "key": "datapusher",
            "value": '{"job_id": "my_id", "job_key":"my_key"}',
            "last_updated": str(datetime.datetime.now()),
            "state": "pending",
        }
        helpers.call_action("task_status_update", context={}, **task_dict)

        # Call datapusher_hook with a status of complete to trigger the
        # default views creation
        params = {
            "status": "complete",
            "metadata": {
                "resource_id": resource["id"]
            },
        }
        helpers.call_action("datapusher_hook", context={}, **params)

        total = sum(
            plugin.after_upload_calls
            for plugin in p.PluginImplementations(interfaces.IDataPusher))
        assert total == 1, total

        params = {
            "status": "complete",
            "metadata": {
                "resource_id": resource["id"]
            },
        }
        helpers.call_action("datapusher_hook", context={}, **params)

        total = sum(
            plugin.after_upload_calls
            for plugin in p.PluginImplementations(interfaces.IDataPusher))
        assert total == 2, total
Example #9
0
def get_action(action):
    '''Return the named :py:mod:`ckan.logic.action` function.

    For example ``get_action('package_create')`` will normally return the
    :py:func:`ckan.logic.action.create.package_create()` function.

    For documentation of the available action functions, see
    :ref:`api-reference`.

    You should always use ``get_action()`` instead of importing an action
    function directly, because :py:class:`~ckan.plugins.interfaces.IActions`
    plugins can override action functions, causing ``get_action()`` to return a
    plugin-provided function instead of the default one.

    Usage::

        import ckan.plugins.toolkit as toolkit

        # Call the package_create action function:
        toolkit.get_action('package_create')(context, data_dict)

    As the context parameter passed to an action function is commonly::

        context = {'model': ckan.model, 'session': ckan.model.Session,
                   'user': pylons.c.user or pylons.c.author}

    an action function returned by ``get_action()`` will automatically add
    these parameters to the context if they are not defined.  This is
    especially useful for plugins as they should not really be importing parts
    of ckan eg :py:mod:`ckan.model` and as such do not have access to ``model``
    or ``model.Session``.

    If a ``context`` of ``None`` is passed to the action function then the
    default context dict will be created.

    :param action: name of the action function to return,
        eg. ``'package_create'``
    :type action: string

    :returns: the named action function
    :rtype: callable

    '''
    # clean the action names
    action = new_authz.clean_action_name(action)

    if _actions:
        if not action in _actions:
            raise KeyError("Action '%s' not found" % action)
        return _actions.get(action)
    # Otherwise look in all the plugins to resolve all possible
    # First get the default ones in the ckan/logic/action directory
    # Rather than writing them out in full will use __import__
    # to load anything from ckan.logic.action that looks like it might
    # be an action
    for action_module_name in ['get', 'create', 'update', 'delete']:
        module_path = 'ckan.logic.action.' + action_module_name
        module = __import__(module_path)
        for part in module_path.split('.')[1:]:
            module = getattr(module, part)
        for k, v in module.__dict__.items():
            if not k.startswith('_'):
                # Only load functions from the action module or already
                # replaced functions.
                if (hasattr(v, '__call__') and
                    (v.__module__ == module_path or hasattr(v, '__replaced'))):
                    k = new_authz.clean_action_name(k)
                    _actions[k] = v

                    # Whitelist all actions defined in logic/action/get.py as
                    # being side-effect free.
                    if action_module_name == 'get' and \
                       not hasattr(v, 'side_effect_free'):
                        v.side_effect_free = True

    # Then overwrite them with any specific ones in the plugins:
    resolved_action_plugins = {}
    fetched_actions = {}
    for plugin in p.PluginImplementations(p.IActions):
        for name, auth_function in plugin.get_actions().items():
            name = new_authz.clean_action_name(name)
            if name in resolved_action_plugins:
                raise Exception('The action %r is already implemented in %r' %
                                (name, resolved_action_plugins[name]))
            log.debug(
                'Action function {0} from plugin {1} was inserted'.format(
                    name, plugin.name))
            resolved_action_plugins[name] = plugin.name
            # Extensions are exempted from the auth audit for now
            # This needs to be resolved later
            auth_function.auth_audit_exempt = True
            fetched_actions[name] = auth_function
    # Use the updated ones in preference to the originals.
    _actions.update(fetched_actions)

    # wrap the functions
    for action_name, _action in _actions.items():

        def make_wrapped(_action, action_name):
            def wrapped(context=None, data_dict=None, **kw):
                if kw:
                    log.critical('%s was passed extra keywords %r' %
                                 (_action.__name__, kw))

                context = _prepopulate_context(context)

                # Auth Auditing
                # store this action name in the auth audit so we can see if
                # check access was called on the function we store the id of
                # the action incase the action is wrapped inside an action
                # of the same name.  this happens in the datastore
                context.setdefault('__auth_audit', [])
                context['__auth_audit'].append((action_name, id(_action)))

                # check_access(action_name, context, data_dict=None)
                result = _action(context, data_dict, **kw)
                try:
                    audit = context['__auth_audit'][-1]
                    if audit[0] == action_name and audit[1] == id(_action):
                        if action_name not in new_authz.auth_functions_list():
                            log.debug('No auth function for %s' % action_name)
                        elif not getattr(_action, 'auth_audit_exempt', False):
                            raise Exception(
                                'Action function {0} did not call its auth function'
                                .format(action_name))
                        # remove from audit stack
                        context['__auth_audit'].pop()
                except IndexError:
                    pass
                return result

            return wrapped

        # If we have been called multiple times for example during tests then
        # we need to make sure that we do not rewrap the actions.
        if hasattr(_action, '__replaced'):
            _actions[action_name] = _action.__replaced
            continue

        fn = make_wrapped(_action, action_name)
        # we need to mirror the docstring
        fn.__doc__ = _action.__doc__
        # we need to retain the side effect free behaviour
        if getattr(_action, 'side_effect_free', False):
            fn.side_effect_free = True
        _actions[action_name] = fn

        def replaced_action(action_name):
            def warn(context, data_dict):
                log.critical('Action `%s` is being called directly '
                             'all action calls should be accessed via '
                             'logic.get_action' % action_name)
                return get_action(action_name)(context, data_dict)

            return warn

        # Store our wrapped function so it is available.  This is to prevent
        # rewrapping of actions
        module = sys.modules[_action.__module__]
        r = replaced_action(action_name)
        r.__replaced = fn
        module.__dict__[action_name] = r

    return _actions.get(action)
Example #10
0
    def gather_stage(self, harvest_job):

        log.debug('In DCATRDFHarvester gather_stage')

        rdf_format = None
        if harvest_job.source.config:
            rdf_format = json.loads(
                harvest_job.source.config).get("rdf_format")

        # Get file contents of first page
        next_page_url = harvest_job.source.url

        guids_in_source = []
        object_ids = []
        last_content_hash = None

        while next_page_url:
            for harvester in p.PluginImplementations(IDCATRDFHarvester):
                next_page_url, before_download_errors = harvester.before_download(
                    next_page_url, harvest_job)

                for error_msg in before_download_errors:
                    self._save_gather_error(error_msg, harvest_job)

                if not next_page_url:
                    return []

            content, rdf_format = self._get_content_and_type(
                next_page_url, harvest_job, 1, content_type=rdf_format)

            content_hash = hashlib.md5()
            if content:
                content_hash.update(content)

            if last_content_hash:
                if content_hash.digest() == last_content_hash.digest():
                    log.warning(
                        'Remote content was the same even when using a paginated URL, skipping'
                    )
                    break
            else:
                last_content_hash = content_hash

            # TODO: store content?
            for harvester in p.PluginImplementations(IDCATRDFHarvester):
                content, after_download_errors = harvester.after_download(
                    content, harvest_job)

                for error_msg in after_download_errors:
                    self._save_gather_error(error_msg, harvest_job)

            if not content:
                return []

            # TODO: profiles conf
            parser = RDFParser()

            try:
                parser.parse(content, _format=rdf_format)
            except RDFParserException, e:
                self._save_gather_error(
                    'Error parsing the RDF file: {0}'.format(e), harvest_job)
                return []

            try:

                source_dataset = model.Package.get(harvest_job.source.id)

                for dataset in parser.datasets():
                    if not dataset.get('name'):
                        dataset['name'] = self._gen_new_name(dataset['title'])

                    # Unless already set by the parser, get the owner organization (if any)
                    # from the harvest source dataset
                    if not dataset.get('owner_org'):
                        if source_dataset.owner_org:
                            dataset['owner_org'] = source_dataset.owner_org

                    # Try to get a unique identifier for the harvested dataset
                    guid = self._get_guid(dataset,
                                          source_url=source_dataset.url)

                    if not guid:
                        self._save_gather_error(
                            'Could not get a unique identifier for dataset: {0}'
                            .format(dataset), harvest_job)
                        continue

                    dataset['extras'].append({'key': 'guid', 'value': guid})
                    guids_in_source.append(guid)

                    obj = HarvestObject(guid=guid,
                                        job=harvest_job,
                                        content=json.dumps(dataset))

                    obj.save()
                    object_ids.append(obj.id)
            except Exception, e:
                self._save_gather_error(
                    'Error when processsing dataset: %r / %s' %
                    (e, traceback.format_exc()), harvest_job)
                return []
Example #11
0
    def import_stage(self, harvest_object):

        log.debug('In DCATRDFHarvester import_stage')

        status = self._get_object_extra(harvest_object, 'status')
        if status == 'delete':
            # Delete package
            context = {
                'model': model,
                'session': model.Session,
                'user': self._get_user_name(),
                'ignore_auth': True
            }

            p.toolkit.get_action('package_delete')(
                context, {
                    'id': harvest_object.package_id
                })
            log.info('Deleted package {0} with guid {1}'.format(
                harvest_object.package_id, harvest_object.guid))
            return True

        if harvest_object.content is None:
            self._save_object_error(
                'Empty content for object {0}'.format(harvest_object.id),
                harvest_object, 'Import')
            return False

        try:
            dataset = json.loads(harvest_object.content)
        except ValueError:
            self._save_object_error(
                'Could not parse content for object {0}'.format(
                    harvest_object.id), harvest_object, 'Import')
            return False

        # Get the last harvested object (if any)
        previous_object = model.Session.query(HarvestObject) \
                                       .filter(HarvestObject.guid==harvest_object.guid) \
                                       .filter(HarvestObject.current==True) \
                                       .first()

        # Flag previous object as not current anymore
        if previous_object:
            previous_object.current = False
            previous_object.add()

        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()

        context = {
            'user': self._get_user_name(),
            'return_id_only': True,
            'ignore_auth': True,
        }

        dataset = self.modify_package_dict(dataset, {}, harvest_object)

        # Check if a dataset with the same guid exists
        existing_dataset = self._get_existing_dataset(harvest_object.guid)

        try:
            if existing_dataset:
                # Don't change the dataset name even if the title has
                dataset['name'] = existing_dataset['name']
                dataset['id'] = existing_dataset['id']

                harvester_tmp_dict = {}

                # check if resources already exist based on their URI
                existing_resources = existing_dataset.get('resources')
                resource_mapping = {
                    r.get('uri'): r.get('id')
                    for r in existing_resources if r.get('uri')
                }
                for resource in dataset.get('resources'):
                    res_uri = resource.get('uri')
                    if res_uri and res_uri in resource_mapping:
                        resource['id'] = resource_mapping[res_uri]

                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    harvester.before_update(harvest_object, dataset,
                                            harvester_tmp_dict)

                try:
                    if dataset:
                        # Save reference to the package on the object
                        harvest_object.package_id = dataset['id']
                        harvest_object.add()

                        p.toolkit.get_action('package_update')(context,
                                                               dataset)
                    else:
                        log.info('Ignoring dataset %s' %
                                 existing_dataset['name'])
                        return 'unchanged'
                except p.toolkit.ValidationError, e:
                    self._save_object_error(
                        'Update validation Error: %s' % str(e.error_summary),
                        harvest_object, 'Import')
                    return False

                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    err = harvester.after_update(harvest_object, dataset,
                                                 harvester_tmp_dict)

                    if err:
                        self._save_object_error(
                            'RDFHarvester plugin error: %s' % err,
                            harvest_object, 'Import')
                        return False

                log.info('Updated dataset %s' % dataset['name'])

            else:
Example #12
0
            else:
                package_plugin = lib_plugins.lookup_package_plugin(
                    dataset.get('type', None))

                package_schema = package_plugin.create_package_schema()
                context['schema'] = package_schema

                # We need to explicitly provide a package ID
                dataset['id'] = unicode(uuid.uuid4())
                package_schema['id'] = [unicode]

                harvester_tmp_dict = {}

                name = dataset['name']
                for harvester in p.PluginImplementations(IDCATRDFHarvester):
                    harvester.before_create(harvest_object, dataset,
                                            harvester_tmp_dict)

                try:
                    if dataset:
                        # Save reference to the package on the object
                        harvest_object.package_id = dataset['id']
                        harvest_object.add()

                        # Defer constraints and flush so the dataset can be indexed with
                        # the harvest object id (on the after_show hook from the harvester
                        # plugin)
                        model.Session.execute(
                            'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED'
                        )
Example #13
0
def make_map():
    """Create, configure and return the routes Mapper"""
    # import controllers here rather than at root level because
    # pylons config is initialised by this point.

    # Helpers to reduce code clutter
    GET = dict(method=['GET'])
    PUT = dict(method=['PUT'])
    POST = dict(method=['POST'])
    DELETE = dict(method=['DELETE'])
    GET_POST = dict(method=['GET', 'POST'])
    PUT_POST = dict(method=['PUT', 'POST'])
    PUT_POST_DELETE = dict(method=['PUT', 'POST', 'DELETE'])
    OPTIONS = dict(method=['OPTIONS'])

    import ckan.lib.plugins as lib_plugins
    lib_plugins.reset_package_plugins()

    map = Mapper(directory=config['pylons.paths']['controllers'],
                 always_scan=config['debug'])
    map.minimization = False
    map.explicit = True

    # CUSTOM ROUTES HERE
    for plugin in p.PluginImplementations(p.IRoutes):
        map = plugin.before_map(map)

    # The ErrorController route (handles 404/500 error pages); it should
    # likely stay at the top, ensuring it can always be resolved.
    map.connect('/error/{action}', controller='error', ckan_core=True)
    map.connect('/error/{action}/{id}', controller='error', ckan_core=True)

    map.connect('*url',
                controller='home',
                action='cors_options',
                conditions=OPTIONS,
                ckan_core=True)

    # Mark all routes added from extensions on the `before_map` extension point
    # as non-core
    for route in map.matchlist:
        if not hasattr(route, '_ckan_core'):
            route._ckan_core = False

    # CKAN API versioned.
    register_list = [
        'package', 'dataset', 'resource', 'tag', 'group', 'revision',
        'licenses', 'rating', 'user', 'activity'
    ]
    register_list_str = '|'.join(register_list)

    # /api ver 1, 2, 3 or none
    with SubMapper(map,
                   controller='api',
                   path_prefix='/api{ver:/1|/2|/3|}',
                   ver='/1') as m:
        m.connect('/search/{register}', action='search')

    # /api/util ver 1, 2 or none
    with SubMapper(map,
                   controller='api',
                   path_prefix='/api{ver:/1|/2|}',
                   ver='/1') as m:
        m.connect('/util/dataset/munge_name', action='munge_package_name')
        m.connect('/util/dataset/munge_title_to_name',
                  action='munge_title_to_package_name')
        m.connect('/util/tag/munge', action='munge_tag')

    ###########
    ## /END API
    ###########

    map.redirect('/packages', '/dataset')
    map.redirect('/packages/{url:.*}', '/dataset/{url}')
    map.redirect('/package', '/dataset')
    map.redirect('/package/{url:.*}', '/dataset/{url}')

    with SubMapper(map, controller='package') as m:
        m.connect('search',
                  '/dataset',
                  action='search',
                  highlight_actions='index search')
        m.connect('dataset_new', '/dataset/new', action='new')
        m.connect('/dataset/{action}',
                  requirements=dict(
                      action='|'.join(['list', 'autocomplete', 'search'])))

        m.connect('/dataset/{action}/{id}/{revision}',
                  action='read_ajax',
                  requirements=dict(action='|'.join([
                      'read',
                      'edit',
                      'history',
                  ])))
        m.connect('/dataset/{action}/{id}',
                  requirements=dict(action='|'.join([
                      'new_resource',
                      'history',
                      'read_ajax',
                      'history_ajax',
                      'follow',
                      'activity',
                      'groups',
                      'unfollow',
                      'delete',
                      'api_data',
                  ])))
        m.connect('dataset_edit',
                  '/dataset/edit/{id}',
                  action='edit',
                  ckan_icon='pencil-square-o')
        m.connect('dataset_followers',
                  '/dataset/followers/{id}',
                  action='followers',
                  ckan_icon='users')
        m.connect('dataset_activity',
                  '/dataset/activity/{id}',
                  action='activity',
                  ckan_icon='clock-o')
        m.connect('/dataset/activity/{id}/{offset}', action='activity')
        m.connect('dataset_groups',
                  '/dataset/groups/{id}',
                  action='groups',
                  ckan_icon='users')
        m.connect('dataset_resources',
                  '/dataset/resources/{id}',
                  action='resources',
                  ckan_icon='bars')
        m.connect('dataset_read',
                  '/dataset/{id}',
                  action='read',
                  ckan_icon='sitemap')
        m.connect('/dataset/{id}/resource/{resource_id}',
                  action='resource_read')
        m.connect('/dataset/{id}/resource_delete/{resource_id}',
                  action='resource_delete')
        m.connect('resource_edit',
                  '/dataset/{id}/resource_edit/{resource_id}',
                  action='resource_edit',
                  ckan_icon='pencil-square-o')
        m.connect('/dataset/{id}/resource/{resource_id}/download',
                  action='resource_download')
        m.connect('/dataset/{id}/resource/{resource_id}/download/{filename}',
                  action='resource_download')
        m.connect('/dataset/{id}/resource/{resource_id}/embed',
                  action='resource_embedded_dataviewer')
        m.connect('/dataset/{id}/resource/{resource_id}/viewer',
                  action='resource_embedded_dataviewer',
                  width="960",
                  height="800")
        m.connect('/dataset/{id}/resource/{resource_id}/preview',
                  action='resource_datapreview')
        m.connect('views',
                  '/dataset/{id}/resource/{resource_id}/views',
                  action='resource_views',
                  ckan_icon='bars')
        m.connect('new_view',
                  '/dataset/{id}/resource/{resource_id}/new_view',
                  action='edit_view',
                  ckan_icon='pencil-square-o')
        m.connect('edit_view',
                  '/dataset/{id}/resource/{resource_id}/edit_view/{view_id}',
                  action='edit_view',
                  ckan_icon='pencil-square-o')
        m.connect('resource_view',
                  '/dataset/{id}/resource/{resource_id}/view/{view_id}',
                  action='resource_view')
        m.connect('/dataset/{id}/resource/{resource_id}/view/',
                  action='resource_view')

    # group
    map.redirect('/groups', '/group')
    map.redirect('/groups/{url:.*}', '/group/{url}')

    # These named routes are used for custom group forms which will use the
    # names below based on the group.type ('group' is the default type)
    with SubMapper(map, controller='group') as m:
        m.connect('group_index',
                  '/group',
                  action='index',
                  highlight_actions='index search')
        m.connect('group_list', '/group/list', action='list')
        m.connect('group_new', '/group/new', action='new')

        for action in [
                'edit',
                'delete',
                'member_new',
                'member_delete',
                'history',
                'followers',
                'follow',
                'unfollow',
                'admins',
                'activity',
        ]:
            m.connect('group_' + action,
                      '/group/' + action + '/{id}',
                      action=action)

        m.connect('group_about',
                  '/group/about/{id}',
                  action='about',
                  ckan_icon='info-circle'),
        m.connect('group_edit',
                  '/group/edit/{id}',
                  action='edit',
                  ckan_icon='pencil-square-o')
        m.connect('group_members',
                  '/group/members/{id}',
                  action='members',
                  ckan_icon='users'),
        m.connect('group_activity',
                  '/group/activity/{id}/{offset}',
                  action='activity',
                  ckan_icon='clock-o'),
        m.connect('group_read',
                  '/group/{id}',
                  action='read',
                  ckan_icon='sitemap')

    # organizations these basically end up being the same as groups
    with SubMapper(map, controller='organization') as m:
        m.connect('organizations_index', '/organization', action='index')
        m.connect('organization_index', '/organization', action='index')
        m.connect('organization_new', '/organization/new', action='new')
        for action in [
                'delete', 'admins', 'member_new', 'member_delete', 'history'
        ]:
            m.connect('organization_' + action,
                      '/organization/' + action + '/{id}',
                      action=action)

        m.connect('organization_activity',
                  '/organization/activity/{id}/{offset}',
                  action='activity',
                  ckan_icon='clock-o')
        m.connect('organization_read', '/organization/{id}', action='read')
        m.connect('organization_about',
                  '/organization/about/{id}',
                  action='about',
                  ckan_icon='info-circle')
        m.connect('organization_read',
                  '/organization/{id}',
                  action='read',
                  ckan_icon='sitemap')
        m.connect('organization_edit',
                  '/organization/edit/{id}',
                  action='edit',
                  ckan_icon='pencil-square-o')
        m.connect('organization_members',
                  '/organization/members/{id}',
                  action='members',
                  ckan_icon='users')
        m.connect('organization_bulk_process',
                  '/organization/bulk_process/{id}',
                  action='bulk_process',
                  ckan_icon='sitemap')
    lib_plugins.register_package_plugins(map)
    lib_plugins.register_group_plugins(map)

    # tags
    map.redirect('/tags', '/tag')
    map.redirect('/tags/{url:.*}', '/tag/{url}')
    map.redirect('/tag/read/{url:.*}',
                 '/tag/{url}',
                 _redirect_code='301 Moved Permanently')
    map.connect('/tag', controller='tag', action='index')
    map.connect('/tag/{id}', controller='tag', action='read')
    # users
    map.redirect('/users/{url:.*}', '/user/{url}')

    with SubMapper(map, controller='revision') as m:
        m.connect('/revision', action='index')
        m.connect('/revision/edit/{id}', action='edit')
        m.connect('/revision/diff/{id}', action='diff')
        m.connect('/revision/list', action='list')
        m.connect('/revision/{id}', action='read')

    with SubMapper(
            map, controller='ckan.controllers.storage:StorageController') as m:
        m.connect('storage_file', '/storage/f/{label:.*}', action='file')

    with SubMapper(map, controller='util') as m:
        m.connect('/i18n/strings_{lang}.js', action='i18n_js_strings')
        m.connect('/util/redirect', action='redirect')
        m.connect('/testing/primer', action='primer')
        m.connect('/testing/markup', action='markup')

    # robots.txt
    map.connect('/(robots.txt)', controller='template', action='view')

    # Mark all unmarked routes added up until now as core routes
    for route in map.matchlist:
        if not hasattr(route, '_ckan_core'):
            route._ckan_core = True

    for plugin in p.PluginImplementations(p.IRoutes):
        map = plugin.after_map(map)

    # Mark all routes added from extensions on the `after_map` extension point
    # as non-core
    for route in map.matchlist:
        if not hasattr(route, '_ckan_core'):
            route._ckan_core = False

    # sometimes we get requests for favicon.ico we should redirect to
    # the real favicon location.
    map.redirect('/favicon.ico', config.get('ckan.favicon'))

    map.redirect('/*(url)/', '/{url}', _redirect_code='301 Moved Permanently')
    map.connect('/*url', controller='template', action='view', ckan_core=True)

    return map
Example #14
0
def get_action(action):
    '''Return the named :py:mod:`ckan.logic.action` function.

    For example ``get_action('package_create')`` will normally return the
    :py:func:`ckan.logic.action.create.package_create()` function.

    For documentation of the available action functions, see
    :ref:`api-reference`.

    You should always use ``get_action()`` instead of importing an action
    function directly, because :py:class:`~ckan.plugins.interfaces.IActions`
    plugins can override action functions, causing ``get_action()`` to return a
    plugin-provided function instead of the default one.

    Usage::

        import ckan.plugins.toolkit as toolkit

        # Call the package_create action function:
        toolkit.get_action('package_create')(context, data_dict)

    As the context parameter passed to an action function is commonly::

        context = {'model': ckan.model, 'session': ckan.model.Session,
                   'user': pylons.c.user}

    an action function returned by ``get_action()`` will automatically add
    these parameters to the context if they are not defined.  This is
    especially useful for plugins as they should not really be importing parts
    of ckan eg :py:mod:`ckan.model` and as such do not have access to ``model``
    or ``model.Session``.

    If a ``context`` of ``None`` is passed to the action function then the
    default context dict will be created.

    .. note::

        Many action functions modify the context dict. It can therefore
        not be reused for multiple calls of the same or different action
        functions.

    :param action: name of the action function to return,
        eg. ``'package_create'``
    :type action: string

    :returns: the named action function
    :rtype: callable

    '''

    if _actions:
        if action not in _actions:
            raise KeyError("Action '%s' not found" % action)
        return _actions.get(action)
    # Otherwise look in all the plugins to resolve all possible First
    # get the default ones in the ckan/logic/action directory Rather
    # than writing them out in full will use importlib.import_module
    # to load anything from ckan.logic.action that looks like it might
    # be an action
    for action_module_name in ['get', 'create', 'update', 'delete', 'patch']:
        module = importlib.import_module('.' + action_module_name,
                                         'ckan.logic.action')
        for k, v in authz.get_local_functions(module):
            _actions[k] = v
            # Whitelist all actions defined in logic/action/get.py as
            # being side-effect free.
            if action_module_name == 'get' and \
               not hasattr(v, 'side_effect_free'):
                v.side_effect_free = True

    # Then overwrite them with any specific ones in the plugins:
    resolved_action_plugins = {}
    fetched_actions = {}
    chained_actions = defaultdict(list)
    for plugin in p.PluginImplementations(p.IActions):
        for name, action_function in plugin.get_actions().items():
            if _is_chained_action(action_function):
                chained_actions[name].append(action_function)
            elif name in resolved_action_plugins:
                raise NameConflict(
                    'The action %r is already implemented in %r' %
                    (name, resolved_action_plugins[name]))
            else:
                resolved_action_plugins[name] = plugin.name
                # Extensions are exempted from the auth audit for now
                # This needs to be resolved later
                action_function.auth_audit_exempt = True
                fetched_actions[name] = action_function
    for name, func_list in six.iteritems(chained_actions):
        if name not in fetched_actions and name not in _actions:
            # nothing to override from plugins or core
            raise NotFound('The action %r is not found for chained action' %
                           (name))
        for func in reversed(func_list):
            # try other plugins first, fall back to core
            prev_func = fetched_actions.get(name, _actions.get(name))
            new_func = functools.partial(func, prev_func)
            # persisting attributes to the new partial function
            for attribute, value in six.iteritems(func.__dict__):
                setattr(new_func, attribute, value)
            fetched_actions[name] = new_func

    # Use the updated ones in preference to the originals.
    _actions.update(fetched_actions)

    # wrap the functions
    for action_name, _action in _actions.items():

        def make_wrapped(_action, action_name):
            def wrapped(context=None, data_dict=None, **kw):
                if kw:
                    log.critical('%s was passed extra keywords %r' %
                                 (_action.__name__, kw))

                context = _prepopulate_context(context)

                # Auth Auditing - checks that the action function did call
                # check_access (unless there is no accompanying auth function).
                # We push the action name and id onto the __auth_audit stack
                # before calling the action, and check_access removes it.
                # (We need the id of the action in case the action is wrapped
                # inside an action of the same name, which happens in the
                # datastore)
                context.setdefault('__auth_audit', [])
                context['__auth_audit'].append((action_name, id(_action)))

                # check_access(action_name, context, data_dict=None)
                result = _action(context, data_dict, **kw)
                try:
                    audit = context['__auth_audit'][-1]
                    if audit[0] == action_name and audit[1] == id(_action):
                        if action_name not in authz.auth_functions_list():
                            log.debug('No auth function for %s' % action_name)
                        elif not getattr(_action, 'auth_audit_exempt', False):
                            raise Exception(
                                'Action function {0} did not call its '
                                'auth function'.format(action_name))
                        # remove from audit stack
                        context['__auth_audit'].pop()
                except IndexError:
                    pass

                return result

            return wrapped

        fn = make_wrapped(_action, action_name)
        # we need to mirror the docstring
        fn.__doc__ = _action.__doc__
        # we need to retain the side effect free behaviour
        if getattr(_action, 'side_effect_free', False):
            fn.side_effect_free = True
        _actions[action_name] = fn
    return _actions.get(action)
Example #15
0
def update_config():
    ''' This code needs to be run when the config is changed to take those
    changes into account. It is called whenever a plugin is loaded as the
    plugin might have changed the config values (for instance it might
    change ckan.site_url) '''

    for plugin in p.PluginImplementations(p.IConfigurer):
        # must do update in place as this does not work:
        # config = plugin.update_config(config)
        plugin.update_config(config)

    # Set whitelisted env vars on config object
    # This is set up before globals are initialized

    ckan_db = os.environ.get('CKAN_DB', None)
    if ckan_db:
        msg = 'Setting CKAN_DB as an env var is deprecated and will be' \
            ' removed in a future release. Use CKAN_SQLALCHEMY_URL instead.'
        log.warn(msg)
        config['sqlalchemy.url'] = ckan_db

    for option in CONFIG_FROM_ENV_VARS:
        from_env = os.environ.get(CONFIG_FROM_ENV_VARS[option], None)
        if from_env:
            config[option] = from_env

    root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

    site_url = config.get('ckan.site_url', '')
    if not site_url:
        raise RuntimeError(
            'ckan.site_url is not configured and it must have a value.'
            ' Please amend your .ini file.')
    if not site_url.lower().startswith('http'):
        raise RuntimeError(
            'ckan.site_url should be a full URL, including the schema '
            '(http or https)')

    display_timezone = config.get('ckan.display_timezone', '')
    if (display_timezone and
            display_timezone != 'server' and
            display_timezone not in pytz.all_timezones):
        raise CkanConfigurationException(
            "ckan.display_timezone is not 'server' or a valid timezone"
        )

    # Remove backslash from site_url if present
    config['ckan.site_url'] = config['ckan.site_url'].rstrip('/')

    ckan_host = config['ckan.host'] = urlparse(site_url).netloc
    if config.get('ckan.site_id') is None:
        if ':' in ckan_host:
            ckan_host, port = ckan_host.split(':')
        assert ckan_host, 'You need to configure ckan.site_url or ' \
                          'ckan.site_id for SOLR search-index rebuild to work.'
        config['ckan.site_id'] = ckan_host

    # ensure that a favicon has been set
    favicon = config.get('ckan.favicon', '/base/images/ckan.ico')
    config['ckan.favicon'] = favicon

    # Init SOLR settings and check if the schema is compatible
    # from ckan.lib.search import SolrSettings, check_solr_schema_version

    # lib.search is imported here as we need the config enabled and parsed
    search.SolrSettings.init(config.get('solr_url'),
                             config.get('solr_user'),
                             config.get('solr_password'))
    search.check_solr_schema_version()

    routes_map = routing.make_map()

    lib_plugins.reset_package_plugins()
    lib_plugins.register_package_plugins()
    lib_plugins.reset_group_plugins()
    lib_plugins.register_group_plugins()

    config['routes.map'] = routes_map
    # The RoutesMiddleware needs its mapper updating if it exists
    if 'routes.middleware' in config:
        config['routes.middleware'].mapper = routes_map
    # routes.named_routes is a CKAN thing
    config['routes.named_routes'] = routing.named_routes
    config['pylons.app_globals'] = app_globals.app_globals
    # initialise the globals
    app_globals.app_globals._init()

    helpers.load_plugin_helpers()
    config['pylons.h'] = helpers.helper_functions

    # Templates and CSS loading from configuration
    valid_base_templates_folder_names = ['templates']
    templates = config.get('ckan.base_templates_folder', 'templates')
    config['ckan.base_templates_folder'] = templates

    if templates not in valid_base_templates_folder_names:
        raise CkanConfigurationException(
            'You provided an invalid value for ckan.base_templates_folder. '
            'Possible values are: "templates".'
        )

    jinja2_templates_path = os.path.join(root, templates)
    log.info('Loading templates from %s' % jinja2_templates_path)
    template_paths = [jinja2_templates_path]

    extra_template_paths = config.get('extra_template_paths', '')
    if extra_template_paths:
        # must be first for them to override defaults
        template_paths = extra_template_paths.split(',') + template_paths
    config['computed_template_paths'] = template_paths

    # Set the default language for validation messages from formencode
    # to what is set as the default locale in the config
    default_lang = config.get('ckan.locale_default', 'en')
    formencode.api.set_stdtranslation(domain="FormEncode",
                                      languages=[default_lang])

    # Markdown ignores the logger config, so to get rid of excessive
    # markdown debug messages in the log, set it to the level of the
    # root logger.
    logging.getLogger("MARKDOWN").setLevel(logging.getLogger().level)

    # Create Jinja2 environment
    env = jinja_extensions.Environment(
        **jinja_extensions.get_jinja_env_options())
    env.install_gettext_callables(_, ungettext, newstyle=True)
    # custom filters
    env.filters['empty_and_escape'] = jinja_extensions.empty_and_escape
    config['pylons.app_globals'].jinja_env = env

    # CONFIGURATION OPTIONS HERE (note: all config options will override
    # any Pylons config options)

    # Initialize SQLAlchemy
    engine = sqlalchemy.engine_from_config(config)
    model.init_model(engine)

    for plugin in p.PluginImplementations(p.IConfigurable):
        plugin.configure(config)

    # reset the template cache - we do this here so that when we load the
    # environment it is clean
    render.reset_template_info_cache()

    # clear other caches
    logic.clear_actions_cache()
    logic.clear_validators_cache()
    authz.clear_auth_functions_cache()

    # Here we create the site user if they are not already in the database
    try:
        logic.get_action('get_site_user')({'ignore_auth': True}, None)
    except (sqlalchemy.exc.ProgrammingError, sqlalchemy.exc.OperationalError):
        # (ProgrammingError for Postgres, OperationalError for SQLite)
        # The database is not initialised.  This is a bit dirty.  This occurs
        # when running tests.
        pass
    except sqlalchemy.exc.InternalError:
        # The database is not initialised.  Travis hits this
        pass

    # Close current session and open database connections to ensure a clean
    # clean environment even if an error occurs later on
    model.Session.remove()
    model.Session.bind.dispose()
Example #16
0
            default_facet_titles = {
                    'organization': _('Organizations'),
                    'groups': _('Groups'),
                    'tags': _('Tags'),
                    'res_format': _('Formats'),
                    'license_id': _('License'),
                    }

            for facet in g.facets:
                if facet in default_facet_titles:
                    facets[facet] = default_facet_titles[facet]
                else:
                    facets[facet] = facet

            # Facet titles
            for plugin in p.PluginImplementations(p.IFacets):
                facets = plugin.dataset_facets(facets, package_type)

            c.facet_titles = facets

            data_dict = {
                'q': q,
                'fq': fq.strip(),
                'facet.field': facets.keys(),
                'rows': limit,
                'start': (page - 1) * limit,
                'sort': sort_by,
                'extras': search_extras
            }

            query = get_action('package_search')(context, data_dict)
Example #17
0
def package_create(context, data_dict):
    '''Create a new dataset (package).

    You must be authorized to create new datasets. If you specify any groups
    for the new dataset, you must also be authorized to edit these groups.

    Plugins may change the parameters of this function depending on the value
    of the ``type`` parameter, see the ``IDatasetForm`` plugin interface.

    :param name: the name of the new dataset, must be between 2 and 100
        characters long and contain only lowercase alphanumeric characters,
        ``-`` and ``_``, e.g. ``'warandpeace'``
    :type name: string
    :param title: the title of the dataset (optional, default: same as
        ``name``)
    :type title: string
    :param author: the name of the dataset's author (optional)
    :type author: string
    :param author_email: the email address of the dataset's author (optional)
    :type author_email: string
    :param maintainer: the name of the dataset's maintainer (optional)
    :type maintainer: string
    :param maintainer_email: the email address of the dataset's maintainer
        (optional)
    :type maintainer_email: string
    :param license_id: the id of the dataset's license, see ``license_list()``
        for available values (optional)
    :type license_id: license id string
    :param notes: a description of the dataset (optional)
    :type notes: string
    :param url: a URL for the dataset's source (optional)
    :type url: string
    :param version: (optional)
    :type version: string, no longer than 100 characters
    :param state: the current state of the dataset, e.g. ``'active'`` or
        ``'deleted'``, only active datasets show up in search results and
        other lists of datasets, this parameter will be ignored if you are not
        authorized to change the state of the dataset (optional, default:
        ``'active'``)
    :type state: string
    :param type: the type of the dataset (optional), ``IDatasetForm`` plugins
        associate themselves with different dataset types and provide custom
        dataset handling behaviour for these types
    :type type: string
    :param resources: the dataset's resources, see ``resource_create()``
        for the format of resource dictionaries (optional)
    :type resources: list of resource dictionaries
    :param tags: the dataset's tags, see ``tag_create()`` for the format
        of tag dictionaries (optional)
    :type tags: list of tag dictionaries
    :param extras: the dataset's extras (optional), extras are arbitrary
        (key: value) metadata items that can be added to datasets, each extra
        dictionary should have keys ``'key'`` (a string), ``'value'`` (a
        string), and optionally ``'deleted'``
    :type extras: list of dataset extra dictionaries
    :param relationships_as_object: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_object: list of relationship dictionaries
    :param relationships_as_subject: see ``package_relationship_create()`` for
        the format of relationship dictionaries (optional)
    :type relationships_as_subject: list of relationship dictionaries
    :param groups: the groups to which the dataset belongs (optional), each
        group dictionary should have one or more of the following keys which
        identify an existing group:
        ``'id'`` (the id of the group, string), ``'name'`` (the name of the
        group, string), ``'title'`` (the title of the group, string), to see
        which groups exist call ``group_list()``
    :type groups: list of dictionaries
    :param owner_org: the id of the dataset's owning organization, see
        ``organization_list()`` or ``organization_list_for_user`` for
        available values (optional)
    :type owner_org: string

    :returns: the newly created dataset (unless 'return_id_only' is set to True
              in the context, in which case just the dataset id will be returned)
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']

    package_type = data_dict.get('type')
    package_plugin = lib_plugins.lookup_package_plugin(package_type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.create_package_schema()

    _check_access('package_create', context, data_dict)

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work
                package_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)
    log.debug('package_create validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'), data.get('name'), data_dict)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user
    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % data.get("name")

    admins = []
    if user:
        user_obj = model.User.by_name(user.decode('utf8'))
        if user_obj:
            admins = [user_obj]
            data['creator_user_id'] = user_obj.id

    pkg = model_save.package_dict_save(data, context)

    model.setup_default_user_roles(pkg, admins)
    # Needed to let extensions know the package id
    model.Session.flush()
    data['id'] = pkg.id

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.create(pkg)

        item.after_create(context, data)

    if not context.get('defer_commit'):
        model.repo.commit()

    ## need to let rest api create
    context["package"] = pkg
    ## this is added so that the rest controller can make a new location
    context["id"] = pkg.id
    log.debug('Created object %s' % pkg.name)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    return_id_only = context.get('return_id_only', False)

    output = context['id'] if return_id_only \
            else _get_action('package_show')(context, {'id':context['id']})

    return output
Example #18
0
    try:
        context['defer_commit'] = True
        context['use_cache'] = False
        updated_pkg_dict = _get_action('package_update')(context, pkg_dict)
        context.pop('defer_commit')
    except ValidationError, e:
        errors = e.error_dict['resources'][n]
        raise ValidationError(errors)

    upload.upload(id, uploader.get_max_resource_size())
    model.repo.commit()

    resource = _get_action('resource_show')(context, {'id': id})

    for plugin in plugins.PluginImplementations(plugins.IResourceController):
        plugin.after_update(context, resource)

    return resource


def resource_view_update(context, data_dict):
    '''Update a resource view.

    To update a resource_view you must be authorized to update the resource
    that the resource_view belongs to.

    For further parameters see ``resource_view_create()``.

    :param id: the id of the resource_view to update
    :type id: string
Example #19
0
def datapusher_hook(context, data_dict):
    ''' Update datapusher task. This action is typically called by the
    datapusher whenever the status of a job changes.

    :param metadata: metadata produced by datapuser service must have
       resource_id property.
    :type metadata: dict
    :param status: status of the job from the datapusher service
    :type status: string
    '''

    metadata, status = _get_or_bust(data_dict, ['metadata', 'status'])

    res_id = _get_or_bust(metadata, 'resource_id')

    # Pass metadata, not data_dict, as it contains the resource id needed
    # on the auth checks
    p.toolkit.check_access('datapusher_submit', context, metadata)

    task = p.toolkit.get_action('task_status_show')(context, {
        'entity_id': res_id,
        'task_type': 'datapusher',
        'key': 'datapusher'
    })

    task['state'] = status
    task['last_updated'] = str(datetime.datetime.utcnow())

    resubmit = False

    if status == 'complete':
        # Create default views for resource if necessary (only the ones that
        # require data to be in the DataStore)
        resource_dict = p.toolkit.get_action('resource_show')(
            context, {'id': res_id})

        dataset_dict = p.toolkit.get_action('package_show')(
            context, {'id': resource_dict['package_id']})

        for plugin in p.PluginImplementations(interfaces.IDataPusher):
            plugin.after_upload(context, resource_dict, dataset_dict)

        logic.get_action('resource_create_default_resource_views')(
            context,
            {
                'resource': resource_dict,
                'package': dataset_dict,
                'create_datastore_views': True,
            })

        # Check if the uploaded file has been modified in the meantime
        if (resource_dict.get('last_modified') and
                metadata.get('task_created')):
            try:
                last_modified_datetime = parse_date(
                    resource_dict['last_modified'])
                task_created_datetime = parse_date(metadata['task_created'])
                if last_modified_datetime > task_created_datetime:
                    log.debug('Uploaded file more recent: {0} > {1}'.format(
                        last_modified_datetime, task_created_datetime))
                    resubmit = True
            except ValueError:
                pass
        # Check if the URL of the file has been modified in the meantime
        elif (resource_dict.get('url') and
                metadata.get('original_url') and
                resource_dict['url'] != metadata['original_url']):
            log.debug('URLs are different: {0} != {1}'.format(
                resource_dict['url'], metadata['original_url']))
            resubmit = True

    context['ignore_auth'] = True
    p.toolkit.get_action('task_status_update')(context, task)

    if resubmit:
        log.debug('Resource {0} has been modified, '
                  'resubmitting to DataPusher'.format(res_id))
        p.toolkit.get_action('datapusher_submit')(
            context, {'resource_id': res_id})
Example #20
0
def resource_update(context, data_dict):
    '''Update a resource.

    To update a resource you must be authorized to update the dataset that the
    resource belongs to.

    For further parameters see
    :py:func:`~ckan.logic.action.create.resource_create`.

    :param id: the id of the resource to update
    :type id: string

    :returns: the updated resource
    :rtype: string

    '''
    model = context['model']
    user = context['user']
    id = _get_or_bust(data_dict, "id")
    if not data_dict.get('url'):
        data_dict['url'] = ''

    resource = model.Resource.get(id)
    context["resource"] = resource

    if not resource:
        log.debug('Could not find resource %s', id)
        raise NotFound(_('Resource was not found.'))

    _check_access('resource_update', context, data_dict)
    del context["resource"]

    package_id = resource.package.id
    pkg_dict = _get_action('package_show')(dict(context, return_type='dict'), {
        'id': package_id
    })

    for n, p in enumerate(pkg_dict['resources']):
        if p['id'] == id:
            break
    else:
        log.error('Could not find resource %s after all', id)
        raise NotFound(_('Resource was not found.'))

    # Persist the datastore_active extra if already present and not provided
    if ('datastore_active' in resource.extras
            and 'datastore_active' not in data_dict):
        data_dict['datastore_active'] = resource.extras['datastore_active']

    for plugin in plugins.PluginImplementations(plugins.IResourceController):
        plugin.before_update(context, pkg_dict['resources'][n], data_dict)

    upload = uploader.get_resource_uploader(data_dict)

    if 'mimetype' not in data_dict:
        if hasattr(upload, 'mimetype'):
            data_dict['mimetype'] = upload.mimetype

    if 'size' not in data_dict and 'url_type' in data_dict:
        if hasattr(upload, 'filesize'):
            data_dict['size'] = upload.filesize

    pkg_dict['resources'][n] = data_dict

    try:
        context['defer_commit'] = True
        context['use_cache'] = False
        updated_pkg_dict = _get_action('package_update')(context, pkg_dict)
        context.pop('defer_commit')
    except ValidationError, e:
        errors = e.error_dict['resources'][n]
        raise ValidationError(errors)
Example #21
0
            module = __import__(module_path)
        except ImportError, e:
            log.debug('No auth module for action "%s"' % auth_module_name)
            continue

        for part in module_path.split('.')[1:]:
            module = getattr(module, part)

        for key, v in module.__dict__.items():
            if not key.startswith('_'):
                AuthFunctions._functions[key] = v

    # Then overwrite them with any specific ones in the plugins:
    resolved_auth_function_plugins = {}
    fetched_auth_functions = {}
    for plugin in p.PluginImplementations(p.IAuthFunctions):
        for name, auth_function in plugin.get_auth_functions().items():
            if name in resolved_auth_function_plugins:
                raise Exception(
                    'The auth function %r is already implemented in %r' %
                    (name, resolved_auth_function_plugins[name]))
            log.debug('Auth function %r was inserted', plugin.name)
            resolved_auth_function_plugins[name] = plugin.name
            fetched_auth_functions[name] = auth_function
    # Use the updated ones in preference to the originals.
    AuthFunctions._functions.update(fetched_auth_functions)
    return AuthFunctions._functions.get(action)


CONFIG_PERMISSIONS_DEFAULTS = {
    # permission and default
Example #22
0
def output_feed(results, feed_title, feed_description, feed_link, feed_url,
                navigation_urls, feed_guid):
    author_name = config.get(u'ckan.feeds.author_name', u'').strip() or \
        config.get(u'ckan.site_id', u'').strip()

    # TODO: language
    feed_class = None
    for plugin in plugins.PluginImplementations(plugins.IFeed):
        if hasattr(plugin, u'get_feed_class'):
            feed_class = plugin.get_feed_class()

    if not feed_class:
        feed_class = _FixedAtom1Feed

    feed = feed_class(
        feed_title,
        feed_link,
        feed_description,
        language=u'en',
        author_name=author_name,
        feed_guid=feed_guid,
        feed_url=feed_url,
        previous_page=navigation_urls[u'previous'],
        next_page=navigation_urls[u'next'],
        first_page=navigation_urls[u'first'],
        last_page=navigation_urls[u'last'],
    )

    for pkg in results:
        additional_fields = {}

        for plugin in plugins.PluginImplementations(plugins.IFeed):
            if hasattr(plugin, u'get_item_additional_fields'):
                additional_fields = plugin.get_item_additional_fields(pkg)

        feed.add_item(
            title=pkg.get(u'title', u''),
            link=h.url_for(u'api.action',
                           logic_function=u'package_read',
                           id=pkg['id'],
                           ver=3,
                           _external=True),
            description=pkg.get(u'notes', u''),
            updated=h.date_str_to_datetime(pkg.get(u'metadata_modified')),
            published=h.date_str_to_datetime(pkg.get(u'metadata_created')),
            unique_id=_create_atom_id(u'/dataset/%s' % pkg['id']),
            author_name=pkg.get(u'author', u''),
            author_email=pkg.get(u'author_email', u''),
            categories=[t['name'] for t in pkg.get(u'tags', [])],
            enclosure=webhelpers.feedgenerator.Enclosure(
                h.url_for(u'api.action',
                          logic_function=u'package_show',
                          id=pkg['name'],
                          ver=3,
                          _external=True), text_type(len(json.dumps(pkg))),
                u'application/json'),
            **additional_fields)

    resp = make_response(feed.writeString(u'utf-8'), 200)
    resp.headers['Content-Type'] = u'application/atom+xml'
    return resp
Example #23
0
    def _read(self, id, limit, group_type):
        ''' This is common code used by both read and bulk_process'''
        context = {'model': model, 'session': model.Session,
                   'user': c.user,
                   'schema': self._db_to_form_schema(group_type=group_type),
                   'for_view': True, 'extras_as_string': True}

        q = c.q = request.params.get('q', '')
        # Search within group
        if c.group_dict.get('is_organization'):
            fq = 'owner_org:"%s"' % c.group_dict.get('id')
        else:
            fq = 'groups:"%s"' % c.group_dict.get('name')

        c.description_formatted = \
            h.render_markdown(c.group_dict.get('description'))

        context['return_query'] = True

        page = h.get_page_number(request.params)

        # most search operations should reset the page counter:
        params_nopage = [(k, v) for k, v in request.params.items()
                         if k != 'page']
        sort_by = request.params.get('sort', None)

        def search_url(params):
            controller = lookup_group_controller(group_type)
            action = 'bulk_process' if c.action == 'bulk_process' else 'read'
            url = h.url_for(controller=controller, action=action, id=id)
            params = [(k, v.encode('utf-8') if isinstance(v, basestring)
            else str(v)) for k, v in params]
            return url + u'?' + urlencode(params)

        def drill_down_url(**by):
            return h.add_url_param(alternative_url=None,
                                   controller='group', action='read',
                                   extras=dict(id=c.group_dict.get('name')),
                                   new_params=by)

        c.drill_down_url = drill_down_url

        def remove_field(key, value=None, replace=None):
            return h.remove_url_param(key, value=value, replace=replace,
                                      controller='group', action='read',
                                      extras=dict(id=c.group_dict.get('name')))

        c.remove_field = remove_field

        def pager_url(q=None, page=None):
            params = list(params_nopage)
            params.append(('page', page))
            return search_url(params)

        try:

            # Set the custom default sort parameter here
            # Might need a rewrite when ckan is updated
            if q and not sort_by:
                sort_by = 'score desc, metadata_modified desc'
            elif not q and not sort_by:
                sort_by = 'date_released desc'

            c.fields = []
            search_extras = {}
            for (param, value) in request.params.items():
                if not param in ['q', 'page', 'sort'] \
                        and len(value) and not param.startswith('_'):
                    if not param.startswith('ext_'):
                        c.fields.append((param, value))
                        q += ' %s: "%s"' % (param, value)
                    else:
                        search_extras[param] = value

            include_private = False
            user_member_of_orgs = [org['id'] for org
                                   in h.organizations_available('read')]

            if (c.group and c.group.id in user_member_of_orgs):
                include_private = True

            facets = OrderedDict()

            default_facet_titles = {'organization': _('Organizations'),
                                    'groups': _('Groups'),
                                    'tags': _('Tags'),
                                    'res_format': _('Formats'),
                                    'license_id': _('Licenses')}

            for facet in h.facets():
                if facet in default_facet_titles:
                    facets[facet] = default_facet_titles[facet]
                else:
                    facets[facet] = facet

            # Facet titles
            for plugin in p.PluginImplementations(p.IFacets):
                if group_type == 'organization':
                    facets = plugin.organization_facets(
                        facets, group_type, None)
                else:
                    facets = plugin.group_facets(
                        facets, group_type, None)

            if 'capacity' in facets and (group_type != 'organization' or
                                             not user_member_of_orgs):
                del facets['capacity']

            c.facet_titles = facets

            data_dict = {
                'q': q,
                'fq': fq,
                'include_private': include_private,
                'facet.field': facets.keys(),
                'rows': limit,
                'sort': sort_by,
                'start': (page - 1) * limit,
                'extras': search_extras
            }

            context_ = dict((k, v) for (k, v) in context.items()
                            if k != 'schema')
            query = get_action('package_search')(context_, data_dict)

            c.page = h.Page(
                collection=query['results'],
                page=page,
                url=pager_url,
                item_count=query['count'],
                items_per_page=limit
            )

            c.group_dict['package_count'] = query['count']
            c.facets = query['facets']

            c.search_facets = query['search_facets']
            c.search_facets_limits = {}
            for facet in c.facets.keys():
                limit = int(request.params.get('_%s_limit' % facet,
                                               int(config.get('search.facets.default', 10))))
                c.search_facets_limits[facet] = limit
            c.page.items = query['results']

            c.sort_by_selected = sort_by

        except search.SearchError, se:
            log.error('Group search error: %r', se.args)
            c.query_error = True
            c.facets = {}
            c.page = h.Page(collection=[])
Example #24
0
def group_dictize(group, context,
                  include_groups=True,
                  include_tags=True,
                  include_users=True,
                  include_extras=True,
                  packages_field='datasets',
                  **kw):
    '''
    Turns a Group object and related into a dictionary. The related objects
    like tags are included unless you specify it in the params.

    :param packages_field: determines the format of the `packages` field - can
    be `datasets`, `dataset_count` or None.
    '''
    assert packages_field in ('datasets', 'dataset_count', None)
    if packages_field == 'dataset_count':
        dataset_counts = context.get('dataset_counts', None)

    result_dict = d.table_dictize(group, context)
    result_dict.update(kw)

    result_dict['display_name'] = group.title or group.name

    if include_extras:
        result_dict['extras'] = extras_dict_dictize(
            group._extras, context)

    context['with_capacity'] = True

    if packages_field:
        def get_packages_for_this_group(group_, just_the_count=False):
            # Ask SOLR for the list of packages for this org/group
            q = {
                'facet': 'false',
                'rows': 0,
            }

            if group_.is_organization:
                q['fq'] = '+owner_org:"{0}"'.format(group_.id)
            else:
                q['fq'] = '+groups:"{0}"'.format(group_.name)

            # Allow members of organizations to see private datasets.
            if group_.is_organization:
                is_group_member = (context.get('user') and
                    authz.has_user_permission_for_group_or_org(
                        group_.id, context.get('user'), 'read'))
                if is_group_member:
                    q['include_private'] = True

            if not just_the_count:
                # package_search limits 'rows' anyway, so this is only if you
                # want even fewer
                try:
                    packages_limit = context['limits']['packages']
                except KeyError:
                    del q['rows']  # leave it to package_search to limit it
                else:
                    q['rows'] = packages_limit

            search_context = dict((k, v) for (k, v) in context.items()
                                  if k != 'schema')
            search_results = logic.get_action('package_search')(search_context,
                                                                q)
            return search_results['count'], search_results['results']

        if packages_field == 'datasets':
            package_count, packages = get_packages_for_this_group(group)
            result_dict['packages'] = packages
        else:
            if dataset_counts is None:
                package_count, packages = get_packages_for_this_group(
                    group, just_the_count=True)
            else:
                # Use the pre-calculated package_counts passed in.
                facets = dataset_counts
                if group.is_organization:
                    package_count = facets['owner_org'].get(group.id, 0)
                else:
                    package_count = facets['groups'].get(group.name, 0)

        result_dict['package_count'] = package_count

    if include_tags:
        # group tags are not creatable via the API yet, but that was(/is) a
        # future intention (see kindly's commit 5c8df894 on 2011/12/23)
        result_dict['tags'] = tag_list_dictize(
            _get_members(context, group, 'tags'),
            context)

    if include_groups:
        # these sub-groups won't have tags or extras for speed
        result_dict['groups'] = group_list_dictize(
            _get_members(context, group, 'groups'),
            context, include_groups=True)

    if include_users:
        result_dict['users'] = user_list_dictize(
            _get_members(context, group, 'users'),
            context)

    context['with_capacity'] = False

    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)

    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        #munge here should not have an effect only doing it incase
        #of potential vulnerability of dodgy api input
        image_url = munge.munge_filename_legacy(image_url)
        result_dict['image_display_url'] = h.url_for_static(
            'uploads/group/%s' % result_dict.get('image_url'),
            qualified=True
        )
    return result_dict
Example #25
0
def _group_or_org_delete(context, data_dict, is_org=False):
    '''Delete a group.

    You must be authorized to delete the group.

    :param id: the name or id of the group
    :type id: string

    '''
    from sqlalchemy import or_

    model = context['model']
    user = context['user']
    id = _get_or_bust(data_dict, 'id')

    group = model.Group.get(id)
    context['group'] = group
    if group is None:
        raise NotFound('Group was not found.')

    revisioned_details = 'Group: %s' % group.name

    if is_org:
        _check_access('organization_delete', context, data_dict)
    else:
        _check_access('group_delete', context, data_dict)

    # organization delete will not occure whilke all datasets for that org are
    # not deleted
    if is_org:
        datasets = model.Session.query(model.Package) \
                        .filter_by(owner_org=group.id) \
                        .filter(model.Package.state != 'deleted') \
                        .count()
        if datasets:
            if not authz.check_config_permission(
                    'ckan.auth.create_unowned_dataset'):
                raise ValidationError(
                    _('Organization cannot be deleted while it '
                      'still has datasets'))

            pkg_table = model.package_table
            # using Core SQLA instead of the ORM should be faster
            model.Session.execute(pkg_table.update().where(
                sqla.and_(
                    pkg_table.c.owner_org == group.id,
                    pkg_table.c.state != 'deleted')).values(owner_org=None))

    rev = model.repo.new_revision()
    rev.author = user
    rev.message = _(u'REST API: Delete %s') % revisioned_details

    # The group's Member objects are deleted
    # (including hierarchy connections to parent and children groups)
    for member in model.Session.query(model.Member).\
            filter(or_(model.Member.table_id == id,
                       model.Member.group_id == id)).\
            filter(model.Member.state == 'active').all():
        member.delete()

    group.delete()

    if is_org:
        plugin_type = plugins.IOrganizationController
    else:
        plugin_type = plugins.IGroupController

    for item in plugins.PluginImplementations(plugin_type):
        item.delete(group)

    model.repo.commit()
Example #26
0
def get_plugins_waiting_on_ipipe():
    return [
        observer.name
        for observer in p.PluginImplementations(archiver_interfaces.IPipe)
    ]
Example #27
0
def package_update(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.

    It is recommended to call
    :py:func:`ckan.logic.action.get.package_show`, make the desired changes to
    the result, and then call ``package_update()`` with it.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the
    :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface.

    For further parameters see
    :py:func:`~ckan.logic.action.create.package_create`.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in
              the context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary

    '''
    model = context['model']
    session = context['session']
    name_or_id = data_dict.get('id') or data_dict.get('name')
    if name_or_id is None:
        raise ValidationError({'id': _('Missing value')})

    pkg = model.Package.get(name_or_id)
    if pkg is None:
        raise NotFound(_('Package was not found.'))
    context["package"] = pkg

    # immutable fields
    data_dict["id"] = pkg.id
    data_dict['type'] = pkg.type

    _check_access('package_update', context, data_dict)

    user = context['user']
    # get the schema
    package_plugin = lib_plugins.lookup_package_plugin(pkg.type)
    if 'schema' in context:
        schema = context['schema']
    else:
        schema = package_plugin.update_package_schema()

    if 'api_version' not in context:
        # check_data_dict() is deprecated. If the package_plugin has a
        # check_data_dict() we'll call it, if it doesn't have the method we'll
        # do nothing.
        check_data_dict = getattr(package_plugin, 'check_data_dict', None)
        if check_data_dict:
            try:
                package_plugin.check_data_dict(data_dict, schema)
            except TypeError:
                # Old plugins do not support passing the schema so we need
                # to ensure they still work.
                package_plugin.check_data_dict(data_dict)

    resource_uploads = []
    for resource in data_dict.get('resources', []):
        # file uploads/clearing
        upload = uploader.get_resource_uploader(resource)

        if 'mimetype' not in resource:
            if hasattr(upload, 'mimetype'):
                resource['mimetype'] = upload.mimetype

        if 'size' not in resource and 'url_type' in resource:
            if hasattr(upload, 'filesize'):
                resource['size'] = upload.filesize

        resource_uploads.append(upload)

    data, errors = lib_plugins.plugin_validate(package_plugin, context,
                                               data_dict, schema,
                                               'package_update')
    log.debug('package_update validate_errs=%r user=%s package=%s data=%r',
              errors, context.get('user'),
              context.get('package').name if context.get('package') else '',
              data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors)

    #avoid revisioning by updating directly
    model.Session.query(model.Package).filter_by(id=pkg.id).update(
        {"metadata_modified": datetime.datetime.utcnow()})
    model.Session.refresh(pkg)

    pkg = model_save.package_dict_save(data, context)

    context_org_update = context.copy()
    context_org_update['ignore_auth'] = True
    context_org_update['defer_commit'] = True
    _get_action('package_owner_org_update')(context_org_update, {
        'id': pkg.id,
        'organization_id': pkg.owner_org
    })

    # Needed to let extensions know the new resources ids
    model.Session.flush()
    for index, (resource, upload) in enumerate(
            zip(data.get('resources', []), resource_uploads)):
        resource['id'] = pkg.resources[index].id

        upload.upload(resource['id'], uploader.get_max_resource_size())

    for item in plugins.PluginImplementations(plugins.IPackageController):
        item.edit(pkg)

        item.after_update(context, data)

    # Create activity
    if not pkg.private:
        user_obj = model.User.by_name(user)
        if user_obj:
            user_id = user_obj.id
        else:
            user_id = 'not logged in'

        activity = pkg.activity_stream_item('changed', user_id)
        session.add(activity)

    if not context.get('defer_commit'):
        model.repo.commit()

    log.debug('Updated object %s' % pkg.name)

    return_id_only = context.get('return_id_only', False)

    # Make sure that a user provided schema is not used on package_show
    context.pop('schema', None)

    # we could update the dataset so we should still be able to read it.
    context['ignore_auth'] = True
    output = data_dict['id'] if return_id_only \
            else _get_action('package_show')(context, {'id': data_dict['id']})

    return output
Example #28
0
def register_group_plugins(map):
    """
    Register the various IGroupForm instances.

    This method will setup the mappings between group types and the
    registered IGroupForm instances. If it's called more than once an
    exception will be raised.
    """
    global _default_group_plugin

    # This function should have not effect if called more than once.
    # This should not occur in normal deployment, but it may happen when
    # running unit tests.
    if _default_group_plugin is not None:
        return

    # Create the mappings and register the fallback behaviour if one is found.
    for plugin in plugins.PluginImplementations(plugins.IGroupForm):
        if plugin.is_fallback():
            if _default_group_plugin is not None:
                raise ValueError("More than one fallback IGroupForm has been "
                                 "registered")
            _default_group_plugin = plugin

        # Get group_controller from plugin if there is one,
        # otherwise use 'group'
        try:
            group_controller = plugin.group_controller()
        except AttributeError:
            group_controller = 'group'

        for group_type in plugin.group_types():
            # Create the routes based on group_type here, this will
            # allow us to have top level objects that are actually
            # Groups, but first we need to make sure we are not
            # clobbering an existing domain

            # Our version of routes doesn't allow the environ to be
            # passed into the match call and so we have to set it on the
            # map instead. This looks like a threading problem waiting
            # to happen but it is executed sequentially from inside the
            # routing setup

            map.connect('%s_index' % group_type, '/%s' % group_type,
                        controller=group_controller, action='index')
            map.connect('%s_new' % group_type, '/%s/new' % group_type,
                        controller=group_controller, action='new')
            map.connect('%s_read' % group_type, '/%s/{id}' % group_type,
                        controller=group_controller, action='read')
            map.connect('%s_action' % group_type,
                        '/%s/{action}/{id}' % group_type,
                        controller=group_controller,
                        requirements=dict(action='|'.join(
                            ['edit', 'authz', 'history', 'member_new',
                             'member_delete', 'followers', 'follow',
                             'unfollow', 'admins', 'activity'])))
            map.connect('%s_edit' % group_type, '/%s/edit/{id}' % group_type,
                        controller=group_controller, action='edit',
                        ckan_icon='edit')
            map.connect('%s_members' % group_type,
                        '/%s/members/{id}' % group_type,
                        controller=group_controller,
                        action='members',
                        ckan_icon='group')
            map.connect('%s_activity' % group_type,
                        '/%s/activity/{id}/{offset}' % group_type,
                        controller=group_controller,
                        action='activity', ckan_icon='time'),

            if group_type in _group_plugins:
                raise ValueError("An existing IGroupForm is "
                                 "already associated with the group type "
                                 "'%s'" % group_type)
            _group_plugins[group_type] = plugin
            _group_controllers[group_type] = group_controller

            if group_controller == 'group':
                # Tell the default group controller that it is allowed to
                # handle other group_types.
                # Import it here to avoid circular imports.
                from ckan.controllers.group import GroupController
                GroupController.add_group_type(group_type)

    # Setup the fallback behaviour if one hasn't been defined.
    if _default_group_plugin is None:
        _default_group_plugin = DefaultGroupForm()
    if 'group' not in _group_controllers:
        _group_controllers['group'] = 'group'
    if 'organization' not in _group_controllers:
        _group_controllers['organization'] = 'organization'
Example #29
0
def _group_or_org_update(context, data_dict, is_org=False):
    model = context['model']
    user = context['user']
    session = context['session']
    id = _get_or_bust(data_dict, 'id')

    group = model.Group.get(id)
    context["group"] = group
    if group is None:
        raise NotFound('Group was not found.')

    data_dict['type'] = group.type

    # get the schema
    group_plugin = lib_plugins.lookup_group_plugin(group.type)
    try:
        schema = group_plugin.form_to_db_schema_options({
            'type':
            'update',
            'api':
            'api_version' in context,
            'context':
            context
        })
    except AttributeError:
        schema = group_plugin.form_to_db_schema()

    upload = uploader.get_uploader('group', group.image_url)
    upload.update_data_dict(data_dict, 'image_url', 'image_upload',
                            'clear_upload')

    if is_org:
        _check_access('organization_update', context, data_dict)
    else:
        _check_access('group_update', context, data_dict)

    if 'api_version' not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            group_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            group_plugin.check_data_dict(data_dict)

    data, errors = lib_plugins.plugin_validate(
        group_plugin, context, data_dict, schema,
        'organization_update' if is_org else 'group_update')
    log.debug('group_update validate_errs=%r user=%s group=%s data_dict=%r',
              errors, context.get('user'),
              context.get('group').name if context.get('group') else '',
              data_dict)

    if errors:
        session.rollback()
        raise ValidationError(errors)

    contains_packages = 'packages' in data_dict

    group = model_save.group_dict_save(data,
                                       context,
                                       prevent_packages_update=is_org
                                       or not contains_packages)

    if is_org:
        plugin_type = plugins.IOrganizationController
    else:
        plugin_type = plugins.IGroupController

    for item in plugins.PluginImplementations(plugin_type):
        item.edit(group)

    if is_org:
        activity_type = 'changed organization'
    else:
        activity_type = 'changed group'

    activity_dict = {
        'user_id': model.User.by_name(six.ensure_text(user)).id,
        'object_id': group.id,
        'activity_type': activity_type,
    }
    # Handle 'deleted' groups.
    # When the user marks a group as deleted this comes through here as
    # a 'changed' group activity. We detect this and change it to a 'deleted'
    # activity.
    if group.state == u'deleted':
        if session.query(ckan.model.Activity).filter_by(
                object_id=group.id, activity_type='deleted').all():
            # A 'deleted group' activity for this group has already been
            # emitted.
            # FIXME: What if the group was deleted and then activated again?
            activity_dict = None
        else:
            # We will emit a 'deleted group' activity.
            activity_dict['activity_type'] = \
                'deleted organization' if is_org else 'deleted group'
    if activity_dict is not None:
        activity_dict['data'] = {
            'group': dictization.table_dictize(group, context)
        }
        activity_create_context = {
            'model': model,
            'user': user,
            'defer_commit': True,
            'ignore_auth': True,
            'session': session
        }
        _get_action('activity_create')(activity_create_context, activity_dict)
        # TODO: Also create an activity detail recording what exactly changed
        # in the group.

    upload.upload(uploader.get_max_image_size())

    if not context.get('defer_commit'):
        model.repo.commit()

    return model_dictize.group_dictize(group, context)
Example #30
0
def group_create(context, data_dict):
    '''Create a new group.

    You must be authorized to create groups.

    Plugins may change the parameters of this function depending on the value
    of the ``type`` parameter, see the ``IGroupForm`` plugin interface.

    :param name: the name of the group, a string between 2 and 100 characters
        long, containing only lowercase alphanumeric characters, ``-`` and
        ``_``
    :type name: string
    :param id: the id of the group (optional)
    :type id: string
    :param title: the title of the group (optional)
    :type title: string
    :param description: the description of the group (optional)
    :type description: string
    :param image_url: the URL to an image to be displayed on the group's page
        (optional)
    :type image_url: string
    :param type: the type of the group (optional), ``IGroupForm`` plugins
        associate themselves with different group types and provide custom
        group handling behaviour for these types
    :type type: string
    :param state: the current state of the group, e.g. ``'active'`` or
        ``'deleted'``, only active groups show up in search results and
        other lists of groups, this parameter will be ignored if you are not
        authorized to change the state of the group (optional, default:
        ``'active'``)
    :type state: string
    :param approval_status: (optional)
    :type approval_status: string
    :param extras: the group's extras (optional), extras are arbitrary
        (key: value) metadata items that can be added to groups, each extra
        dictionary should have keys ``'key'`` (a string), ``'value'`` (a
        string), and optionally ``'deleted'``
    :type extras: list of dataset extra dictionaries
    :param packages: the datasets (packages) that belong to the group, a list
        of dictionaries each with keys ``'name'`` (string, the id or name of
        the dataset) and optionally ``'title'`` (string, the title of the
        dataset)
    :type packages: list of dictionaries
    :param groups: the groups that belong to the group, a list of dictionaries
        each with key ``'name'`` (string, the id or name of the group) and
        optionally ``'capacity'`` (string, the capacity in which the group is
        a member of the group)
    :type groups: list of dictionaries
    :param users: the users that belong to the group, a list of dictionaries
        each with key ``'name'`` (string, the id or name of the user) and
        optionally ``'capacity'`` (string, the capacity in which the user is
        a member of the group)
    :type users: list of dictionaries

    :returns: the newly created group
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']
    session = context['session']
    parent = context.get('parent', None)

    _check_access('group_create', context, data_dict)

    # get the schema
    group_plugin = lib_plugins.lookup_group_plugin(
        group_type=data_dict.get('type'))
    try:
        schema = group_plugin.form_to_db_schema_options({
            'type':
            'create',
            'api':
            'api_version' in context,
            'context':
            context
        })
    except AttributeError:
        schema = group_plugin.form_to_db_schema()

    if 'api_version' not in context:
        # old plugins do not support passing the schema so we need
        # to ensure they still work
        try:
            group_plugin.check_data_dict(data_dict, schema)
        except TypeError:
            group_plugin.check_data_dict(data_dict)

    data, errors = _validate(data_dict, schema, context)
    log.debug('group_create validate_errs=%r user=%s group=%s data_dict=%r',
              errors, context.get('user'), data_dict.get('name'), data_dict)

    if errors:
        session.rollback()
        raise ValidationError(errors)

    rev = model.repo.new_revision()
    rev.author = user

    if 'message' in context:
        rev.message = context['message']
    else:
        rev.message = _(u'REST API: Create object %s') % data.get("name")

    group = model_save.group_dict_save(data, context)

    if parent:
        parent_group = model.Group.get(parent)
        if parent_group:
            member = model.Member(group=parent_group,
                                  table_id=group.id,
                                  table_name='group')
            session.add(member)
            log.debug('Group %s is made child of group %s', group.name,
                      parent_group.name)

    if user:
        admins = [model.User.by_name(user.decode('utf8'))]
    else:
        admins = []
    model.setup_default_user_roles(group, admins)
    # Needed to let extensions know the group id
    session.flush()

    for item in plugins.PluginImplementations(plugins.IGroupController):
        item.create(group)

    activity_dict = {
        'user_id': model.User.by_name(user.decode('utf8')).id,
        'object_id': group.id,
        'activity_type': 'new group',
    }
    activity_dict['data'] = {
        'group': ckan.lib.dictization.table_dictize(group, context)
    }
    activity_create_context = {
        'model': model,
        'user': user,
        'defer_commit': True,
        'session': session
    }
    logic.get_action('activity_create')(activity_create_context,
                                        activity_dict,
                                        ignore_auth=True)

    if not context.get('defer_commit'):
        model.repo.commit()
    context["group"] = group
    context["id"] = group.id
    log.debug('Created object %s' % str(group.name))
    return model_dictize.group_dictize(group, context)