Beispiel #1
0
def set_cors_headers_for_response(response):
    u'''
    Set up Access Control Allow headers if either origin_allow_all is True, or
    the request Origin is in the origin_whitelist.
    '''
    if config.get(u'ckan.cors.origin_allow_all') \
       and request.headers.get(u'Origin'):

        cors_origin_allowed = None
        if asbool(config.get(u'ckan.cors.origin_allow_all')):
            cors_origin_allowed = b'*'
        elif config.get(u'ckan.cors.origin_whitelist') and \
                request.headers.get(u'Origin') \
                in config[u'ckan.cors.origin_whitelist'].split(u' '):
            # set var to the origin to allow it.
            cors_origin_allowed = request.headers.get(u'Origin')

        if cors_origin_allowed is not None:
            response.headers[b'Access-Control-Allow-Origin'] = \
                cors_origin_allowed
            response.headers[b'Access-Control-Allow-Methods'] = \
                b'POST, PUT, GET, DELETE, OPTIONS'
            response.headers[b'Access-Control-Allow-Headers'] = \
                b'X-CKAN-API-KEY, Authorization, Content-Type'

    return response
Beispiel #2
0
def check_config_permission(permission):
    '''Returns the configuration value for the provided permission

    Permission is a string indentifying the auth permission (eg
    `anon_create_dataset`), optionally prefixed with `ckan.auth.`.

    The possible values for `permission` are the keys of
    CONFIG_PERMISSIONS_DEFAULTS. These can be overriden in the config file
    by prefixing them with `ckan.auth.`.

    Returns the permission value, generally True or False, except on
    `roles_that_cascade_to_sub_groups` which is a list of strings.

    '''

    key = permission.replace('ckan.auth.', '')

    if key not in CONFIG_PERMISSIONS_DEFAULTS:
        return False

    default_value = CONFIG_PERMISSIONS_DEFAULTS.get(key)

    config_key = 'ckan.auth.' + key

    value = config.get(config_key, default_value)

    if key == 'roles_that_cascade_to_sub_groups':
        # This permission is set as a list of strings (space separated)
        value = value.split() if value else []
    else:
        value = asbool(value)

    return value
Beispiel #3
0
def process_app_global(
        key: str, value: str) -> tuple[str, Union[bool, int, str, list[str]]]:
    '''
    Tweak a key, value pair meant to be set on the app_globals (g) object

    According to the options on app_globals_from_config_details (if any)
    '''
    options = app_globals_from_config_details.get(key)
    key = get_globals_key(key)
    new_value: Any = value
    if options:
        if 'name' in options:
            key = options['name']
        value = value or options.get('default', '')

        data_type = options.get('type')
        if data_type == 'bool':
            new_value = asbool(value)
        elif data_type == 'int':
            new_value = int(value)
        elif data_type == 'split':
            new_value = aslist(value)
        else:
            new_value = value
    return key, new_value
Beispiel #4
0
    def get(self, id=None, data=None, errors=None, error_summary=None):
        context, id = self._prepare(id)
        data_dict = {u'id': id}
        try:
            old_data = logic.get_action(u'user_show')(context, data_dict)

            g.display_name = old_data.get(u'display_name')
            g.user_name = old_data.get(u'name')

            data = data or old_data

        except logic.NotAuthorized:
            base.abort(403, _(u'Unauthorized to edit user %s') % u'')
        except logic.NotFound:
            base.abort(404, _(u'User not found'))
        user_obj = context.get(u'user_obj')
        errors = errors or {}
        vars = {
            u'data': data,
            u'errors': errors,
            u'error_summary': error_summary
        }

        extra_vars = _extra_template_variables({
            u'model': model,
            u'session': model.Session,
            u'user': g.user
        }, data_dict)

        extra_vars[u'show_email_notifications'] = asbool(
            config.get(u'ckan.activity_streams_email_notifications'))
        vars.update(extra_vars)
        extra_vars[u'form'] = base.render(edit_user_form, extra_vars=vars)

        return base.render(u'user/edit.html', extra_vars)
Beispiel #5
0
def sysadmin():
    username = request.form.get(u'username')
    status = asbool(request.form.get(u'status'))

    try:
        context = {
            u'model': model,
            u'session': model.Session,
            u'user': g.user,
            u'auth_user_obj': g.userobj,
        }
        data_dict = {u'id': username, u'sysadmin': status}
        user = logic.get_action(u'user_patch')(context, data_dict)
    except logic.NotAuthorized:
        return base.abort(403,
                          _(u'Not authorized to promote user to sysadmin'))
    except logic.NotFound:
        return base.abort(404, _(u'User not found'))

    if status:
        h.flash_success(
            _(u'Promoted {} to sysadmin'.format(user[u'display_name'])))
    else:
        h.flash_success(
            _(u'Revoked sysadmin permission from {}'.format(
                user[u'display_name'])))
    return h.redirect_to(u'admin.index')
Beispiel #6
0
def render_snippet(*template_names, **kw):
    ''' Helper function for rendering snippets. Rendered html has
    comment tags added to show the template used. NOTE: unlike other
    render functions this takes a list of keywords instead of a dict for
    the extra template variables.

    :param template_names: the template to render, optionally with fallback
        values, for when the template can't be found. For each, specify the
        relative path to the template inside the registered tpl_dir.
    :type template_names: str
    :param kw: extra template variables to supply to the template
    :type kw: named arguments of any type that are supported by the template
    '''

    last_exc = None
    for template_name in template_names:
        try:
            output = render(template_name, extra_vars=kw)
            if asbool(config.get('debug')):
                output = (
                    '\n<!-- Snippet %s start -->\n%s\n<!-- Snippet %s end -->'
                    '\n' % (template_name, output, template_name))
            return h.literal(output)
        except TemplateNotFound as exc:
            if exc.name == template_name:
                # the specified template doesn't exist - try the next
                # fallback, but store the exception in case it was
                # last one
                last_exc = exc
                continue
            # a nested template doesn't exist - don't fallback
            raise exc
    else:
        raise last_exc or TemplateNotFound
Beispiel #7
0
def webassets_init():
    global env

    static_path = get_webassets_path()

    public = config.get(u'ckan.base_public_folder')

    public_folder = os.path.abspath(
        os.path.join(os.path.dirname(__file__), u'..', public))

    base_path = os.path.join(public_folder, u'base')

    env = Environment()
    env.directory = static_path
    env.debug = asbool(config.get(u'debug', False))
    env.url = u'/webassets/'

    add_public_path(base_path, u'/base/')

    logger.debug(u'Base path {0}'.format(base_path))
    create_library(u'vendor', os.path.join(base_path, u'vendor'))

    create_library(u'base', os.path.join(base_path, u'javascript'))

    create_library(u'datapreview', os.path.join(base_path, u'datapreview'))

    create_library(u'css', os.path.join(base_path, u'css'))
Beispiel #8
0
def user_show(context, data_dict):
    # By default, user details can be read by anyone, but some properties like
    # the API key are stripped at the action level if not not logged in.
    if not asbool(config.get('ckan.auth.public_user_details', True)):
        return restrict_anon(context)
    else:
        return {'success': True}
Beispiel #9
0
    def read(self, id=None):
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user,
            'auth_user_obj': c.userobj,
            'for_view': True
        }
        data_dict = {
            'id': id,
            'user_obj': c.userobj,
            'include_datasets': True,
            'include_num_followers': True
        }

        self._setup_template_variables(context, data_dict)

        # The legacy templates have the user's activity stream on the user
        # profile page, new templates do not.
        if asbool(config.get('ckan.legacy_templates', False)):
            c.user_activity_stream = get_action('user_activity_list_html')(
                context, {
                    'id': c.user_dict['id']
                })

        return render('user/read.html')
Beispiel #10
0
    def edit(self, id=None, data=None, errors=None, error_summary=None):
        context = {'save': 'save' in request.params,
                   'schema': self._edit_form_to_db_schema(),
                   'model': model, 'session': model.Session,
                   'user': c.user, 'auth_user_obj': c.userobj
                   }
        if id is None:
            if c.userobj:
                id = c.userobj.id
            else:
                abort(400, _('No user specified'))
        data_dict = {'id': id}

        try:
            check_access('user_update', context, data_dict)
        except NotAuthorized:
            abort(403, _('Unauthorized to edit a user.'))

        if context['save'] and not data and request.method == 'POST':
            return self._save_edit(id, context)

        try:
            old_data = get_action('user_show')(context, data_dict)

            schema = self._db_to_edit_form_schema()
            if schema:
                old_data, errors = \
                    dictization_functions.validate(old_data, schema, context)

            c.display_name = old_data.get('display_name')
            c.user_name = old_data.get('name')

            data = data or old_data

        except NotAuthorized:
            abort(403, _('Unauthorized to edit user %s') % '')
        except NotFound:
            abort(404, _('User not found'))

        user_obj = context.get('user_obj')

        if not (authz.is_sysadmin(c.user)
                or c.user == user_obj.name):
            abort(403, _('User %s not authorized to edit %s') %
                  (str(c.user), id))

        errors = errors or {}
        vars = {'data': data, 'errors': errors, 'error_summary': error_summary}

        self._setup_template_variables({'model': model,
                                        'session': model.Session,
                                        'user': c.user},
                                       data_dict)

        c.is_myself = True
        c.show_email_notifications = asbool(
            config.get('ckan.activity_streams_email_notifications'))
        c.form = render(self.edit_user_form, extra_vars=vars)

        return render('user/edit.html')
Beispiel #11
0
def _allow_caching(cache_force=None):
    # Caching Logic

    allow_cache = True
    # Force cache or not if explicit.
    if cache_force is not None:
        allow_cache = cache_force
    # Do not allow caching of pages for logged in users/flash messages etc.
    elif _is_valid_session_cookie_data():
        allow_cache = False
    # Tests etc.
    elif 'REMOTE_USER' in request.environ:
        allow_cache = False
    # Don't cache if based on a non-cachable template used in this.
    elif request.environ.get('__no_cache__'):
        allow_cache = False
    # Don't cache if we have set the __no_cache__ param in the query string.
    elif request.params.get('__no_cache__'):
        allow_cache = False
    # Don't cache if caching is not enabled in config
    elif not asbool(config.get('ckan.cache_enabled', False)):
        allow_cache = False

    if not allow_cache:
        # Prevent any further rendering from being cached.
        request.environ['__no_cache__'] = True
Beispiel #12
0
def user_list(context, data_dict):
    # Users list is visible by default
    if data_dict.get('email'):
        # only sysadmins can specify the 'email' parameter
        return {'success': False}
    if not asbool(config.get('ckan.auth.public_user_details', True)):
        return restrict_anon(context)
    else:
        return {'success': True}
Beispiel #13
0
def group_show(context, data_dict):
    user = context.get('user')
    group = get_group_object(context, data_dict)
    if group.state == 'active':
        if asbool(config.get('ckan.auth.public_user_details', True)) or \
            (not asbool(data_dict.get('include_users', False)) and
                (data_dict.get('object_type', None) != 'user')):
            return {'success': True}
    authorized = authz.has_user_permission_for_group_or_org(
        group.id, user, 'read')
    if authorized:
        return {'success': True}
    else:
        return {
            'success': False,
            'msg':
            _('User %s not authorized to read group %s') % (user, group.id)
        }
Beispiel #14
0
 def delete_package(self, pkg_dict):
     conn = make_connection()
     query = "+%s:%s AND +(id:\"%s\" OR name:\"%s\") AND +site_id:\"%s\"" % \
             (TYPE_FIELD, PACKAGE_TYPE, pkg_dict.get('id'), pkg_dict.get('id'), config.get('ckan.site_id'))
     try:
         commit = asbool(config.get('ckan.search.solr_commit', 'true'))
         conn.delete(q=query, commit=commit)
     except Exception as e:
         log.exception(e)
         raise SearchIndexError(e)
Beispiel #15
0
def recut():
    """
    Recreate setup.py so that we can edit keywords
    Remove unnecessary code examples
    """
    # template location
    try:
        # cutting cookie from directory with template
        temp_dir = find.find_template('..')
    except NonTemplatedInputDirException as e:
        # template coming from Github
        # Hooks are passed through jinja2. raw will
        # Make sure `cookiecutter.project` isn't replaced
        {% raw %}
        temp_dir = os.path.join(config['cookiecutters_dir'],
                                'cookiecutter-ckan-extension',
                                '{{cookiecutter.project}}')
        {% endraw %}

    # Location for resulting file
    destination = os.getcwd()
    # name of template
    setup_template = 'setup.py'

    # get context
    context = {{ cookiecutter | jsonify }}

    # Process keywords
    keywords = context['keywords'].strip().split()
    keywords = [keyword for keyword in keywords
                if keyword not in ('ckan', 'CKAN', 'A', 'space',
                                   'seperated', 'list', 'of', 'keywords')]
    keywords.insert(0, 'CKAN')
    keywords = u' '.join(keywords)
    context['keywords'] = keywords

    # Double check 'project_shortname' and 'plugin_class_name'
    short_name = context['project'][8:].replace('-','_')
    if context['project_shortname'] != short_name:
        context['project_shortname'] = short_name

    plugin_class_name = '{}Plugin'.format(context['project_shortname']
                        .title().replace('_', ''))
    if context['plugin_class_name'] != plugin_class_name:
        context['plugin_class_name'] = plugin_class_name
    # Recut cookie
    env = StrictEnvironment()
    env.loader = jinja2.FileSystemLoader(temp_dir)
    gen.generate_file(project_dir=destination,
                      infile=setup_template,
                      context={'cookiecutter': context},
                      env=env)
    if not asbool(context['include_examples']):
        remove_code_examples(os.path.join(destination, 'ckanext', short_name))
Beispiel #16
0
    def __init__(self, data):
        # convert old keys if necessary
        if 'is_okd_compliant' in data:
            data['od_conformance'] = 'approved' \
                if asbool(data['is_okd_compliant']) else ''
            del data['is_okd_compliant']
        if 'is_osi_compliant' in data:
            data['osd_conformance'] = 'approved' \
                if asbool(data['is_osi_compliant']) else ''
            del data['is_osi_compliant']

        self._data = data
        for (key, value) in self._data.items():
            if key == 'date_created':
                # Parse ISO formatted datetime.
                value = datetime.datetime(
                    *list(int(item) for item in re.split(r'[^\d]', value)))
                self._data[key] = value
            elif isinstance(value, str):
                self._data[key] = value
Beispiel #17
0
    def __init__(self, data):
        # convert old keys if necessary
        if 'is_okd_compliant' in data:
            data['od_conformance'] = 'approved' \
                if asbool(data['is_okd_compliant']) else ''
            del data['is_okd_compliant']
        if 'is_osi_compliant' in data:
            data['osd_conformance'] = 'approved' \
                if asbool(data['is_osi_compliant']) else ''
            del data['is_osi_compliant']

        self._data = data
        for (key, value) in self._data.items():
            if key == 'date_created':
                # Parse ISO formatted datetime.
                value = datetime.datetime(*map(int, re.split('[^\d]', value)))
                self._data[key] = value
            elif isinstance(value, str):
                # Convert str to unicode (keeps Pylons and SQLAlchemy happy).
                value = value.decode('utf8')
                self._data[key] = value
Beispiel #18
0
def package_create(context, data_dict):
    # Note that we did not decorate this function with
    # @logic.auth_allow_anonymous_access. This effectively
    # disables dataset creation via the web interface.
    # However, we make sure that the API is used with the following:
    using_api = 'api_version' in context
    if not using_api:
        return {
            "success":
            False,
            "msg":
            "Creating datasets is only possible via the API. "
            "Please use DCOR-Aid for uploading data!"
        }

    # original auth function
    ao = logic.auth.create.package_create(context, data_dict)
    if not ao["success"]:
        return ao

    if data_dict:
        # Use our own configuration option to determine whether the
        # admin has disabled public datasets (e.g. for DCOR-med).
        must_be_private = not asbool(
            config.get("ckanext.dcor_schemas.allow_public_datasets", "true"))
        private_default = must_be_private  # public if not has to be private
        is_private = asbool(data_dict.get('private', private_default))
        if must_be_private and not is_private:
            return {
                "success":
                False,
                "msg":
                "Creating public datasets has been disabled via "
                "the configuration option 'ckanext.dcor_schemas."
                "allow_public_datasets = false'!"
            }

    return {"success": True}
Beispiel #19
0
    def error_handler(e):
        debug = asbool(config.get('debug', config.get('DEBUG', False)))
        if isinstance(e, HTTPException):
            log.debug(e, exc_info=sys.exc_info) if debug else log.info(e)
            extra_vars = {
                u'code': e.code,
                u'content': e.description,
                u'name': e.name
            }

            return base.render(u'error_document_template.html',
                               extra_vars), e.code
        log.error(e, exc_info=sys.exc_info)
        extra_vars = {u'code': [500], u'content': u'Internal server error'}
        return base.render(u'error_document_template.html', extra_vars), 500
Beispiel #20
0
def load_all():
    '''
    Load all plugins listed in the 'ckan.plugins' config directive.
    '''
    # Clear any loaded plugins
    unload_all()

    plugins = config.get('ckan.plugins', '').split() + find_system_plugins()
    # Add the synchronous search plugin, unless already loaded or
    # explicitly disabled
    if 'synchronous_search' not in plugins and \
            asbool(config.get('ckan.search.automatic_indexing', True)):
        log.debug('Loading the synchronous search plugin')
        plugins.append('synchronous_search')

    load(*plugins)
def build_subject(subject_default='Contact/Question from visitor',
                  timestamp_default=False):
    '''
    Creates the subject line for the contact email using the config or the defaults.

    :param subject_default: the default str to use if ckanext.contact.subject isn't specified
    :param timestamp_default: the default bool to use if add_timestamp_to_subject isn't specified
    :return: the subject line
    '''
    subject = toolkit.config.get('ckanext.contact.subject',
                                 toolkit._(subject_default))
    if asbool(
            toolkit.config.get('ckanext.contact.add_timestamp_to_subject',
                               timestamp_default)):
        timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z')
        subject = f'{subject} [{timestamp}]'
    return subject
Beispiel #22
0
 def notify(self, entity, operation):
     if (not isinstance(entity, model.Package) or
             not asbool(config.get('ckan.search.automatic_indexing', True))):
         return
     if operation != model.domain_object.DomainObjectOperation.deleted:
         dispatch_by_operation(
             entity.__class__.__name__,
             logic.get_action('package_show')(
                 {'model': model, 'ignore_auth': True, 'validate': False,
                  'use_cache': False},
                 {'id': entity.id}),
             operation
         )
     elif operation == model.domain_object.DomainObjectOperation.deleted:
         dispatch_by_operation(entity.__class__.__name__,
                               {'id': entity.id}, operation)
     else:
         log.warn("Discarded Sync. indexing for: %s" % entity)
Beispiel #23
0
def prepare_from_flask_request():
    url_data = urlparse(request.url)

    req_path = request.path
    if asbool(config.get('ckan.saml_use_root_path', False)):
        # FIX FOR ROOT_PATH REMOVED IN request.path
        root_path = config.get('ckan.root_path', None)
        if root_path:
            root_path = re.sub('/{{LANG}}', '', root_path)
            req_path = root_path + req_path

    return {
        'https': use_https,
        'http_host': request.host,
        'server_port': url_data.port,
        'script_name': req_path,
        'get_data': request.args.copy(),
        'post_data': request.form.copy()
    }
Beispiel #24
0
 def validate(self):
     for key, value in self.items():
         if key in self.BOOLEAN_OPTIONS:
             try:
                 value = asbool(value)
             except ValueError:
                 raise SearchQueryError(
                     'Value for search option %r must be True or False (1 or 0) but received %r'
                     % (key, value))
         elif key in self.INTEGER_OPTIONS:
             try:
                 value = int(value)
             except ValueError:
                 raise SearchQueryError(
                     'Value for search option %r must be an integer but received %r'
                     % (key, value))
         elif key in self.UNSUPPORTED_OPTIONS:
             raise SearchQueryError('Search option %r is not supported' %
                                    key)
         self[key] = value
def send_email_notifications(context, data_dict):
    '''Send any pending activity stream notification emails to users.

    You must provide a sysadmin's API key in the Authorization header of the
    request, or call this action from the command-line via a `paster post ...`
    command.

    '''
    # If paste.command_request is True then this function has been called
    # by a `paster post ...` command not a real HTTP request, so skip the
    # authorization.
    if not request.environ.get('paste.command_request'):
        _check_access('send_email_notifications', context, data_dict)

    if not converters.asbool(
            config.get('ckan.activity_streams_email_notifications')):
        raise ValidationError('ckan.activity_streams_email_notifications'
                              ' is not enabled in config')

    email_notifications.get_and_send_notifications_for_all_users()
Beispiel #26
0
    def logged_in(self):
        # redirect if needed
        came_from = request.params.get('came_from', '')
        if h.url_is_local(came_from):
            return h.redirect_to(str(came_from))

        if c.user:
            context = None
            data_dict = {'id': c.user}

            user_dict = get_action('user_show')(context, data_dict)

            return self.me()
        else:
            err = _('Login failed. Bad username or password.')
            if asbool(config.get('ckan.legacy_templates', 'false')):
                h.flash_error(err)
                h.redirect_to(controller='user',
                              action='login', came_from=came_from)
            else:
                return self.login(error=err)
Beispiel #27
0
def process_app_global(key, value):
    '''
    Tweak a key, value pair meant to be set on the app_globals (g) object

    According to the options on app_globals_from_config_details (if any)
    '''
    options = app_globals_from_config_details.get(key)
    key = get_globals_key(key)
    if options:
        if 'name' in options:
            key = options['name']
        value = value or options.get('default', '')

        data_type = options.get('type')
        if data_type == 'bool':
            value = asbool(value)
        elif data_type == 'int':
            value = int(value)
        elif data_type == 'split':
            value = aslist(value)

    return key, value
Beispiel #28
0
def _setup_error_mail_handler(app):

    class ContextualFilter(logging.Filter):
        def filter(self, log_record):
            log_record.url = request.path
            log_record.method = request.method
            log_record.ip = request.environ.get("REMOTE_ADDR")
            log_record.headers = request.headers
            return True

    smtp_server = config.get('smtp.server', 'localhost')
    mailhost = tuple(smtp_server.split(':')) \
        if ':' in smtp_server else smtp_server
    credentials = None
    if config.get('smtp.user'):
        credentials = (config.get('smtp.user'), config.get('smtp.password'))
    secure = () if asbool(config.get('smtp.starttls')) else None
    mail_handler = SMTPHandler(
        mailhost=mailhost,
        fromaddr=config.get('error_email_from'),
        toaddrs=[config.get('email_to')],
        subject='Application Error',
        credentials=credentials,
        secure=secure
    )

    mail_handler.setFormatter(logging.Formatter('''
Time:               %(asctime)s
URL:                %(url)s
Method:             %(method)s
IP:                 %(ip)s
Headers:            %(headers)s

'''))

    context_provider = ContextualFilter()
    app.logger.addFilter(context_provider)
    app.logger.addHandler(mail_handler)
Beispiel #29
0
def search(package_type):
    extra_vars = {}

    try:
        context = {
            u'model': model,
            u'user': g.user,
            u'auth_user_obj': g.userobj
        }
        check_access(u'site_read', context)
    except NotAuthorized:
        base.abort(403, _(u'Not authorized to see this page'))

    # unicode format (decoded from utf8)
    extra_vars[u'q'] = q = request.args.get(u'q', u'')

    extra_vars['query_error'] = False
    page = h.get_page_number(request.args)

    limit = int(config.get(u'ckan.datasets_per_page', 20))

    # most search operations should reset the page counter:
    params_nopage = [(k, v) for k, v in request.args.items() if k != u'page']

    extra_vars[u'drill_down_url'] = drill_down_url
    extra_vars[u'remove_field'] = partial(remove_field, package_type)

    sort_by = request.args.get(u'sort', None)
    params_nosort = [(k, v) for k, v in params_nopage if k != u'sort']

    extra_vars[u'sort_by'] = partial(_sort_by, params_nosort, package_type)

    if not sort_by:
        sort_by_fields = []
    else:
        sort_by_fields = [field.split()[0] for field in sort_by.split(u',')]
    extra_vars[u'sort_by_fields'] = sort_by_fields

    pager_url = partial(_pager_url, params_nopage, package_type)

    search_url_params = urlencode(_encode_params(params_nopage))
    extra_vars[u'search_url_params'] = search_url_params

    details = _get_search_details()
    extra_vars[u'fields'] = details[u'fields']
    extra_vars[u'fields_grouped'] = details[u'fields_grouped']
    fq = details[u'fq']
    search_extras = details[u'search_extras']

    context = {
        u'model': model,
        u'session': model.Session,
        u'user': g.user,
        u'for_view': True,
        u'auth_user_obj': g.userobj
    }

    # Unless changed via config options, don't show other dataset
    # types any search page. Potential alternatives are do show them
    # on the default search page (dataset) or on one other search page
    search_all_type = config.get(u'ckan.search.show_all_types', u'dataset')
    search_all = False

    try:
        # If the "type" is set to True or False, convert to bool
        # and we know that no type was specified, so use traditional
        # behaviour of applying this only to dataset type
        search_all = asbool(search_all_type)
        search_all_type = u'dataset'
    # Otherwise we treat as a string representing a type
    except ValueError:
        search_all = True

    if not search_all or package_type != search_all_type:
        # Only show datasets of this particular type
        fq += u' +dataset_type:{type}'.format(type=package_type)

    facets = OrderedDict()

    org_label = h.humanize_entity_type(
        u'organization',
        h.default_group_type(u'organization'),
        u'facet label') or _(u'Organizations')

    group_label = h.humanize_entity_type(
        u'group',
        h.default_group_type(u'group'),
        u'facet label') or _(u'Groups')

    default_facet_titles = {
        u'organization': org_label,
        u'groups': group_label,
        u'tags': _(u'Tags'),
        u'res_format': _(u'Formats'),
        u'license_id': _(u'Licenses'),
    }

    for facet in h.facets():
        if facet in default_facet_titles:
            facets[facet] = default_facet_titles[facet]
        else:
            facets[facet] = facet

    # Facet titles
    for plugin in plugins.PluginImplementations(plugins.IFacets):
        facets = plugin.dataset_facets(facets, package_type)

    extra_vars[u'facet_titles'] = facets
    data_dict = {
        u'q': q,
        u'fq': fq.strip(),
        u'facet.field': list(facets.keys()),
        u'rows': limit,
        u'start': (page - 1) * limit,
        u'sort': sort_by,
        u'extras': search_extras,
        u'include_private': asbool(
            config.get(u'ckan.search.default_include_private', True)
        ),
    }
    try:
        query = get_action(u'package_search')(context, data_dict)

        extra_vars[u'sort_by_selected'] = query[u'sort']

        extra_vars[u'page'] = h.Page(
            collection=query[u'results'],
            page=page,
            url=pager_url,
            item_count=query[u'count'],
            items_per_page=limit
        )
        extra_vars[u'search_facets'] = query[u'search_facets']
        extra_vars[u'page'].items = query[u'results']
    except SearchQueryError as se:
        # User's search parameters are invalid, in such a way that is not
        # achievable with the web interface, so return a proper error to
        # discourage spiders which are the main cause of this.
        log.info(u'Dataset search query rejected: %r', se.args)
        base.abort(
            400,
            _(u'Invalid search query: {error_message}')
            .format(error_message=str(se))
        )
    except SearchError as se:
        # May be bad input from the user, but may also be more serious like
        # bad code causing a SOLR syntax error, or a problem connecting to
        # SOLR
        log.error(u'Dataset search error: %r', se.args)
        extra_vars[u'query_error'] = True
        extra_vars[u'search_facets'] = {}
        extra_vars[u'page'] = h.Page(collection=[])

    # FIXME: try to avoid using global variables
    g.search_facets_limits = {}
    for facet in extra_vars[u'search_facets'].keys():
        try:
            limit = int(
                request.args.get(
                    u'_%s_limit' % facet,
                    int(config.get(u'search.facets.default', 10))
                )
            )
        except ValueError:
            base.abort(
                400,
                _(u'Parameter u"{parameter_name}" is not '
                  u'an integer').format(parameter_name=u'_%s_limit' % facet)
            )

        g.search_facets_limits[facet] = limit

    _setup_template_variables(context, {}, package_type=package_type)

    extra_vars[u'dataset_type'] = package_type

    # TODO: remove
    for key, value in six.iteritems(extra_vars):
        setattr(g, key, value)

    return base.render(
        _get_pkg_template(u'search_template', package_type), extra_vars
    )
Beispiel #30
0
    def index_package(self, pkg_dict, defer_commit=False):
        if pkg_dict is None:
            return

        # tracking summary values will be stale, never store them
        tracking_summary = pkg_dict.pop('tracking_summary', None)
        for r in pkg_dict.get('resources', []):
            r.pop('tracking_summary', None)

        data_dict_json = json.dumps(pkg_dict)

        if config.get('ckan.cache_validated_datasets', True):
            package_plugin = lib_plugins.lookup_package_plugin(
                pkg_dict.get('type'))

            schema = package_plugin.show_package_schema()
            validated_pkg_dict, errors = lib_plugins.plugin_validate(
                package_plugin, {
                    'model': model,
                    'session': model.Session
                }, pkg_dict, schema, 'package_show')
            pkg_dict['validated_data_dict'] = json.dumps(
                validated_pkg_dict,
                cls=ckan.lib.navl.dictization_functions.MissingNullEncoder)

        pkg_dict['data_dict'] = data_dict_json

        # add to string field for sorting
        title = pkg_dict.get('title')
        if title:
            pkg_dict['title_string'] = title

        # delete the package if there is no state, or the state is `deleted`
        if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
            return self.delete_package(pkg_dict)

        index_fields = RESERVED_FIELDS + list(pkg_dict.keys())

        # include the extras in the main namespace
        extras = pkg_dict.get('extras', [])
        for extra in extras:
            key, value = extra['key'], extra['value']
            if isinstance(value, (tuple, list)):
                value = " ".join(map(text_type, value))
            key = ''.join([c for c in key if c in KEY_CHARS])
            pkg_dict['extras_' + key] = value
            if key not in index_fields:
                pkg_dict[key] = value
        pkg_dict.pop('extras', None)

        # add tags, removing vocab tags from 'tags' list and adding them as
        # vocab_<tag name> so that they can be used in facets
        non_vocab_tag_names = []
        tags = pkg_dict.pop('tags', [])
        context = {'model': model}

        for tag in tags:
            if tag.get('vocabulary_id'):
                data = {'id': tag['vocabulary_id']}
                vocab = logic.get_action('vocabulary_show')(context, data)
                key = u'vocab_%s' % vocab['name']
                if key in pkg_dict:
                    pkg_dict[key].append(tag['name'])
                else:
                    pkg_dict[key] = [tag['name']]
            else:
                non_vocab_tag_names.append(tag['name'])

        pkg_dict['tags'] = non_vocab_tag_names

        # add groups
        groups = pkg_dict.pop('groups', [])

        # we use the capacity to make things private in the search index
        if pkg_dict['private']:
            pkg_dict['capacity'] = 'private'
        else:
            pkg_dict['capacity'] = 'public'

        pkg_dict['groups'] = [group['name'] for group in groups]

        # if there is an owner_org we want to add this to groups for index
        # purposes
        if pkg_dict.get('organization'):
            pkg_dict['organization'] = pkg_dict['organization']['name']
        else:
            pkg_dict['organization'] = None

        # tracking
        if not tracking_summary:
            tracking_summary = model.TrackingSummary.get_for_package(
                pkg_dict['id'])
        pkg_dict['views_total'] = tracking_summary['total']
        pkg_dict['views_recent'] = tracking_summary['recent']

        resource_fields = [('name', 'res_name'),
                           ('description', 'res_description'),
                           ('format', 'res_format'), ('url', 'res_url'),
                           ('resource_type', 'res_type')]
        resource_extras = [(e, 'res_extras_' + e)
                           for e in model.Resource.get_extra_columns()]
        # flatten the structure for indexing:
        for resource in pkg_dict.get('resources', []):
            for (okey, nkey) in resource_fields + resource_extras:
                pkg_dict[nkey] = pkg_dict.get(nkey,
                                              []) + [resource.get(okey, u'')]
        pkg_dict.pop('resources', None)

        rel_dict = collections.defaultdict(list)
        subjects = pkg_dict.pop("relationships_as_subject", [])
        objects = pkg_dict.pop("relationships_as_object", [])
        for rel in objects:
            type = model.PackageRelationship.forward_to_reverse_type(
                rel['type'])
            rel_dict[type].append(
                model.Package.get(rel['subject_package_id']).name)
        for rel in subjects:
            type = rel['type']
            rel_dict[type].append(
                model.Package.get(rel['object_package_id']).name)
        for key, value in six.iteritems(rel_dict):
            if key not in pkg_dict:
                pkg_dict[key] = value

        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE

        # Save dataset type
        pkg_dict['dataset_type'] = pkg_dict['type']

        # clean the dict fixing keys and dates
        # FIXME where are we getting these dirty keys from?  can we not just
        # fix them in the correct place or is this something that always will
        # be needed?  For my data not changing the keys seems to not cause a
        # problem.
        new_dict = {}
        bogus_date = datetime.datetime(1, 1, 1)
        for key, value in pkg_dict.items():
            key = six.ensure_str(key)
            if key.endswith('_date'):
                try:
                    date = parse(value, default=bogus_date)
                    if date != bogus_date:
                        value = date.isoformat() + 'Z'
                    else:
                        # The date field was empty, so dateutil filled it with
                        # the default bogus date
                        value = None
                except (ValueError, IndexError):
                    continue
            new_dict[key] = value
        pkg_dict = new_dict

        for k in ('title', 'notes', 'title_string'):
            if k in pkg_dict and pkg_dict[k]:
                pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])

        # modify dates (SOLR is quite picky with dates, and only accepts ISO dates
        # with UTC time (i.e trailing Z)
        # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        pkg_dict['metadata_created'] += 'Z'
        pkg_dict['metadata_modified'] += 'Z'

        # mark this CKAN instance as data source:
        pkg_dict['site_id'] = config.get('ckan.site_id')

        # Strip a selection of the fields.
        # These fields are possible candidates for sorting search results on,
        # so we strip leading spaces because solr will sort " " before "a" or "A".
        for field_name in ['title']:
            try:
                value = pkg_dict.get(field_name)
                if value:
                    pkg_dict[field_name] = value.lstrip()
            except KeyError:
                pass

        # add a unique index_id to avoid conflicts
        import hashlib
        pkg_dict['index_id'] = hashlib.md5(
            six.b('%s%s' %
                  (pkg_dict['id'], config.get('ckan.site_id')))).hexdigest()

        for item in PluginImplementations(IPackageController):
            pkg_dict = item.before_index(pkg_dict)

        assert pkg_dict, 'Plugin must return non empty package dict on index'

        # permission labels determine visibility in search, can't be set
        # in original dataset or before_index plugins
        labels = lib_plugins.get_permission_labels()
        dataset = model.Package.get(pkg_dict['id'])
        pkg_dict['permission_labels'] = labels.get_dataset_labels(
            dataset) if dataset else []  # TestPackageSearchIndex-workaround

        # send to solr:
        try:
            conn = make_connection()
            commit = not defer_commit
            if not asbool(config.get('ckan.search.solr_commit', 'true')):
                commit = False
            conn.add(docs=[pkg_dict], commit=commit)
        except pysolr.SolrError as e:
            msg = 'Solr returned an error: {0}'.format(
                e.args[0][:1000]  # limit huge responses
            )
            raise SearchIndexError(msg)
        except socket.error as e:
            err = 'Could not connect to Solr using {0}: {1}'.format(
                conn.url, str(e))
            log.error(err)
            raise SearchIndexError(err)

        commit_debug_msg = 'Not committed yet' if defer_commit else 'Committed'
        log.debug('Updated index for %s [%s]' %
                  (pkg_dict.get('name'), commit_debug_msg))