def set_cors_headers_for_response(response): u''' Set up Access Control Allow headers if either origin_allow_all is True, or the request Origin is in the origin_whitelist. ''' if config.get(u'ckan.cors.origin_allow_all') \ and request.headers.get(u'Origin'): cors_origin_allowed = None if asbool(config.get(u'ckan.cors.origin_allow_all')): cors_origin_allowed = b'*' elif config.get(u'ckan.cors.origin_whitelist') and \ request.headers.get(u'Origin') \ in config[u'ckan.cors.origin_whitelist'].split(u' '): # set var to the origin to allow it. cors_origin_allowed = request.headers.get(u'Origin') if cors_origin_allowed is not None: response.headers[b'Access-Control-Allow-Origin'] = \ cors_origin_allowed response.headers[b'Access-Control-Allow-Methods'] = \ b'POST, PUT, GET, DELETE, OPTIONS' response.headers[b'Access-Control-Allow-Headers'] = \ b'X-CKAN-API-KEY, Authorization, Content-Type' return response
def check_config_permission(permission): '''Returns the configuration value for the provided permission Permission is a string indentifying the auth permission (eg `anon_create_dataset`), optionally prefixed with `ckan.auth.`. The possible values for `permission` are the keys of CONFIG_PERMISSIONS_DEFAULTS. These can be overriden in the config file by prefixing them with `ckan.auth.`. Returns the permission value, generally True or False, except on `roles_that_cascade_to_sub_groups` which is a list of strings. ''' key = permission.replace('ckan.auth.', '') if key not in CONFIG_PERMISSIONS_DEFAULTS: return False default_value = CONFIG_PERMISSIONS_DEFAULTS.get(key) config_key = 'ckan.auth.' + key value = config.get(config_key, default_value) if key == 'roles_that_cascade_to_sub_groups': # This permission is set as a list of strings (space separated) value = value.split() if value else [] else: value = asbool(value) return value
def process_app_global( key: str, value: str) -> tuple[str, Union[bool, int, str, list[str]]]: ''' Tweak a key, value pair meant to be set on the app_globals (g) object According to the options on app_globals_from_config_details (if any) ''' options = app_globals_from_config_details.get(key) key = get_globals_key(key) new_value: Any = value if options: if 'name' in options: key = options['name'] value = value or options.get('default', '') data_type = options.get('type') if data_type == 'bool': new_value = asbool(value) elif data_type == 'int': new_value = int(value) elif data_type == 'split': new_value = aslist(value) else: new_value = value return key, new_value
def get(self, id=None, data=None, errors=None, error_summary=None): context, id = self._prepare(id) data_dict = {u'id': id} try: old_data = logic.get_action(u'user_show')(context, data_dict) g.display_name = old_data.get(u'display_name') g.user_name = old_data.get(u'name') data = data or old_data except logic.NotAuthorized: base.abort(403, _(u'Unauthorized to edit user %s') % u'') except logic.NotFound: base.abort(404, _(u'User not found')) user_obj = context.get(u'user_obj') errors = errors or {} vars = { u'data': data, u'errors': errors, u'error_summary': error_summary } extra_vars = _extra_template_variables({ u'model': model, u'session': model.Session, u'user': g.user }, data_dict) extra_vars[u'show_email_notifications'] = asbool( config.get(u'ckan.activity_streams_email_notifications')) vars.update(extra_vars) extra_vars[u'form'] = base.render(edit_user_form, extra_vars=vars) return base.render(u'user/edit.html', extra_vars)
def sysadmin(): username = request.form.get(u'username') status = asbool(request.form.get(u'status')) try: context = { u'model': model, u'session': model.Session, u'user': g.user, u'auth_user_obj': g.userobj, } data_dict = {u'id': username, u'sysadmin': status} user = logic.get_action(u'user_patch')(context, data_dict) except logic.NotAuthorized: return base.abort(403, _(u'Not authorized to promote user to sysadmin')) except logic.NotFound: return base.abort(404, _(u'User not found')) if status: h.flash_success( _(u'Promoted {} to sysadmin'.format(user[u'display_name']))) else: h.flash_success( _(u'Revoked sysadmin permission from {}'.format( user[u'display_name']))) return h.redirect_to(u'admin.index')
def render_snippet(*template_names, **kw): ''' Helper function for rendering snippets. Rendered html has comment tags added to show the template used. NOTE: unlike other render functions this takes a list of keywords instead of a dict for the extra template variables. :param template_names: the template to render, optionally with fallback values, for when the template can't be found. For each, specify the relative path to the template inside the registered tpl_dir. :type template_names: str :param kw: extra template variables to supply to the template :type kw: named arguments of any type that are supported by the template ''' last_exc = None for template_name in template_names: try: output = render(template_name, extra_vars=kw) if asbool(config.get('debug')): output = ( '\n<!-- Snippet %s start -->\n%s\n<!-- Snippet %s end -->' '\n' % (template_name, output, template_name)) return h.literal(output) except TemplateNotFound as exc: if exc.name == template_name: # the specified template doesn't exist - try the next # fallback, but store the exception in case it was # last one last_exc = exc continue # a nested template doesn't exist - don't fallback raise exc else: raise last_exc or TemplateNotFound
def webassets_init(): global env static_path = get_webassets_path() public = config.get(u'ckan.base_public_folder') public_folder = os.path.abspath( os.path.join(os.path.dirname(__file__), u'..', public)) base_path = os.path.join(public_folder, u'base') env = Environment() env.directory = static_path env.debug = asbool(config.get(u'debug', False)) env.url = u'/webassets/' add_public_path(base_path, u'/base/') logger.debug(u'Base path {0}'.format(base_path)) create_library(u'vendor', os.path.join(base_path, u'vendor')) create_library(u'base', os.path.join(base_path, u'javascript')) create_library(u'datapreview', os.path.join(base_path, u'datapreview')) create_library(u'css', os.path.join(base_path, u'css'))
def user_show(context, data_dict): # By default, user details can be read by anyone, but some properties like # the API key are stripped at the action level if not not logged in. if not asbool(config.get('ckan.auth.public_user_details', True)): return restrict_anon(context) else: return {'success': True}
def read(self, id=None): context = { 'model': model, 'session': model.Session, 'user': c.user, 'auth_user_obj': c.userobj, 'for_view': True } data_dict = { 'id': id, 'user_obj': c.userobj, 'include_datasets': True, 'include_num_followers': True } self._setup_template_variables(context, data_dict) # The legacy templates have the user's activity stream on the user # profile page, new templates do not. if asbool(config.get('ckan.legacy_templates', False)): c.user_activity_stream = get_action('user_activity_list_html')( context, { 'id': c.user_dict['id'] }) return render('user/read.html')
def edit(self, id=None, data=None, errors=None, error_summary=None): context = {'save': 'save' in request.params, 'schema': self._edit_form_to_db_schema(), 'model': model, 'session': model.Session, 'user': c.user, 'auth_user_obj': c.userobj } if id is None: if c.userobj: id = c.userobj.id else: abort(400, _('No user specified')) data_dict = {'id': id} try: check_access('user_update', context, data_dict) except NotAuthorized: abort(403, _('Unauthorized to edit a user.')) if context['save'] and not data and request.method == 'POST': return self._save_edit(id, context) try: old_data = get_action('user_show')(context, data_dict) schema = self._db_to_edit_form_schema() if schema: old_data, errors = \ dictization_functions.validate(old_data, schema, context) c.display_name = old_data.get('display_name') c.user_name = old_data.get('name') data = data or old_data except NotAuthorized: abort(403, _('Unauthorized to edit user %s') % '') except NotFound: abort(404, _('User not found')) user_obj = context.get('user_obj') if not (authz.is_sysadmin(c.user) or c.user == user_obj.name): abort(403, _('User %s not authorized to edit %s') % (str(c.user), id)) errors = errors or {} vars = {'data': data, 'errors': errors, 'error_summary': error_summary} self._setup_template_variables({'model': model, 'session': model.Session, 'user': c.user}, data_dict) c.is_myself = True c.show_email_notifications = asbool( config.get('ckan.activity_streams_email_notifications')) c.form = render(self.edit_user_form, extra_vars=vars) return render('user/edit.html')
def _allow_caching(cache_force=None): # Caching Logic allow_cache = True # Force cache or not if explicit. if cache_force is not None: allow_cache = cache_force # Do not allow caching of pages for logged in users/flash messages etc. elif _is_valid_session_cookie_data(): allow_cache = False # Tests etc. elif 'REMOTE_USER' in request.environ: allow_cache = False # Don't cache if based on a non-cachable template used in this. elif request.environ.get('__no_cache__'): allow_cache = False # Don't cache if we have set the __no_cache__ param in the query string. elif request.params.get('__no_cache__'): allow_cache = False # Don't cache if caching is not enabled in config elif not asbool(config.get('ckan.cache_enabled', False)): allow_cache = False if not allow_cache: # Prevent any further rendering from being cached. request.environ['__no_cache__'] = True
def user_list(context, data_dict): # Users list is visible by default if data_dict.get('email'): # only sysadmins can specify the 'email' parameter return {'success': False} if not asbool(config.get('ckan.auth.public_user_details', True)): return restrict_anon(context) else: return {'success': True}
def group_show(context, data_dict): user = context.get('user') group = get_group_object(context, data_dict) if group.state == 'active': if asbool(config.get('ckan.auth.public_user_details', True)) or \ (not asbool(data_dict.get('include_users', False)) and (data_dict.get('object_type', None) != 'user')): return {'success': True} authorized = authz.has_user_permission_for_group_or_org( group.id, user, 'read') if authorized: return {'success': True} else: return { 'success': False, 'msg': _('User %s not authorized to read group %s') % (user, group.id) }
def delete_package(self, pkg_dict): conn = make_connection() query = "+%s:%s AND +(id:\"%s\" OR name:\"%s\") AND +site_id:\"%s\"" % \ (TYPE_FIELD, PACKAGE_TYPE, pkg_dict.get('id'), pkg_dict.get('id'), config.get('ckan.site_id')) try: commit = asbool(config.get('ckan.search.solr_commit', 'true')) conn.delete(q=query, commit=commit) except Exception as e: log.exception(e) raise SearchIndexError(e)
def recut(): """ Recreate setup.py so that we can edit keywords Remove unnecessary code examples """ # template location try: # cutting cookie from directory with template temp_dir = find.find_template('..') except NonTemplatedInputDirException as e: # template coming from Github # Hooks are passed through jinja2. raw will # Make sure `cookiecutter.project` isn't replaced {% raw %} temp_dir = os.path.join(config['cookiecutters_dir'], 'cookiecutter-ckan-extension', '{{cookiecutter.project}}') {% endraw %} # Location for resulting file destination = os.getcwd() # name of template setup_template = 'setup.py' # get context context = {{ cookiecutter | jsonify }} # Process keywords keywords = context['keywords'].strip().split() keywords = [keyword for keyword in keywords if keyword not in ('ckan', 'CKAN', 'A', 'space', 'seperated', 'list', 'of', 'keywords')] keywords.insert(0, 'CKAN') keywords = u' '.join(keywords) context['keywords'] = keywords # Double check 'project_shortname' and 'plugin_class_name' short_name = context['project'][8:].replace('-','_') if context['project_shortname'] != short_name: context['project_shortname'] = short_name plugin_class_name = '{}Plugin'.format(context['project_shortname'] .title().replace('_', '')) if context['plugin_class_name'] != plugin_class_name: context['plugin_class_name'] = plugin_class_name # Recut cookie env = StrictEnvironment() env.loader = jinja2.FileSystemLoader(temp_dir) gen.generate_file(project_dir=destination, infile=setup_template, context={'cookiecutter': context}, env=env) if not asbool(context['include_examples']): remove_code_examples(os.path.join(destination, 'ckanext', short_name))
def __init__(self, data): # convert old keys if necessary if 'is_okd_compliant' in data: data['od_conformance'] = 'approved' \ if asbool(data['is_okd_compliant']) else '' del data['is_okd_compliant'] if 'is_osi_compliant' in data: data['osd_conformance'] = 'approved' \ if asbool(data['is_osi_compliant']) else '' del data['is_osi_compliant'] self._data = data for (key, value) in self._data.items(): if key == 'date_created': # Parse ISO formatted datetime. value = datetime.datetime( *list(int(item) for item in re.split(r'[^\d]', value))) self._data[key] = value elif isinstance(value, str): self._data[key] = value
def __init__(self, data): # convert old keys if necessary if 'is_okd_compliant' in data: data['od_conformance'] = 'approved' \ if asbool(data['is_okd_compliant']) else '' del data['is_okd_compliant'] if 'is_osi_compliant' in data: data['osd_conformance'] = 'approved' \ if asbool(data['is_osi_compliant']) else '' del data['is_osi_compliant'] self._data = data for (key, value) in self._data.items(): if key == 'date_created': # Parse ISO formatted datetime. value = datetime.datetime(*map(int, re.split('[^\d]', value))) self._data[key] = value elif isinstance(value, str): # Convert str to unicode (keeps Pylons and SQLAlchemy happy). value = value.decode('utf8') self._data[key] = value
def package_create(context, data_dict): # Note that we did not decorate this function with # @logic.auth_allow_anonymous_access. This effectively # disables dataset creation via the web interface. # However, we make sure that the API is used with the following: using_api = 'api_version' in context if not using_api: return { "success": False, "msg": "Creating datasets is only possible via the API. " "Please use DCOR-Aid for uploading data!" } # original auth function ao = logic.auth.create.package_create(context, data_dict) if not ao["success"]: return ao if data_dict: # Use our own configuration option to determine whether the # admin has disabled public datasets (e.g. for DCOR-med). must_be_private = not asbool( config.get("ckanext.dcor_schemas.allow_public_datasets", "true")) private_default = must_be_private # public if not has to be private is_private = asbool(data_dict.get('private', private_default)) if must_be_private and not is_private: return { "success": False, "msg": "Creating public datasets has been disabled via " "the configuration option 'ckanext.dcor_schemas." "allow_public_datasets = false'!" } return {"success": True}
def error_handler(e): debug = asbool(config.get('debug', config.get('DEBUG', False))) if isinstance(e, HTTPException): log.debug(e, exc_info=sys.exc_info) if debug else log.info(e) extra_vars = { u'code': e.code, u'content': e.description, u'name': e.name } return base.render(u'error_document_template.html', extra_vars), e.code log.error(e, exc_info=sys.exc_info) extra_vars = {u'code': [500], u'content': u'Internal server error'} return base.render(u'error_document_template.html', extra_vars), 500
def load_all(): ''' Load all plugins listed in the 'ckan.plugins' config directive. ''' # Clear any loaded plugins unload_all() plugins = config.get('ckan.plugins', '').split() + find_system_plugins() # Add the synchronous search plugin, unless already loaded or # explicitly disabled if 'synchronous_search' not in plugins and \ asbool(config.get('ckan.search.automatic_indexing', True)): log.debug('Loading the synchronous search plugin') plugins.append('synchronous_search') load(*plugins)
def build_subject(subject_default='Contact/Question from visitor', timestamp_default=False): ''' Creates the subject line for the contact email using the config or the defaults. :param subject_default: the default str to use if ckanext.contact.subject isn't specified :param timestamp_default: the default bool to use if add_timestamp_to_subject isn't specified :return: the subject line ''' subject = toolkit.config.get('ckanext.contact.subject', toolkit._(subject_default)) if asbool( toolkit.config.get('ckanext.contact.add_timestamp_to_subject', timestamp_default)): timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z') subject = f'{subject} [{timestamp}]' return subject
def notify(self, entity, operation): if (not isinstance(entity, model.Package) or not asbool(config.get('ckan.search.automatic_indexing', True))): return if operation != model.domain_object.DomainObjectOperation.deleted: dispatch_by_operation( entity.__class__.__name__, logic.get_action('package_show')( {'model': model, 'ignore_auth': True, 'validate': False, 'use_cache': False}, {'id': entity.id}), operation ) elif operation == model.domain_object.DomainObjectOperation.deleted: dispatch_by_operation(entity.__class__.__name__, {'id': entity.id}, operation) else: log.warn("Discarded Sync. indexing for: %s" % entity)
def prepare_from_flask_request(): url_data = urlparse(request.url) req_path = request.path if asbool(config.get('ckan.saml_use_root_path', False)): # FIX FOR ROOT_PATH REMOVED IN request.path root_path = config.get('ckan.root_path', None) if root_path: root_path = re.sub('/{{LANG}}', '', root_path) req_path = root_path + req_path return { 'https': use_https, 'http_host': request.host, 'server_port': url_data.port, 'script_name': req_path, 'get_data': request.args.copy(), 'post_data': request.form.copy() }
def validate(self): for key, value in self.items(): if key in self.BOOLEAN_OPTIONS: try: value = asbool(value) except ValueError: raise SearchQueryError( 'Value for search option %r must be True or False (1 or 0) but received %r' % (key, value)) elif key in self.INTEGER_OPTIONS: try: value = int(value) except ValueError: raise SearchQueryError( 'Value for search option %r must be an integer but received %r' % (key, value)) elif key in self.UNSUPPORTED_OPTIONS: raise SearchQueryError('Search option %r is not supported' % key) self[key] = value
def send_email_notifications(context, data_dict): '''Send any pending activity stream notification emails to users. You must provide a sysadmin's API key in the Authorization header of the request, or call this action from the command-line via a `paster post ...` command. ''' # If paste.command_request is True then this function has been called # by a `paster post ...` command not a real HTTP request, so skip the # authorization. if not request.environ.get('paste.command_request'): _check_access('send_email_notifications', context, data_dict) if not converters.asbool( config.get('ckan.activity_streams_email_notifications')): raise ValidationError('ckan.activity_streams_email_notifications' ' is not enabled in config') email_notifications.get_and_send_notifications_for_all_users()
def logged_in(self): # redirect if needed came_from = request.params.get('came_from', '') if h.url_is_local(came_from): return h.redirect_to(str(came_from)) if c.user: context = None data_dict = {'id': c.user} user_dict = get_action('user_show')(context, data_dict) return self.me() else: err = _('Login failed. Bad username or password.') if asbool(config.get('ckan.legacy_templates', 'false')): h.flash_error(err) h.redirect_to(controller='user', action='login', came_from=came_from) else: return self.login(error=err)
def process_app_global(key, value): ''' Tweak a key, value pair meant to be set on the app_globals (g) object According to the options on app_globals_from_config_details (if any) ''' options = app_globals_from_config_details.get(key) key = get_globals_key(key) if options: if 'name' in options: key = options['name'] value = value or options.get('default', '') data_type = options.get('type') if data_type == 'bool': value = asbool(value) elif data_type == 'int': value = int(value) elif data_type == 'split': value = aslist(value) return key, value
def _setup_error_mail_handler(app): class ContextualFilter(logging.Filter): def filter(self, log_record): log_record.url = request.path log_record.method = request.method log_record.ip = request.environ.get("REMOTE_ADDR") log_record.headers = request.headers return True smtp_server = config.get('smtp.server', 'localhost') mailhost = tuple(smtp_server.split(':')) \ if ':' in smtp_server else smtp_server credentials = None if config.get('smtp.user'): credentials = (config.get('smtp.user'), config.get('smtp.password')) secure = () if asbool(config.get('smtp.starttls')) else None mail_handler = SMTPHandler( mailhost=mailhost, fromaddr=config.get('error_email_from'), toaddrs=[config.get('email_to')], subject='Application Error', credentials=credentials, secure=secure ) mail_handler.setFormatter(logging.Formatter(''' Time: %(asctime)s URL: %(url)s Method: %(method)s IP: %(ip)s Headers: %(headers)s ''')) context_provider = ContextualFilter() app.logger.addFilter(context_provider) app.logger.addHandler(mail_handler)
def search(package_type): extra_vars = {} try: context = { u'model': model, u'user': g.user, u'auth_user_obj': g.userobj } check_access(u'site_read', context) except NotAuthorized: base.abort(403, _(u'Not authorized to see this page')) # unicode format (decoded from utf8) extra_vars[u'q'] = q = request.args.get(u'q', u'') extra_vars['query_error'] = False page = h.get_page_number(request.args) limit = int(config.get(u'ckan.datasets_per_page', 20)) # most search operations should reset the page counter: params_nopage = [(k, v) for k, v in request.args.items() if k != u'page'] extra_vars[u'drill_down_url'] = drill_down_url extra_vars[u'remove_field'] = partial(remove_field, package_type) sort_by = request.args.get(u'sort', None) params_nosort = [(k, v) for k, v in params_nopage if k != u'sort'] extra_vars[u'sort_by'] = partial(_sort_by, params_nosort, package_type) if not sort_by: sort_by_fields = [] else: sort_by_fields = [field.split()[0] for field in sort_by.split(u',')] extra_vars[u'sort_by_fields'] = sort_by_fields pager_url = partial(_pager_url, params_nopage, package_type) search_url_params = urlencode(_encode_params(params_nopage)) extra_vars[u'search_url_params'] = search_url_params details = _get_search_details() extra_vars[u'fields'] = details[u'fields'] extra_vars[u'fields_grouped'] = details[u'fields_grouped'] fq = details[u'fq'] search_extras = details[u'search_extras'] context = { u'model': model, u'session': model.Session, u'user': g.user, u'for_view': True, u'auth_user_obj': g.userobj } # Unless changed via config options, don't show other dataset # types any search page. Potential alternatives are do show them # on the default search page (dataset) or on one other search page search_all_type = config.get(u'ckan.search.show_all_types', u'dataset') search_all = False try: # If the "type" is set to True or False, convert to bool # and we know that no type was specified, so use traditional # behaviour of applying this only to dataset type search_all = asbool(search_all_type) search_all_type = u'dataset' # Otherwise we treat as a string representing a type except ValueError: search_all = True if not search_all or package_type != search_all_type: # Only show datasets of this particular type fq += u' +dataset_type:{type}'.format(type=package_type) facets = OrderedDict() org_label = h.humanize_entity_type( u'organization', h.default_group_type(u'organization'), u'facet label') or _(u'Organizations') group_label = h.humanize_entity_type( u'group', h.default_group_type(u'group'), u'facet label') or _(u'Groups') default_facet_titles = { u'organization': org_label, u'groups': group_label, u'tags': _(u'Tags'), u'res_format': _(u'Formats'), u'license_id': _(u'Licenses'), } for facet in h.facets(): if facet in default_facet_titles: facets[facet] = default_facet_titles[facet] else: facets[facet] = facet # Facet titles for plugin in plugins.PluginImplementations(plugins.IFacets): facets = plugin.dataset_facets(facets, package_type) extra_vars[u'facet_titles'] = facets data_dict = { u'q': q, u'fq': fq.strip(), u'facet.field': list(facets.keys()), u'rows': limit, u'start': (page - 1) * limit, u'sort': sort_by, u'extras': search_extras, u'include_private': asbool( config.get(u'ckan.search.default_include_private', True) ), } try: query = get_action(u'package_search')(context, data_dict) extra_vars[u'sort_by_selected'] = query[u'sort'] extra_vars[u'page'] = h.Page( collection=query[u'results'], page=page, url=pager_url, item_count=query[u'count'], items_per_page=limit ) extra_vars[u'search_facets'] = query[u'search_facets'] extra_vars[u'page'].items = query[u'results'] except SearchQueryError as se: # User's search parameters are invalid, in such a way that is not # achievable with the web interface, so return a proper error to # discourage spiders which are the main cause of this. log.info(u'Dataset search query rejected: %r', se.args) base.abort( 400, _(u'Invalid search query: {error_message}') .format(error_message=str(se)) ) except SearchError as se: # May be bad input from the user, but may also be more serious like # bad code causing a SOLR syntax error, or a problem connecting to # SOLR log.error(u'Dataset search error: %r', se.args) extra_vars[u'query_error'] = True extra_vars[u'search_facets'] = {} extra_vars[u'page'] = h.Page(collection=[]) # FIXME: try to avoid using global variables g.search_facets_limits = {} for facet in extra_vars[u'search_facets'].keys(): try: limit = int( request.args.get( u'_%s_limit' % facet, int(config.get(u'search.facets.default', 10)) ) ) except ValueError: base.abort( 400, _(u'Parameter u"{parameter_name}" is not ' u'an integer').format(parameter_name=u'_%s_limit' % facet) ) g.search_facets_limits[facet] = limit _setup_template_variables(context, {}, package_type=package_type) extra_vars[u'dataset_type'] = package_type # TODO: remove for key, value in six.iteritems(extra_vars): setattr(g, key, value) return base.render( _get_pkg_template(u'search_template', package_type), extra_vars )
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return # tracking summary values will be stale, never store them tracking_summary = pkg_dict.pop('tracking_summary', None) for r in pkg_dict.get('resources', []): r.pop('tracking_summary', None) data_dict_json = json.dumps(pkg_dict) if config.get('ckan.cache_validated_datasets', True): package_plugin = lib_plugins.lookup_package_plugin( pkg_dict.get('type')) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, { 'model': model, 'session': model.Session }, pkg_dict, schema, 'package_show') pkg_dict['validated_data_dict'] = json.dumps( validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) pkg_dict['data_dict'] = data_dict_json # add to string field for sorting title = pkg_dict.get('title') if title: pkg_dict['title_string'] = title # delete the package if there is no state, or the state is `deleted` if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + list(pkg_dict.keys()) # include the extras in the main namespace extras = pkg_dict.get('extras', []) for extra in extras: key, value = extra['key'], extra['value'] if isinstance(value, (tuple, list)): value = " ".join(map(text_type, value)) key = ''.join([c for c in key if c in KEY_CHARS]) pkg_dict['extras_' + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop('extras', None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop('tags', []) context = {'model': model} for tag in tags: if tag.get('vocabulary_id'): data = {'id': tag['vocabulary_id']} vocab = logic.get_action('vocabulary_show')(context, data) key = u'vocab_%s' % vocab['name'] if key in pkg_dict: pkg_dict[key].append(tag['name']) else: pkg_dict[key] = [tag['name']] else: non_vocab_tag_names.append(tag['name']) pkg_dict['tags'] = non_vocab_tag_names # add groups groups = pkg_dict.pop('groups', []) # we use the capacity to make things private in the search index if pkg_dict['private']: pkg_dict['capacity'] = 'private' else: pkg_dict['capacity'] = 'public' pkg_dict['groups'] = [group['name'] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get('organization'): pkg_dict['organization'] = pkg_dict['organization']['name'] else: pkg_dict['organization'] = None # tracking if not tracking_summary: tracking_summary = model.TrackingSummary.get_for_package( pkg_dict['id']) pkg_dict['views_total'] = tracking_summary['total'] pkg_dict['views_recent'] = tracking_summary['recent'] resource_fields = [('name', 'res_name'), ('description', 'res_description'), ('format', 'res_format'), ('url', 'res_url'), ('resource_type', 'res_type')] resource_extras = [(e, 'res_extras_' + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get('resources', []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')] pkg_dict.pop('resources', None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type( rel['type']) rel_dict[type].append( model.Package.get(rel['subject_package_id']).name) for rel in subjects: type = rel['type'] rel_dict[type].append( model.Package.get(rel['object_package_id']).name) for key, value in six.iteritems(rel_dict): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict['dataset_type'] = pkg_dict['type'] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = six.ensure_str(key) if key.endswith('_date'): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + 'Z' else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except (ValueError, IndexError): continue new_dict[key] = value pkg_dict = new_dict for k in ('title', 'notes', 'title_string'): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict['metadata_created'] += 'Z' pkg_dict['metadata_modified'] += 'Z' # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ['title']: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict['index_id'] = hashlib.md5( six.b('%s%s' % (pkg_dict['id'], config.get('ckan.site_id')))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, 'Plugin must return non empty package dict on index' # permission labels determine visibility in search, can't be set # in original dataset or before_index plugins labels = lib_plugins.get_permission_labels() dataset = model.Package.get(pkg_dict['id']) pkg_dict['permission_labels'] = labels.get_dataset_labels( dataset) if dataset else [] # TestPackageSearchIndex-workaround # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get('ckan.search.solr_commit', 'true')): commit = False conn.add(docs=[pkg_dict], commit=commit) except pysolr.SolrError as e: msg = 'Solr returned an error: {0}'.format( e.args[0][:1000] # limit huge responses ) raise SearchIndexError(msg) except socket.error as e: err = 'Could not connect to Solr using {0}: {1}'.format( conn.url, str(e)) log.error(err) raise SearchIndexError(err) commit_debug_msg = 'Not committed yet' if defer_commit else 'Committed' log.debug('Updated index for %s [%s]' % (pkg_dict.get('name'), commit_debug_msg))