def _keywords(self, subject, predicate): keywords = {} for lang in get_langs(): keywords[lang] = [] for keyword_node in self.g.objects(subject, predicate): keywords[keyword_node.language].append(unicode(keyword_node)) return keywords
def before_search(self, search_params): """ Adjust search parameters """ ''' search in correct language-specific field and boost results in current language ''' lang_set = sh.get_langs() try: current_lang = toolkit.request.environ['CKAN_LANG'] except TypeError as err: if err.message == ('No object (name: request) has been registered ' 'for this thread'): # This happens when this code gets called as part of a paster # command rather then as part of an HTTP request. current_lang = toolkit.config.get('ckan.locale_default') else: raise # fallback to default locale if locale not in suported langs if current_lang not in lang_set: current_lang = toolkit.config.get('ckan.locale_default', 'en') # treat current lang differenly so remove from set lang_set.remove(current_lang) # add default query field(s) query_fields = 'text' # weight current lang more highly query_fields += ' title_%s^8 text_%s^4' % (current_lang, current_lang) for lang in lang_set: query_fields += ' title_%s^2 text_%s' % (lang, lang) search_params['qf'] = query_fields ''' Unless the query is already being filtered by any type (either positively, or negatively), reduce to only display 'dataset' type This is done because by standard all types are displayed, this leads to strange situations where e.g. harvest sources are shown on organization pages. TODO: fix issue https://github.com/ckan/ckan/issues/2803 in CKAN core ''' fq = search_params.get('fq', '') if 'dataset_type:' not in fq: search_params.update({'fq': "%s +dataset_type:dataset" % fq}) # remove colon followed by a space from q to avoid false negatives q = search_params.get('q', '') search_params['q'] = re.sub(":\s", " ", q) return search_params
def _keywords(self, subject, predicate): keywords = {} # initialize the keywords with empty lists for all languages for lang in get_langs(): keywords[lang] = [] for keyword_node in self.g.objects(subject, predicate): lang = keyword_node.language keyword = munge_tag(unicode(keyword_node)) keywords.setdefault(lang, []).append(keyword) return keywords
def before_index(self, search_data): if not self.is_supported_package_type(search_data): return search_data extract_title = LangToString('title') validated_dict = json.loads(search_data['validated_data_dict']) # log.debug(pprint.pformat(validated_dict)) search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']] # noqa search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']] # noqa search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources']) # noqa search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']] # noqa search_data['title_string'] = extract_title(validated_dict) search_data['description'] = LangToString('description')(validated_dict) # noqa if 'political_level' in validated_dict[u'organization']: search_data['political_level'] = validated_dict[u'organization'][u'political_level'] # noqa try: # index language-specific values (or it's fallback) text_field_items = {} for lang_code in get_langs(): search_data['title_' + lang_code] = get_localized_value( validated_dict['title'], lang_code ) search_data['title_string_' + lang_code] = munge_title_to_name( get_localized_value(validated_dict['title'], lang_code) ) search_data['description_' + lang_code] = get_localized_value( validated_dict['description'], lang_code ) search_data['keywords_' + lang_code] = get_localized_value( validated_dict['keywords'], lang_code ) text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)] # noqa text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code]) # noqa text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]]) # noqa text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]]) # noqa # flatten values for text_* fields for key, value in text_field_items.iteritems(): search_data[key] = ' '.join(value) except KeyError: pass # log.debug(pprint.pformat(search_data)) return search_data
def before_search(self, search_params): """ Adjust search parameters """ ''' search in correct language-specific field and boost results in current language ''' lang_set = get_langs() try: current_lang = pylons.request.environ['CKAN_LANG'] except TypeError as err: if err.message == ('No object (name: request) has been registered ' 'for this thread'): # This happens when this code gets called as part of a paster # command rather then as part of an HTTP request. current_lang = pylons.config.get('ckan.locale_default') else: raise # fallback to default locale if locale not in suported langs if current_lang not in lang_set: current_lang = pylons.config.get('ckan.locale_default', 'en') # treat current lang differenly so remove from set lang_set.remove(current_lang) # weight current lang more highly query_fields = 'title_%s^8 text_%s^4' % (current_lang, current_lang) for lang in lang_set: query_fields += ' title_%s^2 text_%s' % (lang, lang) search_params['qf'] = query_fields ''' Unless the query is already being filtered by any type (either positively, or negatively), reduce to only display 'dataset' type This is done because by standard all types are displayed, this leads to strange situations where e.g. harvest sources are shown on organization pages. TODO: fix issue https://github.com/ckan/ckan/issues/2803 in CKAN core ''' fq = search_params.get('fq', '') if 'dataset_type:' not in fq: search_params.update({'fq': "%s +dataset_type:dataset" % fq}) return search_params
def before_index(self, search_data): if not self.is_supported_package_type(search_data): return search_data extract_title = LangToString('title') validated_dict = json.loads(search_data['validated_data_dict']) search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']] # noqa search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']] # noqa search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources']) # noqa search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']] # noqa search_data['title_string'] = extract_title(validated_dict) search_data['description'] = LangToString('description')(validated_dict) # noqa if 'political_level' in validated_dict[u'organization']: search_data['political_level'] = validated_dict[u'organization'][u'political_level'] # noqa try: # index language-specific values (or it's fallback) text_field_items = {} for lang_code in get_langs(): search_data['title_' + lang_code] = get_localized_value( validated_dict['title'], lang_code ) search_data['title_string_' + lang_code] = munge_title_to_name( get_localized_value(validated_dict['title'], lang_code) ) search_data['description_' + lang_code] = get_localized_value( validated_dict['description'], lang_code ) search_data['keywords_' + lang_code] = get_localized_value( validated_dict['keywords'], lang_code ) text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)] # noqa text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code]) # noqa text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]]) # noqa text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]]) # noqa # flatten values for text_* fields for key, value in text_field_items.iteritems(): search_data[key] = ' '.join(value) except KeyError: pass return search_data
def _object_value(self, subject, predicate, multilang=False): ''' Given a subject and a predicate, returns the value of the object Both subject and predicate must be rdflib URIRef or BNode objects If found, the unicode representation is returned, else None ''' default_lang = 'de' lang_dict = {} for o in self.g.objects(subject, predicate): if multilang and o.language: lang_dict[o.language] = unicode(o) elif multilang: lang_dict[default_lang] = unicode(o) else: return unicode(o) if multilang: # when translation does not exist, create an empty one for lang in get_langs(): if lang not in lang_dict: lang_dict[lang] = '' return lang_dict
def before_index(self, search_data): if not self.is_supported_package_type(search_data): return search_data extract_title = LangToString('title') validated_dict = json.loads(search_data['validated_data_dict']) search_data['res_name'] = [ extract_title(r) for r in validated_dict[u'resources'] ] # noqa search_data['res_name_en'] = [ sh.get_localized_value(r['title'], 'en') for r in validated_dict[u'resources'] ] # noqa search_data['res_name_de'] = [ sh.get_localized_value(r['title'], 'de') for r in validated_dict[u'resources'] ] # noqa search_data['res_name_fr'] = [ sh.get_localized_value(r['title'], 'fr') for r in validated_dict[u'resources'] ] # noqa search_data['res_name_it'] = [ sh.get_localized_value(r['title'], 'it') for r in validated_dict[u'resources'] ] # noqa search_data['res_description_en'] = [ sh.get_localized_value(r['description'], 'en') for r in validated_dict[u'resources'] ] # noqa search_data['res_description_de'] = [ sh.get_localized_value(r['description'], 'de') for r in validated_dict[u'resources'] ] # noqa search_data['res_description_fr'] = [ sh.get_localized_value(r['description'], 'fr') for r in validated_dict[u'resources'] ] # noqa search_data['res_description_it'] = [ sh.get_localized_value(r['description'], 'it') for r in validated_dict[u'resources'] ] # noqa search_data['groups_en'] = [ sh.get_localized_value(g['display_name'], 'en') for g in validated_dict[u'groups'] ] # noqa search_data['groups_de'] = [ sh.get_localized_value(g['display_name'], 'de') for g in validated_dict[u'groups'] ] # noqa search_data['groups_fr'] = [ sh.get_localized_value(g['display_name'], 'fr') for g in validated_dict[u'groups'] ] # noqa search_data['groups_it'] = [ sh.get_localized_value(g['display_name'], 'it') for g in validated_dict[u'groups'] ] # noqa search_data['res_description'] = [ LangToString('description')(r) for r in validated_dict[u'resources'] ] # noqa search_data['res_format'] = self._prepare_formats_for_index( validated_dict[u'resources']) # noqa search_data['res_rights'] = [ sh.simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources'] ] # noqa search_data['title_string'] = extract_title(validated_dict) search_data['description'] = LangToString('description')( validated_dict) # noqa if 'political_level' in validated_dict[u'organization']: search_data['political_level'] = validated_dict[u'organization'][ u'political_level'] # noqa search_data['identifier'] = validated_dict.get('identifier') search_data['contact_points'] = [ c['name'] for c in validated_dict.get('contact_points', []) ] # noqa search_data['publishers'] = [ p['label'] for p in validated_dict.get('publishers', []) ] # noqa # TODO: Remove the try-except-block. # This fixes the index while we have 'wrong' relations on # datasets harvested with an old version of ckanext-geocat try: search_data['see_alsos'] = [ d['dataset_identifier'] for d in validated_dict.get('see_alsos', []) ] # noqa except TypeError: search_data['see_alsos'] = [ d for d in validated_dict.get('see_alsos', []) ] # noqa # make sure we're not dealing with NoneType if search_data['metadata_created'] is None: search_data['metadata_created'] = '' if search_data['metadata_modified'] is None: search_data['metadata_modified'] = '' try: # index language-specific values (or it's fallback) for lang_code in sh.get_langs(): search_data['title_' + lang_code] = sh.get_localized_value( validated_dict['title'], lang_code) search_data['title_string_' + lang_code] = munge_title_to_name( sh.get_localized_value(validated_dict['title'], lang_code)) search_data['description_' + lang_code] = sh.get_localized_value( # noqa validated_dict['description'], lang_code) search_data['keywords_' + lang_code] = sh.get_localized_value( validated_dict['keywords'], lang_code) search_data['organization_' + lang_code] = sh.get_localized_value( # noqa validated_dict['organization']['title'], lang_code) except KeyError: pass # clean terms for suggest context search_data = self._prepare_suggest_context(search_data, validated_dict) return search_data