def before_index(self, pkg_dict): title = pkg_dict.get("title", pkg_dict.get("name")) # Strip accents first and if equivilant do next stage comparison. # Leaving space and concatenating is to avoid having todo a real # 2 level sort. pkg_dict["title_sort"] = (unicode_sort.strip_accents(title) + " " + title).translate(UNICODE_SORT) # set 'metadata_modified' field to value of metadata_modified if # not present (this field is used to sort datasets according to # their last update date). if not "modified_date" in pkg_dict: pkg_dict["modified_date"] = pkg_dict["metadata_modified"] else: # modify dates (SOLR is quite picky with dates, and only accepts # ISO dates with UTC time (i.e trailing Z) pkg_dict["modified_date"] = helpers.ecportal_date_to_iso(pkg_dict["modified_date"]) + "Z" def change_format(format): if format in helpers.resource_mapping(): format = helpers.resource_mapping()[format][1] return format pkg_dict["res_format"] = [change_format(format) for format in pkg_dict.get("res_format", [])] return pkg_dict
def sort_translations(key): # Strip accents first and if equivilant do next stage comparison. # leaving space and concatenating is to avoid having todo a real # 2 level sort. display_name = key[1] return (unicode_sort.strip_accents(display_name) + ' ' + display_name).translate(UNICODE_SORT)
def sort_group(key): if isinstance(key, basestring): display_name = key else: display_name = key.get('display_name', '') # Strip accents first and if equivilant do next stage comparison. # Leaving space and concatenating is to avoid having todo a real # 2 level sort. return (unicode_sort.strip_accents(display_name) + ' ' + display_name).translate(UNICODE_SORT)
def sort_organization(key): if isinstance(key, basestring): display_name = key else: display_name = key.get('display_name', '') # Strip accents first and if equivilant do next stage comparison. # Leaving space and concatenating is to avoid having todo a real # 2 level sort. return (unicode_sort.strip_accents(display_name) + ' ' + display_name).translate(UNICODE_SORT)
def before_index(self, pkg_dict): title = ui_util._get_translated_term_from_dcat_object( pkg_dict.schema, 'title_dcterms', 'en') # Strip accents first and if equivalant do next stage comparison. # Leaving space and concatenating is to avoid having todo a real # 2 level sort. result_dict = {} result_dict['title_sort'] = (unicode_sort.strip_accents(title) + ' ' + title).translate(UNICODE_SORT) # set 'metadata_modified' field to value of metadata_modified if # not present (this field is used to sort datasets according to # their last update date). if not 'modified_date' in result_dict: import datetime modified_date = datetime.datetime.now().isoformat() try: modified_date = helpers.ecportal_date_to_iso( ui_util._get_translated_term_from_dcat_object( pkg_dict.schema_catalog_record, 'modified_dcterms', 'en')) except BaseException as e: log.error( "[Indexation before index] [Failed] []use current date as fallback" ) result_dict['modified_date'] = modified_date if re.search(r".*T.*Z$", result_dict['modified_date']): pass # date format OK (verified with basic check) elif result_dict['modified_date']: # modify dates (SOLR is quite picky with dates, and only accepts # ISO dates with UTC time (i.e trailing Z) result_dict['modified_date'] = helpers.ecportal_date_to_iso( result_dict['modified_date']) + 'Z' else: result_dict.pop('modified_date', None) # def change_format(format): # if format in helpers.resource_mapping(): # format = helpers.resource_mapping()[format][1] # return format # # pkg_dict['res_format'] = [change_format(format) for format in # pkg_dict.get('res_format', [])] return result_dict
def before_index(self, search_data): # same code as in ckanext multilingual except language codes and # where mareked default_lang = search_data.get("lang_code", pylons.config.get("ckan.locale_default", "en")) # translate title title = search_data.get("title") search_data["title_" + default_lang] = title title_translations = p.toolkit.get_action("term_translation_show")( {"model": model}, {"terms": [title], "lang_codes": LANGS} ) for translation in title_translations: title_field = "title_" + translation["lang_code"] search_data[title_field] = translation["term_translation"] # EC change add sort order field. for lang in LANGS: title_field = "title_" + lang title_value = search_data.get(title_field) title_string_field = "title_string_" + lang if not title_value: title_value = title # Strip accents first and if equivilant do next stage comparison. # Leaving space and concatonating is to avoid having todo a real # 2 level sort. sortable_title = unicode_sort.strip_accents(title_value) + " " + title_value search_data[title_string_field] = sortable_title.translate(UNICODE_SORT) ########################################## ## translate rest all_terms = [] for key, value in search_data.iteritems(): if key in KEYS_TO_IGNORE or key.startswith("title"): continue if isinstance(value, list): all_terms.extend(value) elif not isinstance(value, basestring): continue else: all_terms.append(value) field_translations = p.toolkit.get_action("term_translation_show")( {"model": model}, {"terms": all_terms, "lang_codes": LANGS} ) text_field_items = dict(("text_" + lang, []) for lang in LANGS) text_field_items["text_" + default_lang].extend(all_terms) for translation in sorted(field_translations): lang_field = "text_" + translation["lang_code"] text_field_items[lang_field].append(translation["term_translation"]) for key, value in text_field_items.iteritems(): search_data[key] = " ".join(value) return search_data
def before_index(self, search_data): ''' :param DatasetDcatApOp search_data: :return: ''' # same code as in ckanext multilingual except language codes and # where mareked result_dict = {} language_list = config.get('ckan.locales_offered', ['en']) # translate title title = ui_util._get_translated_term_from_dcat_object( search_data.schema, 'title_dcterms', 'en') for lang in language_list.split(' '): translated_title = ui_util._get_translated_term_from_dcat_object( search_data.schema, 'title_dcterms', lang) if translated_title != title: result_dict['title_' + lang] = translated_title # EC change add sort order field. for lang in LanguagesConstants.LANGUAGES: title_field = 'title_' + lang title_value = result_dict.get(title_field) title_string_field = 'title_string_' + lang if not title_value: title_value = title # Strip accents first and if equivilant do next stage comparison. # Leaving space and concatonating is to avoid having todo a real # 2 level sort. sortable_title = \ unicode_sort.strip_accents(title_value) + ' ' + title_value result_dict[title_string_field] = \ sortable_title.translate(UNICODE_SORT) ########################################## # # TODO: translate rest result_list = search_data.create_multi_lang_full_text() for lang in LanguagesConstants.LANGUAGES: result_dict['text_{0}'.format(lang)] = result_list.get( lang, '').replace('----', '') # all_terms = [] # for key, value in search_data.iteritems(): # if key in KEYS_TO_IGNORE or key.startswith('title'): # continue # if not isinstance(value, list): # value = [value] # for item in value: # if isinstance(item, basestring): # all_terms.append(item) # # field_translations = p.toolkit.get_action('term_translation_show')( # {'model': model}, # {'terms': all_terms, # 'lang_codes': LanguagesConstants.LANGUAGES}) # # text_field_items = dict(('text_' + lang, []) for lang in LanguagesConstants.LANGUAGES) # # text_field_items['text_' + default_lang].extend(all_terms) # # for translation in sorted(field_translations): # lang_field = 'text_' + translation['lang_code'] # text_field_items[lang_field].append( # translation['term_translation']) # # for key, value in text_field_items.iteritems(): # search_data[key] = ' '.join(value) return result_dict