Beispiel #1
0
    def before_index(self, pkg_dict):
        title = pkg_dict.get("title", pkg_dict.get("name"))
        # Strip accents first and if equivilant do next stage comparison.
        # Leaving space and concatenating is to avoid having todo a real
        # 2 level sort.
        pkg_dict["title_sort"] = (unicode_sort.strip_accents(title) + "   " + title).translate(UNICODE_SORT)

        # set 'metadata_modified' field to value of metadata_modified if
        # not present (this field is used to sort datasets according to
        # their last update date).
        if not "modified_date" in pkg_dict:
            pkg_dict["modified_date"] = pkg_dict["metadata_modified"]
        else:
            # modify dates (SOLR is quite picky with dates, and only accepts
            # ISO dates with UTC time (i.e trailing Z)
            pkg_dict["modified_date"] = helpers.ecportal_date_to_iso(pkg_dict["modified_date"]) + "Z"

        def change_format(format):
            if format in helpers.resource_mapping():
                format = helpers.resource_mapping()[format][1]
            return format

        pkg_dict["res_format"] = [change_format(format) for format in pkg_dict.get("res_format", [])]

        return pkg_dict
Beispiel #2
0
 def sort_translations(key):
     # Strip accents first and if equivilant do next stage comparison.
     # leaving space and concatenating is to avoid having todo a real
     # 2 level sort.
     display_name = key[1]
     return (unicode_sort.strip_accents(display_name) +
             '   ' +
             display_name).translate(UNICODE_SORT)
Beispiel #3
0
def sort_group(key):
    if isinstance(key, basestring):
        display_name = key
    else:
        display_name = key.get('display_name', '')
    # Strip accents first and if equivilant do next stage comparison.
    # Leaving space and concatenating is to avoid having todo a real
    # 2 level sort.
    return (unicode_sort.strip_accents(display_name) +
            '   ' +
            display_name).translate(UNICODE_SORT)
Beispiel #4
0
def sort_organization(key):
    if isinstance(key, basestring):
        display_name = key
    else:
        display_name = key.get('display_name', '')
    # Strip accents first and if equivilant do next stage comparison.
    # Leaving space and concatenating is to avoid having todo a real
    # 2 level sort.
    return (unicode_sort.strip_accents(display_name) +
            '   ' +
            display_name).translate(UNICODE_SORT)
Beispiel #5
0
    def before_index(self, pkg_dict):
        title = ui_util._get_translated_term_from_dcat_object(
            pkg_dict.schema, 'title_dcterms', 'en')
        # Strip accents first and if equivalant do next stage comparison.
        # Leaving space and concatenating is to avoid having todo a real
        # 2 level sort.
        result_dict = {}
        result_dict['title_sort'] = (unicode_sort.strip_accents(title) +
                                     '   ' + title).translate(UNICODE_SORT)

        # set 'metadata_modified' field to value of metadata_modified if
        # not present (this field is used to sort datasets according to
        # their last update date).
        if not 'modified_date' in result_dict:
            import datetime
            modified_date = datetime.datetime.now().isoformat()
            try:
                modified_date = helpers.ecportal_date_to_iso(
                    ui_util._get_translated_term_from_dcat_object(
                        pkg_dict.schema_catalog_record, 'modified_dcterms',
                        'en'))
            except BaseException as e:
                log.error(
                    "[Indexation before index] [Failed] []use current date as fallback"
                )
            result_dict['modified_date'] = modified_date

        if re.search(r".*T.*Z$", result_dict['modified_date']):
            pass  # date format OK (verified with basic check)
        elif result_dict['modified_date']:
            # modify dates (SOLR is quite picky with dates, and only accepts
            # ISO dates with UTC time (i.e trailing Z)
            result_dict['modified_date'] = helpers.ecportal_date_to_iso(
                result_dict['modified_date']) + 'Z'
        else:
            result_dict.pop('modified_date', None)

        # def change_format(format):
        #     if format in helpers.resource_mapping():
        #         format = helpers.resource_mapping()[format][1]
        #     return format
        #
        # pkg_dict['res_format'] = [change_format(format) for format in
        #                           pkg_dict.get('res_format', [])]

        return result_dict
Beispiel #6
0
    def before_index(self, search_data):
        # same code as in ckanext multilingual except language codes and
        # where mareked

        default_lang = search_data.get("lang_code", pylons.config.get("ckan.locale_default", "en"))

        # translate title
        title = search_data.get("title")
        search_data["title_" + default_lang] = title
        title_translations = p.toolkit.get_action("term_translation_show")(
            {"model": model}, {"terms": [title], "lang_codes": LANGS}
        )

        for translation in title_translations:
            title_field = "title_" + translation["lang_code"]
            search_data[title_field] = translation["term_translation"]

        # EC change add sort order field.
        for lang in LANGS:
            title_field = "title_" + lang
            title_value = search_data.get(title_field)
            title_string_field = "title_string_" + lang
            if not title_value:
                title_value = title

            # Strip accents first and if equivilant do next stage comparison.
            # Leaving space and concatonating is to avoid having todo a real
            # 2 level sort.
            sortable_title = unicode_sort.strip_accents(title_value) + "   " + title_value
            search_data[title_string_field] = sortable_title.translate(UNICODE_SORT)

        ##########################################

        ## translate rest
        all_terms = []
        for key, value in search_data.iteritems():
            if key in KEYS_TO_IGNORE or key.startswith("title"):
                continue
            if isinstance(value, list):
                all_terms.extend(value)
            elif not isinstance(value, basestring):
                continue
            else:
                all_terms.append(value)

        field_translations = p.toolkit.get_action("term_translation_show")(
            {"model": model}, {"terms": all_terms, "lang_codes": LANGS}
        )

        text_field_items = dict(("text_" + lang, []) for lang in LANGS)

        text_field_items["text_" + default_lang].extend(all_terms)

        for translation in sorted(field_translations):
            lang_field = "text_" + translation["lang_code"]
            text_field_items[lang_field].append(translation["term_translation"])

        for key, value in text_field_items.iteritems():
            search_data[key] = " ".join(value)

        return search_data
Beispiel #7
0
    def before_index(self, search_data):
        '''

        :param DatasetDcatApOp search_data:
        :return:
        '''
        # same code as in ckanext multilingual except language codes and
        # where mareked
        result_dict = {}
        language_list = config.get('ckan.locales_offered', ['en'])
        # translate title
        title = ui_util._get_translated_term_from_dcat_object(
            search_data.schema, 'title_dcterms', 'en')

        for lang in language_list.split(' '):
            translated_title = ui_util._get_translated_term_from_dcat_object(
                search_data.schema, 'title_dcterms', lang)
            if translated_title != title:
                result_dict['title_' + lang] = translated_title

        # EC change add sort order field.
        for lang in LanguagesConstants.LANGUAGES:
            title_field = 'title_' + lang
            title_value = result_dict.get(title_field)
            title_string_field = 'title_string_' + lang
            if not title_value:
                title_value = title

            # Strip accents first and if equivilant do next stage comparison.
            # Leaving space and concatonating is to avoid having todo a real
            # 2 level sort.
            sortable_title = \
                unicode_sort.strip_accents(title_value) + '   ' + title_value
            result_dict[title_string_field] = \
                sortable_title.translate(UNICODE_SORT)

        ##########################################

        # # TODO: translate rest
        result_list = search_data.create_multi_lang_full_text()
        for lang in LanguagesConstants.LANGUAGES:
            result_dict['text_{0}'.format(lang)] = result_list.get(
                lang, '').replace('----', '')

        # all_terms = []
        # for key, value in search_data.iteritems():
        #     if key in KEYS_TO_IGNORE or key.startswith('title'):
        #         continue
        #     if not isinstance(value, list):
        #         value = [value]
        #     for item in value:
        #         if isinstance(item, basestring):
        #             all_terms.append(item)
        #
        # field_translations = p.toolkit.get_action('term_translation_show')(
        #     {'model': model},
        #     {'terms': all_terms,
        #      'lang_codes': LanguagesConstants.LANGUAGES})
        #
        # text_field_items = dict(('text_' + lang, []) for lang in LanguagesConstants.LANGUAGES)
        #
        # text_field_items['text_' + default_lang].extend(all_terms)
        #
        # for translation in sorted(field_translations):
        #     lang_field = 'text_' + translation['lang_code']
        #     text_field_items[lang_field].append(
        #         translation['term_translation'])
        #
        # for key, value in text_field_items.iteritems():
        #     search_data[key] = ' '.join(value)

        return result_dict