def _keywords(self, subject, predicate):

        keywords = {}

        for lang in get_langs():
            keywords[lang] = []

        for keyword_node in self.g.objects(subject, predicate):
            keywords[keyword_node.language].append(unicode(keyword_node))

        return keywords
Exemple #2
0
    def before_search(self, search_params):
        """
        Adjust search parameters
        """
        '''
        search in correct language-specific field and boost
        results in current language
        '''
        lang_set = sh.get_langs()
        try:
            current_lang = toolkit.request.environ['CKAN_LANG']
        except TypeError as err:
            if err.message == ('No object (name: request) has been registered '
                               'for this thread'):
                # This happens when this code gets called as part of a paster
                # command rather then as part of an HTTP request.
                current_lang = toolkit.config.get('ckan.locale_default')
            else:
                raise

        # fallback to default locale if locale not in suported langs
        if current_lang not in lang_set:
            current_lang = toolkit.config.get('ckan.locale_default', 'en')
        # treat current lang differenly so remove from set
        lang_set.remove(current_lang)

        # add default query field(s)
        query_fields = 'text'

        # weight current lang more highly
        query_fields += ' title_%s^8 text_%s^4' % (current_lang, current_lang)

        for lang in lang_set:
            query_fields += ' title_%s^2 text_%s' % (lang, lang)

        search_params['qf'] = query_fields
        '''
        Unless the query is already being filtered by any type
        (either positively, or negatively), reduce to only display
        'dataset' type
        This is done because by standard all types are displayed, this
        leads to strange situations where e.g. harvest sources are shown
        on organization pages.
        TODO: fix issue https://github.com/ckan/ckan/issues/2803 in CKAN core
        '''
        fq = search_params.get('fq', '')
        if 'dataset_type:' not in fq:
            search_params.update({'fq': "%s +dataset_type:dataset" % fq})

        # remove colon followed by a space from q to avoid false negatives
        q = search_params.get('q', '')
        search_params['q'] = re.sub(":\s", " ", q)

        return search_params
Exemple #3
0
    def _keywords(self, subject, predicate):

        keywords = {}

        for lang in get_langs():
            keywords[lang] = []

        for keyword_node in self.g.objects(subject, predicate):
            keywords[keyword_node.language].append(unicode(keyword_node))

        return keywords
Exemple #4
0
    def _keywords(self, subject, predicate):
        keywords = {}
        # initialize the keywords with empty lists for all languages
        for lang in get_langs():
            keywords[lang] = []

        for keyword_node in self.g.objects(subject, predicate):
            lang = keyword_node.language
            keyword = munge_tag(unicode(keyword_node))
            keywords.setdefault(lang, []).append(keyword)

        return keywords
Exemple #5
0
    def before_index(self, search_data):
        if not self.is_supported_package_type(search_data):
            return search_data

        extract_title = LangToString('title')
        validated_dict = json.loads(search_data['validated_data_dict'])

        # log.debug(pprint.pformat(validated_dict))

        search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources'])  # noqa
        search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']]  # noqa
        search_data['title_string'] = extract_title(validated_dict)
        search_data['description'] = LangToString('description')(validated_dict)  # noqa
        if 'political_level' in validated_dict[u'organization']:
            search_data['political_level'] = validated_dict[u'organization'][u'political_level']  # noqa

        try:
            # index language-specific values (or it's fallback)
            text_field_items = {}
            for lang_code in get_langs():
                search_data['title_' + lang_code] = get_localized_value(
                    validated_dict['title'],
                    lang_code
                )
                search_data['title_string_' + lang_code] = munge_title_to_name(
                    get_localized_value(validated_dict['title'], lang_code)
                )
                search_data['description_' + lang_code] = get_localized_value(
                    validated_dict['description'],
                    lang_code
                )
                search_data['keywords_' + lang_code] = get_localized_value(
                    validated_dict['keywords'],
                    lang_code
                )

                text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)]  # noqa
                text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code])  # noqa
                text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]])  # noqa
                text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]])  # noqa

            # flatten values for text_* fields
            for key, value in text_field_items.iteritems():
                search_data[key] = ' '.join(value)

        except KeyError:
            pass

        # log.debug(pprint.pformat(search_data))
        return search_data
    def before_search(self, search_params):
        """
        Adjust search parameters
        """

        '''
        search in correct language-specific field and boost
        results in current language
        '''
        lang_set = get_langs()
        try:
            current_lang = pylons.request.environ['CKAN_LANG']
        except TypeError as err:
            if err.message == ('No object (name: request) has been registered '
                               'for this thread'):
                # This happens when this code gets called as part of a paster
                # command rather then as part of an HTTP request.
                current_lang = pylons.config.get('ckan.locale_default')
            else:
                raise

        # fallback to default locale if locale not in suported langs
        if current_lang not in lang_set:
            current_lang = pylons.config.get('ckan.locale_default', 'en')
        # treat current lang differenly so remove from set
        lang_set.remove(current_lang)

        # weight current lang more highly
        query_fields = 'title_%s^8 text_%s^4' % (current_lang, current_lang)

        for lang in lang_set:
            query_fields += ' title_%s^2 text_%s' % (lang, lang)

        search_params['qf'] = query_fields

        '''
        Unless the query is already being filtered by any type
        (either positively, or negatively), reduce to only display
        'dataset' type
        This is done because by standard all types are displayed, this
        leads to strange situations where e.g. harvest sources are shown
        on organization pages.
        TODO: fix issue https://github.com/ckan/ckan/issues/2803 in CKAN core
        '''
        fq = search_params.get('fq', '')
        if 'dataset_type:' not in fq:
            search_params.update({'fq': "%s +dataset_type:dataset" % fq})

        return search_params
Exemple #7
0
    def before_index(self, search_data):
        if not self.is_supported_package_type(search_data):
            return search_data

        extract_title = LangToString('title')
        validated_dict = json.loads(search_data['validated_data_dict'])

        search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources'])  # noqa
        search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']]  # noqa
        search_data['title_string'] = extract_title(validated_dict)
        search_data['description'] = LangToString('description')(validated_dict)  # noqa
        if 'political_level' in validated_dict[u'organization']:
            search_data['political_level'] = validated_dict[u'organization'][u'political_level']  # noqa

        try:
            # index language-specific values (or it's fallback)
            text_field_items = {}
            for lang_code in get_langs():
                search_data['title_' + lang_code] = get_localized_value(
                    validated_dict['title'],
                    lang_code
                )
                search_data['title_string_' + lang_code] = munge_title_to_name(
                    get_localized_value(validated_dict['title'], lang_code)
                )
                search_data['description_' + lang_code] = get_localized_value(
                    validated_dict['description'],
                    lang_code
                )
                search_data['keywords_' + lang_code] = get_localized_value(
                    validated_dict['keywords'],
                    lang_code
                )

                text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)]  # noqa
                text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code])  # noqa
                text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]])  # noqa
                text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]])  # noqa

            # flatten values for text_* fields
            for key, value in text_field_items.iteritems():
                search_data[key] = ' '.join(value)

        except KeyError:
            pass

        return search_data
Exemple #8
0
    def _object_value(self, subject, predicate, multilang=False):
        '''
        Given a subject and a predicate, returns the value of the object

        Both subject and predicate must be rdflib URIRef or BNode objects

        If found, the unicode representation is returned, else None
        '''
        default_lang = 'de'
        lang_dict = {}
        for o in self.g.objects(subject, predicate):
            if multilang and o.language:
                lang_dict[o.language] = unicode(o)
            elif multilang:
                lang_dict[default_lang] = unicode(o)
            else:
                return unicode(o)
        if multilang:
            # when translation does not exist, create an empty one
            for lang in get_langs():
                if lang not in lang_dict:
                    lang_dict[lang] = ''
        return lang_dict
    def _object_value(self, subject, predicate, multilang=False):
        '''
        Given a subject and a predicate, returns the value of the object

        Both subject and predicate must be rdflib URIRef or BNode objects

        If found, the unicode representation is returned, else None
        '''
        default_lang = 'de'
        lang_dict = {}
        for o in self.g.objects(subject, predicate):
            if multilang and o.language:
                lang_dict[o.language] = unicode(o)
            elif multilang:
                lang_dict[default_lang] = unicode(o)
            else:
                return unicode(o)
        if multilang:
            # when translation does not exist, create an empty one
            for lang in get_langs():
                if lang not in lang_dict:
                    lang_dict[lang] = ''
        return lang_dict
Exemple #10
0
    def before_index(self, search_data):
        if not self.is_supported_package_type(search_data):
            return search_data

        extract_title = LangToString('title')
        validated_dict = json.loads(search_data['validated_data_dict'])

        search_data['res_name'] = [
            extract_title(r) for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_name_en'] = [
            sh.get_localized_value(r['title'], 'en')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_name_de'] = [
            sh.get_localized_value(r['title'], 'de')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_name_fr'] = [
            sh.get_localized_value(r['title'], 'fr')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_name_it'] = [
            sh.get_localized_value(r['title'], 'it')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_description_en'] = [
            sh.get_localized_value(r['description'], 'en')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_description_de'] = [
            sh.get_localized_value(r['description'], 'de')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_description_fr'] = [
            sh.get_localized_value(r['description'], 'fr')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_description_it'] = [
            sh.get_localized_value(r['description'], 'it')
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['groups_en'] = [
            sh.get_localized_value(g['display_name'], 'en')
            for g in validated_dict[u'groups']
        ]  # noqa
        search_data['groups_de'] = [
            sh.get_localized_value(g['display_name'], 'de')
            for g in validated_dict[u'groups']
        ]  # noqa
        search_data['groups_fr'] = [
            sh.get_localized_value(g['display_name'], 'fr')
            for g in validated_dict[u'groups']
        ]  # noqa
        search_data['groups_it'] = [
            sh.get_localized_value(g['display_name'], 'it')
            for g in validated_dict[u'groups']
        ]  # noqa
        search_data['res_description'] = [
            LangToString('description')(r)
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['res_format'] = self._prepare_formats_for_index(
            validated_dict[u'resources'])  # noqa
        search_data['res_rights'] = [
            sh.simplify_terms_of_use(r['rights'])
            for r in validated_dict[u'resources']
        ]  # noqa
        search_data['title_string'] = extract_title(validated_dict)
        search_data['description'] = LangToString('description')(
            validated_dict)  # noqa
        if 'political_level' in validated_dict[u'organization']:
            search_data['political_level'] = validated_dict[u'organization'][
                u'political_level']  # noqa

        search_data['identifier'] = validated_dict.get('identifier')
        search_data['contact_points'] = [
            c['name'] for c in validated_dict.get('contact_points', [])
        ]  # noqa
        search_data['publishers'] = [
            p['label'] for p in validated_dict.get('publishers', [])
        ]  # noqa

        # TODO: Remove the try-except-block.
        # This fixes the index while we have 'wrong' relations on
        # datasets harvested with an old version of ckanext-geocat
        try:
            search_data['see_alsos'] = [
                d['dataset_identifier']
                for d in validated_dict.get('see_alsos', [])
            ]  # noqa
        except TypeError:
            search_data['see_alsos'] = [
                d for d in validated_dict.get('see_alsos', [])
            ]  # noqa

        # make sure we're not dealing with NoneType
        if search_data['metadata_created'] is None:
            search_data['metadata_created'] = ''

        if search_data['metadata_modified'] is None:
            search_data['metadata_modified'] = ''

        try:
            # index language-specific values (or it's fallback)
            for lang_code in sh.get_langs():
                search_data['title_' + lang_code] = sh.get_localized_value(
                    validated_dict['title'], lang_code)
                search_data['title_string_' + lang_code] = munge_title_to_name(
                    sh.get_localized_value(validated_dict['title'], lang_code))
                search_data['description_' +
                            lang_code] = sh.get_localized_value(  # noqa
                                validated_dict['description'], lang_code)
                search_data['keywords_' + lang_code] = sh.get_localized_value(
                    validated_dict['keywords'], lang_code)
                search_data['organization_' +
                            lang_code] = sh.get_localized_value(  # noqa
                                validated_dict['organization']['title'],
                                lang_code)

        except KeyError:
            pass

        # clean terms for suggest context
        search_data = self._prepare_suggest_context(search_data,
                                                    validated_dict)

        return search_data