Exemple #1
0
    def search(self,
               plugin_api,
               query,
               offset=0,
               limit=None,
               filter_dict=None):
        if query is False:  # False means 'use default values'
            query = ''
        ans = {'rows': []}
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)
        used_keywords = set()
        all_keywords_map = dict(
            self._corparch.all_keywords(plugin_api.user_lang))
        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'),
                                           strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'),
                                           strict=False)
        else:
            max_size = None
        if filter_dict.get('favOnly'):
            favourite_only = bool(int(filter_dict.get('favOnly')))
        else:
            favourite_only = False

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)

        normalized_query_substrs = [s.lower() for s in query_substrs]
        for corp in self._corparch.get_list(plugin_api, permitted_corpora):
            full_data = self._corparch.get_corpus_info(plugin_api.user_lang,
                                                       corp['id'])
            if not isinstance(full_data, BrokenCorpusInfo):
                if favourite_only and fav_id(corp['id']) is None:
                    continue

                keywords = [k for k, _ in full_data.metadata.keywords]
                tests = []
                found_in = []

                tests.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp['name'].lower():
                        tests.append(True)
                    elif s in (corp['desc'].lower() if corp['desc'] else ''):
                        tests.append(True)
                        found_in.append('defaultCorparch__found_in_desc')
                    else:
                        tests.append(False)
                tests.append(self.matches_size(corp, min_size, max_size))
                tests.append(
                    self._corparch.custom_filter(self._plugin_api, full_data,
                                                 permitted_corpora))

                if self.matches_all(tests):
                    corp['size'] = corp['size']
                    corp['size_info'] = l10n.simplify_num(
                        corp['size']) if corp['size'] else None
                    corp['keywords'] = [(k, all_keywords_map[k])
                                        for k in keywords]
                    corp['found_in'] = found_in
                    corp['fav_id'] = fav_id(corp['id'])
                    # because of client-side fav/feat/search items compatibility
                    corp['corpus_id'] = corp['id']
                    ans['rows'].append(corp)
                    used_keywords.update(keywords)
                    if not self.should_fetch_next(ans, offset, limit):
                        break
        ans['rows'], ans['nextOffset'] = self.cut_result(
            self.sort(plugin_api, ans['rows']), offset, limit)
        ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang)
        ans['query'] = query
        ans['current_keywords'] = query_keywords
        ans['filters'] = dict(filter_dict)
        return ans
Exemple #2
0
    def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None):
        if query is False:  # False means 'use default values'
            query = ''
        ans = {'rows': []}
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)
        used_keywords = set()
        all_keywords_map = dict(self._corparch.all_keywords(plugin_api.user_lang))
        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False)
        else:
            max_size = None

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)

        normalized_query_substrs = [s.lower() for s in query_substrs]
        for corp in self._corparch.get_list(plugin_api, permitted_corpora):
            full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id'])
            if not isinstance(full_data, BrokenCorpusInfo):
                keywords = [k for k in full_data['metadata']['keywords'].keys()]
                tests = []
                found_in = []

                tests.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp['name'].lower():
                        tests.append(True)
                    elif s in (corp['desc'].lower() if corp['desc'] else ''):
                        tests.append(True)
                        found_in.append('defaultCorparch__found_in_desc')
                    else:
                        tests.append(False)
                tests.append(self.matches_size(corp, min_size, max_size))
                tests.append(self._corparch.custom_filter(
                    self._plugin_api, full_data, permitted_corpora))

                if self.matches_all(tests):
                    corp['size'] = corp['size']
                    corp['size_info'] = l10n.simplify_num(corp['size']) if corp['size'] else None
                    corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords]
                    corp['found_in'] = found_in
                    corp['fav_id'] = fav_id(corp['id'])
                    # because of client-side fav/feat/search items compatibility
                    corp['corpus_id'] = corp['id']
                    ans['rows'].append(corp)
                    used_keywords.update(keywords)
                    if not self.should_fetch_next(ans, offset, limit):
                        break
        ans['rows'], ans['nextOffset'] = self.cut_result(
            self.sort(plugin_api, ans['rows']), offset, limit)
        ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang)
        ans['query'] = query
        ans['current_keywords'] = query_keywords
        ans['filters'] = dict(filter_dict)
        return ans
Exemple #3
0
    def search(self, user_id, query, offset=0, limit=None, filter_dict=None):
        ans = {"rows": []}
        permitted_corpora = self._auth.permitted_corpora(user_id)
        user_items = self._user_items.get_user_items(user_id)
        used_keywords = set()
        all_keywords_map = dict(self.all_keywords)
        if filter_dict.get("minSize"):
            min_size = l10n.desimplify_num(filter_dict.get("minSize"), strict=False)
        else:
            min_size = 0
        if filter_dict.get("maxSize"):
            max_size = l10n.desimplify_num(filter_dict.get("maxSize"), strict=False)
        else:
            max_size = None
        corplist = self.get_list(permitted_corpora)

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = self._max_page_size

        def cut_result(res):
            if limit is not None:
                right_lim = offset + int(limit)
                new_res = res[offset:right_lim]
                if right_lim >= len(res):
                    right_lim = None
            else:
                right_lim = None
                new_res = res
            return new_res, right_lim

        def is_fav(corpus_id):
            for item in user_items:
                if isinstance(item, CorpusItem) and item.corpus_id == corpus_id:
                    return True
            return False

        query_substrs, query_keywords = self._parse_query(query)
        matches_all = lambda d: reduce(lambda t1, t2: t1 and t2, d, True)

        def matches_size(d):
            item_size = d.get("size", None)
            return (
                item_size is not None
                and (not min_size or int(item_size) >= int(min_size))
                and (not max_size or int(item_size) <= int(max_size))
            )

        normalized_query_substrs = [s.lower() for s in query_substrs]

        for corp in corplist:
            full_data = self.get_corpus_info(corp["id"], self.getlocal("lang"))
            if not isinstance(full_data, BrokenCorpusInfo):
                keywords = [k for k in full_data["metadata"]["keywords"].keys()]
                hits = []
                found_in = []

                hits.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp["name"].lower():
                        hits.append(True)
                    elif s in (corp["desc"].lower() if corp["desc"] else ""):
                        hits.append(True)
                        found_in.append(_("description"))
                    else:
                        hits.append(False)
                hits.append(matches_size(corp))
                hits.append(self.custom_filter(full_data, permitted_corpora))

                if matches_all(hits):
                    corp["raw_size"] = l10n.simplify_num(corp["size"]) if corp["size"] else None
                    corp["keywords"] = [(k, all_keywords_map[k]) for k in keywords]
                    corp["found_in"] = found_in
                    corp["user_item"] = is_fav(corp["id"])
                    self.customize_search_result_item(corp, full_data)
                    ans["rows"].append(corp)
                    used_keywords.update(keywords)

        corp_cmp_key = lambda c: c.get("name") if c.get("name") is not None else ""
        ans["rows"], ans["nextOffset"] = cut_result(l10n.sort(ans["rows"], loc=self._lang(), key=corp_cmp_key))
        ans["keywords"] = l10n.sort(used_keywords, loc=self._lang())
        ans["query"] = query
        ans["filters"] = dict(filter_dict)
        return ans
Exemple #4
0
    def search(self,
               plugin_api,
               query,
               offset=0,
               limit=None,
               filter_dict=None):
        if query is False:  # False means 'use default values'
            query = ''
        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'),
                                           strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'),
                                           strict=False)
        else:
            max_size = None

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        def get_found_in(corp, phrases):
            ans = []
            for phrase in phrases:
                if phrase in corp.description.lower():
                    ans.append(_('description'))
                    break
            return ans

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)
        normalized_query_substrs = [s.lower() for s in query_substrs]
        used_keywords = set()
        rows = self._corparch.list_corpora(plugin_api,
                                           substrs=normalized_query_substrs,
                                           min_size=min_size,
                                           max_size=max_size,
                                           offset=offset,
                                           limit=limit + 1,
                                           keywords=query_keywords).values()
        ans = []
        for i, corp in enumerate(rows):
            used_keywords.update(corp.keywords)
            corp.keywords = self._corparch.get_l10n_keywords(
                corp.keywords, plugin_api.user_lang)
            corp.fav_id = fav_id(corp.id)
            corp.found_in = get_found_in(corp, normalized_query_substrs)
            ans.append(corp.to_dict())
            if i == limit - 1:
                break
        return dict(rows=ans,
                    nextOffset=(limit + 1) if len(rows) > limit else None,
                    keywords=l10n.sort(used_keywords,
                                       loc=plugin_api.user_lang),
                    query=query,
                    current_keywords=query_keywords,
                    filters=dict(filter_dict))
Exemple #5
0
    def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None):
        if query is False:  # False means 'use default values'
            query = ''
        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False)
        else:
            max_size = None
        if filter_dict.get('requestable'):
            requestable = bool(int(filter_dict.get('requestable')))
        else:
            requestable = False
        if filter_dict.get('favOnly'):
            favourites_only = bool(int(filter_dict.get('favOnly')))
        else:
            favourites_only = False

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)
        favourite_corpora = {
            item.main_corpus_id: item.ident for item in user_items if item.is_single_corpus}

        def get_found_in(corp, phrases):
            ans = []
            for phrase in phrases:
                phrase = phrase.lower()
                name = corp.name.lower() if corp.name is not None else ''
                desc = corp.description.lower() if corp.description is not None else ''
                if phrase not in name and phrase in desc:
                    ans.append('defaultCorparch__found_in_desc')
                    break
            return ans

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)
        normalized_query_substrs = [s.lower() for s in query_substrs]
        used_keywords = set()
        rows = list(self._corparch.list_corpora(plugin_api, substrs=normalized_query_substrs,
                                                min_size=min_size, max_size=max_size, requestable=requestable,
                                                offset=offset, limit=limit + 1, keywords=query_keywords,
                                                favourites=tuple(favourite_corpora.keys()) if favourites_only else ()).values())
        ans = []
        for i, corp in enumerate(rows):
            used_keywords.update(corp.keywords)
            corp.keywords = self._corparch.get_l10n_keywords(corp.keywords, plugin_api.user_lang)
            corp.fav_id = favourite_corpora.get(corp.id, None)
            corp.found_in = get_found_in(corp, normalized_query_substrs)
            ans.append(corp.to_dict())
            if i == limit - 1:
                break
        return dict(rows=ans,
                    nextOffset=offset + limit if len(rows) > limit else None,
                    keywords=l10n.sort(used_keywords, loc=plugin_api.user_lang),
                    query=query,
                    current_keywords=query_keywords,
                    filters=dict(filter_dict))
Exemple #6
0
    def search(self,
               plugin_api,
               query,
               offset=0,
               limit=None,
               filter_dict=None):
        external_keywords = filter_dict.getlist('keyword')
        external_keywords = self._corparch.map_external_keywords(
            external_keywords, plugin_api.user_lang)
        if len(external_keywords) != 0:
            query_substrs = []
            query_keywords = external_keywords + [self.default_label]
        else:

            if self.SESSION_KEYWORDS_KEY not in plugin_api.session:
                plugin_api.session[self.SESSION_KEYWORDS_KEY] = [
                    self.default_label
                ]
            initial_query = query
            if query is False:
                query = ''
            query_substrs, query_keywords = parse_query(
                self._tag_prefix, query)
            if len(query_keywords) == 0 and initial_query is False:
                query_keywords = plugin_api.session[self.SESSION_KEYWORDS_KEY]
            else:
                plugin_api.session[self.SESSION_KEYWORDS_KEY] = query_keywords
        query = ' '.join(query_substrs) \
                + ' ' + ' '.join('%s%s' % (self._tag_prefix, s) for s in query_keywords)

        ans = {'rows': []}
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)

        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'),
                                           strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'),
                                           strict=False)
        else:
            max_size = None

        sorting_field = filter_dict.get('sortBySize', 'name')

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)
        all_keywords_map = dict(
            self._corparch.all_keywords(plugin_api.user_lang))
        normalized_query_substrs = [s.lower() for s in query_substrs]
        used_keywords = set()

        for corp in self._corparch.get_list(plugin_api):
            full_data = self._corparch.get_corpus_info(plugin_api.user_lang,
                                                       corp['id'])
            if not isinstance(full_data, BrokenCorpusInfo):
                keywords = [k for k, _ in full_data.metadata.keywords]
                tests = []
                found_in = []
                tests.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp['name'].lower():
                        tests.append(True)
                    elif s in (corp['desc'].lower() if corp['desc'] else ''):
                        tests.append(True)
                        found_in.append('defaultCorparch__found_in_desc')
                    else:
                        tests.append(False)
                tests.append(self.matches_size(corp, min_size, max_size))
                tests.append(
                    self._corparch.custom_filter(self._plugin_api, full_data,
                                                 permitted_corpora))

                if all(test for test in tests):
                    corp['size'] = corp['size']
                    corp['size_info'] = l10n.simplify_num(
                        corp['size']) if corp['size'] else None
                    corp['keywords'] = [(k, all_keywords_map[k])
                                        for k in keywords]
                    corp['found_in'] = found_in
                    corp['fav_id'] = fav_id(corp['id'])
                    # because of client-side fav/feat/search items compatibility
                    corp['corpus_id'] = corp['id']
                    corp['pmltq'] = full_data['pmltq']
                    corp['repo'] = full_data['web']
                    corp['access'] = full_data['access']
                    corp['tokenConnect'] = full_data['token_connect'][
                        'providers']
                    ans['rows'].append(corp)
                    used_keywords.update(keywords)
                    if not self.should_fetch_next(ans, offset, limit):
                        break

        ans['rows'], ans['nextOffset'] = self.cut_result(
            self.sort(plugin_api, ans['rows'], field=sorting_field), offset,
            limit)
        ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang)
        ans['query'] = query
        ans['current_keywords'] = query_keywords
        ans['filters'] = dict(filter_dict)
        return ans
Exemple #7
0
    def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None):
        if query is False:  # False means 'use default values'
            query = ''
        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False)
        else:
            max_size = None
        if filter_dict.get('requestable'):
            requestable = bool(int(filter_dict.get('requestable')))
        else:
            requestable = False

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        def get_found_in(corp, phrases):
            ans = []
            for phrase in phrases:
                phrase = phrase.lower()
                if phrase not in corp.name.lower() and phrase in corp.description.lower():
                    ans.append('defaultCorparch__found_in_desc')
                    break
            return ans

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)
        normalized_query_substrs = [s.lower() for s in query_substrs]
        used_keywords = set()
        rows = self._corparch.list_corpora(plugin_api, substrs=normalized_query_substrs,
                                           min_size=min_size, max_size=max_size, requestable=requestable,
                                           offset=offset, limit=limit + 1, keywords=query_keywords).values()
        ans = []
        for i, corp in enumerate(rows):
            used_keywords.update(corp.keywords)
            corp.keywords = self._corparch.get_l10n_keywords(corp.keywords, plugin_api.user_lang)
            corp.fav_id = fav_id(corp.id)
            corp.found_in = get_found_in(corp, normalized_query_substrs)
            ans.append(corp.to_dict())
            if i == limit - 1:
                break
        return dict(rows=ans,
                    nextOffset=offset + limit if len(rows) > limit else None,
                    keywords=l10n.sort(used_keywords, loc=plugin_api.user_lang),
                    query=query,
                    current_keywords=query_keywords,
                    filters=dict(filter_dict))