def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): if query is False: # False means 'use default values' query = '' ans = {'rows': []} permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) used_keywords = set() all_keywords_map = dict( self._corparch.all_keywords(plugin_api.user_lang)) if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None if filter_dict.get('favOnly'): favourite_only = bool(int(filter_dict.get('favOnly'))) else: favourite_only = False if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None query_substrs, query_keywords = parse_query(self._tag_prefix, query) normalized_query_substrs = [s.lower() for s in query_substrs] for corp in self._corparch.get_list(plugin_api, permitted_corpora): full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id']) if not isinstance(full_data, BrokenCorpusInfo): if favourite_only and fav_id(corp['id']) is None: continue keywords = [k for k, _ in full_data.metadata.keywords] tests = [] found_in = [] tests.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp['name'].lower(): tests.append(True) elif s in (corp['desc'].lower() if corp['desc'] else ''): tests.append(True) found_in.append('defaultCorparch__found_in_desc') else: tests.append(False) tests.append(self.matches_size(corp, min_size, max_size)) tests.append( self._corparch.custom_filter(self._plugin_api, full_data, permitted_corpora)) if self.matches_all(tests): corp['size'] = corp['size'] corp['size_info'] = l10n.simplify_num( corp['size']) if corp['size'] else None corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords] corp['found_in'] = found_in corp['fav_id'] = fav_id(corp['id']) # because of client-side fav/feat/search items compatibility corp['corpus_id'] = corp['id'] ans['rows'].append(corp) used_keywords.update(keywords) if not self.should_fetch_next(ans, offset, limit): break ans['rows'], ans['nextOffset'] = self.cut_result( self.sort(plugin_api, ans['rows']), offset, limit) ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang) ans['query'] = query ans['current_keywords'] = query_keywords ans['filters'] = dict(filter_dict) return ans
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): if query is False: # False means 'use default values' query = '' ans = {'rows': []} permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) used_keywords = set() all_keywords_map = dict(self._corparch.all_keywords(plugin_api.user_lang)) if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None query_substrs, query_keywords = parse_query(self._tag_prefix, query) normalized_query_substrs = [s.lower() for s in query_substrs] for corp in self._corparch.get_list(plugin_api, permitted_corpora): full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id']) if not isinstance(full_data, BrokenCorpusInfo): keywords = [k for k in full_data['metadata']['keywords'].keys()] tests = [] found_in = [] tests.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp['name'].lower(): tests.append(True) elif s in (corp['desc'].lower() if corp['desc'] else ''): tests.append(True) found_in.append('defaultCorparch__found_in_desc') else: tests.append(False) tests.append(self.matches_size(corp, min_size, max_size)) tests.append(self._corparch.custom_filter( self._plugin_api, full_data, permitted_corpora)) if self.matches_all(tests): corp['size'] = corp['size'] corp['size_info'] = l10n.simplify_num(corp['size']) if corp['size'] else None corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords] corp['found_in'] = found_in corp['fav_id'] = fav_id(corp['id']) # because of client-side fav/feat/search items compatibility corp['corpus_id'] = corp['id'] ans['rows'].append(corp) used_keywords.update(keywords) if not self.should_fetch_next(ans, offset, limit): break ans['rows'], ans['nextOffset'] = self.cut_result( self.sort(plugin_api, ans['rows']), offset, limit) ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang) ans['query'] = query ans['current_keywords'] = query_keywords ans['filters'] = dict(filter_dict) return ans
def search(self, user_id, query, offset=0, limit=None, filter_dict=None): ans = {"rows": []} permitted_corpora = self._auth.permitted_corpora(user_id) user_items = self._user_items.get_user_items(user_id) used_keywords = set() all_keywords_map = dict(self.all_keywords) if filter_dict.get("minSize"): min_size = l10n.desimplify_num(filter_dict.get("minSize"), strict=False) else: min_size = 0 if filter_dict.get("maxSize"): max_size = l10n.desimplify_num(filter_dict.get("maxSize"), strict=False) else: max_size = None corplist = self.get_list(permitted_corpora) if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = self._max_page_size def cut_result(res): if limit is not None: right_lim = offset + int(limit) new_res = res[offset:right_lim] if right_lim >= len(res): right_lim = None else: right_lim = None new_res = res return new_res, right_lim def is_fav(corpus_id): for item in user_items: if isinstance(item, CorpusItem) and item.corpus_id == corpus_id: return True return False query_substrs, query_keywords = self._parse_query(query) matches_all = lambda d: reduce(lambda t1, t2: t1 and t2, d, True) def matches_size(d): item_size = d.get("size", None) return ( item_size is not None and (not min_size or int(item_size) >= int(min_size)) and (not max_size or int(item_size) <= int(max_size)) ) normalized_query_substrs = [s.lower() for s in query_substrs] for corp in corplist: full_data = self.get_corpus_info(corp["id"], self.getlocal("lang")) if not isinstance(full_data, BrokenCorpusInfo): keywords = [k for k in full_data["metadata"]["keywords"].keys()] hits = [] found_in = [] hits.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp["name"].lower(): hits.append(True) elif s in (corp["desc"].lower() if corp["desc"] else ""): hits.append(True) found_in.append(_("description")) else: hits.append(False) hits.append(matches_size(corp)) hits.append(self.custom_filter(full_data, permitted_corpora)) if matches_all(hits): corp["raw_size"] = l10n.simplify_num(corp["size"]) if corp["size"] else None corp["keywords"] = [(k, all_keywords_map[k]) for k in keywords] corp["found_in"] = found_in corp["user_item"] = is_fav(corp["id"]) self.customize_search_result_item(corp, full_data) ans["rows"].append(corp) used_keywords.update(keywords) corp_cmp_key = lambda c: c.get("name") if c.get("name") is not None else "" ans["rows"], ans["nextOffset"] = cut_result(l10n.sort(ans["rows"], loc=self._lang(), key=corp_cmp_key)) ans["keywords"] = l10n.sort(used_keywords, loc=self._lang()) ans["query"] = query ans["filters"] = dict(filter_dict) return ans
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): if query is False: # False means 'use default values' query = '' if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None def get_found_in(corp, phrases): ans = [] for phrase in phrases: if phrase in corp.description.lower(): ans.append(_('description')) break return ans query_substrs, query_keywords = parse_query(self._tag_prefix, query) normalized_query_substrs = [s.lower() for s in query_substrs] used_keywords = set() rows = self._corparch.list_corpora(plugin_api, substrs=normalized_query_substrs, min_size=min_size, max_size=max_size, offset=offset, limit=limit + 1, keywords=query_keywords).values() ans = [] for i, corp in enumerate(rows): used_keywords.update(corp.keywords) corp.keywords = self._corparch.get_l10n_keywords( corp.keywords, plugin_api.user_lang) corp.fav_id = fav_id(corp.id) corp.found_in = get_found_in(corp, normalized_query_substrs) ans.append(corp.to_dict()) if i == limit - 1: break return dict(rows=ans, nextOffset=(limit + 1) if len(rows) > limit else None, keywords=l10n.sort(used_keywords, loc=plugin_api.user_lang), query=query, current_keywords=query_keywords, filters=dict(filter_dict))
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): if query is False: # False means 'use default values' query = '' if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None if filter_dict.get('requestable'): requestable = bool(int(filter_dict.get('requestable'))) else: requestable = False if filter_dict.get('favOnly'): favourites_only = bool(int(filter_dict.get('favOnly'))) else: favourites_only = False if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) favourite_corpora = { item.main_corpus_id: item.ident for item in user_items if item.is_single_corpus} def get_found_in(corp, phrases): ans = [] for phrase in phrases: phrase = phrase.lower() name = corp.name.lower() if corp.name is not None else '' desc = corp.description.lower() if corp.description is not None else '' if phrase not in name and phrase in desc: ans.append('defaultCorparch__found_in_desc') break return ans query_substrs, query_keywords = parse_query(self._tag_prefix, query) normalized_query_substrs = [s.lower() for s in query_substrs] used_keywords = set() rows = list(self._corparch.list_corpora(plugin_api, substrs=normalized_query_substrs, min_size=min_size, max_size=max_size, requestable=requestable, offset=offset, limit=limit + 1, keywords=query_keywords, favourites=tuple(favourite_corpora.keys()) if favourites_only else ()).values()) ans = [] for i, corp in enumerate(rows): used_keywords.update(corp.keywords) corp.keywords = self._corparch.get_l10n_keywords(corp.keywords, plugin_api.user_lang) corp.fav_id = favourite_corpora.get(corp.id, None) corp.found_in = get_found_in(corp, normalized_query_substrs) ans.append(corp.to_dict()) if i == limit - 1: break return dict(rows=ans, nextOffset=offset + limit if len(rows) > limit else None, keywords=l10n.sort(used_keywords, loc=plugin_api.user_lang), query=query, current_keywords=query_keywords, filters=dict(filter_dict))
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): external_keywords = filter_dict.getlist('keyword') external_keywords = self._corparch.map_external_keywords( external_keywords, plugin_api.user_lang) if len(external_keywords) != 0: query_substrs = [] query_keywords = external_keywords + [self.default_label] else: if self.SESSION_KEYWORDS_KEY not in plugin_api.session: plugin_api.session[self.SESSION_KEYWORDS_KEY] = [ self.default_label ] initial_query = query if query is False: query = '' query_substrs, query_keywords = parse_query( self._tag_prefix, query) if len(query_keywords) == 0 and initial_query is False: query_keywords = plugin_api.session[self.SESSION_KEYWORDS_KEY] else: plugin_api.session[self.SESSION_KEYWORDS_KEY] = query_keywords query = ' '.join(query_substrs) \ + ' ' + ' '.join('%s%s' % (self._tag_prefix, s) for s in query_keywords) ans = {'rows': []} permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None sorting_field = filter_dict.get('sortBySize', 'name') if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None query_substrs, query_keywords = parse_query(self._tag_prefix, query) all_keywords_map = dict( self._corparch.all_keywords(plugin_api.user_lang)) normalized_query_substrs = [s.lower() for s in query_substrs] used_keywords = set() for corp in self._corparch.get_list(plugin_api): full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id']) if not isinstance(full_data, BrokenCorpusInfo): keywords = [k for k, _ in full_data.metadata.keywords] tests = [] found_in = [] tests.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp['name'].lower(): tests.append(True) elif s in (corp['desc'].lower() if corp['desc'] else ''): tests.append(True) found_in.append('defaultCorparch__found_in_desc') else: tests.append(False) tests.append(self.matches_size(corp, min_size, max_size)) tests.append( self._corparch.custom_filter(self._plugin_api, full_data, permitted_corpora)) if all(test for test in tests): corp['size'] = corp['size'] corp['size_info'] = l10n.simplify_num( corp['size']) if corp['size'] else None corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords] corp['found_in'] = found_in corp['fav_id'] = fav_id(corp['id']) # because of client-side fav/feat/search items compatibility corp['corpus_id'] = corp['id'] corp['pmltq'] = full_data['pmltq'] corp['repo'] = full_data['web'] corp['access'] = full_data['access'] corp['tokenConnect'] = full_data['token_connect'][ 'providers'] ans['rows'].append(corp) used_keywords.update(keywords) if not self.should_fetch_next(ans, offset, limit): break ans['rows'], ans['nextOffset'] = self.cut_result( self.sort(plugin_api, ans['rows'], field=sorting_field), offset, limit) ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang) ans['query'] = query ans['current_keywords'] = query_keywords ans['filters'] = dict(filter_dict) return ans
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): if query is False: # False means 'use default values' query = '' if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None if filter_dict.get('requestable'): requestable = bool(int(filter_dict.get('requestable'))) else: requestable = False if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None def get_found_in(corp, phrases): ans = [] for phrase in phrases: phrase = phrase.lower() if phrase not in corp.name.lower() and phrase in corp.description.lower(): ans.append('defaultCorparch__found_in_desc') break return ans query_substrs, query_keywords = parse_query(self._tag_prefix, query) normalized_query_substrs = [s.lower() for s in query_substrs] used_keywords = set() rows = self._corparch.list_corpora(plugin_api, substrs=normalized_query_substrs, min_size=min_size, max_size=max_size, requestable=requestable, offset=offset, limit=limit + 1, keywords=query_keywords).values() ans = [] for i, corp in enumerate(rows): used_keywords.update(corp.keywords) corp.keywords = self._corparch.get_l10n_keywords(corp.keywords, plugin_api.user_lang) corp.fav_id = fav_id(corp.id) corp.found_in = get_found_in(corp, normalized_query_substrs) ans.append(corp.to_dict()) if i == limit - 1: break return dict(rows=ans, nextOffset=offset + limit if len(rows) > limit else None, keywords=l10n.sort(used_keywords, loc=plugin_api.user_lang), query=query, current_keywords=query_keywords, filters=dict(filter_dict))