def search(self, query: str, frequencies=False): """ Get the indices of the documents matching the query :param query: The whoosh query string :param frequencies: If true, return pairs of (docnum, frequency) rather than only docnum :return: sequence of document numbers (and freqs, if frequencies is True) """ with self.index.searcher(weighting=scoring.Frequency) as searcher: if frequencies: ## for some reason, using searcher.search counts all individual occurrences of the terms in a phrase ("term1 term2") ## after the phrase occurs at least once. So for frequencies, we use this lengthy alternative ## (I expect that somewhere a setting is hidden to simply fix this with searcher.search, but no clue yet) results = defaultdict(lambda:float(0)) queries = divide_query(query) for i, q in enumerate(queries): q = QueryParser("text", self.index.schema).parse(q) matcher = q.matcher(searcher) while matcher.is_active(): docnum = searcher.reader().stored_fields(matcher.id())['doc_i'] bd = boostdict(matcher) for s in matcher.spans(): results[docnum] += bd[s] if s in bd else 1 matcher.next() return [(k,v) for k,v in results.items()] else: query = QueryParser("text", self.index.schema).parse(query) results = searcher.search(query, limit=None, scored=False, sortedby=None) return [results[i]['doc_i'] for i in range(len(results))]