def query(self, **kwargs): indexes = filter(self.__contains__, kwargs.keys()) results = (self[key].apply(value) for key, value in kwargs.items()) return ResultSet( self, reduce(lambda x, y: intersection(x, y) if x and y else (), results))
def apply(self, cache, context=None): results = [] for term in self.terms: result = term.cached_apply(cache, context) if not result: # Empty results return result results.append(result) if len(results) == 1: return results[0] # Sort results to have the smallest set first to optimize the # set operation. results.sort(key=lambda r: len(r)) result = results.pop(0) for r in results: if self.weighted: _, result = weightedIntersection(result, r) else: result = intersection(result, r) if not result: # Empty results return result return result
def _reindex_doc(self, docid, text): # Touch as few docid->w(docid, score) maps in ._wordinfo as possible. old_wids = self.get_words(docid) old_wid2w, old_docw = self._get_frequencies(old_wids) new_wids = self._lexicon.sourceToWordIds(text) new_wid2w, new_docw = self._get_frequencies(new_wids) old_widset = IFTreeSet(old_wid2w.keys()) new_widset = IFTreeSet(new_wid2w.keys()) in_both_widset = intersection(old_widset, new_widset) only_old_widset = difference(old_widset, in_both_widset) only_new_widset = difference(new_widset, in_both_widset) del old_widset, new_widset for wid in only_old_widset.keys(): self._del_wordinfo(wid, docid) for wid in only_new_widset.keys(): self._add_wordinfo(wid, new_wid2w[wid], docid) for wid in in_both_widset.keys(): # For the Okapi indexer, the "if" will trigger only for words # whose counts have changed. For the cosine indexer, the "if" # may trigger for every wid, since W(d) probably changed and # W(d) is divided into every score. newscore = new_wid2w[wid] if old_wid2w[wid] != newscore: self._add_wordinfo(wid, newscore, docid) self._docweight[docid] = new_docw self._docwords[docid] = widcode.encode(new_wids) return len(new_wids)
def apply(self, query): """see IIndexSearch.apply expected query is a 2-tuple with datetime.datetime Use case as following: search: q_start|--------------------|q_end cases: 1) i_start|---------------------------|i_end 2) i_start|---------------|i_end 3) i_start|-----------------|i_end 4) i_start|-----|i_end """ if len(query) != 2 or not isinstance(query, tuple): raise TypeError("two-length tuple expected", query) q_start, q_end = query ################################### # do 1) objects with "both outside" # # objects starting before q_start query1_1 = (None, q_start) res1_1 = self._i_start.apply(query1_1) # objects ending after q_end query1_2 = (q_end, None) res1_2 = self._i_end.apply(query1_2) res1 = intersection(res1_1, res1_2) ##################################### # do 2) objects with "start inside" # query2 = (q_start, q_end) res2 = self._i_start.apply(query2) ################################### # do 3) objects with "end inside" query3 = (q_start, q_end) res3 = self._i_end.apply(query3) ################################### # do 4) object where both are inside # -> already found with 2) and 3) :-) ################################### # union the three results result = multiunion([res1, res2, res3]) return result
def intersection(self, *args): from BTrees.IFBTree import intersection return intersection(*args)
def intersect(self, r1, r2): return r1 is None and r2 or intersection(r1, r2)
def query(self, **kwargs): indexes = filter(self.__contains__, kwargs.keys()) results = (self[key].apply(value) for key, value in kwargs.items()) return ResultSet(self, reduce( lambda x, y: intersection(x, y) if x and y else (), results))