def testPairs(self): t1 = IFBTree([(1, 10), (3, 30), (7, 70)]) t2 = IFBTree([(3, 30), (5, 50), (7, 7), (9, 90)]) allkeys = [1, 3, 5, 7, 9] b1 = IFBucket(t1) b2 = IFBucket(t2) for x in t1, t2, b1, b2: for key in x.keys(): self.assertEqual(key in allkeys, 1) for y in t1, t2, b1, b2: for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3): # Test the union. expected = [] for key in allkeys: if x.has_key(key) or y.has_key(key): result = x.get(key, 0) * w1 + y.get(key, 0) * w2 expected.append((key, result)) expected.sort() got = mass_weightedUnion([(x, w1), (y, w2)]) self.assertEqual(expected, list(got.items())) got = mass_weightedUnion([(y, w2), (x, w1)]) self.assertEqual(expected, list(got.items())) # Test the intersection. expected = [] for key in allkeys: if x.has_key(key) and y.has_key(key): result = x[key] * w1 + y[key] * w2 expected.append((key, result)) expected.sort() got = mass_weightedIntersection([(x, w1), (y, w2)]) self.assertEqual(expected, list(got.items())) got = mass_weightedIntersection([(y, w2), (x, w1)]) self.assertEqual(expected, list(got.items()))
def executeQuery(self, index): weighted = [] for node in self.getValue(): r = node.executeQuery(index) # If None, technically it matches every doc, but we treat # it as if it matched none (we want # real_word OR stop_word # to act like plain real_word). if r is not None: weighted.append((r, 1)) return mass_weightedUnion(weighted, index.family)
def executeQuery(self, index): weighted = [] for node in self.getValue(): r = node.executeQuery(index) # If None, technically it matches every doc, but we treat # it as if it matched none (we want # real_word OR stop_word # to act like plain real_word). if r is not None: weighted.append((r, 1)) return mass_weightedUnion(weighted)
def testMany(self): import random N = 15 # number of IFBTrees to feed in L = [] commonkey = N * 1000 allkeys = {commonkey: 1} for i in range(N): t = IFBTree() t[commonkey] = i for j in range(N-i): key = i + j allkeys[key] = 1 t[key] = N*i + j L.append((t, i+1)) random.shuffle(L) allkeys = allkeys.keys() allkeys.sort() # Test the union. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w expected.append((key, sum)) # print 'union', expected got = mass_weightedUnion(L) self.assertEqual(expected, list(got.items())) # Test the intersection. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w else: break else: # We didn't break out of the loop so it's in the intersection. expected.append((key, sum)) # print 'intersection', expected got = mass_weightedIntersection(L) self.assertEqual(expected, list(got.items()))
def apply(self, query_list, threshold, start=0, count=None): _wids_dict = self._wids_dict _wordinfo = self.index._wordinfo l_pow = float.__pow__ L = [] qw = 0 for term in query_list: wid, weight = _wids_dict.get(term, (None, None)) if wid is None: continue docs = _wordinfo[wid] L.append((docs, weight)) qw += l_pow(weight, 2) results = mass_weightedUnion(L) qw = math.sqrt(qw) results = results.byValue(qw * threshold) return results
def apply(self, query_list, threshold, start=0, count=None): _wids_dict = self._wids_dict _wordinfo = self.index._wordinfo l_pow = float.__pow__ L = [] qw = 0 for term in query_list : wid, weight = _wids_dict.get(term, (None, None)) if wid is None : continue docs = _wordinfo[wid] L.append((docs, weight)) qw += l_pow(weight, 2) results = mass_weightedUnion(L) qw = math.sqrt(qw) results = results.byValue(qw * threshold) return results
def executeQuery(self, index): L = [] Nots = [] for subnode in self.getValue(): if subnode.nodeType() == "NOT": r = subnode.getValue().executeQuery(index) # If None, technically it matches every doc, but we treat # it as if it matched none (we want # real_word AND NOT stop_word # to act like plain real_word). if r is not None: Nots.append((r, 1)) else: r = subnode.executeQuery(index) # If None, technically it matches every doc, so needn't be # included. if r is not None: L.append((r, 1)) set = mass_weightedIntersection(L, index.family) if Nots: notset = mass_weightedUnion(Nots, index.family) set = index.family.IF.difference(set, notset) return set
def executeQuery(self, index): L = [] Nots = [] for subnode in self.getValue(): if subnode.nodeType() == "NOT": r = subnode.getValue().executeQuery(index) # If None, technically it matches every doc, but we treat # it as if it matched none (we want # real_word AND NOT stop_word # to act like plain real_word). if r is not None: Nots.append((r, 1)) else: r = subnode.executeQuery(index) # If None, technically it matches every doc, so needn't be # included. if r is not None: L.append((r, 1)) set = mass_weightedIntersection(L) if Nots: notset = mass_weightedUnion(Nots) set = difference(set, notset) return set
def search_glob(self, pattern): wids = self._lexicon.globToWordIds(pattern) wids = self._remove_oov_wids(wids) return mass_weightedUnion(self._search_wids(wids))
def search(self, term): wids = self._lexicon.termToWordIds(term) if not wids: return None # All docs match wids = self._remove_oov_wids(wids) return mass_weightedUnion(self._search_wids(wids))
def _callFUT(self, L, family=_marker): from zope.index.text.setops import mass_weightedUnion if family is _marker: return mass_weightedUnion(L) return mass_weightedUnion(L, family)
def search_glob(self, pattern): wids = self._lexicon.globToWordIds(pattern) wids = self._remove_oov_wids(wids) return mass_weightedUnion(self._search_wids(wids), self.family)
def search(self, term): wids = self._lexicon.termToWordIds(term) if not wids: return None # All docs match wids = self._remove_oov_wids(wids) return mass_weightedUnion(self._search_wids(wids), self.family)
def testEmptyLists(self): self.assertEqual(len(mass_weightedIntersection([])), 0) self.assertEqual(len(mass_weightedUnion([])), 0)