def testPairs(self): t1 = IIBTree([(1, 10), (3, 30), (7, 70)]) t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)]) allkeys = [1, 3, 5, 7, 9] b1 = IIBucket(t1) b2 = IIBucket(t2) for x in t1, t2, b1, b2: for key in x.keys(): self.assertEqual(key in allkeys, 1) for y in t1, t2, b1, b2: for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3): # Test the union. expected = [] for key in allkeys: if x.has_key(key) or y.has_key(key): result = x.get(key, 0) * w1 + y.get(key, 0) * w2 expected.append((key, result)) expected.sort() got = mass_weightedUnion([(x, w1), (y, w2)]) self.assertEqual(expected, list(got.items())) got = mass_weightedUnion([(y, w2), (x, w1)]) self.assertEqual(expected, list(got.items())) # Test the intersection. expected = [] for key in allkeys: if x.has_key(key) and y.has_key(key): result = x[key] * w1 + y[key] * w2 expected.append((key, result)) expected.sort() got = mass_weightedIntersection([(x, w1), (y, w2)]) self.assertEqual(expected, list(got.items())) got = mass_weightedIntersection([(y, w2), (x, w1)]) self.assertEqual(expected, list(got.items()))
def executeQuery(self, index): weighted = [] for node in self.getValue(): r = node.executeQuery(index) # If None, technically it matches every doc, but we treat # it as if it matched none (we want # real_word OR stop_word # to act like plain real_word). if r is not None: weighted.append((r, 1)) return mass_weightedUnion(weighted)
def testMany(self): import random N = 15 # number of IIBTrees to feed in L = [] commonkey = N * 1000 allkeys = {commonkey: 1} for i in range(N): t = IIBTree() t[commonkey] = i for j in range(N - i): key = i + j allkeys[key] = 1 t[key] = N * i + j L.append((t, i + 1)) random.shuffle(L) allkeys = allkeys.keys() allkeys.sort() # Test the union. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w expected.append((key, sum)) # print 'union', expected got = mass_weightedUnion(L) self.assertEqual(expected, list(got.items())) # Test the intersection. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w else: break else: # We didn't break out of the loop so it's in the intersection. expected.append((key, sum)) # print 'intersection', expected got = mass_weightedIntersection(L) self.assertEqual(expected, list(got.items()))
def testMany(self): import random N = 15 # number of IIBTrees to feed in L = [] commonkey = N * 1000 allkeys = {commonkey: 1} for i in range(N): t = IIBTree() t[commonkey] = i for j in range(N-i): key = i + j allkeys[key] = 1 t[key] = N*i + j L.append((t, i+1)) random.shuffle(L) allkeys = allkeys.keys() allkeys.sort() # Test the union. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w expected.append((key, sum)) # print 'union', expected got = mass_weightedUnion(L) self.assertEqual(expected, list(got.items())) # Test the intersection. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w else: break else: # We didn't break out of the loop so it's in the intersection. expected.append((key, sum)) # print 'intersection', expected got = mass_weightedIntersection(L) self.assertEqual(expected, list(got.items()))
def executeQuery(self, index): L = [] Nots = [] for subnode in self.getValue(): if subnode.nodeType() == "NOT": r = subnode.getValue().executeQuery(index) # If None, technically it matches every doc, but we treat # it as if it matched none (we want # real_word AND NOT stop_word # to act like plain real_word). if r is not None: Nots.append((r, 1)) else: r = subnode.executeQuery(index) # If None, technically it matches every doc, so needn't be # included. if r is not None: L.append((r, 1)) set = mass_weightedIntersection(L) if Nots: notset = mass_weightedUnion(Nots) set = difference(set, notset) return set
def testEmptyLists(self): self.assertEqual(len(mass_weightedIntersection([])), 0) self.assertEqual(len(mass_weightedUnion([])), 0)
def search_glob(self, pattern): wids = self._lexicon.globToWordIds(pattern) wids = self._remove_oov_wids(wids) return mass_weightedUnion(self._search_wids(wids))
def search(self, term): wids = self._lexicon.termToWordIds(term) if not wids: return None # All docs match wids = self._remove_oov_wids(wids) return mass_weightedUnion(self._search_wids(wids))