Ejemplo n.º 1
0
    def testPairs(self):
        t1 = IIBTree([(1, 10), (3, 30), (7, 70)])
        t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
        allkeys = [1, 3, 5, 7, 9]
        b1 = IIBucket(t1)
        b2 = IIBucket(t2)
        for x in t1, t2, b1, b2:
            for key in x.keys():
                self.assertEqual(key in allkeys, 1)
            for y in t1, t2, b1, b2:
                for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3):
                    # Test the union.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) or y.has_key(key):
                            result = x.get(key, 0) * w1 + y.get(key, 0) * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedUnion([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedUnion([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))

                    # Test the intersection.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) and y.has_key(key):
                            result = x[key] * w1 + y[key] * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedIntersection([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedIntersection([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))
Ejemplo n.º 2
0
    def testPairs(self):
        t1 = IIBTree([(1, 10), (3, 30), (7, 70)])
        t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
        allkeys = [1, 3, 5, 7, 9]
        b1 = IIBucket(t1)
        b2 = IIBucket(t2)
        for x in t1, t2, b1, b2:
            for key in x.keys():
                self.assertEqual(key in allkeys, 1)
            for y in t1, t2, b1, b2:
                for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3):
                    # Test the union.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) or y.has_key(key):
                            result = x.get(key, 0) * w1 + y.get(key, 0) * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedUnion([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedUnion([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))

                    # Test the intersection.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) and y.has_key(key):
                            result = x[key] * w1 + y[key] * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedIntersection([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedIntersection([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))
Ejemplo n.º 3
0
 def search_phrase(self, phrase):
     wids = self._lexicon.termToWordIds(phrase)
     cleaned_wids = self._remove_oov_wids(wids)
     if len(wids) != len(cleaned_wids):
         # At least one wid was OOV:  can't possibly find it.
         return IIBTree()
     scores = self._search_wids(wids)
     hits = mass_weightedIntersection(scores)
     if not hits:
         return hits
     code = WidCode.encode(wids)
     result = IIBTree()
     for docid, weight in hits.items():
         docwords = self._docwords[docid]
         if docwords.find(code) >= 0:
             result[docid] = weight
     return result
Ejemplo n.º 4
0
 def search_phrase(self, phrase):
     wids = self._lexicon.termToWordIds(phrase)
     cleaned_wids = self._remove_oov_wids(wids)
     if len(wids) != len(cleaned_wids):
         # At least one wid was OOV:  can't possibly find it.
         return IIBTree()
     scores = self._search_wids(wids)
     hits = mass_weightedIntersection(scores)
     if not hits:
         return hits
     code = WidCode.encode(wids)
     result = IIBTree()
     for docid, weight in hits.items():
         docwords = self._docwords[docid]
         if docwords.find(code) >= 0:
             result[docid] = weight
     return result
Ejemplo n.º 5
0
    def testMany(self):
        import random
        N = 15  # number of IIBTrees to feed in
        L = []
        commonkey = N * 1000
        allkeys = {commonkey: 1}
        for i in range(N):
            t = IIBTree()
            t[commonkey] = i
            for j in range(N - i):
                key = i + j
                allkeys[key] = 1
                t[key] = N * i + j
            L.append((t, i + 1))
        random.shuffle(L)
        allkeys = allkeys.keys()
        allkeys.sort()

        # Test the union.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
            expected.append((key, sum))
        # print 'union', expected
        got = mass_weightedUnion(L)
        self.assertEqual(expected, list(got.items()))

        # Test the intersection.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
                else:
                    break
            else:
                # We didn't break out of the loop so it's in the intersection.
                expected.append((key, sum))
        # print 'intersection', expected
        got = mass_weightedIntersection(L)
        self.assertEqual(expected, list(got.items()))
Ejemplo n.º 6
0
    def testMany(self):
        import random
        N = 15  # number of IIBTrees to feed in
        L = []
        commonkey = N * 1000
        allkeys = {commonkey: 1}
        for i in range(N):
            t = IIBTree()
            t[commonkey] = i
            for j in range(N-i):
                key = i + j
                allkeys[key] = 1
                t[key] = N*i + j
            L.append((t, i+1))
        random.shuffle(L)
        allkeys = allkeys.keys()
        allkeys.sort()

        # Test the union.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
            expected.append((key, sum))
        # print 'union', expected
        got = mass_weightedUnion(L)
        self.assertEqual(expected, list(got.items()))

        # Test the intersection.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
                else:
                    break
            else:
                # We didn't break out of the loop so it's in the intersection.
                expected.append((key, sum))
        # print 'intersection', expected
        got = mass_weightedIntersection(L)
        self.assertEqual(expected, list(got.items()))
Ejemplo n.º 7
0
 def executeQuery(self, index):
     L = []
     Nots = []
     for subnode in self.getValue():
         if subnode.nodeType() == "NOT":
             r = subnode.getValue().executeQuery(index)
             # If None, technically it matches every doc, but we treat
             # it as if it matched none (we want
             #     real_word AND NOT stop_word
             # to act like plain real_word).
             if r is not None:
                 Nots.append((r, 1))
         else:
             r = subnode.executeQuery(index)
             # If None, technically it matches every doc, so needn't be
             # included.
             if r is not None:
                 L.append((r, 1))
     set = mass_weightedIntersection(L)
     if Nots:
         notset = mass_weightedUnion(Nots)
         set = difference(set, notset)
     return set
Ejemplo n.º 8
0
 def executeQuery(self, index):
     L = []
     Nots = []
     for subnode in self.getValue():
         if subnode.nodeType() == "NOT":
             r = subnode.getValue().executeQuery(index)
             # If None, technically it matches every doc, but we treat
             # it as if it matched none (we want
             #     real_word AND NOT stop_word
             # to act like plain real_word).
             if r is not None:
                 Nots.append((r, 1))
         else:
             r = subnode.executeQuery(index)
             # If None, technically it matches every doc, so needn't be
             # included.
             if r is not None:
                 L.append((r, 1))
     set = mass_weightedIntersection(L)
     if Nots:
         notset = mass_weightedUnion(Nots)
         set = difference(set, notset)
     return set
Ejemplo n.º 9
0
 def testEmptyLists(self):
     self.assertEqual(len(mass_weightedIntersection([])), 0)
     self.assertEqual(len(mass_weightedUnion([])), 0)
Ejemplo n.º 10
0
 def testEmptyLists(self):
     self.assertEqual(len(mass_weightedIntersection([])), 0)
     self.assertEqual(len(mass_weightedUnion([])), 0)