Example #1
0
    def testPairs(self):
        t1 = IFBTree([(1, 10), (3, 30), (7, 70)])
        t2 = IFBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
        allkeys = [1, 3, 5, 7, 9]
        b1 = IFBucket(t1)
        b2 = IFBucket(t2)
        for x in t1, t2, b1, b2:
            for key in x.keys():
                self.assertEqual(key in allkeys, 1)
            for y in t1, t2, b1, b2:
                for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3):
                    # Test the union.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) or y.has_key(key):
                            result = x.get(key, 0) * w1 + y.get(key, 0) * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedUnion([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedUnion([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))

                    # Test the intersection.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) and y.has_key(key):
                            result = x[key] * w1 + y[key] * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedIntersection([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedIntersection([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))
Example #2
0
 def executeQuery(self, index):
     weighted = []
     for node in self.getValue():
         r = node.executeQuery(index)
         # If None, technically it matches every doc, but we treat
         # it as if it matched none (we want
         #     real_word OR stop_word
         # to act like plain real_word).
         if r is not None:
             weighted.append((r, 1))
     return mass_weightedUnion(weighted, index.family)
Example #3
0
 def executeQuery(self, index):
     weighted = []
     for node in self.getValue():
         r = node.executeQuery(index)
         # If None, technically it matches every doc, but we treat
         # it as if it matched none (we want
         #     real_word OR stop_word
         # to act like plain real_word).
         if r is not None:
             weighted.append((r, 1))
     return mass_weightedUnion(weighted)
Example #4
0
    def testMany(self):
        import random
        N = 15  # number of IFBTrees to feed in
        L = []
        commonkey = N * 1000
        allkeys = {commonkey: 1}
        for i in range(N):
            t = IFBTree()
            t[commonkey] = i
            for j in range(N-i):
                key = i + j
                allkeys[key] = 1
                t[key] = N*i + j
            L.append((t, i+1))
        random.shuffle(L)
        allkeys = allkeys.keys()
        allkeys.sort()

        # Test the union.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
            expected.append((key, sum))
        # print 'union', expected
        got = mass_weightedUnion(L)
        self.assertEqual(expected, list(got.items()))

        # Test the intersection.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
                else:
                    break
            else:
                # We didn't break out of the loop so it's in the intersection.
                expected.append((key, sum))
        # print 'intersection', expected
        got = mass_weightedIntersection(L)
        self.assertEqual(expected, list(got.items()))
    def apply(self, query_list, threshold, start=0, count=None):
        _wids_dict = self._wids_dict
        _wordinfo = self.index._wordinfo
        l_pow = float.__pow__

        L = []
        qw = 0

        for term in query_list:
            wid, weight = _wids_dict.get(term, (None, None))
            if wid is None:
                continue
            docs = _wordinfo[wid]
            L.append((docs, weight))
            qw += l_pow(weight, 2)

        results = mass_weightedUnion(L)

        qw = math.sqrt(qw)
        results = results.byValue(qw * threshold)

        return results
Example #6
0
    def apply(self, query_list, threshold, start=0, count=None):
        _wids_dict = self._wids_dict
        _wordinfo = self.index._wordinfo
        l_pow = float.__pow__

        L = []
        qw = 0

        for term in query_list :
            wid, weight = _wids_dict.get(term, (None, None))
            if wid is None :
                continue
            docs = _wordinfo[wid]
            L.append((docs, weight))
            qw += l_pow(weight, 2)

        results = mass_weightedUnion(L)

        qw = math.sqrt(qw)
        results = results.byValue(qw * threshold)

        return results
Example #7
0
 def executeQuery(self, index):
     L = []
     Nots = []
     for subnode in self.getValue():
         if subnode.nodeType() == "NOT":
             r = subnode.getValue().executeQuery(index)
             # If None, technically it matches every doc, but we treat
             # it as if it matched none (we want
             #     real_word AND NOT stop_word
             # to act like plain real_word).
             if r is not None:
                 Nots.append((r, 1))
         else:
             r = subnode.executeQuery(index)
             # If None, technically it matches every doc, so needn't be
             # included.
             if r is not None:
                 L.append((r, 1))
     set = mass_weightedIntersection(L, index.family)
     if Nots:
         notset = mass_weightedUnion(Nots, index.family)
         set = index.family.IF.difference(set, notset)
     return set
Example #8
0
 def executeQuery(self, index):
     L = []
     Nots = []
     for subnode in self.getValue():
         if subnode.nodeType() == "NOT":
             r = subnode.getValue().executeQuery(index)
             # If None, technically it matches every doc, but we treat
             # it as if it matched none (we want
             #     real_word AND NOT stop_word
             # to act like plain real_word).
             if r is not None:
                 Nots.append((r, 1))
         else:
             r = subnode.executeQuery(index)
             # If None, technically it matches every doc, so needn't be
             # included.
             if r is not None:
                 L.append((r, 1))
     set = mass_weightedIntersection(L)
     if Nots:
         notset = mass_weightedUnion(Nots)
         set = difference(set, notset)
     return set
Example #9
0
 def search_glob(self, pattern):
     wids = self._lexicon.globToWordIds(pattern)
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids))
Example #10
0
 def search(self, term):
     wids = self._lexicon.termToWordIds(term)
     if not wids:
         return None # All docs match
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids))
Example #11
0
 def _callFUT(self, L, family=_marker):
     from zope.index.text.setops import mass_weightedUnion
     if family is _marker:
         return mass_weightedUnion(L)
     return mass_weightedUnion(L, family)
Example #12
0
 def search_glob(self, pattern):
     wids = self._lexicon.globToWordIds(pattern)
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids), self.family)
Example #13
0
 def search(self, term):
     wids = self._lexicon.termToWordIds(term)
     if not wids:
         return None  # All docs match
     wids = self._remove_oov_wids(wids)
     return mass_weightedUnion(self._search_wids(wids), self.family)
Example #14
0
 def testEmptyLists(self):
     self.assertEqual(len(mass_weightedIntersection([])), 0)
     self.assertEqual(len(mass_weightedUnion([])), 0)
Example #15
0
 def _callFUT(self, L, family=_marker):
     from zope.index.text.setops import mass_weightedUnion
     if family is _marker:
         return mass_weightedUnion(L)
     return mass_weightedUnion(L, family)