def testMany(self):
        import random
        inputs = [(-i, i) for i in range(50)]

        reversed_inputs = inputs[:]
        reversed_inputs.reverse()

        # Test the N-best for a variety of n (1, 6, 11, ... 50).
        for n in range(1, len(inputs) + 1, 5):
            expected = inputs[-n:]
            expected.reverse()

            random_inputs = inputs[:]
            random.shuffle(random_inputs)

            for source in inputs, reversed_inputs, random_inputs:
                # Try feeding them one at a time.
                nb = NBest(n)
                for item, score in source:
                    nb.add(item, score)
                self.assertEqual(len(nb), n)
                self.assertEqual(nb.capacity(), n)
                self.assertEqual(nb.getbest(), expected)

                # And again in one gulp.
                nb = NBest(n)
                nb.addmany(source)
                self.assertEqual(len(nb), n)
                self.assertEqual(nb.capacity(), n)
                self.assertEqual(nb.getbest(), expected)

                for i in range(1, n + 1):
                    self.assertEqual(nb.pop_smallest(), expected[-i])
                self.assertRaises(IndexError, nb.pop_smallest)
    def testConstructor(self):
        self.assertRaises(ValueError, NBest, 0)
        self.assertRaises(ValueError, NBest, -1)

        for n in range(1, 11):
            nb = NBest(n)
            self.assertEqual(len(nb), 0)
            self.assertEqual(nb.capacity(), n)
 def query(self, query, nbest=10):
     # returns a total hit count and a mapping from docids to scores
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     results = tree.executeQuery(self.index)
     if results is None:
         return [], 0
     chooser = NBest(nbest)
     chooser.addmany(results.items())
     return chooser.getbest(), len(results)
Exemple #4
0
def main(rt):
    index = rt["index"]
    files = rt["files"]
    times = {}
    ITERS = range(50)
    for i in range(11):
        for q in QUERIES:
            terms = q.split()
            for c in " OR ", " AND ":
                query = c.join(terms)
                t0 = clock()
                if TEXTINDEX:
                    if c == " OR ":
                        op = Or
                    else:
                        op = And
                    _q = " ".join(terms)
                    for _ in ITERS:
                        b = index.query(_q, op).bucket()
                        num = len(b)
                        chooser = NBest(10)
                        chooser.addmany(b.items())
                        results = chooser.getbest()

                else:
                    try:
                        for _ in ITERS:
                            results, num = index.query(query)
                    except:
                        continue
                t1 = clock()
                print "<p>Query: \"%s\"" % query
                print "<br>Num results: %d" % num
                print "<br>time.clock(): %s" % (t1 - t0)
                key = query
                if i == 0:
                    print "<ol>"
                    for docid, score in results:
                        url = path2url(files[docid])
                        fmt = '<li><a href="%s">%s</A> score = %s'
                        print fmt % (url, url, score)
                    print "</ol>"
                    continue
                l = times.setdefault(key, [])
                l.append(t1 - t0)

    l = times.keys()
    l.sort()
    print "<hr>"
    for k in l:
        v = times[k]
        print "<p>Query: \"%s\"" % k
        print "<br>Min time: %s" % min(v)
        print "<br>All times: %s" % " ".join(map(str, v))
Exemple #5
0
    def query(self, query, nbest=10):
        """Return pair (mapping from docids to scores, num results).

        The num results is the total number of results before trimming
        to the nbest results.
        """
        tree = QueryParser(self.getLexicon()).parseQuery(query)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)
Exemple #6
0
def mass_weightedUnion(l_):
    "A list of (mapping, weight) pairs -> their weightedUnion IIBucket."
    if len(l_) < 2:
        return _trivial(l_)
    # Balance unions as closely as possible, smallest to largest.
    merge = NBest(len(l_))
    for x, weight in l_:
        merge.add((x, weight), len(x))
    while len(merge) > 1:
        # Merge the two smallest so far, and add back to the queue.
        (x, wx), dummy = merge.pop_smallest()
        (y, wy), dummy = merge.pop_smallest()
        dummy, z = weightedUnion(x, y, wx, wy)
        merge.add((z, 1), len(z))
    (result, weight), dummy = merge.pop_smallest()
    return result
    def testOne(self):
        nb = NBest(1)
        nb.add('a', 0)
        self.assertEqual(nb.getbest(), [('a', 0)])

        nb.add('b', 1)
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [('b', 1)])

        nb.add('c', -1)
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [('b', 1)])

        nb.addmany([('d', 3), ('e', -6), ('f', 5), ('g', 4)])
        self.assertEqual(len(nb), 1)
        self.assertEqual(nb.capacity(), 1)
        self.assertEqual(nb.getbest(), [('f', 5)])