def query(rt, query_str, profiler): idx = rt["index"] docs = rt["documents"] start = time.clock() if profiler is None: results, num_results = idx.query(query_str, BEST) else: if WARM_CACHE: print "Warming the cache..." idx.query(query_str, BEST) start = time.clock() results, num_results = profiler.runcall(idx.query, query_str, BEST) elapsed = time.clock() - start print "query:", query_str print "# results:", len(results), "of", num_results, \ "in %.2f ms" % (elapsed * 1000) tree = QueryParser(idx.lexicon).parseQuery(query_str) qw = idx.index.query_weight(tree.terms()) for docid, score in results: scaled = 100.0 * score / qw print "docid %7d score %6d scaled %5.2f%%" % (docid, score, scaled) if VERBOSE: msg = docs[docid] ctx = msg.text.split("\n", CONTEXT) del ctx[-1] print "-" * 60 print "message:" for l in ctx: print l print "-" * 60
def query(rt, query_str, profiler): idx = rt["index"] docs = rt["documents"] start = time.clock() if profiler is None: results, num_results = idx.query(query_str, BEST) else: if WARM_CACHE: print "Warming the cache..." idx.query(query_str, BEST) start = time.clock() results, num_results = profiler.runcall(idx.query, query_str, BEST) elapsed = time.clock() - start print "query:", query_str print "# results:", len(results), "of", num_results, \ "in %.2f ms" % (elapsed * 1000) tree = QueryParser(idx.lexicon).parseQuery(query_str) qw = idx.index.query_weight(tree.terms()) for docid, score in results: scaled = 100.0 * score / qw print "docid %7d score %6d scaled %5.2f%%" % (docid, score, scaled) if VERBOSE: msg = docs[docid] ctx = msg.text.split("\n", CONTEXT) del ctx[-1] print "-" * 60 print "message:" for l in ctx: print l print "-" * 60
def _ranking_queries(self): queries = ['eat', 'porridge', 'hot OR porridge', 'eat OR nine OR day OR old OR porridge'] wqs = [1.95, 1.10, 1.77, 3.55] results = [[(6, 0.71)], [(1, 0.61), (2, 0.58), (5, 0.71)], [(1, 0.66), (2, 0.36), (4, 0.36), (5, 0.44)], [(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]] for i in range(len(queries)): raw = queries[i] q = QueryParser(self.lexicon).parseQuery(raw) wq = self.index.query_weight(q.terms()) eq(wq, scaled_int(wqs[i])) r, n = self.zc_index.query(raw) self.assertEqual(len(r), len(results[i])) # convert the results to a dict for each checking d = {} for doc, score in results[i]: d[doc] = scaled_int(score) for doc, score in r: score = scaled_int(float(score / SCALE_FACTOR) / wq) self.assertTrue(0 <= score <= SCALE_FACTOR) eq(d[doc], score)