def query_index(self, record, resultset=None): query_str = ' '.join(record.keys) if not query_str: return None tree = QueryParser(self.getLexicon()).parseQuery(query_str) results = tree.executeQuery(self.index) return results
def query(rt, query_str, profiler): idx = rt["index"] docs = rt["documents"] start = time.clock() if profiler is None: results, num_results = idx.query(query_str, BEST) else: if WARM_CACHE: print "Warming the cache..." idx.query(query_str, BEST) start = time.clock() results, num_results = profiler.runcall(idx.query, query_str, BEST) elapsed = time.clock() - start print "query:", query_str print "# results:", len(results), "of", num_results, \ "in %.2f ms" % (elapsed * 1000) tree = QueryParser(idx.lexicon).parseQuery(query_str) qw = idx.index.query_weight(tree.terms()) for docid, score in results: scaled = 100.0 * score / qw print "docid %7d score %6d scaled %5.2f%%" % (docid, score, scaled) if VERBOSE: msg = docs[docid] ctx = msg.text.split("\n", CONTEXT) del ctx[-1] print "-" * 60 print "message:" for l in ctx: print l print "-" * 60
class TestQueryParserBase(TestCase): def setUp(self): from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.Lexicon import Lexicon from Products.ZCTextIndex.Lexicon import Splitter self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon) def expect(self, input, output, expected_ignored=[]): tree = self.parser.parseQuery(input) ignored = self.parser.getIgnored() self.compareParseTrees(tree, output) self.assertEqual(ignored, expected_ignored) # Check that parseQueryEx() == (parseQuery(), getIgnored()) ex_tree, ex_ignored = self.parser.parseQueryEx(input) self.compareParseTrees(ex_tree, tree) self.assertEqual(ex_ignored, expected_ignored) def failure(self, input): from Products.ZCTextIndex.ParseTree import ParseError self.assertRaises(ParseError, self.parser.parseQuery, input) self.assertRaises(ParseError, self.parser.parseQueryEx, input) def compareParseTrees(self, got, expected, msg=None): from Products.ZCTextIndex.ParseTree import AndNode from Products.ZCTextIndex.ParseTree import AtomNode from Products.ZCTextIndex.ParseTree import GlobNode from Products.ZCTextIndex.ParseTree import NotNode from Products.ZCTextIndex.ParseTree import OrNode from Products.ZCTextIndex.ParseTree import ParseTreeNode from Products.ZCTextIndex.ParseTree import PhraseNode if msg is None: msg = repr(got) self.assertEqual(isinstance(got, ParseTreeNode), 1) self.assertEqual(got.__class__, expected.__class__, msg) if isinstance(got, PhraseNode): self.assertEqual(got.nodeType(), "PHRASE", msg) self.assertEqual(got.getValue(), expected.getValue(), msg) elif isinstance(got, GlobNode): self.assertEqual(got.nodeType(), "GLOB", msg) self.assertEqual(got.getValue(), expected.getValue(), msg) elif isinstance(got, AtomNode): self.assertEqual(got.nodeType(), "ATOM", msg) self.assertEqual(got.getValue(), expected.getValue(), msg) elif isinstance(got, NotNode): self.assertEqual(got.nodeType(), "NOT") self.compareParseTrees(got.getValue(), expected.getValue(), msg) elif isinstance(got, AndNode) or isinstance(got, OrNode): self.assertEqual(got.nodeType(), isinstance(got, AndNode) and "AND" or "OR", msg) list1 = got.getValue() list2 = expected.getValue() self.assertEqual(len(list1), len(list2), msg) for i in range(len(list1)): self.compareParseTrees(list1[i], list2[i], msg)
def query(self, query, nbest=10): # returns a total hit count and a mapping from docids to scores parser = QueryParser(self.lexicon) tree = parser.parseQuery(query) results = tree.executeQuery(self.index) if results is None: return [], 0 chooser = NBest(nbest) chooser.addmany(results.items()) return chooser.getbest(), len(results)
def query(self, query, nbest=10): """Return pair (mapping from docids to scores, num results). The num results is the total number of results before trimming to the nbest results. """ tree = QueryParser(self.getLexicon()).parseQuery(query) results = tree.executeQuery(self.index) if results is None: return [], 0 chooser = NBest(nbest) chooser.addmany(results.items()) return chooser.getbest(), len(results)
def setUp(self): from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.Lexicon import Lexicon from Products.ZCTextIndex.Lexicon import Splitter # Only 'stop' is a stopword (but 'and' is still an operator) self.lexicon = Lexicon(Splitter(), FakeStopWordRemover()) self.parser = QueryParser(self.lexicon)
class TestQueryEngine(TestCase): def setUp(self): self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon) self.index = FauxIndex() def compareSet(self, set, dict): d = {} for k, v in set.items(): d[k] = v self.assertEqual(d, dict) def compareQuery(self, query, dict): tree = self.parser.parseQuery(query) set = tree.executeQuery(self.index) self.compareSet(set, dict) def testExecuteQuery(self): self.compareQuery("foo AND bar", {1: 2}) self.compareQuery("foo OR bar", {1: 2, 2: 1, 3: 1}) self.compareQuery("foo AND NOT bar", {3: 1}) self.compareQuery("foo AND foo AND foo", {1: 3, 3: 3}) self.compareQuery("foo OR foo OR foo", {1: 3, 3: 3}) self.compareQuery("ham AND NOT foo AND NOT bar", {4: 1}) self.compareQuery("ham OR foo OR bar", {1: 3, 2: 2, 3: 2, 4: 1}) self.compareQuery("ham AND foo AND bar", {1: 3}) def testInvalidQuery(self): from Products.ZCTextIndex.ParseTree import NotNode, AtomNode tree = NotNode(AtomNode("foo")) self.assertRaises(QueryError, tree.executeQuery, self.index)
class TestQueryEngine(TestCase): def setUp(self): self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon) self.index = FauxIndex() def compareSet(self, set, dict): d = {} for k, v in set.items(): d[k] = v self.assertEqual(d, dict) def compareQuery(self, query, dict): tree = self.parser.parseQuery(query) set = tree.executeQuery(self.index) self.compareSet(set, dict) def testExecuteQuery(self): self.compareQuery("foo AND bar", {1: 2}) self.compareQuery("foo OR bar", {1: 2, 2: 1, 3:1}) self.compareQuery("foo AND NOT bar", {3: 1}) self.compareQuery("foo AND foo AND foo", {1: 3, 3: 3}) self.compareQuery("foo OR foo OR foo", {1: 3, 3: 3}) self.compareQuery("ham AND NOT foo AND NOT bar", {4: 1}) self.compareQuery("ham OR foo OR bar", {1: 3, 2: 2, 3: 2, 4: 1}) self.compareQuery("ham AND foo AND bar", {1: 3}) def testInvalidQuery(self): from Products.ZCTextIndex.ParseTree import NotNode, AtomNode tree = NotNode(AtomNode("foo")) self.assertRaises(QueryError, tree.executeQuery, self.index)
def _apply_index(self, request, cid=''): """Apply query specified by request, a mapping containing the query. Returns two object on success, the resultSet containing the matching record numbers and a tuple containing the names of the fields used Returns None if request is not valid for this index. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None query_str = ' '.join(record.keys) if not query_str: return None tree = QueryParser(self.getLexicon()).parseQuery(query_str) results = tree.executeQuery(self.index) return results, (self.id,)
def _apply_index(self, request, cid=''): """Apply query specified by request, a mapping containing the query. Returns two object on success, the resultSet containing the matching record numbers and a tuple containing the names of the fields used Returns None if request is not valid for this index. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None query_str = ' '.join(record.keys) if not query_str: return None tree = QueryParser(self.getLexicon()).parseQuery(query_str) results = tree.executeQuery(self.index) return results, (self.id, )
def setUp(self): self.lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(), StopWordRemover()) caller = LexiconHolder(self.lexicon) self.zc_index = ZCTextIndex('name', None, caller, self.IndexFactory, 'text', 'lexicon') self.parser = QueryParser(self.lexicon) self.index = self.zc_index.index self.add_docs()
def _ranking_queries(self): queries = ['eat', 'porridge', 'hot OR porridge', 'eat OR nine OR day OR old OR porridge'] wqs = [1.95, 1.10, 1.77, 3.55] results = [[(6, 0.71)], [(1, 0.61), (2, 0.58), (5, 0.71)], [(1, 0.66), (2, 0.36), (4, 0.36), (5, 0.44)], [(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]] for i in range(len(queries)): raw = queries[i] q = QueryParser(self.lexicon).parseQuery(raw) wq = self.index.query_weight(q.terms()) eq(wq, scaled_int(wqs[i])) r, n = self.zc_index.query(raw) self.assertEqual(len(r), len(results[i])) # convert the results to a dict for each checking d = {} for doc, score in results[i]: d[doc] = scaled_int(score) for doc, score in r: score = scaled_int(float(score / SCALE_FACTOR) / wq) self.assertTrue(0 <= score <= SCALE_FACTOR) eq(d[doc], score)
def zctidx_ApplyIndexWithSynonymous(self, request, cid=''): """Apply query specified by request, a mapping containing the query. Returns two object on success, the resultSet containing the matching record numbers and a tuple containing the names of the fields used Returns None if request is not valid for this index. If this index id is listed in PloneGlossary.config.INDEX_SEARCH_GLOSSARY, the query tree is changed to look for terms and their variants found in general glossaries. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None query_str = ' '.join(record.keys) if not query_str: return None parseQuery = QueryParser(self.getLexicon()).parseQuery tree = parseQuery(query_str) if self.getId() in INDEX_SEARCH_GLOSSARY: gtool = getToolByName(self, PLONEGLOSSARY_TOOL) glossary_uids = gtool.getGeneralGlossaryUIDs() all_term_items = gtool._getGlossaryTermItems(glossary_uids) # get atoms from query and build related term query # text = ' '.join(flatten(tree.terms())) excluded = dict.fromkeys(__getNOTWords(tree), True) tree = replaceWordsQuery(tree, parseQuery, gtool, all_term_items, excluded) results = tree.executeQuery(self.index) return results, (self.id,)
def setUp(self): self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon)
def setUp(self): from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.Lexicon import Lexicon from Products.ZCTextIndex.Lexicon import Splitter self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon)
def query_weight(self, query): parser = QueryParser(self.lexicon) tree = parser.parseQuery(query) terms = tree.terms() return self.index.query_weight(terms)
def setUp(self): # Only 'stop' is a stopword (but 'and' is still an operator) self.lexicon = Lexicon(Splitter(), FakeStopWordRemover()) self.parser = QueryParser(self.lexicon)
def setUp(self): self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon) self.index = FauxIndex()