Exemple #1
0
 def query_index(self, record, resultset=None):
     query_str = ' '.join(record.keys)
     if not query_str:
         return None
     tree = QueryParser(self.getLexicon()).parseQuery(query_str)
     results = tree.executeQuery(self.index)
     return results
def query(rt, query_str, profiler):
    idx = rt["index"]
    docs = rt["documents"]

    start = time.clock()
    if profiler is None:
        results, num_results = idx.query(query_str, BEST)
    else:
        if WARM_CACHE:
            print "Warming the cache..."
            idx.query(query_str, BEST)
        start = time.clock()
        results, num_results = profiler.runcall(idx.query, query_str, BEST)
    elapsed = time.clock() - start

    print "query:", query_str
    print "# results:", len(results), "of", num_results, \
          "in %.2f ms" % (elapsed * 1000)

    tree = QueryParser(idx.lexicon).parseQuery(query_str)
    qw = idx.index.query_weight(tree.terms())

    for docid, score in results:
        scaled = 100.0 * score / qw
        print "docid %7d score %6d scaled %5.2f%%" % (docid, score, scaled)
        if VERBOSE:
            msg = docs[docid]
            ctx = msg.text.split("\n", CONTEXT)
            del ctx[-1]
            print "-" * 60
            print "message:"
            for l in ctx:
                print l
            print "-" * 60
def query(rt, query_str, profiler):
    idx = rt["index"]
    docs = rt["documents"]

    start = time.clock()
    if profiler is None:
        results, num_results = idx.query(query_str, BEST)
    else:
        if WARM_CACHE:
            print "Warming the cache..."
            idx.query(query_str, BEST)
        start = time.clock()
        results, num_results = profiler.runcall(idx.query, query_str, BEST)
    elapsed = time.clock() - start

    print "query:", query_str
    print "# results:", len(results), "of", num_results, \
          "in %.2f ms" % (elapsed * 1000)

    tree = QueryParser(idx.lexicon).parseQuery(query_str)
    qw = idx.index.query_weight(tree.terms())

    for docid, score in results:
        scaled = 100.0 * score / qw
        print "docid %7d score %6d scaled %5.2f%%" % (docid, score, scaled)
        if VERBOSE:
            msg = docs[docid]
            ctx = msg.text.split("\n", CONTEXT)
            del ctx[-1]
            print "-" * 60
            print "message:"
            for l in ctx:
                print l
            print "-" * 60
class TestQueryParserBase(TestCase):

    def setUp(self):
        from Products.ZCTextIndex.QueryParser import QueryParser
        from Products.ZCTextIndex.Lexicon import Lexicon
        from Products.ZCTextIndex.Lexicon import Splitter
        self.lexicon = Lexicon(Splitter())
        self.parser = QueryParser(self.lexicon)

    def expect(self, input, output, expected_ignored=[]):
        tree = self.parser.parseQuery(input)
        ignored = self.parser.getIgnored()
        self.compareParseTrees(tree, output)
        self.assertEqual(ignored, expected_ignored)
        # Check that parseQueryEx() == (parseQuery(), getIgnored())
        ex_tree, ex_ignored = self.parser.parseQueryEx(input)
        self.compareParseTrees(ex_tree, tree)
        self.assertEqual(ex_ignored, expected_ignored)

    def failure(self, input):
        from Products.ZCTextIndex.ParseTree import ParseError
        self.assertRaises(ParseError, self.parser.parseQuery, input)
        self.assertRaises(ParseError, self.parser.parseQueryEx, input)

    def compareParseTrees(self, got, expected, msg=None):
        from Products.ZCTextIndex.ParseTree import AndNode
        from Products.ZCTextIndex.ParseTree import AtomNode
        from Products.ZCTextIndex.ParseTree import GlobNode
        from Products.ZCTextIndex.ParseTree import NotNode
        from Products.ZCTextIndex.ParseTree import OrNode
        from Products.ZCTextIndex.ParseTree import ParseTreeNode
        from Products.ZCTextIndex.ParseTree import PhraseNode
        if msg is None:
            msg = repr(got)
        self.assertEqual(isinstance(got, ParseTreeNode), 1)
        self.assertEqual(got.__class__, expected.__class__, msg)
        if isinstance(got, PhraseNode):
            self.assertEqual(got.nodeType(), "PHRASE", msg)
            self.assertEqual(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, GlobNode):
            self.assertEqual(got.nodeType(), "GLOB", msg)
            self.assertEqual(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, AtomNode):
            self.assertEqual(got.nodeType(), "ATOM", msg)
            self.assertEqual(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, NotNode):
            self.assertEqual(got.nodeType(), "NOT")
            self.compareParseTrees(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, AndNode) or isinstance(got, OrNode):
            self.assertEqual(got.nodeType(),
                             isinstance(got, AndNode) and "AND" or "OR", msg)
            list1 = got.getValue()
            list2 = expected.getValue()
            self.assertEqual(len(list1), len(list2), msg)
            for i in range(len(list1)):
                self.compareParseTrees(list1[i], list2[i], msg)
 def query(self, query, nbest=10):
     # returns a total hit count and a mapping from docids to scores
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     results = tree.executeQuery(self.index)
     if results is None:
         return [], 0
     chooser = NBest(nbest)
     chooser.addmany(results.items())
     return chooser.getbest(), len(results)
 def query(self, query, nbest=10):
     # returns a total hit count and a mapping from docids to scores
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     results = tree.executeQuery(self.index)
     if results is None:
         return [], 0
     chooser = NBest(nbest)
     chooser.addmany(results.items())
     return chooser.getbest(), len(results)
Exemple #7
0
    def query(self, query, nbest=10):
        """Return pair (mapping from docids to scores, num results).

        The num results is the total number of results before trimming
        to the nbest results.
        """
        tree = QueryParser(self.getLexicon()).parseQuery(query)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)
Exemple #8
0
    def query(self, query, nbest=10):
        """Return pair (mapping from docids to scores, num results).

        The num results is the total number of results before trimming
        to the nbest results.
        """
        tree = QueryParser(self.getLexicon()).parseQuery(query)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)
 def setUp(self):
     from Products.ZCTextIndex.QueryParser import QueryParser
     from Products.ZCTextIndex.Lexicon import Lexicon
     from Products.ZCTextIndex.Lexicon import Splitter
     # Only 'stop' is a stopword (but 'and' is still an operator)
     self.lexicon = Lexicon(Splitter(), FakeStopWordRemover())
     self.parser = QueryParser(self.lexicon)
Exemple #10
0
class TestQueryEngine(TestCase):
    def setUp(self):
        self.lexicon = Lexicon(Splitter())
        self.parser = QueryParser(self.lexicon)
        self.index = FauxIndex()

    def compareSet(self, set, dict):
        d = {}
        for k, v in set.items():
            d[k] = v
        self.assertEqual(d, dict)

    def compareQuery(self, query, dict):
        tree = self.parser.parseQuery(query)
        set = tree.executeQuery(self.index)
        self.compareSet(set, dict)

    def testExecuteQuery(self):
        self.compareQuery("foo AND bar", {1: 2})
        self.compareQuery("foo OR bar", {1: 2, 2: 1, 3: 1})
        self.compareQuery("foo AND NOT bar", {3: 1})
        self.compareQuery("foo AND foo AND foo", {1: 3, 3: 3})
        self.compareQuery("foo OR foo OR foo", {1: 3, 3: 3})
        self.compareQuery("ham AND NOT foo AND NOT bar", {4: 1})
        self.compareQuery("ham OR foo OR bar", {1: 3, 2: 2, 3: 2, 4: 1})
        self.compareQuery("ham AND foo AND bar", {1: 3})

    def testInvalidQuery(self):
        from Products.ZCTextIndex.ParseTree import NotNode, AtomNode
        tree = NotNode(AtomNode("foo"))
        self.assertRaises(QueryError, tree.executeQuery, self.index)
class TestQueryEngine(TestCase):

    def setUp(self):
        self.lexicon = Lexicon(Splitter())
        self.parser = QueryParser(self.lexicon)
        self.index = FauxIndex()

    def compareSet(self, set, dict):
        d = {}
        for k, v in set.items():
            d[k] = v
        self.assertEqual(d, dict)

    def compareQuery(self, query, dict):
        tree = self.parser.parseQuery(query)
        set = tree.executeQuery(self.index)
        self.compareSet(set, dict)

    def testExecuteQuery(self):
        self.compareQuery("foo AND bar", {1: 2})
        self.compareQuery("foo OR bar", {1: 2, 2: 1, 3:1})
        self.compareQuery("foo AND NOT bar", {3: 1})
        self.compareQuery("foo AND foo AND foo", {1: 3, 3: 3})
        self.compareQuery("foo OR foo OR foo", {1: 3, 3: 3})
        self.compareQuery("ham AND NOT foo AND NOT bar", {4: 1})
        self.compareQuery("ham OR foo OR bar", {1: 3, 2: 2, 3: 2, 4: 1})
        self.compareQuery("ham AND foo AND bar", {1: 3})

    def testInvalidQuery(self):
        from Products.ZCTextIndex.ParseTree import NotNode, AtomNode
        tree = NotNode(AtomNode("foo"))
        self.assertRaises(QueryError, tree.executeQuery, self.index)
Exemple #12
0
    def _apply_index(self, request, cid=''):
        """Apply query specified by request, a mapping containing the query.

        Returns two object on success, the resultSet containing the
        matching record numbers and a tuple containing the names of
        the fields used

        Returns None if request is not valid for this index.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None
        query_str = ' '.join(record.keys)
        if not query_str:
            return None
        tree = QueryParser(self.getLexicon()).parseQuery(query_str)
        results = tree.executeQuery(self.index)
        return  results, (self.id,)
Exemple #13
0
    def _apply_index(self, request, cid=''):
        """Apply query specified by request, a mapping containing the query.

        Returns two object on success, the resultSet containing the
        matching record numbers and a tuple containing the names of
        the fields used

        Returns None if request is not valid for this index.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None
        query_str = ' '.join(record.keys)
        if not query_str:
            return None
        tree = QueryParser(self.getLexicon()).parseQuery(query_str)
        results = tree.executeQuery(self.index)
        return results, (self.id, )
Exemple #14
0
    def setUp(self):
        self.lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(),
                                StopWordRemover())
        caller = LexiconHolder(self.lexicon)

        self.zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                                    'text', 'lexicon')
        self.parser = QueryParser(self.lexicon)
        self.index = self.zc_index.index
        self.add_docs()
 def _ranking_queries(self):
     queries = ['eat', 'porridge', 'hot OR porridge',
                'eat OR nine OR day OR old OR porridge']
     wqs = [1.95, 1.10, 1.77, 3.55]
     results = [[(6, 0.71)],
                [(1, 0.61), (2, 0.58), (5, 0.71)],
                [(1, 0.66), (2, 0.36), (4, 0.36), (5, 0.44)],
                [(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]]
     for i in range(len(queries)):
         raw = queries[i]
         q = QueryParser(self.lexicon).parseQuery(raw)
         wq = self.index.query_weight(q.terms())
         eq(wq, scaled_int(wqs[i]))
         r, n = self.zc_index.query(raw)
         self.assertEqual(len(r), len(results[i]))
         # convert the results to a dict for each checking
         d = {}
         for doc, score in results[i]:
             d[doc] = scaled_int(score)
         for doc, score in r:
             score = scaled_int(float(score / SCALE_FACTOR) / wq)
             self.assertTrue(0 <= score <= SCALE_FACTOR)
             eq(d[doc], score)
Exemple #16
0
def zctidx_ApplyIndexWithSynonymous(self, request, cid=''):
    """Apply query specified by request, a mapping containing the query.

    Returns two object on success, the resultSet containing the
    matching record numbers and a tuple containing the names of
    the fields used

    Returns None if request is not valid for this index.

    If this index id is listed in
    PloneGlossary.config.INDEX_SEARCH_GLOSSARY, the query tree is
    changed to look for terms and their variants found in general
    glossaries.
    """
    record = parseIndexRequest(request, self.id, self.query_options)
    if record.keys is None:
        return None
    query_str = ' '.join(record.keys)
    if not query_str:
        return None

    parseQuery = QueryParser(self.getLexicon()).parseQuery
    tree = parseQuery(query_str)

    if self.getId() in INDEX_SEARCH_GLOSSARY:

        gtool = getToolByName(self, PLONEGLOSSARY_TOOL)
        glossary_uids = gtool.getGeneralGlossaryUIDs()
        all_term_items = gtool._getGlossaryTermItems(glossary_uids)

        # get atoms from query and build related term query
        # text = ' '.join(flatten(tree.terms()))
        excluded = dict.fromkeys(__getNOTWords(tree), True)

        tree = replaceWordsQuery(tree, parseQuery, gtool, all_term_items,
                                 excluded)

    results = tree.executeQuery(self.index)
    return results, (self.id,)
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
 def setUp(self):
     from Products.ZCTextIndex.QueryParser import QueryParser
     from Products.ZCTextIndex.Lexicon import Lexicon
     from Products.ZCTextIndex.Lexicon import Splitter
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
 def query_weight(self, query):
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     terms = tree.terms()
     return self.index.query_weight(terms)
Exemple #20
0
 def setUp(self):
     # Only 'stop' is a stopword (but 'and' is still an operator)
     self.lexicon = Lexicon(Splitter(), FakeStopWordRemover())
     self.parser = QueryParser(self.lexicon)
 def query_weight(self, query):
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     terms = tree.terms()
     return self.index.query_weight(terms)
Exemple #22
0
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
     self.index = FauxIndex()
Exemple #24
0
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
     self.index = FauxIndex()