コード例 #1
0
 def query_index(self, record, resultset=None):
     query_str = ' '.join(record.keys)
     if not query_str:
         return None
     tree = QueryParser(self.getLexicon()).parseQuery(query_str)
     results = tree.executeQuery(self.index)
     return results
コード例 #2
0
def query(rt, query_str, profiler):
    idx = rt["index"]
    docs = rt["documents"]

    start = time.clock()
    if profiler is None:
        results, num_results = idx.query(query_str, BEST)
    else:
        if WARM_CACHE:
            print "Warming the cache..."
            idx.query(query_str, BEST)
        start = time.clock()
        results, num_results = profiler.runcall(idx.query, query_str, BEST)
    elapsed = time.clock() - start

    print "query:", query_str
    print "# results:", len(results), "of", num_results, \
          "in %.2f ms" % (elapsed * 1000)

    tree = QueryParser(idx.lexicon).parseQuery(query_str)
    qw = idx.index.query_weight(tree.terms())

    for docid, score in results:
        scaled = 100.0 * score / qw
        print "docid %7d score %6d scaled %5.2f%%" % (docid, score, scaled)
        if VERBOSE:
            msg = docs[docid]
            ctx = msg.text.split("\n", CONTEXT)
            del ctx[-1]
            print "-" * 60
            print "message:"
            for l in ctx:
                print l
            print "-" * 60
コード例 #3
0
def query(rt, query_str, profiler):
    idx = rt["index"]
    docs = rt["documents"]

    start = time.clock()
    if profiler is None:
        results, num_results = idx.query(query_str, BEST)
    else:
        if WARM_CACHE:
            print "Warming the cache..."
            idx.query(query_str, BEST)
        start = time.clock()
        results, num_results = profiler.runcall(idx.query, query_str, BEST)
    elapsed = time.clock() - start

    print "query:", query_str
    print "# results:", len(results), "of", num_results, \
          "in %.2f ms" % (elapsed * 1000)

    tree = QueryParser(idx.lexicon).parseQuery(query_str)
    qw = idx.index.query_weight(tree.terms())

    for docid, score in results:
        scaled = 100.0 * score / qw
        print "docid %7d score %6d scaled %5.2f%%" % (docid, score, scaled)
        if VERBOSE:
            msg = docs[docid]
            ctx = msg.text.split("\n", CONTEXT)
            del ctx[-1]
            print "-" * 60
            print "message:"
            for l in ctx:
                print l
            print "-" * 60
コード例 #4
0
class TestQueryParserBase(TestCase):

    def setUp(self):
        from Products.ZCTextIndex.QueryParser import QueryParser
        from Products.ZCTextIndex.Lexicon import Lexicon
        from Products.ZCTextIndex.Lexicon import Splitter
        self.lexicon = Lexicon(Splitter())
        self.parser = QueryParser(self.lexicon)

    def expect(self, input, output, expected_ignored=[]):
        tree = self.parser.parseQuery(input)
        ignored = self.parser.getIgnored()
        self.compareParseTrees(tree, output)
        self.assertEqual(ignored, expected_ignored)
        # Check that parseQueryEx() == (parseQuery(), getIgnored())
        ex_tree, ex_ignored = self.parser.parseQueryEx(input)
        self.compareParseTrees(ex_tree, tree)
        self.assertEqual(ex_ignored, expected_ignored)

    def failure(self, input):
        from Products.ZCTextIndex.ParseTree import ParseError
        self.assertRaises(ParseError, self.parser.parseQuery, input)
        self.assertRaises(ParseError, self.parser.parseQueryEx, input)

    def compareParseTrees(self, got, expected, msg=None):
        from Products.ZCTextIndex.ParseTree import AndNode
        from Products.ZCTextIndex.ParseTree import AtomNode
        from Products.ZCTextIndex.ParseTree import GlobNode
        from Products.ZCTextIndex.ParseTree import NotNode
        from Products.ZCTextIndex.ParseTree import OrNode
        from Products.ZCTextIndex.ParseTree import ParseTreeNode
        from Products.ZCTextIndex.ParseTree import PhraseNode
        if msg is None:
            msg = repr(got)
        self.assertEqual(isinstance(got, ParseTreeNode), 1)
        self.assertEqual(got.__class__, expected.__class__, msg)
        if isinstance(got, PhraseNode):
            self.assertEqual(got.nodeType(), "PHRASE", msg)
            self.assertEqual(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, GlobNode):
            self.assertEqual(got.nodeType(), "GLOB", msg)
            self.assertEqual(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, AtomNode):
            self.assertEqual(got.nodeType(), "ATOM", msg)
            self.assertEqual(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, NotNode):
            self.assertEqual(got.nodeType(), "NOT")
            self.compareParseTrees(got.getValue(), expected.getValue(), msg)
        elif isinstance(got, AndNode) or isinstance(got, OrNode):
            self.assertEqual(got.nodeType(),
                             isinstance(got, AndNode) and "AND" or "OR", msg)
            list1 = got.getValue()
            list2 = expected.getValue()
            self.assertEqual(len(list1), len(list2), msg)
            for i in range(len(list1)):
                self.compareParseTrees(list1[i], list2[i], msg)
コード例 #5
0
 def query(self, query, nbest=10):
     # returns a total hit count and a mapping from docids to scores
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     results = tree.executeQuery(self.index)
     if results is None:
         return [], 0
     chooser = NBest(nbest)
     chooser.addmany(results.items())
     return chooser.getbest(), len(results)
コード例 #6
0
 def query(self, query, nbest=10):
     # returns a total hit count and a mapping from docids to scores
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     results = tree.executeQuery(self.index)
     if results is None:
         return [], 0
     chooser = NBest(nbest)
     chooser.addmany(results.items())
     return chooser.getbest(), len(results)
コード例 #7
0
ファイル: ZCTextIndex.py プロジェクト: bendavis78/zope
    def query(self, query, nbest=10):
        """Return pair (mapping from docids to scores, num results).

        The num results is the total number of results before trimming
        to the nbest results.
        """
        tree = QueryParser(self.getLexicon()).parseQuery(query)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)
コード例 #8
0
ファイル: ZCTextIndex.py プロジェクト: wpjunior/proled
    def query(self, query, nbest=10):
        """Return pair (mapping from docids to scores, num results).

        The num results is the total number of results before trimming
        to the nbest results.
        """
        tree = QueryParser(self.getLexicon()).parseQuery(query)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)
コード例 #9
0
 def setUp(self):
     from Products.ZCTextIndex.QueryParser import QueryParser
     from Products.ZCTextIndex.Lexicon import Lexicon
     from Products.ZCTextIndex.Lexicon import Splitter
     # Only 'stop' is a stopword (but 'and' is still an operator)
     self.lexicon = Lexicon(Splitter(), FakeStopWordRemover())
     self.parser = QueryParser(self.lexicon)
コード例 #10
0
class TestQueryEngine(TestCase):
    def setUp(self):
        self.lexicon = Lexicon(Splitter())
        self.parser = QueryParser(self.lexicon)
        self.index = FauxIndex()

    def compareSet(self, set, dict):
        d = {}
        for k, v in set.items():
            d[k] = v
        self.assertEqual(d, dict)

    def compareQuery(self, query, dict):
        tree = self.parser.parseQuery(query)
        set = tree.executeQuery(self.index)
        self.compareSet(set, dict)

    def testExecuteQuery(self):
        self.compareQuery("foo AND bar", {1: 2})
        self.compareQuery("foo OR bar", {1: 2, 2: 1, 3: 1})
        self.compareQuery("foo AND NOT bar", {3: 1})
        self.compareQuery("foo AND foo AND foo", {1: 3, 3: 3})
        self.compareQuery("foo OR foo OR foo", {1: 3, 3: 3})
        self.compareQuery("ham AND NOT foo AND NOT bar", {4: 1})
        self.compareQuery("ham OR foo OR bar", {1: 3, 2: 2, 3: 2, 4: 1})
        self.compareQuery("ham AND foo AND bar", {1: 3})

    def testInvalidQuery(self):
        from Products.ZCTextIndex.ParseTree import NotNode, AtomNode
        tree = NotNode(AtomNode("foo"))
        self.assertRaises(QueryError, tree.executeQuery, self.index)
コード例 #11
0
class TestQueryEngine(TestCase):

    def setUp(self):
        self.lexicon = Lexicon(Splitter())
        self.parser = QueryParser(self.lexicon)
        self.index = FauxIndex()

    def compareSet(self, set, dict):
        d = {}
        for k, v in set.items():
            d[k] = v
        self.assertEqual(d, dict)

    def compareQuery(self, query, dict):
        tree = self.parser.parseQuery(query)
        set = tree.executeQuery(self.index)
        self.compareSet(set, dict)

    def testExecuteQuery(self):
        self.compareQuery("foo AND bar", {1: 2})
        self.compareQuery("foo OR bar", {1: 2, 2: 1, 3:1})
        self.compareQuery("foo AND NOT bar", {3: 1})
        self.compareQuery("foo AND foo AND foo", {1: 3, 3: 3})
        self.compareQuery("foo OR foo OR foo", {1: 3, 3: 3})
        self.compareQuery("ham AND NOT foo AND NOT bar", {4: 1})
        self.compareQuery("ham OR foo OR bar", {1: 3, 2: 2, 3: 2, 4: 1})
        self.compareQuery("ham AND foo AND bar", {1: 3})

    def testInvalidQuery(self):
        from Products.ZCTextIndex.ParseTree import NotNode, AtomNode
        tree = NotNode(AtomNode("foo"))
        self.assertRaises(QueryError, tree.executeQuery, self.index)
コード例 #12
0
ファイル: ZCTextIndex.py プロジェクト: wpjunior/proled
    def _apply_index(self, request, cid=''):
        """Apply query specified by request, a mapping containing the query.

        Returns two object on success, the resultSet containing the
        matching record numbers and a tuple containing the names of
        the fields used

        Returns None if request is not valid for this index.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None
        query_str = ' '.join(record.keys)
        if not query_str:
            return None
        tree = QueryParser(self.getLexicon()).parseQuery(query_str)
        results = tree.executeQuery(self.index)
        return  results, (self.id,)
コード例 #13
0
ファイル: ZCTextIndex.py プロジェクト: bendavis78/zope
    def _apply_index(self, request, cid=''):
        """Apply query specified by request, a mapping containing the query.

        Returns two object on success, the resultSet containing the
        matching record numbers and a tuple containing the names of
        the fields used

        Returns None if request is not valid for this index.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None
        query_str = ' '.join(record.keys)
        if not query_str:
            return None
        tree = QueryParser(self.getLexicon()).parseQuery(query_str)
        results = tree.executeQuery(self.index)
        return results, (self.id, )
コード例 #14
0
    def setUp(self):
        self.lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(),
                                StopWordRemover())
        caller = LexiconHolder(self.lexicon)

        self.zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                                    'text', 'lexicon')
        self.parser = QueryParser(self.lexicon)
        self.index = self.zc_index.index
        self.add_docs()
コード例 #15
0
 def _ranking_queries(self):
     queries = ['eat', 'porridge', 'hot OR porridge',
                'eat OR nine OR day OR old OR porridge']
     wqs = [1.95, 1.10, 1.77, 3.55]
     results = [[(6, 0.71)],
                [(1, 0.61), (2, 0.58), (5, 0.71)],
                [(1, 0.66), (2, 0.36), (4, 0.36), (5, 0.44)],
                [(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]]
     for i in range(len(queries)):
         raw = queries[i]
         q = QueryParser(self.lexicon).parseQuery(raw)
         wq = self.index.query_weight(q.terms())
         eq(wq, scaled_int(wqs[i]))
         r, n = self.zc_index.query(raw)
         self.assertEqual(len(r), len(results[i]))
         # convert the results to a dict for each checking
         d = {}
         for doc, score in results[i]:
             d[doc] = scaled_int(score)
         for doc, score in r:
             score = scaled_int(float(score / SCALE_FACTOR) / wq)
             self.assertTrue(0 <= score <= SCALE_FACTOR)
             eq(d[doc], score)
コード例 #16
0
def zctidx_ApplyIndexWithSynonymous(self, request, cid=''):
    """Apply query specified by request, a mapping containing the query.

    Returns two object on success, the resultSet containing the
    matching record numbers and a tuple containing the names of
    the fields used

    Returns None if request is not valid for this index.

    If this index id is listed in
    PloneGlossary.config.INDEX_SEARCH_GLOSSARY, the query tree is
    changed to look for terms and their variants found in general
    glossaries.
    """
    record = parseIndexRequest(request, self.id, self.query_options)
    if record.keys is None:
        return None
    query_str = ' '.join(record.keys)
    if not query_str:
        return None

    parseQuery = QueryParser(self.getLexicon()).parseQuery
    tree = parseQuery(query_str)

    if self.getId() in INDEX_SEARCH_GLOSSARY:

        gtool = getToolByName(self, PLONEGLOSSARY_TOOL)
        glossary_uids = gtool.getGeneralGlossaryUIDs()
        all_term_items = gtool._getGlossaryTermItems(glossary_uids)

        # get atoms from query and build related term query
        # text = ' '.join(flatten(tree.terms()))
        excluded = dict.fromkeys(__getNOTWords(tree), True)

        tree = replaceWordsQuery(tree, parseQuery, gtool, all_term_items,
                                 excluded)

    results = tree.executeQuery(self.index)
    return results, (self.id,)
コード例 #17
0
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
コード例 #18
0
 def setUp(self):
     from Products.ZCTextIndex.QueryParser import QueryParser
     from Products.ZCTextIndex.Lexicon import Lexicon
     from Products.ZCTextIndex.Lexicon import Splitter
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
コード例 #19
0
 def query_weight(self, query):
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     terms = tree.terms()
     return self.index.query_weight(terms)
コード例 #20
0
ファイル: testQueryParser.py プロジェクト: bendavis78/zope
 def setUp(self):
     # Only 'stop' is a stopword (but 'and' is still an operator)
     self.lexicon = Lexicon(Splitter(), FakeStopWordRemover())
     self.parser = QueryParser(self.lexicon)
コード例 #21
0
 def query_weight(self, query):
     parser = QueryParser(self.lexicon)
     tree = parser.parseQuery(query)
     terms = tree.terms()
     return self.index.query_weight(terms)
コード例 #22
0
ファイル: testQueryParser.py プロジェクト: bendavis78/zope
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
コード例 #23
0
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
     self.index = FauxIndex()
コード例 #24
0
 def setUp(self):
     self.lexicon = Lexicon(Splitter())
     self.parser = QueryParser(self.lexicon)
     self.index = FauxIndex()