Esempio n. 1
0
    def query(self, q, ranked=True):
        """Return a ranked list of matching `Document` instances."""
        qq = Query.parse(q)
        res = self.discodex_client.query(self.spec.invindex_name, qq)
        res = map(TfIdf.undemux, res)
        if not res:
            return []

        pageranks = None
        if ranked:
            scoredb = ScoreDB(self.spec.scoredb_path)
            uris = [e[0] for e in res]
            pageranks = dict(scoredb.rank(uris))
            if not pageranks:
                raise Exception("no ranks available")
            
        docs = []
        for uri,scores in res:
            doc = self.docset.get(uri)
            doc.score = Score(**scores)
            if pageranks:
                doc.score['pagerank'] = pageranks[uri]
            doc.excerpt = doc.excerpt(qq)
            docs.append(doc)
        return docs
Esempio n. 2
0
 def test_excerpt_lowercases(self):
     qq = Query.parse('welcome')
     self.assertEquals('Welcome to example', fixtures.example.excerpt(qq, radius=20))
Esempio n. 3
0
 def test_excerpt(self):
     qq = Query.parse('example')
     self.assertEquals('Welcome to example', fixtures.example.excerpt(qq, radius=11))
     self.assertEquals('... example', fixtures.example.excerpt(qq, radius=1))
Esempio n. 4
0
 def test_eliminates_stopwords_when_stemming(self):
     qq = Query.parse('welcome & a')
     self.assertEquals('welcom&a|~a', qq.format())
Esempio n. 5
0
 def test_stems(self):
     self.assertEquals('welcom', Query.parse('welcome').format())
     self.assertEquals('welcom&univers', Query.parse('welcome & university').format())
Esempio n. 6
0
 def test_non_negated_literals(self):
     qq = Query.parse('abcd & ~wxyz & efgh')
     self.assertEquals(set(['abcd', 'efgh']), set(qq.non_negated_literals()))