def query(self, q, ranked=True): """Return a ranked list of matching `Document` instances.""" qq = Query.parse(q) res = self.discodex_client.query(self.spec.invindex_name, qq) res = map(TfIdf.undemux, res) if not res: return [] pageranks = None if ranked: scoredb = ScoreDB(self.spec.scoredb_path) uris = [e[0] for e in res] pageranks = dict(scoredb.rank(uris)) if not pageranks: raise Exception("no ranks available") docs = [] for uri,scores in res: doc = self.docset.get(uri) doc.score = Score(**scores) if pageranks: doc.score['pagerank'] = pageranks[uri] doc.excerpt = doc.excerpt(qq) docs.append(doc) return docs
def test_excerpt_lowercases(self): qq = Query.parse('welcome') self.assertEquals('Welcome to example', fixtures.example.excerpt(qq, radius=20))
def test_excerpt(self): qq = Query.parse('example') self.assertEquals('Welcome to example', fixtures.example.excerpt(qq, radius=11)) self.assertEquals('... example', fixtures.example.excerpt(qq, radius=1))
def test_eliminates_stopwords_when_stemming(self): qq = Query.parse('welcome & a') self.assertEquals('welcom&a|~a', qq.format())
def test_stems(self): self.assertEquals('welcom', Query.parse('welcome').format()) self.assertEquals('welcom&univers', Query.parse('welcome & university').format())
def test_non_negated_literals(self): qq = Query.parse('abcd & ~wxyz & efgh') self.assertEquals(set(['abcd', 'efgh']), set(qq.non_negated_literals()))