def test_sequence_complex(): ana = analysis.StandardAnalyzer(stoplist=None) schema = fields.Schema(title=fields.TEXT(stored=True), path=fields.ID(stored=True), content=fields.TEXT(stored=True, phrase=True, analyzer=ana)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(title=u"First document", path=u"/a", content=u"This is the first document we've added!") w.add_document(title=u"Second document", path=u"/b", content=(u"In truth, he said, I would like to combine " u"logical operators with proximity-based " u"search in Whoosh!")) with ix.searcher() as s: qp = qparser.QueryParser("content", ix.schema) qp.remove_plugin_class(plugins.PhrasePlugin) qp.add_plugin(plugins.SequencePlugin()) qp.add_plugin(plugins.FuzzyTermPlugin()) q = qp.parse(u'"(he OR she OR we~) would*"~3') r = s.search(q) assert r.scored_length()
def test_sequence_plugin(): qp = default.QueryParser("f", None) qp.remove_plugin_class(plugins.PhrasePlugin) qp.add_plugin(plugins.FuzzyTermPlugin()) qp.add_plugin(plugins.SequencePlugin()) q = qp.parse(u('alfa "bravo charlie~2 (delta OR echo)" foxtrot')) assert q.__unicode__( ) == "(f:alfa AND (f:bravo NEAR f:charlie~2 NEAR (f:delta OR f:echo)) AND f:foxtrot)" assert q[1].__class__ == query.Sequence q = qp.parse(u('alfa "bravo charlie~2 d?lt*')) assert q[0].text == "alfa" assert q[1].text == "bravo" assert q[2].__class__ == query.FuzzyTerm assert q[3].__class__ == query.Wildcard q = qp.parse(u('alfa "bravo charlie~2" d?lt* "[a TO z] [0 TO 9]" echo')) assert q.__unicode__( ) == "(f:alfa AND (f:bravo NEAR f:charlie~2) AND f:d?lt* AND (f:[a TO z] NEAR f:[0 TO 9]) AND f:echo)" assert q[0].text == "alfa" assert q[1].__class__ == query.Sequence assert q[2].__class__ == query.Wildcard assert q[3].__class__ == query.Sequence assert q[3][0].__class__ == query.TermRange assert q[3][1].__class__ == query.TermRange assert q[4].text == "echo" q = qp.parse(u('alfa "bravo charlie~3"~2 delta')) assert q[1].__class__ == query.Sequence assert q[1].slop == 2 assert q[1][1].__class__ == query.FuzzyTerm assert q[1][1].maxdist == 3
def search(self, queryEntered, page): title = list() plot = list() poster = list() year = list() director = list() genre = list() actors = list() tomato_score = list() # JY for the sake of demonstrating ranking weight, not going to affect search much visibly. #mw=MultiWeighting(BM25F(), tomato_score=FunctionWeighting(custom_weight)) # plot=BM25F(B=0.75, plot_B=1.0, K1=2.0), actors=BM25F(B=0.75, actors_B=1.0, K1=1.5), director=TF_IDF() ) with self.indexer.searcher(weighting=BM25F()) as search: parser = MultifieldParser(['title', 'plot','actors', 'director', 'genre'], schema=self.indexer.schema, termclass=FuzzyTerm) # parser.add_plugin(plugins.FuzzyTermPlugin()) parser.add_plugin(plugins.SequencePlugin()) query = parser.parse(queryEntered) results = search.search_page(query, page, 20, sortedby = {'tomato_score'}, reverse=True) # 'tomato_score', 'year' for x in results: title.append(x['title']) plot.append(x['plot']) poster.append(x['poster']) tomato_score.append(x['tomato_score']) year.append(x['year']) director.append(x['director']) actors.append(x['actors']) genre.append(x['genre']) return title, plot, poster, tomato_score, year, actors, director, genre, results.pagecount if results.pagecount < 23 else 23
def test_sequence_andmaybe(): qp = default.QueryParser("f", None) qp.remove_plugin_class(plugins.PhrasePlugin) qp.add_plugins([plugins.FuzzyTermPlugin(), plugins.SequencePlugin()]) q = qp.parse(u('Dahmen ANDMAYBE "Besov Spaces"')) assert isinstance(q, query.AndMaybe) assert q[0] == query.Term("f", u("Dahmen")) assert q[1] == query.Sequence( [query.Term("f", u("Besov")), query.Term("f", u("Spaces"))])