def _mk_parser(self): from whoosh import qparser as qparse # use whoosh default query parser for now parser = qparse.QueryParser("meta", schema=self.idx_obj.schema) parser.add_plugin(qparse.FuzzyTermPlugin()) parser.remove_plugin_class(qparse.PhrasePlugin) parser.add_plugin(qparse.SequencePlugin()) self.parser = parser
def search(self, string=None, fields=["title", "content"]): query_parser = qparser.MultifieldParser(fields, self.ix.schema, group=qparser.OrGroup) query_parser.remove_plugin_class(qparser.PhrasePlugin) query_parser.add_plugin(qparser.FuzzyTermPlugin()) query_parser.add_plugin(qparser.SequencePlugin()) with self.ix.searcher(weighting=scoring.BM25F) as searcher: pattern = query_parser.parse(u'"{}"'.format(string)) for result in searcher.search(pattern, limit=None): yield result
def basic_search(query, query_parse, group=default_group, facet=default_facet, index=default_index): searcher = index.searcher() parser = QueryParser(query_parse, index.schema, group=group) myquery = parser.parse(query) parser.remove_plugin_class(qparser.PhrasePlugin) parser.add_plugin(qparser.SequencePlugin()) parser.add_plugin(qparser.FuzzyTermPlugin()) results = searcher.search( myquery, limit=None, sortedby=facet) # limit为搜索结果的限制,默认为10,详见博客开头的官方文档 print(results) return results
def index_search(self, search_query): if '/' in search_query: return [] search_query = [token.text for token in my_analyzer(search_query)] search_query = '~ '.join(search_query) search_query += '~' ix=index.open_dir("index") with ix.searcher(weighting=scoring.Frequency) as s: og = qparser.OrGroup.factory(0.8) qp = qparser.QueryParser("name", schema=ix.schema, termclass=MyFuzzyTerm, group=og) qp.add_plugin(qparser.FuzzyTermPlugin()) qp.add_plugin(qparser.SequencePlugin()) q = qp.parse(search_query) results = s.search(q, terms=True,limit=None) list=[] for res in results: # list.append(res['name']) list.append(res['id']) return list
def question_tokens_to_query(keywords): """ From a list of keywords and its synonym, transform to whoosh-defined query format """ # Build query from keywords query_str = "" for keyword in keywords: keywords_str = "(" for i in range(len(keyword)): keywords_str += keyword[i] + " OR " keywords_str = keywords_str[:-4] # Remove the last " OR " keywords_str += ")" query_str += keywords_str + " " # From query string build whoosh-defined query ix = index.open_dir(index_dir) parser = qparser.MultifieldParser(["title", "content"], ix.schema) parser.remove_plugin_class(qparser.PhrasePlugin) parser.add_plugin(qparser.SequencePlugin()) # For complex pharse query parser.add_plugin(qparser.FuzzyTermPlugin() ) # Search for term that dont have to match exactly query = parser.parse(query_str) return query
_string = sys.argv[1] _mode = sys.argv[2] normal = (_mode == "normal") _distance = 0 if (normal is False): _distance = int(sys.argv[3]) with ix.searcher() as searcher: # og = qparser.OrGroup.factory(0.9) parser = MultifieldParser(["title", "sub_title", "author", "content"], schema=ix.schema) # parser = qparser.QueryParser("content", ix.schema) parser.remove_plugin_class(qparser.PhrasePlugin) parser.add_plugin(qparser.SequencePlugin()) if (normal): string = _string query = parser.parse(string) else: # proximity distance = _distance proximty_query = "\"" + _string + "\"" + '~' + str((1 + distance) * 3) query = parser.parse(proximty_query) # sys.stdout.buffer.write(query) sys.stdout.buffer.write(">>>>>>OUTPUT start<<<<<<".encode('utf-8')) results = searcher.search(query, limit=20) results.fragmenter.maxchars = 100 # Show more context before and after
q_d = MultifieldParser(["title", "content", "extension", "url"], i_d.schema, group=og) q_e = MultifieldParser(["title", "content", "extension", "url"], i_e.schema, group=og) q_f = MultifieldParser(["title", "content", "extension", "url"], i_f.schema, group=og) elif operator == 4: #print ("in oper 4") og = qparser.OrGroup.factory(0.9) q_a = MultifieldParser(["title", "content", "tags", "extension", "url"], i_a.schema, group=og) q_a.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?")) q_a.add_plugin(qparser.FuzzyTermPlugin()) q_b = MultifieldParser(["title", "content", "extension", "url"], i_b.schema, group=og) q_b.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?")) q_b.add_plugin(qparser.FuzzyTermPlugin()) q_c = MultifieldParser(["title", "content", "extension", "url"], i_c.schema, group=og) q_c.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?")) q_c.add_plugin(qparser.FuzzyTermPlugin()) q_d = MultifieldParser(["title", "content", "extension", "url", "url"], i_d.schema, group=og) q_d.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))