Exemple #1
0
    def _mk_parser(self):
        from whoosh import qparser as qparse

        # use whoosh default query parser for now
        parser = qparse.QueryParser("meta", schema=self.idx_obj.schema)
        parser.add_plugin(qparse.FuzzyTermPlugin())
        parser.remove_plugin_class(qparse.PhrasePlugin)
        parser.add_plugin(qparse.SequencePlugin())
        self.parser = parser
    def search(self, string=None, fields=["title", "content"]):
        query_parser = qparser.MultifieldParser(fields,
                                                self.ix.schema,
                                                group=qparser.OrGroup)
        query_parser.remove_plugin_class(qparser.PhrasePlugin)
        query_parser.add_plugin(qparser.FuzzyTermPlugin())
        query_parser.add_plugin(qparser.SequencePlugin())

        with self.ix.searcher(weighting=scoring.BM25F) as searcher:
            pattern = query_parser.parse(u'"{}"'.format(string))
            for result in searcher.search(pattern, limit=None):
                yield result
Exemple #3
0
def basic_search(query,
                 query_parse,
                 group=default_group,
                 facet=default_facet,
                 index=default_index):
    searcher = index.searcher()
    parser = QueryParser(query_parse, index.schema, group=group)
    myquery = parser.parse(query)
    parser.remove_plugin_class(qparser.PhrasePlugin)
    parser.add_plugin(qparser.SequencePlugin())
    parser.add_plugin(qparser.FuzzyTermPlugin())
    results = searcher.search(
        myquery, limit=None, sortedby=facet)  # limit为搜索结果的限制,默认为10,详见博客开头的官方文档
    print(results)
    return results
Exemple #4
0
 def index_search(self, search_query):
     if '/' in search_query:
         return []
     search_query = [token.text for token in my_analyzer(search_query)]
     search_query = '~ '.join(search_query)
     search_query += '~'
     ix=index.open_dir("index")
     with ix.searcher(weighting=scoring.Frequency) as s:
         og = qparser.OrGroup.factory(0.8)
         qp = qparser.QueryParser("name", schema=ix.schema, termclass=MyFuzzyTerm, group=og)
         qp.add_plugin(qparser.FuzzyTermPlugin())
         qp.add_plugin(qparser.SequencePlugin())
         q = qp.parse(search_query)
         results = s.search(q, terms=True,limit=None)
         list=[]
         for res in results:
             # list.append(res['name'])
             list.append(res['id'])
         return list
Exemple #5
0
def question_tokens_to_query(keywords):
    """ From a list of keywords and its synonym, transform to whoosh-defined query format """
    # Build query from keywords
    query_str = ""
    for keyword in keywords:
        keywords_str = "("
        for i in range(len(keyword)):
            keywords_str += keyword[i] + " OR "
        keywords_str = keywords_str[:-4]  # Remove the last " OR "
        keywords_str += ")"
        query_str += keywords_str + " "

    # From query string build whoosh-defined query
    ix = index.open_dir(index_dir)
    parser = qparser.MultifieldParser(["title", "content"], ix.schema)
    parser.remove_plugin_class(qparser.PhrasePlugin)
    parser.add_plugin(qparser.SequencePlugin())  # For complex pharse query
    parser.add_plugin(qparser.FuzzyTermPlugin()
                      )  # Search for term that dont have to match exactly
    query = parser.parse(query_str)

    return query
_string = sys.argv[1]
_mode = sys.argv[2]
normal = (_mode == "normal")

_distance = 0
if (normal is False):
    _distance = int(sys.argv[3])

with ix.searcher() as searcher:
    # og = qparser.OrGroup.factory(0.9)
    parser = MultifieldParser(["title", "sub_title", "author", "content"],
                              schema=ix.schema)
    # parser = qparser.QueryParser("content", ix.schema)
    parser.remove_plugin_class(qparser.PhrasePlugin)
    parser.add_plugin(qparser.SequencePlugin())

    if (normal):
        string = _string
        query = parser.parse(string)
    else:
        # proximity
        distance = _distance
        proximty_query = "\"" + _string + "\"" + '~' + str((1 + distance) * 3)
        query = parser.parse(proximty_query)

    # sys.stdout.buffer.write(query)
    sys.stdout.buffer.write(">>>>>>OUTPUT start<<<<<<".encode('utf-8'))
    results = searcher.search(query, limit=20)
    results.fragmenter.maxchars = 100
    # Show more context before and after
Exemple #7
0
    q_d = MultifieldParser(["title", "content", "extension", "url"],
                           i_d.schema,
                           group=og)
    q_e = MultifieldParser(["title", "content", "extension", "url"],
                           i_e.schema,
                           group=og)
    q_f = MultifieldParser(["title", "content", "extension", "url"],
                           i_f.schema,
                           group=og)
elif operator == 4:
    #print ("in oper 4")
    og = qparser.OrGroup.factory(0.9)
    q_a = MultifieldParser(["title", "content", "tags", "extension", "url"],
                           i_a.schema,
                           group=og)
    q_a.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))
    q_a.add_plugin(qparser.FuzzyTermPlugin())
    q_b = MultifieldParser(["title", "content", "extension", "url"],
                           i_b.schema,
                           group=og)
    q_b.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))
    q_b.add_plugin(qparser.FuzzyTermPlugin())
    q_c = MultifieldParser(["title", "content", "extension", "url"],
                           i_c.schema,
                           group=og)
    q_c.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))
    q_c.add_plugin(qparser.FuzzyTermPlugin())
    q_d = MultifieldParser(["title", "content", "extension", "url", "url"],
                           i_d.schema,
                           group=og)
    q_d.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))