Python PrefixPlugin Examples, whoosh.qparser.PrefixPlugin Python Examples

Example #1

0

Show file

def _search_query(search_string, index_dir, search_field):
    search_index = index.open_dir(index_dir)
    searcher = search_index.searcher()
    query_parser = qparser.QueryParser(search_field, schema=search_index.schema)
    query_parser.add_plugin(qparser.PrefixPlugin())
    query_parser.add_plugin(qparser.FuzzyTermPlugin())
    results = searcher.search(query_parser.parse(search_string), limit=20)
    return [dict(result) for result in results]

Example #2

0

Show file

def search_index(words):
    xg_duanluo = []
    with ix.searcher() as s:
        qp = QueryParser('duanluo', schema=ix.schema, group=qparser.OrGroup)
        qp.remove_plugin_class(qparser.WildcardPlugin)
        qp.add_plugin(qparser.PrefixPlugin())

        for word in words:
            q = qp.parse(u'{}'.format(word))
            results = s.search(q, limit=10)
            for i in results:
                xg_duanluo.append((i['id'], i['duanluo']))
    return xg_duanluo

Example #3

0

Show file

def search_index(words):
    xg_words = []
    with ix.searcher() as s:
        qp = QueryParser('section', schema=ix.schema, group=qparser.OrGroup)

        # 可以使用通配符搜索
        qp.remove_plugin_class(qparser.WildcardPlugin)
        qp.add_plugin(qparser.PrefixPlugin())

        for word in words:
            q = qp.parse(u'{}'.format(word))
            results = s.search(q, limit=10)
            for i in results:
                xg_words.append(i['section'])
    return xg_words

Example #4

0

Show file

    def _search(self, query_string, search_model, user, global_and_search=False):
        index = self.get_index(search_model=search_model)

        id_list = []
        with index.searcher() as searcher:
            search_string = []

            if 'q' in query_string:
                # Emulate full field set search
                for search_field in self.get_search_model_fields(search_model=search_model):
                    search_string.append(
                        '{}:({})'.format(search_field.get_full_name(), query_string['q'])
                    )
            else:
                for key, value in query_string.items():
                    if value:
                        search_string.append(
                            '{}:({})'.format(key, value)
                        )

            global_logic_string = ' AND ' if global_and_search else ' OR '
            search_string = global_logic_string.join(search_string)

            logger.debug('search_string: %s', search_string)

            parser = qparser.QueryParser(
                fieldname='_', schema=index.schema
            )
            parser.remove_plugin_class(cls=qparser.WildcardPlugin)
            parser.add_plugin(pin=qparser.PrefixPlugin())
            query = parser.parse(text=search_string)
            results = searcher.search(
                q=query, limit=setting_results_limit.value
            )

            logger.debug('results: %s', results)

            for result in results:
                id_list.append(result['id'])

        queryset = search_model.get_queryset().filter(
            id__in=id_list
        ).distinct()

        return SearchBackend.limit_queryset(queryset=queryset)

Example #5

0

Show file

def search_index(words):
    xg_words = []
    with ix.searcher() as s:

        # group=qparser.OrGroup 表示可匹配任意查询词，而不是所有查询词都匹配才能出结果
        qp = QueryParser('section', schema=ix.schema, group=qparser.OrGroup)

        # 下面两行表示可以使用通配符搜索，如"窗前*月光"
        qp.remove_plugin_class(qparser.WildcardPlugin)
        qp.add_plugin(qparser.PrefixPlugin())

        for word in words:
            q = qp.parse(u'%s' % word)
            # limit：代表返回多少条搜索结果
            results = s.search(q, limit=10)
            for i in results:
                xg_words.append(i['section'])
                # print (word,i['section'])
    return xg_words

Example #6

0

Show file

def search_index(words):
    xg_part = []
    with ix.searcher() as s:
        # group = qparser.OrGroup 表示可匹配任意查询词，而不是所有查询词都匹配才出结果
        qp = QueryParser('part', schema=ix.schema, group=qparser.OrGroup)

        # 下面两行表示可以使用通配符，如“窗前*月光”
        qp.remove_plugin_class(qparser.WildcardPlugin)
        qp.add_plugin(qparser.PrefixPlugin())

        # 随机数
        num = random.randint(3, 7)

        for word in words:
            q = qp.parse(u'%s' % word)

            # limit 表示多少条搜索结果
            results = s.search(q, limit=num)
            count = 0
            for i in results:
                if count > 0:  # 防止等于本身
                    xg_part.append((i['pid'], i['part']))
                count += 1
    return xg_part

Example #7

0

Show file

def base_query():
    assert request.path == '/index'
    #print(dict(request.form)["query"][0])
    #print(dict(request.form))
    query_sentence = str(dict(request.form)["query"][0])
    logging.info("Query sentence: %s" % query_sentence)
    res = []
    with ix.searcher() as searcher:
        # 对输入的查询文本进行解析，如果存在按域查询的需求则区分按域查询，默认采用多属性查询模式
        # mark 表示是否需要高亮学院查询区域，默认情况下需要
        highlight_xy = True
        # 默认的多域查询
        query = qparser.MultifieldParser(
            ["content", "title", "mtext", "xueyuan"], ix.schema)
        if query_sentence.endswith("$姓名$"):
            # 按名字查询
            query = qparser.SimpleParser("title", ix.schema)
            query_sentence = query_sentence.strip('$姓名$')
        elif query_sentence.endswith("$学院$"):
            # 按学院查询
            query = qparser.SimpleParser("xueyuan", ix.schema)
            query_sentence = query_sentence.strip('$学院$')

        elif query_sentence.endswith("$网页$"):
            # 按网页内容查询
            query = qparser.SimpleParser("content", ix.schema)
            query_sentence = query_sentence.strip('$网页$')

        #print(query_sentence)
        # 引入查询解析器插件
        query.add_plugin(qparser.WildcardPlugin)

        # query.remove_plugin_class(qparser.WildcardPlugin)
        query.add_plugin(qparser.PrefixPlugin())
        query.add_plugin(qparser.OperatorsPlugin)
        query.add_plugin(qparser.RegexPlugin)
        query.add_plugin(qparser.PhrasePlugin)

        # 解析得到查询器
        q = query.parse(query_sentence)
        logging.info("Query parse result: %s" % str(q))
        print(q)
        # 获取查询结果
        result = searcher.search(q, limit=20)
        # print(result)
        # 设置碎片的属性
        # Allow larger fragments
        my_cf = highlight.ContextFragmenter(maxchars=200, surround=30)
        hf = highlight.HtmlFormatter(tagname='em',
                                     classname='match',
                                     termclass='term')

        hi = highlight.Highlighter(fragmenter=my_cf, formatter=hf)
        for hit in result:
            print(hit["picpath"])
            print(hit["title"])
            print(escape(hi.highlight_hit(hit, "content")))
            if hit['picpath'] == '#':
                if highlight_xy:
                    res.append({
                        "title":
                        hit['title'],
                        "xueyuan":
                        Markup(hi.highlight_hit(hit, "xueyuan")),
                        "url":
                        hit["url"],
                        'shotpath':
                        hit['shotpath'],
                        "content":
                        Markup(hi.highlight_hit(hit, "content")),
                        "parenturl":
                        hit["parenturl"],
                        "picpath":
                        '#',
                        "pagerank":
                        scores[url_dict[hit["url"]]]
                    })
                else:
                    res.append({
                        "title":
                        hit['title'],
                        "xueyuan":
                        hit["xueyuan"],
                        "url":
                        hit["url"],
                        'shotpath':
                        hit['shotpath'],
                        "content":
                        Markup(hi.highlight_hit(hit, "content")),
                        "parenturl":
                        hit["parenturl"],
                        "picpath":
                        '#',
                        "pagerank":
                        scores[url_dict[hit["url"]]]
                    })
            else:
                if highlight_xy:
                    res.append({
                        "title":
                        hit['title'],
                        "xueyuan":
                        Markup(hi.highlight_hit(hit, "xueyuan")),
                        "url":
                        hit["url"],
                        'shotpath':
                        hit['shotpath'],
                        "content":
                        Markup(hi.highlight_hit(hit, "content")),
                        "parenturl":
                        hit["parenturl"],
                        "picpath":
                        "images/%s/%s" % (hit['picpath'].split('/')[-3],
                                          hit['picpath'].split('/')[-1]),
                        "pagerank":
                        scores[url_dict[hit["url"]]]
                    })
                else:
                    res.append({
                        "title":
                        hit['title'],
                        "xueyuan":
                        hit["xueyuan"],
                        "url":
                        hit["url"],
                        'shotpath':
                        hit['shotpath'],
                        "content":
                        Markup(hi.highlight_hit(hit, "content")),
                        "parenturl":
                        hit["parenturl"],
                        "picpath":
                        "images/%s/%s" % (hit['picpath'].split('/')[-3],
                                          hit['picpath'].split('/')[-1]),
                        "pagerank":
                        scores[url_dict[hit["url"]]]
                    })
        print(len(result))
        print(res)
    count = len(result)

    if count == 0:
        logging.warning("%d,没有查询到相关内容！" % 404)
        return "没有查询到相关内容！", 404
    else:
        # 记录查询日志
        log = "Response: "
        for item in res:
            log = log + " (name:%s,url:%s) " % (item["title"], item["url"])
        logging.info(log)

        # # 基于page rank 对链接进行排序
        # res.sort(key=lambda k:(k.get("pagerank",0)),reverse = True)
        # print(res)

        mysession["data"] = res  # 使用会话session传递参数
        return jsonify({"url": "/display/%d&%s" % (count, query_sentence)})