예제 #1
0
def doc_keywords():
    category = request.form['category']
    word = request.form['word'].encode('utf-8').strip()
    in_type = request.form['type']
    f_text = input_doc_str(in_type)
    inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category),
                                                      get_lda_conf())
    seg_list = inference_engine_wrapper.tokenize(f_text)
    items = inference_engine_wrapper.cal_keywords_similarity(
        word, ' '.join(seg_list))

    return json_format(items)
예제 #2
0
def doc_keywords_plus():
    category = request.form['category']
    #word = request.form['word'].encode('utf-8').strip()
    in_type = request.form['type']
    f_text = input_doc_str(in_type)
    inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category),
                                                      get_lda_conf())
    seg_list = inference_engine_wrapper.tokenize(f_text)
    items = {}
    for x, w in jieba.analyse.extract_tags(f_text, withWeight=True):
        result = inference_engine_wrapper.cal_keywords_similarity(
            x.encode('utf-8').strip(), ' '.join(seg_list))
        items.update(result)

    return json_format(items)
예제 #3
0
# found in the LICENSE file.

import sys
from familia_wrapper import InferenceEngineWrapper

if sys.version_info < (3, 0):
    input = raw_input

if __name__ == '__main__':
    if len(sys.argv) < 3:
        sys.stderr.write("Usage:python {} {} {}.\n".format(
            sys.argv[0], "model_dir", "conf_file"))
        exit(-1)

    # 获取参数
    model_dir = sys.argv[1]
    conf_file = sys.argv[2]
    # 创建InferenceEngineWrapper对象
    inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file)
    while True:
        # 输入两个长文本
        words = input("Enter Keywords: ").strip()
        doc = input("Enter Document: ").strip()
        seg_list = inference_engine_wrapper.tokenize(doc)
        items = inference_engine_wrapper.cal_keywords_similarity(
            words, ' '.join(seg_list))
        # 打印结果
        print('----------------------------')
        for item in items:
            print(item[0] + '\t' + str(item[1]))
예제 #4
0
# Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import sys
from familia_wrapper import InferenceEngineWrapper

if sys.version_info < (3, 0):
    input = raw_input

if __name__ == '__main__':
    if len(sys.argv) < 3:
        sys.stderr.write("Usage:python {} {} {}.\n".format(
            sys.argv[0], "model_dir", "conf_file"))
        exit(-1)

    # 获取参数
    model_dir = sys.argv[1]
    conf_file = sys.argv[2]
    # 创建InferenceEngineWrapper对象
    inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file)
    while True:
        # 输入两个长文本
        words = input("Enter Keywords: ").strip()
        doc = input("Enter Document: ").strip()
        items = inference_engine_wrapper.cal_keywords_similarity(words, doc)
        # 打印结果
        print('----------------------------')
        for item in items:
            print item[0] + '\t' + str(item[1])