def lda_infer():
    category = request.form['category']
    in_type = request.form['type']
    f_text = input_doc_str(in_type)
    inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category),
                                                      get_lda_conf())
    seg_list = inference_engine_wrapper.tokenize(f_text)
    topic_dist = inference_engine_wrapper.lda_infer(seg_list)

    return json_format(topic_dist)
def doc_topic_word_lda():
    category = request.form['category']
    in_type = request.form['type']
    f_text = input_doc_str(in_type)
    inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category),
                                                      get_lda_conf())
    seg_list = inference_engine_wrapper.tokenize(f_text)
    topic_dist = inference_engine_wrapper.lda_infer(seg_list)

    result = {}
    for key, value in dict(topic_dist).items():
        twe_wrapper = TopicalWordEmbeddingsWrapper(get_model_dir(category),
                                                   get_emb_file(category))
        result_dict = dict(
            twe_wrapper.nearest_words_around_topic(int(key), get_count()))
        result[value] = result_dict

    return json.dumps(result)
Beispiel #3
0
    result.append(ent)
    return result


if __name__ == '__main__':
    path = '/media/iiip/数据/duanduan/data/validation.csv'
    documents = read_whole_file(path)
    if len(sys.argv) < 3:
        sys.stderr.write("Usage:python {} {} {}\n".format(
            sys.argv[0], "model_dir", "conf_file"))
        exit(-1)
    # 获取参数
    model_dir = sys.argv[1]
    conf_file = sys.argv[2]
    # 创建InferenceEngineWrapper对象
    inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file)
    topic_result = {}
    for key in documents:
        print key
        seg_list = inference_engine_wrapper.tokenize(documents[key])
        # 进行推断
        topic_dist = inference_engine_wrapper.lda_infer(seg_list)
        topic_result[key] = cal_topic(topic_dist)
    file = open(path.replace(".csv", "_topic.csv"), 'w')
    writer = csv.writer(file)
    for each in topic_result:
        writer.writerow([each, topic_result[each][0], topic_result[each][1]])
    file.close()
    # return topic_result

Beispiel #4
0
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Author: [email protected]

import sys
from familia_wrapper import InferenceEngineWrapper

if sys.version_info < (3,0):
    input = raw_input

if __name__ == '__main__':
    if len(sys.argv) < 3:
        sys.stderr.write("Usage:python {} {} {}\n".format(
            sys.argv[0], "model_dir", "conf_file"))
        exit(-1)
    # 获取参数
    model_dir = sys.argv[1]
    conf_file = sys.argv[2]
    # 创建InferenceEngineWrapper对象
    inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file)
    while True:
        input_text = input("Enter Document: ")
        # 分词
        seg_list = inference_engine_wrapper.tokenize(input_text)
        # 进行推断
        topic_dist = inference_engine_wrapper.lda_infer(seg_list)
        # 打印结果
        print("Document Topic Distribution:")
        print(topic_dist)