def slda_infer(): category = request.form['category'] in_type = request.form['type'] f_text = input_doc_str(in_type) inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category), get_slda_conf()) seg_list = inference_engine_wrapper.tokenize(f_text) sentences = [] length = len(seg_list) for index in range(0, length, 5): sentences.append(seg_list[index:index + 5]) topic_dist = inference_engine_wrapper.slda_infer(sentences) return json_format(topic_dist)
def doc_topic_word_slda(): category = request.form['category'] in_type = request.form['type'] f_text = input_doc_str(in_type) inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category), get_slda_conf()) seg_list = inference_engine_wrapper.tokenize(f_text) sentences = [] length = len(seg_list) for index in range(0, length, 5): sentences.append(seg_list[index:index + 5]) topic_dist = inference_engine_wrapper.slda_infer(sentences) result = {} for key, value in dict(topic_dist).items(): twe_wrapper = TopicalWordEmbeddingsWrapper(get_model_dir(category), get_emb_file(category)) result_dict = dict( twe_wrapper.nearest_words_around_topic(int(key), get_count())) result[value] = result_dict return json.dumps(result)
import sys from familia_wrapper import InferenceEngineWrapper if sys.version_info < (3, 0): input = raw_input if __name__ == '__main__': if len(sys.argv) < 3: sys.stderr.write("Usage:python {} {} {}\n".format( sys.argv[0], "model_dir", "conf_file")) exit(-1) # 获取参数 model_dir = sys.argv[1] conf_file = sys.argv[2] # 创建InferenceEngineWrapper对象 inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file) while True: input_text = input("Enter Document: ") # 分词 seg_list = inference_engine_wrapper.tokenize(input_text.strip()) # 构建句子结构,5个词为一个句子 sentences = [] length = len(seg_list) for index in range(0, length, 5): sentences.append(seg_list[index:index + 5]) # 进行推断 topic_dist = inference_engine_wrapper.slda_infer(sentences) # 打印结果 print("Document Topic Distribution:") print(topic_dist)
import sys from familia_wrapper import InferenceEngineWrapper if sys.version_info < (3,0): input = raw_input if __name__ == '__main__': if len(sys.argv) < 3: sys.stderr.write("Usage:python {} {} {}\n".format( sys.argv[0], "model_dir", "conf_file")) exit(-1) # 获取参数 model_dir = sys.argv[1] conf_file = sys.argv[2] # 创建InferenceEngineWrapper对象 inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file) while True: input_text = input("Enter Document: ") # 分词 seg_list = inference_engine_wrapper.tokenize(input_text.strip()) # 构建句子结构,5个词为一个句子 sentences = [] length = len(seg_list) for index in range(0, length, 5): sentences.append(seg_list[index: index + 5]) # 进行推断 topic_dist = inference_engine_wrapper.slda_infer(sentences) # 打印结果 print("Document Topic Distribution:") print(topic_dist)