def task(param): print('加载模型中...') LtpModel=ltpModel() W2vModel=w2vModel(LtpModel) TripleModel=tripleModel(LtpModel) QAModel=qaModel('qa_data\\weibokb'+str(param['id'])+'.json',LtpModel,TripleModel,W2vModel) qaResult=[] noqaResult=[] qlines=param['qlines'] alines=param['alines'] sentence_number=0 knowledge_number=0 for (postLine,responseLine) in zip(qlines,alines): k=remove_emoji(postLine.replace(" ","").replace("\n","")) while(len(k)>1): if u'\u4e00' <= k[-1] <= u'\u9fff': break else: k=k[:-1] while(len(k)>1): if u'\u4e00' <= k[0] <= u'\u9fff': break else: k=k[1:] if len(k)<minlen: continue v=remove_emoji(responseLine.replace(" ","").replace("\n","")) while(len(v)>1): if u'\u4e00' <= v[-1] <= u'\u9fff': break else: v=v[:-1] while(len(v)>1): if u'\u4e00' <= v[0] <= u'\u9fff': break else: v=v[1:] if len(v)<minlen or abs(len(k)-len(v)) >= dos or k.find('@')!=-1 or v.find('@')!=-1 or k.find('http')!=-1 or v.find('http')!=-1: continue question=k answer=v try: score,reason=QAModel.getMatchScore(question,answer) if score>0.7: knowledge_number += 1 qaResult.append(question+'\t'+answer+'\n') else: noqaResult.append(question+'\t'+answer+'\n') except: traceback.print_exc() sentence_number += 1 if sentence_number % 100 == 0: try: print('process:'+str(param['id'])+' done'+str(sentence_number)) except: traceback.print_exc() QAModel.saveKB() result={'qaResult':qaResult,'noqaResult':noqaResult,'sentence_number':sentence_number,'knowledge_number':knowledge_number} return result
def task(param): print('加载模型中...') NlpModel = nlpModel() W2vModel = w2vModel() TripleModel = tripleModel(NlpModel) QAModel = qaModel('qa_data\\rkb' + str(param['id']) + '.json', TripleModel, W2vModel) qaResult = [] noqaResult = [] lines = param['lines'] sentence_number = 0 knowledge_number = 0 for line in lines: try: qaList = line.strip().split('\t') if len(qaList) != 2: continue question = qaList[0] answer = qaList[1] score, reason = QAModel.getMatchScore(question, answer) if score > 0.7: knowledge_number += 1 qaResult.append(question + '\t' + answer + '\n') else: noqaResult.append(question + '\t' + answer + '\n') except: print('process:' + str(param['id']) + ' error ' + line) traceback.print_exc() sentence_number += 1 if sentence_number % 100 == 0: print( 'process:%d done%d qa%d noqa%d' % (param['id'], sentence_number, len(qaResult), len(noqaResult))) print('process:%d finish%d qa%d noqa%d' % (param['id'], sentence_number, len(qaResult), len(noqaResult))) QAModel.saveKB() result = { 'qaResult': qaResult, 'noqaResult': noqaResult, 'sentence_number': sentence_number, 'knowledge_number': knowledge_number } return result
# -*- coding: utf-8 -*- import sys import traceback from nlpModel import nlpModel from qaModel import qaModel from w2vModel import w2vModel from tripleModel import tripleModel modelname = 'beam1.gm' print('加载模型中...') NlpModel = nlpModel() W2vModel = w2vModel() TripleModel = tripleModel(NlpModel) QAModel = qaModel('qa_data/kb.json', TripleModel, W2vModel) print('开始读取文件') scoreNums = [0] * 11 scoreSum = 0 with open('Twitter.100w.test.key', 'r') as keyFile, open('Twitter.100w.test.beam1.gm.output', 'r') as valueFile,\ open('Twitter.100w.rule.'+modelname+'.score','w') as out_file: id = 1 for key, value in zip(keyFile, valueFile): key = key.strip() value = value.strip() try:
def __init__(self, model_dir, tokenize): self.model = load_pretrained_rnn(model_dir) self.oie = Trained_oie(self.model, tokenize=tokenize) self.w2v = w2vModel()
import traceback from ltpModel import ltpModel from qaModel import qaModel from w2vModel import w2vModel from tripleModel import tripleModel import myLog #win10 1709版本控制台存在bug,需要引入这个包防止print意外报错 import win_unicode_console win_unicode_console.enable() logger = myLog.getLogging('baiduqalog.txt') #log文件 logger.info('加载模型中...') LtpModel = ltpModel() W2vModel = w2vModel(LtpModel) TripleModel = tripleModel(LtpModel) QAModel = qaModel('qa_data\\baiduqakb.json', LtpModel, TripleModel, W2vModel) logger.info('开始读取文件') #读取文件 in_file_name = "test_file\\baiduqa.txt" out_file_name = "test_file\\baiduqa_output.txt" if len(sys.argv) > 1: in_file_name = sys.argv[1] if len(sys.argv) > 2: out_file_name = sys.argv[2] #默认一行记录一问一答,用tab分隔开 with open(in_file_name, 'r',