def init_config(self): # # 读取词汇表 # with(open("./data/vocabulary.txt","r",encoding="utf-8")) as fr: # vocab_list=fr.readlines() # vocab_dict={} # vocablist=[] # for one in vocab_list: # word_id,word=str(one).strip().split(":") # vocab_dict[str(word).strip()]=int(word_id) # vocablist.append(str(word).strip()) # # print(vocab_dict) # self.vocab=vocab_dict # 训练分类器 self.classify_model=Question_classify() # 读取问题模板 with(open("./data/question/question_classification.txt","r",encoding="utf-8")) as f: question_mode_list=f.readlines() self.question_mode_dict={} for one_mode in question_mode_list: # 读取一行 mode_id,mode_str=str(one_mode).strip().split(":") # 处理一行,并存入 self.question_mode_dict[int(mode_id)]=str(mode_str).strip() # print(self.question_mode_dict) # 创建问题模板对象 self.questiontemplate=QuestionTemplate()
def __init__(self): # 初始化相关设置:读取词汇表,训练分类器,连接数据库 # 训练分类器 self.classify_model = Question_classify() # 读取问题模板 with (open("./questions/question_classification.txt", "r", encoding="utf-8")) as f: question_mode_list = f.readlines() self.question_mode_dict = {} for one_mode in question_mode_list: # 读取一行 mode_id, mode_str = str(one_mode).strip().split(":") # 处理一行,并存入 self.question_mode_dict[int(mode_id)] = str(mode_str).strip() # 创建问题模板对象 self.questiontemplate = QuestionTemplate()
def init_config(self): # 训练分类器 self.classify_model = Question_classify() # 读取问题模板 with (open("./data/question/question_classification.txt", "r", encoding="utf-8")) as f: question_mode_list = f.readlines() self.question_mode_dict = {} for one_mode in question_mode_list: # 读取一行 mode_id, mode_str = str(one_mode).strip('\ufeff').split(":") # 处理一行,并存入 self.question_mode_dict[int(mode_id)] = str(mode_str).strip() # print(self.question_mode_dict) # 创建问题模板对象 self.questiontemplate = QuestionTemplate()
class Question(): def __init__(self): # 初始化相关设置:读取词汇表,训练分类器,连接数据库 # 训练分类器 self.classify_model = Question_classify() # 读取问题模板 with (open("./questions/question_classification.txt", "r", encoding="utf-8")) as f: question_mode_list = f.readlines() self.question_mode_dict = {} for one_mode in question_mode_list: # 读取一行 mode_id, mode_str = str(one_mode).strip().split(":") # 处理一行,并存入 self.question_mode_dict[int(mode_id)] = str(mode_str).strip() # 创建问题模板对象 self.questiontemplate = QuestionTemplate() def question_process(self, question): # 接收问题 self.raw_question = str(question).strip() # 对问题进行词性标注 self.pos_quesiton = self.question_posseg() # 得到问题的模板 self.question_template_id_str = self.get_question_template() # 查询图数据库,得到答案 self.answer = self.query_template() return (self.answer) def question_posseg(self): jieba.load_userdict("./questions/userdict3.txt") clean_question = re.sub( "[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+", "", self.raw_question) self.clean_question = clean_question question_seged = jieba.posseg.cut(str(clean_question)) result = [] question_word, question_flag = [], [] for w in question_seged: temp_word = f"{w.word}/{w.flag}" result.append(temp_word) # 预处理问题 word, flag = w.word, w.flag question_word.append(str(word).strip()) question_flag.append(str(flag).strip()) assert len(question_flag) == len(question_word) self.question_word = question_word self.question_flag = question_flag print(result) return result def get_question_template(self): # 抽象问题 for item in ['nr', 'nm', 'ng']: while (item in self.question_flag): ix = self.question_flag.index(item) self.question_word[ix] = item self.question_flag[ix] = item + "ed" # 将问题转化字符串 str_question = "".join(self.question_word) print("抽象问题为:", str_question) # 通过分类器获取问题模板编号 question_template_num = self.classify_model.predict(str_question) print("使用模板编号:", question_template_num) question_template = self.question_mode_dict[question_template_num] print("问题模板:", question_template) question_template_id_str = str( question_template_num) + "\t" + question_template return question_template_id_str # 根据问题模板的具体类容,构造cql语句,并查询 def query_template(self): # 调用问题模板类中的获取答案的方法 try: answer = self.questiontemplate.get_question_answer( self.pos_quesiton, self.question_template_id_str) except: answer = "我也不知道啊!" return answer
class Question(): def __init__(self): # 初始化相关设置:读取词汇表,训练分类器,连接数据库 self.init_config() def init_config(self): # # 读取词汇表(not used here) # with(open("./data/vocabulary.txt","r",encoding="utf-8")) as fr: # vocab_list=fr.readlines() # vocab_dict={} # vocablist=[] # for one in vocab_list: # word_id,word=str(one).strip().split(":") # vocab_dict[str(word).strip()]=int(word_id) # vocablist.append(str(word).strip()) # # print(vocab_dict) # self.vocab=vocab_dict # 训练分类器 self.classify_model = Question_classify() # 读取问题模板 with (open("./data/question/question_classification.txt", "r", encoding="utf-8")) as f: question_mode_list = f.readlines() self.question_mode_dict = {} for one_mode in question_mode_list: # 读取一行 mode_id, mode_str = str(one_mode).strip().split(":") # 处理一行,并存入 self.question_mode_dict[int(mode_id)] = str(mode_str).strip() # print(self.question_mode_dict) # 创建问题模板对象 self.questiontemplate = QuestionTemplate() def question_process(self, question): # 接收问题 self.raw_question = question # 对问题进行词性标注 self.pos_quesiton = self.question_posseg() # 得到问题的模板 self.question_template_id_str = self.get_question_template() # 查询图数据库,得到答案 self.answer = self.query_template() return (self.answer) def question_posseg(self): # jieba.load_userdict("./data/userdict3.txt") clean_question = re.sub( "[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+", " ", self.raw_question) print(clean_question) self.clean_question = clean_question # question_seged=jieba.posseg.cut(str(clean_question)) nlp = spacy.load("en_core_web_sm") question_seged = nlp(clean_question) result = [] question_word, question_flag = [], [] for ent in question_seged.ents: temp_word = f"{ent}/{ent.label_}" result.append(temp_word) # 预处理问题 word, flag = ent, ent.label_ question_word.append(str(word).strip()) question_flag.append(str(flag).strip()) for token in question_seged: temp_word = f"{token}/{token.pos_}" result.append(temp_word) # 预处理问题 word, flag = token, token.pos_ question_word.append(str(word).strip()) question_flag.append(str(flag).strip()) assert len(question_flag) == len(question_word) self.question_word = question_word self.question_flag = question_flag print(result) print("return answer!........................") return result def get_question_template(self): # 抽象问题 for item in ['PERSON', 'WORK_OF_ART']: while (item in self.question_flag): ix = self.question_flag.index(item) self.question_word[ix] = item self.question_flag[ix] = item + "ed" # 将问题转化字符串 str_question = " ".join(self.question_word) print("Question can be simplified as:", str_question) # 通过分类器获取问题模板编号 question_template_num = self.classify_model.predict(str_question) print("Question match template No.", question_template_num) question_template = self.question_mode_dict[question_template_num] print("The template is:", question_template) question_template_id_str = str( question_template_num) + "\t" + question_template return question_template_id_str # 根据问题模板的具体类容,构造cql语句,并查询 def query_template(self): # 调用问题模板类中的获取答案的方法 try: answer = self.questiontemplate.get_question_answer( self.pos_quesiton, self.question_template_id_str) except: answer = "idk!" # answer = self.questiontemplate.get_question_answer(self.pos_quesiton, self.question_template_id_str) return answer
class Question(): def __init__(self): # 初始化相关设置:读取词汇表,训练分类器,连接数据库 self.init_config() def init_config(self): # # 读取词汇表 # with(open("./data/vocabulary.txt","r",encoding="utf-8")) as fr: # vocab_list=fr.readlines() # vocab_dict={} # vocablist=[] # for one in vocab_list: # word_id,word=str(one).strip().split(":") # vocab_dict[str(word).strip()]=int(word_id) # vocablist.append(str(word).strip()) # # print(vocab_dict) # self.vocab=vocab_dict # 训练分类器 self.classify_model = Question_classify() # 读取问题模板 with (open("./data/question/question_classification.txt", "r", encoding="utf-8")) as f: question_mode_list = f.readlines() self.question_mode_dict = {} for one_mode in question_mode_list: # 读取一行 mode_id, mode_str = str(one_mode).strip().split(":") # 处理一行,并存入 self.question_mode_dict[int(mode_id)] = str(mode_str).strip() # print(self.question_mode_dict) # 创建问题模板对象 self.questiontemplate = QuestionTemplate() def question_process(self, question): # 接收问题 self.raw_question = str(question).strip() # 对问题进行词性标注 self.pos_quesiton = self.question_posseg() # 得到问题的模板 self.question_template_id_str = self.get_question_template() # 查询图数据库,得到答案 self.answer = self.query_template() return (self.answer) def question_posseg(self): jieba.load_userdict("./data/vocab_jieba.txt") clean_question = re.sub( "[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+", "", self.raw_question) self.clean_question = clean_question question_seged = jieba.posseg.cut(str(clean_question)) result = [] question_word, question_flag = [], [] for w in question_seged: temp_word = f"{w.word}/{w.flag}" result.append(temp_word) # 预处理问题 word, flag = w.word, w.flag question_word.append(str(word).strip()) question_flag.append(str(flag).strip()) assert len(question_flag) == len(question_word) self.question_word = question_word self.question_flag = question_flag print(result) return result def get_question_template(self): while 'nr' in self.question_flag: ix = self.question_flag.index('nr') self.question_word[ix] = 'nr' #这里相当于把病种,药品换成nr #这个符号,这个符号再放在question里做意图识别,比较巧妙的转换了主体,适用于任何句子 # self.question_flag[ix] = 'nred' # 抽象问题将问题转化字符串 str_question = "".join(self.question_word) print("抽象问题为:", str_question) # 通过分类器获取问题模板编号 question_template_num = self.classify_model.predict(str_question) print("使用模板编号:", question_template_num) question_template = self.question_mode_dict[question_template_num] print("问题模板:", question_template) question_template_id_str = str( question_template_num) + "\t" + question_template return question_template_id_str # 根据问题模板的具体类容,构造cql语句,并查询 def query_template(self): # 调用问题模板类中的获取答案的方法 try: print('开始处理self.questiontemplate.get_question_answer') answer = self.questiontemplate.get_question_answer( self.pos_quesiton, self.question_template_id_str) except: answer = "我也还不知道??!" # answer = self.questiontemplate.get_question_answer(self.pos_quesiton, self.question_template_id_str) return answer