class PreProcessResponse(BaseResponse): name = rcategory.preprocess.value _preprocess_check_class = PreprocessCheck() _preprocess_class = Preprocess() def __init__(self): super(PreProcessResponse, self).__init__() def _preprocess(self, str_in): str_synonym, words = self._preprocess_class.process(str_in) return {'str_synonym': str_synonym, 'words_segment': words} def _check(self, input): bool_input, _msg = self._preprocess_check_class.check(input) return bool_input, _msg def _response(self, input): _data_return = copy.deepcopy(self._data_return) bool_input, _msg = self._check(input) if bool_input: _data_return['status'] = 1 _data_return['data'] = self._preprocess(input.question) else: _data_return['status'] = 0 _data_return['msg'] = _msg return _data_return
class Action2(ActionBase): """只继承领域 class""" name = PatternClassificationMapNames.only_inherit_domain.value # _slots_maintain_class = SlotsMaintain() _prounoun_class = PronounDeal() _preprocess_class = Preprocess() def __init__(self): super(Action2, self).__init__() def action(self, sessionid_in): # 当前句子没有domain, 继承上文domain 当前句子有指代,继承domain, 当前句子有domain,不继承 src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = self._prounoun_class.replace_words( sessionid_in) dst_dict = {cname.domain.value: dst_word_to_fufill[cname.domain.value]} return src_prounoun_word, dst_dict
class InteractionBase(object): _entity_class = Entity() _preprocess_class = Preprocess() def __init__(self): pass @abc.abstractmethod def judge_interaction(self, sessionid_in, label2entities_cur_dict_in, str_in): pass @abc.abstractmethod def deal_interaction(self, sessionid_in, label2entities_cur_dict_in, str_in): pass
class RewriteResponse(BaseResponse): name = rcategory.rewrite.value _rewrite_check_class = RewriteCheck() _preprocess_class = Preprocess() _nerdeal_class = NerDeal() _usermanager = usermanager def __init__(self): super(RewriteResponse, self).__init__() def _rewrite(self, sessionid_in): # words_list_out, dst_word_to_fufill, label2entities_cur_dict, abbrev2stds_dict = self._nerdeal_class.replace_with_context(sessionid_in) words_list_out, dst_word_to_fufill, label2entities_cur_dict, flag_inter, abbrev_str, abbrev2std_list = self._nerdeal_class.replace_with_context( sessionid_in) return { 'words_with_context': words_list_out, 'words_with_context_notin_curstr': dst_word_to_fufill, 'label2entities_cur_dict': label2entities_cur_dict, 'abbrev2stds_dict': abbrev2std_list } def _check(self, input): bool_input, _msg = self._rewrite_check_class.check(input) return bool_input, _msg def _response(self, input): _data_return = copy.deepcopy(self._data_return) bool_input, _msg = self._check(input) if bool_input: _data_return['status'] = 1 # str_synonym, words = self._preprocess_class.process(input.question) user_cur = self._usermanager.create(input.jdpin, input.session) # user_cur.update({'str_synonym':str_synonym,'str_synonym_cut':str_synonym}) usermanager.update(input.session, str_raw=input.question) _data_return['data'] = self._rewrite(sessionid_in=input.session) _data_return['data']['str_synonym'] = user_cur['str_synonym'] else: _data_return['status'] = 0 _data_return['msg'] = _msg return _data_return
class UserManager(object, metaclass=Singleton): _preprocess_class = Preprocess() _users_containers = {} def __init__(self): pass # print('__init__') def create(self, jdpin_in, session_id_in): if not session_id_in in self._users_containers: user_class = User() user_class['jdpin_id'] = jdpin_in user_class['session_id'] = session_id_in self._users_containers[session_id_in] = user_class else: pass return self._users_containers[session_id_in] def update(self, session_id, **kwargs): user_cur = self._users_containers[session_id] for key, value in kwargs.items(): if key in user_cur.__dict__: user_cur[key] = value if key == 'str_raw': str_synonym, str_synonym_cut = self._preprocess_class.process( value) str_raw_cut = jieba.lcut(value) user_cur['str_synonym'] = str_synonym user_cur['str_synonym_cut'] = str_synonym_cut user_cur['str_raw_cut'] = str_raw_cut if key == 'str_synonym': str_synonym_cut = jieba.lcut(value) user_cur['str_synonym'] = value user_cur['str_synonym_cut'] = str_synonym_cut def delete(self, session_id): if session_id in self._users_containers: self._users_containers.pop(session_id) def get_user(self, session_id_in): return self._users_containers[session_id_in]
class Action3(ActionBase): """通用正常继承class""" name = PatternClassificationMapNames.common_classification.value _prounoun_class = PronounDeal() # _slots_maintain_class = SlotsMaintain() _similarmodel_class = SimilarModelSents() _preprocess_class = Preprocess() _depend_analysis_class = DependAnalysis() def __init__(self): super(Action3, self).__init__() def action_accurate(self, str_in, words_list_in, jdpin_in): src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = self._prounoun_class.replace_words( str_in, words_list_in, jdpin_in) return src_prounoun_word, dst_word_to_fufill, str_in def action_with_fuzzy(self, sessionid_in): # 同义句替换,先分析当前句子实体词情况,再决定是否模糊匹配 user_cur = self._usermanager.get_user(sessionid_in) src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = \ self._prounoun_class.replace_words(sessionid_in) property_value = label2entities_cur_dict[cname.property.value] domain_list = label2entities_cur_dict[cname.domain.value] if not property_value: if domain_list: domain_str = domain_list[0] str_no_domain = user_cur['str_synonym'].replace(domain_str, '') else: domain_str = '' str_no_domain = user_cur['str_synonym'] strs_set, scores_set = self._similarmodel_class.similar_threshold( str_no_domain) if strs_set: # print('recall similarity===>',words_set) mylog.info('similar_threshold str_no_domain\t\t{}'.format( str(str_no_domain))) mylog.info('similar_threshold words_set\t\t{}'.format( str(strs_set))) mylog.info('similar_threshold scores_set\t\t{}'.format( str(scores_set))) str_similar = strs_set[0] # str_similar = ''.join(words_similar) str_similar_with_domain = domain_str + str_similar str_synonym, str_synonym_words = self._preprocess_class.process( str_similar_with_domain) self._usermanager.update(sessionid_in, str_synonym=str_synonym, str_synonym_cut=str_synonym_words) src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = self._prounoun_class.replace_words( sessionid_in) else: # if '是多久' in str_no_domain: # print('str_no_domain==>',str_no_domain) if self._depend_analysis_class.judge_complete(str_no_domain): dst_word_to_fufill = { cname.domain.value: dst_word_to_fufill[cname.domain.value] } else: pass else: pass return src_prounoun_word, dst_word_to_fufill def action(self, sessionid_in): # sentence_synonym, words_jieba = self._preprocess_class.process(str_in) return self.action_with_fuzzy(sessionid_in)
if __name__ == "__main__": # setUserInfo('1234', '安行万里的投保年龄', '123') # setUserInfo('1234', '安行万里的保障地区', '123') # setUserInfo('1234', '安行万里的保险事故通知有哪些', '123') # setUserInfo('1234', '123', '123') # r=getUserInfo('1234') # print(r) import sys sys.path.append('../../') # from online.nlu.slots_maintain import SlotsMaintain from online.nlu.preprocess import Preprocess from online.nlu.query_rewrite import NerDeal preprocess_class = Preprocess() nerdeal_class = NerDeal() # slots_maintain_class = SlotsMaintain() jdpin = '12345' sententc = '安行万里的投保年龄' answer = '没问题' setQAPairs(jdpin, sententc, answer, 1) # print(rds.keys()) sententc = '安行万里的保障地区' answer = '没问题' setQAPairs(jdpin, sententc, answer, 1) sententc = '安行万里的保险事故通知有哪些' answer = '没问题' setQAPairs(jdpin, sententc, answer, 1) sententc = '目的地填写错误怎么办'
class SimilarModelBase(object): __metaclass__ = ABCMeta # 指定该类的元类是ABCMeta _preprocess_class = Preprocess() _ngram_num = 1 def __init__(self): self._preload() # pass def _preload(self): self._stopwords = self._preload_stopwords() def _preload_stopwords(self): """停用词加载""" stopwords_list = readfile_line2list(PathUtil().stopwords_filepath) words = list(itertools.chain.from_iterable(stopwords_list)) return words @abc.abstractmethod def __load_similar_sents(self): # 加载同义句 raise ValueError('要继承复写') def _filter_stopwords(self, words_in): # 从分词句子中,过滤 停用词 return words_in for worditer in words_in: if list(worditer)[0] not in self._stopwords: words_ret.append(worditer) return words_ret def _get_words_freq(self, sentences_map_in): # 获取所有句子词频 words_freq_dict = collections.defaultdict(int) # words_freq_dict_sorted=collections.Counter() for key, values in sentences_map_in.items(): words_key_iter = self._preprocess_class.cut_words(key) # words_key_iter_with_label=psg.lcut(key) for word_iter in words_key_iter: words_freq_dict[word_iter] += 1 for value_iter in values: words_iter = self._preprocess_class.cut_words(value_iter) for word_iter in words_iter: words_freq_dict[word_iter] += 1 words_freq_sorted = sorted(words_freq_dict.items(), key=lambda x: int(x[1]), reverse=True) return words_freq_sorted def _get_n_gram_row(self, sentence_in, n): words_key_iter = self._preprocess_class.cut_words(sentence_in) words_key_filtered = self._filter_stopwords(words_key_iter) words_key_combinate = list( itertools.combinations(words_key_filtered, r=n)) return words_key_combinate, words_key_filtered def _get_n_gram(self, sentences_map_in, n): # 获取句子中,ngram words_std_and_similar_list = list() lengths = [] sentences = list() for key, values in sentences_map_in.items(): length_raw_words = 0 words_std_and_similar_list_iter = list() words_key_combinate, words_key_filtered = self._get_n_gram_row( key, n) words_std_and_similar_list_iter.append(words_key_combinate) length_raw_words += len(words_key_filtered) for value_iter in values: words_value_combinate, words_value_filtered = self._get_n_gram_row( value_iter, n) # words_iter = self._preprocess_class.cut_words(value_iter) # words_value_filtered = self._filter_stopwords(words_iter) length_raw_words += len(words_value_filtered) words_std_and_similar_list_iter.append(words_value_combinate) words_std_and_similar_list.append(words_std_and_similar_list_iter) lengths.append(length_raw_words) sentences.append([key, *values]) return words_std_and_similar_list, lengths, sentences
class NerDeal(object): _prounoun_class = PronounDeal() _sentenceclassification_class = SentenceClassification() _fuzzy_interaction_class = FuzzyInterAction() _preprocess_class = Preprocess() _usermanager_class = usermanager _domain_classification_class = DomainClassification() _classification2action_class = Classification2Action() def __init__(self): pass def _pattern_classification(self, sentence_in): return self._sentenceclassification_class.classification(sentence_in) def replace_words_list(self, words_list_in, src_prounoun_word, dst_word_to_fufill): words_list_out = copy.deepcopy(words_list_in) dst_word_out = copy.deepcopy(dst_word_to_fufill) if src_prounoun_word: for src_word in src_prounoun_word: index_pronoun = words_list_out.index(src_word) for label, words_iter in dst_word_out.items(): if words_iter: entity_to_be_repl = words_iter.pop() words_list_out[index_pronoun] = entity_to_be_repl break else: pass for key, value in dst_word_out.items(): words_list_out.extend(value) return words_list_out def _get_last_questionbak(self, jdpin_in): chatRecordList = getQAPairs(jdpin_in) if chatRecordList: last_question = chatRecordList[-1].question last_answer = chatRecordList[-1].answer else: last_question, last_answer = '', '' return last_question, last_answer def _get_entity_from_last_answer(self, str_in): return self._prounoun_class.get_entity_from_strs([str_in], dcnames.alldomain.value) def _deal_last_question(self, session_id_in): last_question, last_answer, last_sentence_pattern_classification = _get_last_question(session_id_in) classification = self._pattern_classification(last_question) if classification == classification_name.query_product.value: label2entities_cur_dict, words_timestap = self._get_entity_from_last_answer(last_answer) # self._slots_maintain_class.update_slots(label2entities_cur_dict, session_id_in) usermanager.get_user(session_id_in).update_slots(-1, **{usname.slots_answer.value: label2entities_cur_dict}) else: pass def _classification_sentence_pattern(self, sessionid_in,str_in): user_cur=usermanager.get_user(sessionid_in) classification = self._pattern_classification(str_in) if classification == classification_name.bottom_no_inherit.value: str_synonym, _ = self._preprocess_class.process(str_in) classification = self._pattern_classification(str_synonym) if _environ=='online': sentence_pattern_classification_from_kg = _get_sentence_pattern_from_kg(sessionid_in, user_cur['str_raw']) if sentence_pattern_classification_from_kg: # user_cur['sentence_pattern_classification'] = sentence_pattern_classification_from_kg classification=sentence_pattern_classification_from_kg else: pass return classification def _get_prounoun_and_dst_words(self, sessionid_in): return self._classification2action_class.act(sessionid_in) def replace_with_context_only_question(self, sessionid_in): """ 根据历史对话,从历史对话提取实体,根据当前str中是否存在指代词,决定 实体是补充还是替换 args: str_in: 输入字符串 words_list_in:输入字符串分词结果 jdpin_in: id return: words_list_out: 补全后的分词列表 dst_word_out: 从历史对话记录提取的slots label2entities_cur_dict: 当前用户话提取的slots,准备存入列表的slots """ user_cur = self._usermanager_class.get_user(sessionid_in) sentence_classification = self._classification_sentence_pattern(sessionid_in,user_cur['str_raw']) mylog.info('sentence_classification:\t\t{}\tstr_raw\t{}'.format(sentence_classification, user_cur['str_raw'])) self._usermanager_class.update(sessionid_in, sentence_pattern_classification=sentence_classification) src_prounoun_word, dst_word_to_fufill = self._get_prounoun_and_dst_words(sessionid_in) label2entities_cur_dict, words_timestap = self._prounoun_class.get_entity_from_strs([user_cur['str_synonym']], user_cur[ 'domain_classification']) words_list_out = self.replace_words_list(user_cur['str_synonym_cut'], src_prounoun_word, dst_word_to_fufill) usermanager.update(sessionid_in,**{'dst_word_to_fufill':dst_word_to_fufill,'label2entities_cur_dict':label2entities_cur_dict}) return words_list_out, dst_word_to_fufill, label2entities_cur_dict def replace_with_context(self, sessionid_in): self._deal_last_question(sessionid_in) # todo 添加分类 user_cur = usermanager.get_user(sessionid_in) # todo 添加当前槽位切换 domain_classification = self._domain_classification_class.classify(user_cur['str_synonym']) mylog.info('domain_classification:\t\t{}'.format(domain_classification)) usermanager.update(sessionid_in, domain_classification=domain_classification) # usermanager.get_user(sessionid_in).update_slots(index=-1,sentence_pattern_classification=sentence_pattern_classification_from_kg) # self._usermanager_class.update(sessionid_in,{'str_raw':str_in,'str_synonym':str_synonym, # 'domain_classification':domain_classification,'str_synonym_cut':str_synonym_cut}) words_list_out, dst_word_to_fufill, label2entities_cur_dict = self.replace_with_context_only_question( sessionid_in) # print('words_list_out, dst_word_to_fufill, label2entities_cur_dict==>',words_list_out, dst_word_to_fufill, label2entities_cur_dict) flag_inter, abbrev_str, abbrev2std_list = self._fuzzy_interaction_class.deal_fuzzy_interaction(sessionid_in, label2entities_cur_dict, user_cur[ 'str_raw'], words_list_out) usermanager.get_user(sessionid_in).insert_slots( **{'question': user_cur['str_synonym'], 'slots_question': label2entities_cur_dict, 'domain_classification': user_cur['domain_classification'], 'sentence_pattern_classification': user_cur['sentence_pattern_classification']}) rds.set(user_cur['jdpin_id'] + _flag_recomm_sent_pattern, 0) return words_list_out, dst_word_to_fufill, label2entities_cur_dict, flag_inter, abbrev_str, abbrev2std_list