def _add_weather_db_config(cls, loader): """ 添加天气问答数据库配置 """ _logger = loader.logger # 检查配置是否已经增加 if (StdQuestion.select().where( (StdQuestion.milvus_id == -1) & (StdQuestion.q_type == 'context') & (StdQuestion.question == '天气问答')).count()) > 0: if _logger is not None: _logger.info('Weather config already exists!') return # 插入标准问题 _std_q = StdQuestion.create(q_type='context', milvus_id=-1, collection=WEATHER_COLLECTION, partition=WEATHER_PARTITION, question='天气问答') # 插入问题答案 Answer.create( std_question_id=_std_q.id, a_type='ask', type_param="['ApiToolAsk', 'weather', '%s', '%s', {}, True]" % (WEATHER_COLLECTION, WEATHER_PARTITION), replace_pre_def='N', answer=WEATHER_ERROR) # 插入NLP意图识别配置 NlpPurposConfigDict.create( action='天气查询', match_collection='', match_partition='', collection=WEATHER_COLLECTION, partition=WEATHER_PARTITION, std_question_id=_std_q.id, order_num=0, exact_match_words='[]', exact_ignorecase='N', match_words="['天气', '今天天气']", ignorecase='N', word_scale=0.0, info= "['InitInfo', 'get_wordclass_list', {'condition': [{'key': 'time', 'class': ['t']}, {'key': 'addr', 'class': ['ns']}]}]", check= "['InitCheck', 'reject_by_nest', {'next': {'天气': ['真好', '不错', '真差']}, }]" )
def import_config(cls, qa_manager: QAManager, logger: Logger): """ 添加标准配置(不考虑删除问题) @param {QAManager} qa_manager - 数据管理对象 @param {Logger} logger - 日志对象 """ # 插入标准问题 _std_q = StdQuestion.create(tag='leave_message_direct_action', q_type='context', milvus_id=-1, collection=LEAVE_MESSAGE_PLUGIN_COLLECTION, partition=LEAVE_MESSAGE_PLUGIN_PARTITION, question='留言插件通用处理') # 插入问题答案 Answer.create( std_question_id=_std_q.id, a_type='ask', type_param="['LeaveMessagePlugin', 'save_msg', '', '', {}, True]", replace_pre_def='N', answer='留言插件通用处理') if logger is not None: logger.info( 'create leave message plugin std question config success!') # 创建留言意图参数 NlpPurposConfigDict.create( action='leave_message', match_collection='', match_partition='', collection=LEAVE_MESSAGE_PLUGIN_COLLECTION, partition=LEAVE_MESSAGE_PLUGIN_PARTITION, std_question_id=_std_q.id, order_num=LEAVE_MESSAGE_PLUGIN_NLP_CONFIG['order_num'], exact_match_words=str( LEAVE_MESSAGE_PLUGIN_NLP_CONFIG['exact_match_words']), exact_ignorecase=LEAVE_MESSAGE_PLUGIN_NLP_CONFIG[ 'exact_ignorecase'], match_words=str(LEAVE_MESSAGE_PLUGIN_NLP_CONFIG['match_words']), ignorecase=LEAVE_MESSAGE_PLUGIN_NLP_CONFIG['ignorecase'], word_scale=LEAVE_MESSAGE_PLUGIN_NLP_CONFIG['word_scale'], info=str(LEAVE_MESSAGE_PLUGIN_NLP_CONFIG['info']), check=str(LEAVE_MESSAGE_PLUGIN_NLP_CONFIG['check'])) if logger is not None: logger.info('create leave message plugin nlp config success!') # 创建文件上传参数 UploadFileConfig.create( upload_type=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['upload_type'], exts=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['exts'], size=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['size'], save_path=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['save_path'], url=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['url'], rename=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['rename'], after=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['after'], remark=LEAVE_MESSAGE_PLUGIN_UPLOAD_FILE_CONFIG['remark']) if logger is not None: logger.info( 'create leave message plugin upload file config success!')
def import_config(cls, qa_manager: QAManager, logger: Logger): """ 添加标准配置(不考虑删除问题) @param {QAManager} qa_manager - 数据管理对象 @param {Logger} logger - 日志对象 """ FORM_PLUGIN_CONFIG = RunTool.get_global_var('FORM_PLUGIN_CONFIG') if FORM_PLUGIN_CONFIG is None: FORM_PLUGIN_CONFIG = dict() RunTool.set_global_var('FORM_PLUGIN_CONFIG', FORM_PLUGIN_CONFIG) FORM_PLUGIN_SELF_TABLE = RunTool.get_global_var( 'FORM_PLUGIN_SELF_TABLE') if FORM_PLUGIN_SELF_TABLE is None: FORM_PLUGIN_SELF_TABLE = dict() RunTool.set_global_var('FORM_PLUGIN_SELF_TABLE', FORM_PLUGIN_SELF_TABLE) # 插入标准问题 _std_q = StdQuestion.create(tag='form_direct_action', q_type='context', milvus_id=-1, collection=FORM_PLUGIN_COLLECTION, partition=FORM_PLUGIN_PARTITION, question='表单插件通用处理') # 插入问题答案 Answer.create(std_question_id=_std_q.id, a_type='job', type_param="['FormPlugin', 'operate', {}]", replace_pre_def='N', answer='表单插件通用处理') if logger is not None: logger.info('create form plugin std question config success!') # 处理扩展插件 if FORM_PLUGIN_SEARCH_PATH is not None: _path = os.path.join(os.path.dirname(__file__), FORM_PLUGIN_SEARCH_PATH) _file_list = FileTool.get_filelist(path=_path, regex_str=r'.*\.py$', is_fullname=False) for _file in _file_list: if _file == '__init__.py': continue # 执行加载 _module = ImportTool.import_module(_file[0:-3], extend_path=_path, is_force=True) _clsmembers = inspect.getmembers(_module, inspect.isclass) for (_class_name, _class) in _clsmembers: if _module.__name__ != _class.__module__: # 不是当前模块定义的函数 continue # 判断类型 _get_form_type = getattr(_class, 'get_form_type', None) if _get_form_type is None or not callable(_get_form_type): # 不是标准的插件类 continue _form_type = _get_form_type() _get_form_config = getattr(_class, 'get_form_config', None) # 加入配置 FORM_PLUGIN_CONFIG[_form_type] = _get_form_config() # 循环插件实例进行处理 for _form_type in FORM_PLUGIN_CONFIG.keys(): _config = FORM_PLUGIN_CONFIG[_form_type] # 创建表单类型意图参数 NlpPurposConfigDict.create( action=_form_type, match_collection='', match_partition='', collection=FORM_PLUGIN_COLLECTION, partition=FORM_PLUGIN_PARTITION, std_question_id=_std_q.id, order_num=_config['order_num'], exact_match_words=str(_config['exact_match_words']), exact_ignorecase=_config['exact_ignorecase'], match_words=str(_config['match_words']), ignorecase=_config['ignorecase'], word_scale=_config['word_scale'], info=str(_config['info']), check=str(_config['check'])) if logger is not None: logger.info('create form plugin [%s] success!' % _form_type)
def _import_answers_by_xls(self, excel_io, milvus: mv.Milvus, bert: BertClient, std_question_id_mapping: dict): """ 导入Answers @param {object} excel_io - pd.io.excel.ExcelFile的IO文件 @param {Milvus} milvus - Milvus连接对象 @param {BertClient} bert - bert服务连接对象 @param {dict} std_question_id_mapping - 标准问题id映射字典 """ try: # 读取标题行 _df_header = pd.read_excel( excel_io, sheet_name='Answers', nrows=0, engine=self.excel_engine ) except: _df_header = None # 没有获取到指定的页 if _df_header is not None: # 定义替换变量函数 def replace_var_fun(m): _match_str = m.group(0) if _match_str.startswith('{$id='): # 替换为映射id _id: str = _match_str[5: -2] if _id.isdigit(): # 是数字 _new_id = std_question_id_mapping.get( int(_id), _id ) else: # 是字符串 _new_id = std_question_id_mapping.get( _id, _id ) return str(_new_id) # 没有匹配到 return _match_str _skiprows = 1 # 跳过的记录数 _columns = {i: col for i, col in enumerate(_df_header.columns.tolist())} while True: # 循环处理 _df = pd.read_excel( excel_io, sheet_name='Answers', nrows=self.excel_batch_num, header=None, skiprows=_skiprows, engine=self.excel_engine ) _skiprows += self.excel_batch_num if not _df.shape[0]: # 获取不到数据 break # 变更标题 _df.rename(columns=_columns, inplace=True) for _index, _row in _df.iterrows(): # 逐行添加标准问题答案, _index为行,_row为数据集 try: _std_question_id = std_question_id_mapping.get( _row['std_question_id'], _row['std_question_id'] ) _type_param = re.sub( r'\{\$.+?\$\}', replace_var_fun, str(_row['type_param']), re.M ) Answer.create( std_question_id=_std_question_id, a_type=_row['a_type'], type_param=_type_param, replace_pre_def=_row['replace_pre_def'], answer=_row['answer'] ) except: self._log_error('imported answer [id: %s] [%s] error: %s' % ( str(_row['std_question_id']), _row['answer'], traceback.format_exc() )) self._log_debug('imported answers[%d]: %s' % (_skiprows, str(_df)))
def add_std_question(self, question: str, collection: str = 'chat', q_type: str = 'ask', partition: str = None, answer: str = None, a_type: str = 'text', replace_pre_def: str = 'N', a_type_param: str = '') -> int: """ 添加标准问题 @param {str} question - 标准问题 @param {str} collection='chat' - 问题分类,默认为'chat', 可以自定义分类 @param {str} q_type='ask' - 问题类型 ask-问答类(问题对应答案) context-场景类(问题对应上下文场景) @param {str} partition=None - 问题所属场景, q_type为context时使用 @param {str} answer=None - 标准问题对应的答案 @param {string} a_type='text' - 答案类型 text-文字答案 @param {str} replace_pre_def='N' - 是否替换答案的预定义字符 @param {string} a_type_param='' - 答案类型扩展参数 @returns {int} - 返回问题记录对应的id """ # 简单校验 if q_type == 'ask' and answer is None: raise AttributeError('parameter answer should be not None!') # 获取问题的向量值 with self.get_bert_client() as _bert, self.get_milvus() as _milvus: _vectors = _bert.encode([question, ]) _question_vectors = self.normaliz_vec(_vectors.tolist()) self._log_debug('get question vectors: %s' % str(len(_question_vectors))) # 存入Milvus服务, 先创建分类 self._add_collection(collection, _milvus) # 创建场景 if partition is not None and partition != '': self._add_partition(collection, partition, _milvus) else: partition = None _milvus_id = self._add_milvus_question( _question_vectors[0], collection, partition, _milvus ) # 存入AnswerDb,通过事务处理 with self.database.atomic() as _txn: # 插入标准问题 _std_q = StdQuestion.create( q_type=q_type, milvus_id=_milvus_id, collection=collection, partition=('' if partition is None else partition), question=question ) # 插入对应的答案 if answer is not None: Answer.create( std_question_id=_std_q.id, a_type=a_type, replace_pre_def=replace_pre_def, type_param=a_type_param, answer=answer ) # 提交事务 _txn.commit() # 返回结果 self._log_debug('insert question: %s' % str(_std_q)) return _std_q.id