def chat_record_predict(db_namespace='f_fuwu_dw', doc2vec_dm_model=None, doc2vec_dbow_model=None, args=None, *models): """ 读取mysql指定记录表中的数据,根据预测模型预测舆情分数值 :param db_namespace: 数据库名称 :param doc2vec_dm_model: doc2vec dm模型 :param doc2vec_dbow_model: doc2vec dbow模型 :param args: 参数配置 :param models: 回归模型 """ # 生成查询时间范围,如 2017-12-26 00:00:00 - 2017-12-26 23:59:59 predict_date = args.prediction_date start_predict_date = predict_date + ' 00:00:00' end_predict_date = predict_date + ' 23:59:59' if not os.path.exists(chat_query_sql): logger.error('The sql of chat not exist!') sys.exit(0) # 连接数据库,查询结果 query_mysql_object = dbops.Mysql(db_namespace) query_sql_content = dbops.get_sql_content(chat_query_sql) query_sql_content = query_sql_content % (start_predict_date, end_predict_date) result = query_mysql_object.query(query_sql_content) query_mysql_object.close() # 对查询结果,即聊天记录舆情分析 result_with_parms = [] for r in result: rp = (r, doc2vec_dm_model, doc2vec_dbow_model, args, models) result_with_parms.append(rp) # 对查询结果,即聊天记录舆情分析,使用并行计算 pool = Pool(2) pool.map(chat_record_neg_analyze, result_with_parms) # pool.join() pool.close()
def speech_record_predict(db_namespace='f_fuwu_dm', doc2vec_dm_model=None, doc2vec_dbow_model=None, args=None, *models): """ 读取mysql指定记录表中的数据,根据预测模型预测舆情分数值 :param db_namespace: 数据库名称 :param doc2vec_dm_model: doc2vec dm模型 :param doc2vec_dbow_model: doc2vec dbow模型 :param args: 参数配置 :param models: 回归模型 """ # 查询时间 predict_date = args.prediction_date if not os.path.exists(speech_query_with_emotion_sql): logger.error('The sql of speech not exist!') sys.exit(0) # 连接数据库,查询结果 query_mysql_object = dbops.Mysql(db_namespace) query_sql_content = dbops.get_sql_content(speech_query_with_emotion_sql) query_sql_content = query_sql_content % (predict_date, predict_date) result = query_mysql_object.query(query_sql_content) query_mysql_object.close() result_with_parms = [] for r in result: rp = (r, doc2vec_dm_model, doc2vec_dbow_model, args, models) result_with_parms.append(rp) # 对查询结果,即聊天记录舆情分析,使用并行计算 pool = Pool(10) pool.map(speech_record_neg_analyze, result_with_parms) # pool.join() pool.close()
logger = log.get_logger() def parse_args(): """ 命令行解析 :return parser: 返回参数解析器对象 """ parser = argparse.ArgumentParser() parser.add_argument('--prediction_date', type=str, default=(datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")) return parser.parse_args() if __name__ == '__main__': args = parse_args() cur_path = os.getcwd() join_insert_sql = os.path.dirname(os.path.dirname(cur_path)) + os.sep + 'sql' + os.sep + 'qualitycheck' + os.sep + 'join_insert.sql' if not os.path.exists(join_insert_sql): logger.error('The sql of join insert not exist!') sys.exit(0) insert_mysql_object = dbops.Mysql('f_fuwu_dm') insert_sql_content = dbops.get_sql_content(join_insert_sql) insert_sql_content = insert_sql_content % (args.prediction_date, args.prediction_date, args.prediction_date, args.prediction_date) try: insert_mysql_object.execute(insert_sql_content) except Exception as e: print(e) finally: insert_mysql_object.close()
) + os.sep + 'sql' + os.sep + 'qualitycheck' + os.sep + 'query_flowcheck_gongdan_info.sql' filter_item_sql = os.path.dirname( os.path.dirname(cur_path) ) + os.sep + 'sql' + os.sep + 'qualitycheck' + os.sep + 'query_flowcheck_filter_item.sql' chat_speech_sql = os.path.dirname( os.path.dirname(cur_path) ) + os.sep + 'sql' + os.sep + 'qualitycheck' + os.sep + 'query_flowcheck_chat_speech.sql' flowcheck_result_insert_sql = os.path.dirname( os.path.dirname(cur_path) ) + os.sep + 'sql' + os.sep + 'qualitycheck' + os.sep + 'flowcheck_result_insert.sql' flowcheck_join_insert_sql = os.path.dirname( os.path.dirname(cur_path) ) + os.sep + 'sql' + os.sep + 'qualitycheck' + os.sep + 'flowcheck_join_insert.sql' # 读取sql内容 issue_info_sql_content = dbops.get_sql_content(issue_info_sql) filter_item_sql_content = dbops.get_sql_content(filter_item_sql) chat_speech_sql_content = dbops.get_sql_content(chat_speech_sql) # 从数据库中取出过滤条件 query_mysql_object = dbops.Mysql("f_fuwu_dw") filter_item_records = query_mysql_object.query(filter_item_sql_content) query_mysql_object.close() filter_items = {} for record in filter_item_records: size = len(record) if size != 3: logger.error("The data format is err!") sys.exit(0) try:
def speech_record_neg_analyze(result_with_parms): """ 语音记录负面情绪分析 :param result_with_parms: 语音记录以及参数,元组类型 """ if len(result_with_parms) != 5: logger.error("The result with params format is err!") sys.exit(0) record = result_with_parms[0] doc2vec_dm_model = result_with_parms[1] doc2vec_dbow_model = result_with_parms[2] args = result_with_parms[3] models = result_with_parms[4] size = len(record) if size != 13: logger.error("The data format is err!") sys.exit(0) """下面是不带情感得分的查询解析结果""" # v_date = record[0] # 计算日期 # task_no = record[1] # 任务号 # transfer_way = record[2] # 转换方式 # status = record[3] # 状态 # create_time = record[4] # 创建时间 # result = record[5] # 通话语义 """下面是带情感得分的查询解析结果""" task_no = record[0] # 语音任务号, # business_no = record[1] # 通话sid # order_no = record[2] # 订单号 score_qingxu = record[3] # 情绪得分 # score_caozuo = record[4] # 处理得分 # voice_address = record[5] # 录音地址, # caller_role = record[6] # dialed_role = record[7] # creator_ucaccount = record[8] # 电话操作人 # start_time = record[9] # 接听时间 # stop_time = record[10] # 结束时间 result = record[11] # 通话语义 v_date = record[12].strftime('%Y-%m-%d') # 计算日期 try: speech_records = json.loads(result) # 通话语义 except Exception as e: speech_records = None print(e) if speech_records: speech_contents = parser.speech_record_parser(speech_records) speech_highest_content_score = (0.0, '') # 对语音每条记录进行打分 for speechContent in speech_contents: precess_content = data_processor.single_content_clean( speechContent, args) # 清洗数据 neg_prob = predict_prob(precess_content, doc2vec_dm_model, doc2vec_dbow_model, *models) # 调用模型对该句内容进行打分 neg_prob = neg_prob * 0.8 + float(score_qingxu) * 0.2 # 加入情绪分占比 if precess_content and neg_prob >= args.speech_filter_score and neg_prob > speech_highest_content_score[0] \ and data_processor.has_sensitive_word(precess_content, sensitive_word): speech_highest_content_score = (neg_prob, speechContent) if speech_highest_content_score and speech_highest_content_score[ 0] >= args.speech_filter_score: score = speech_highest_content_score[0] # 该记录总分 problem_result = speech_highest_content_score[1] # 该记录有问题的内容 source = 2 # 1代表chat,2代表语音 # 结果插入数据库 insert_mysql_object = dbops.Mysql("f_fuwu_dm") insert_sql_content = dbops.get_sql_content(result_insert_sql) insert_sql_content = insert_sql_content % ( v_date, source, task_no, ' '.join(problem_result), score) # print(insert_sql_content) try: insert_mysql_object.execute(insert_sql_content) except Exception as e: print(e) finally: insert_mysql_object.close()
def chat_record_neg_analyze(result_with_parms): """ chat记录负面情绪分析 :param result_with_parms: chat记录以及参数,元组类型 """ if len(result_with_parms) != 5: logger.error("The result with params format is err!") sys.exit(0) record = result_with_parms[0] doc2vec_dm_model = result_with_parms[1] doc2vec_dbow_model = result_with_parms[2] args = result_with_parms[3] models = result_with_parms[4] size = len(record) if size != 10: logger.error("The data format is err!") sys.exit(0) try: chat_records = json.loads(record[2]) # 聊天记录,有多条 except Exception as e: chat_records = None print(e) if chat_records: create_time = record[1] # 创建时间 session_id = record[3] # 聊天记录对应的session id # time_long = record[4] # 聊天记录时长 user_id = record[5] # 用户的id号 agent_id = record[6] # 客服的id号 # begin_time = record[7] # 聊天开始时间 # end_time = record[8] # 聊天的结束时间 user_say, kefu_say = parser.chat_record_parser(chat_records, user_id, agent_id) # 用户和客服说话内容 user_say_highest_score = (0.0, '') for content in user_say: # 对用户说的每句话进行打分 precess_content = list(jieba.cut(content, cut_all=False)) # 结巴分词 precess_content = data_processor.single_content_clean( precess_content, args) # 清洗数据 neg_prob = predict_prob(precess_content, doc2vec_dm_model, doc2vec_dbow_model, *models) # 调用模型对该句内容进行打分 if precess_content and neg_prob > args.chat_filter_score and neg_prob > user_say_highest_score[0] \ and data_processor.has_sensitive_word(precess_content, sensitive_word): user_say_highest_score = (neg_prob, content) kefu_say_highest_score = (0.0, '') for content in kefu_say: # 对客服说的每句话进行打分 precess_content = list(jieba.cut(content, cut_all=False)) # 结巴分词 precess_content = data_processor.single_content_clean( precess_content, args) # 清洗数据 neg_prob = predict_prob(precess_content, doc2vec_dm_model, doc2vec_dbow_model, *models) # 调用模型对该句内容进行打分 if precess_content and neg_prob > args.chat_filter_score and neg_prob > kefu_say_highest_score[0] \ and data_processor.has_sensitive_word(precess_content, sensitive_word): kefu_say_highest_score = (neg_prob, content) # 取用户、客服负面情绪分最高的那条记录 if user_say_highest_score[0] > kefu_say_highest_score[ 0] and user_say_highest_score[1]: score_content = user_say_highest_score elif user_say_highest_score[0] <= kefu_say_highest_score[ 0] and kefu_say_highest_score[1]: score_content = user_say_highest_score else: score_content = None if score_content and score_content[0] >= args.chat_filter_score: score = score_content[0] # 该记录总分 problem_result = score_content[1] # 该记录有问题的内容 source = 1 # 1代表chat,2代表语音 # 结果插入数据库 insert_mysql_object = dbops.Mysql("f_fuwu_dm") insert_sql_content = dbops.get_sql_content(result_insert_sql) insert_sql_content = insert_sql_content % ( create_time, source, session_id, problem_result, score) # print(insert_sql_content) try: insert_mysql_object.execute(insert_sql_content) except Exception as e: print(e) finally: insert_mysql_object.close()