def get_src_dialog(batch_id, dialogue_id, turn_id): """ 根据batch_id, dialog_id, turn_id查找src_dialog :param batch_id: :param dialog_id: :param turn_id: :return: """ login_name = comm_util.get_session_login_name() # 从缓存中取出Dialogue cache_dialogue_key = cache_util.generate_dialogue_key(login_name, batch_id, dialogue_id) dialogue = cache_util.get(cache_dialogue_key) # 缓存中没有时从DB中获取Dialogue if dialogue == None: dialogue_query = {"login_name": login_name, "batch_id": batch_id, 'dialogue.dialogue_id': dialogue_id} dialogue = db[TBL_PARAPHRASING].find_one(dialogue_query) turn_list = dialogue['dialogue']['turn_list'] for turn_item in turn_list: if turn_item['turn_id'] == turn_id: src_dialog = turn_item['src_dialog'] break return src_dialog
def handle_dialogues_main(batch_id): """ GET - All dialogues POST - Create a new one, either from data(json, string) or empty """ login_name = comm_util.get_session_login_name() responseObject = {} if request.method == "GET": dialogues = db[TBL_ANNOTATING].find({ "login_name": login_name, "batch_id": batch_id }) for item in dialogues: dialogue = item["dialogue"] responseObject[dialogue["dialogue_id"]] = dialogue if id == 'MultiWOZ': responseObject = DataConvertor.to_dataset(responseObject, data_set=id) if request.method == "POST": # 新建对话时,数据为空 data = request.get_json() # 根据既有的Dialog得到metadata_name existing_dialogue = db[TBL_ANNOTATING].find_one({ "login_name": login_name, "batch_id": batch_id }) if existing_dialogue: metadata_name = existing_dialogue["metadata_name"] else: metadata_name = 'MULTIWOZ' new_dialogue_id = 'Dialogue_NEW' + comm_util.get_timestamp_key() dialogue = { "dialogue_id": new_dialogue_id, "activated": False, "status": "PROCESSING", "turns": [] } new_dialogue = { "login_name": login_name, "batch_id": batch_id, "batch_id": batch_id, "metadata_name": metadata_name, "data_version": CURRENT_VERSION, "dialogue": dialogue } db[TBL_ANNOTATING].save(new_dialogue) responseObject = {"id": new_dialogue_id} return jsonify(responseObject)
def merge_dialog(batch_id, dialogue_id, para_turn_dict): login_name = comm_util.get_session_login_name() dialogue = db[TBL_PARAPHRASING].find_one( {"login_name": login_name, "batch_id": batch_id, 'dialogue.dialogue_id': dialogue_id}) turn_list = dialogue["dialogue"]["turn_list"] for turn_item in turn_list: turn_id = turn_item["turn_id"] if para_turn_dict.__contains__(str(turn_id)): updated_para = para_turn_dict[str(turn_id)] turn_item['para_dialog'] = updated_para['para_dialog'] return turn_list
def handle_batch_list_by_category(): """ GET - All batch metadata """ batch_category = request.args.get('batch_category') login_name = comm_util.get_session_login_name() batch_info = db[TBL_BATCHSTATUS].find_one({'login_name': login_name}) if batch_info == None: batch_list = [] else: batch_list = batch_info[batch_category] batch_list = sorted(batch_list, key=lambda item: item['batch_id']) return jsonify(batch_list)
def handle_dialogue_status(): # st = time.time() data = request.get_json() batch_id = data['batch_id'] dialogue_id = data['dialogue_id'] currentStatus = data['currentStatus'] login_name = comm_util.get_session_login_name() db[TBL_ANNOTATING].update_one( { 'login_name': login_name, 'batch_id': batch_id, 'dialogue.dialogue_id': dialogue_id }, {"$set": { "dialogue.status": currentStatus }}) dialogue_list = db[TBL_ANNOTATING].find({ 'login_name': login_name, "batch_id": batch_id }) dialog_finish_num = 0 dialog_total_num = 0 for item in dialogue_list: dialogueObject = item["dialogue"] dialog_total_num += 1 if 'FINISHED' == dialogueObject['status']: dialog_finish_num += 1 batch_progress = format((dialog_finish_num / dialog_total_num), '.0%') db[TBL_BATCHSTATUS].update_one( {'login_name': login_name}, {"$set": { "annotating.$[elem].batch_progress": batch_progress }}, array_filters=[{ "elem.batch_id": batch_id }]) responseObject = { "status": "success", } return jsonify(responseObject)
def handle_batch_description_post(): # if request.method == "POST": data = request.get_json() batch_category = data['batch_category'] batch_id = data['batch_id'] batch_description = data['batch_description'] login_name = comm_util.get_session_login_name() db[TBL_BATCHSTATUS].update_one({'login_name': login_name}, { "$set": { batch_category + ".$[elem].batch_description": batch_description } }, array_filters=[{ "elem.batch_id": batch_id }]) result = {"code": 200, "msg": gettext(u'SuccessfullyUpdate.')} return jsonify(result)
def handle_paraphrasing_dialogues_main(batch_id): """ GET - All dialogues DELETE - delete a dialogue """ login_name = comm_util.get_session_login_name() dialogues = db[TBL_PARAPHRASING].find({"login_name": login_name, "batch_id": batch_id}) \ .sort("dialogue.dialogue_id") responseObject = {} for item in dialogues: dialogue = item["dialogue"] # responseObject[dialogue["dialogue_id"]] = dialogue responseObject[dialogue["dialogue_id"]] = { "dialogue_id": dialogue["dialogue_id"], "turn_len": len(dialogue["turn_list"]), "finished": dialogue["para_info"]["finished"] } return jsonify(responseObject)
def handle_dialogues_metadata(batch_id): """ GET - All dialogues metadata PUT - Handle """ login_name = comm_util.get_session_login_name() dialogue_list = db[TBL_ANNOTATING].find({'login_name': login_name, "batch_id": batch_id}) \ .sort("dialogue.dialogue_id") responseObject = [] for item in dialogue_list: dialogueObject = item["dialogue"] dialogueID = dialogueObject['dialogue_id'] dialogueTurnList = dialogueObject['turns'] responseObject.append({ "id": dialogueID, "num_turns": len(dialogueTurnList), "status": dialogueObject['status'] }) return jsonify(responseObject)
def handle_dialogues_metadata_put(dialog_id): data = request.get_json() id = data["id"] login_name = comm_util.get_session_login_name() dialogue = db[TBL_ANNOTATING].find_one({ "login_name": login_name, 'dialogue.dialogue_id': dialog_id }) # 保存新的对话ID dialogue["dialogue"]["dialogue_id"] = id db[TBL_ANNOTATING].save(dialogue) # 删除旧的对话 db[TBL_ANNOTATING].delete_one({ 'login_name': login_name, "dialogue.dialogue_id": dialog_id }) responseObject = {"status": "success"} return jsonify(responseObject)
def handle_paraphrasing_dialogues_detail(batch_id, dialogue_id): """ GET - get paraphrasing dialogues PUT - change specific dialogue with a dialogue :param batch_id: :param id: :return: """ login_name = comm_util.get_session_login_name() dialogue = db[TBL_PARAPHRASING].find_one( {"login_name": login_name, "batch_id": batch_id, 'dialogue.dialogue_id': dialogue_id}) # 生成缓存key cache_dialogue_key = cache_util.generate_dialogue_key(login_name, batch_id, dialogue_id) if not dialogue: responseObject = {"status": "failure", "msg": "对话未找到, batch_id:=%s, dialogue_id:=%s" % (str(batch_id), str(dialogue_id))} return jsonify(responseObject) else: # 保存Dialogue到缓存中 cache_util.put(cache_dialogue_key, dialogue) if request.method == "GET": responseObject = dialogue["dialogue"] task_list = responseObject["task_list"] for task_item in task_list: if '任务描述' in task_item.keys(): task_item['TaskDescription'] = task_item['任务描述'] # del task_item['任务描述'] return jsonify(responseObject) if request.method == "PUT": para_turn_dict = request.get_json() # # 使用事务控制多级更新 # with session.start_transaction(): turn_finished_count = 0 turn_list = dialogue["dialogue"]["turn_list"] for turn_item in turn_list: turn_id = turn_item["turn_id"] if para_turn_dict.__contains__(str(turn_id)): src_dialogue = turn_item['src_dialog'] # 校验对话轮次 updated_para = para_turn_dict[str(turn_id)] para_dialogue = updated_para['para_dialog'] turn_validate_result = paraphrasing_validator.validate_turn(src_dialogue, para_dialogue, turn_item['role']) if turn_validate_result["status"] == 'SUCCESS': turn_finished_count += 1 para_info = { "status": turn_validate_result["status"], "desc": "复述已完成" } else: para_info = { "status": turn_validate_result["status"], "desc": turn_validate_result["error"] } db[TBL_PARAPHRASING].update_one( {'login_name': login_name, "batch_id": batch_id, "dialogue.dialogue_id": dialogue_id}, {"$set": {"dialogue.turn_list.$[elem].para_dialog": para_dialogue, "dialogue.turn_list.$[elem].para_info": para_info}}, array_filters=[{"elem.turn_id": turn_id}]) # 整体统计 if len(turn_list) == turn_finished_count: main_para_status = "FINISHED" else: main_para_status = "PROCESSING" main_para_info = { "total": len(turn_list), "finished": turn_finished_count, "status": main_para_status } db[TBL_PARAPHRASING].update_one( {'login_name': login_name, "batch_id": batch_id, "dialogue.dialogue_id": dialogue_id}, {"$set": {"dialogue.activated": True, "dialogue.para_info": main_para_info}}) # 清除缓存中的dialogue cache_util.pop(cache_dialogue_key) responseObject = {"status": "SUCCESS"} if request.method == "DELETE": db[TBL_PARAPHRASING].delete_one( {'login_name': login_name, "batch_id": batch_id, "dialogue.dialogue_id": dialogue_id}) responseObject = {"status": "SUCCESS"} # 计算dialogue完成比例 dialogue_finished_count = 0 dialogue_total_count = 0 dialogue_list = db[TBL_PARAPHRASING].find({"login_name": login_name, "batch_id": batch_id}) for dialogue in dialogue_list: dialogue_total_count += 1 para_info = dialogue['dialogue']['para_info'] if para_info['status'] == 'FINISHED': dialogue_finished_count += 1 # 更新BatchStatus中的完成比例 batch_progress = format((dialogue_finished_count / dialogue_total_count), '.0%') db[TBL_BATCHSTATUS].update_one({'login_name': login_name}, {"$set": {"paraphrasing.$[elem].batch_progress": batch_progress}}, array_filters=[{"elem.batch_id": batch_id}]) return jsonify(responseObject)
def handle_dialogues_detail(batch_id, dialogue_id): """ GET - All dialogues PUT - change specific dialogue with a dialogue """ login_name = comm_util.get_session_login_name() dialogue = db[TBL_ANNOTATING].find_one({ "login_name": login_name, "batch_id": batch_id, 'dialogue.dialogue_id': dialogue_id }) if not dialogue: responseObject = { "status": "failure", "msg": "对话未找到, batch_id:=%s, dialogue_id:=%s" % (str(batch_id), str(dialogue_id)) } return jsonify(responseObject) if 'metadata_name' not in dialogue.keys(): responseObject = { "status": "failure", "msg": "MetadataName缺失, batch_id:=%s, dialogue_id:=%s" % (str(batch_id), str(dialogue_id)) } return jsonify(responseObject) # 生成Metadata缓存key, 放入缓存 metadata_name = dialogue['metadata_name'] metadata_value = get_current_metadata_value(metadata_name) if request.method == "GET": item = dialogue["dialogue"] responseObject = { "dialogue": item["turns"], "metadata_name": metadata_name, "metadata": metadata_value } if request.method == "PUT": turn_list = request.get_json() for turn in turn_list: usr_validation, sys_validation = annotating_validator.validate_turn( turn, metadata_value) # 保存检测结果 turn['usr']['turn_info'] = usr_validation turn['sys']['turn_info'] = sys_validation # 删除旧的Dialogue db[TBL_ANNOTATING].delete_one({ 'login_name': login_name, "batch_id": batch_id, "dialogue.dialogue_id": dialogue_id }) # 保存新的Dialogue new_dialogue = { 'login_name': login_name, "batch_id": batch_id, "metadata_name": metadata_name, DATA_VERSION: CURRENT_VERSION, "dialogue": { "dialogue_id": dialogue_id, "activated": True, "status": "PROCESSING", "turns": turn_list } } db[TBL_ANNOTATING].save(new_dialogue) responseObject = {"status": "success"} if request.method == "DELETE": db[TBL_ANNOTATING].delete_one({ 'login_name': login_name, "batch_id": batch_id, "dialogue.dialogue_id": dialogue_id }) responseObject = {"status": "success"} return jsonify(responseObject)