def one_autofinetune(args, schema_labels, predict_data, predict_sents, id): seq_label_task, reader = get_task(args, schema_labels, id) # 加载PaddleHub 预训练模型ERNIE Tiny/RoBERTa large # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel # model_name = "ernie_tiny" # PaddleHub Finetune API # 将自动训练、评测并保存模型 if args.do_train: print("start finetune and eval process") seq_label_task.finetune_and_eval() write_log('./work/log/' + args.do_model + '.txt', args, str(seq_label_task.best_score)) if args.do_predict: print("start predict process") ret = [] id2label = {val: key for key, val in reader.label_map.items()} input_data = [[d] for d in predict_data] run_states = seq_label_task.predict(data=input_data[1:]) results = [] for batch_states in run_states: batch_results = batch_states.run_results batch_infers = batch_results[0].reshape([-1]).astype( np.int32).tolist() seq_lens = batch_results[1].reshape([-1]).astype(np.int32).tolist() current_id = 0 for length in seq_lens: seq_infers = batch_infers[current_id:current_id + length] seq_result = list(map(id2label.get, seq_infers[1:-1])) current_id += length if args.add_crf else args.max_seq_len results.append(seq_result) ret = [] for sent, r_label in zip(predict_sents, results): sent["labels"] = r_label ret.append(json.dumps(sent, ensure_ascii=False)) write_by_lines( "{}.{}.{}.pred".format(args.predict_data, args.do_model, id), ret) # Load model from the defined model path or not # # seq_label_task.finetune_and_eval() # run_states = seq_label_task.eval() # eval_avg_score, eval_avg_loss, eval_run_speed =seq_label_task._calculate_metrics( # run_states) # Move ckpt/best_model to the defined saved parameters directory best_model_dir = os.path.join(args.checkpoint_dir, "best_model") if is_path_valid(args.saved_params_dir) and os.path.exists(best_model_dir): shutil.copytree(best_model_dir, args.saved_params_dir) shutil.rmtree(args.checkpoint_dir) write_log('./work/log/' + args.do_model + '.txt', args, id + ',' + str(seq_label_task.best_score)) print(seq_label_task.best_score) hub.report_final_result(seq_label_task.best_score)
def one(args, schema_labels, id): seq_label_task, reader = get_task(args, schema_labels, id) # 加载PaddleHub 预训练模型ERNIE Tiny/RoBERTa large # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel # model_name = "ernie_tiny" # PaddleHub Finetune API # 将自动训练、评测并保存模型 if args.do_train: print("start finetune and eval process") seq_label_task.finetune_and_eval() write_log('./work/log/' + args.do_model + '.txt', args, id + ',' + str(seq_label_task.best_score))
def one(args, schema_labels, predict_data, predict_sents, id): seq_label_task, reader = get_task(args, schema_labels, id) # 加载PaddleHub 预训练模型ERNIE Tiny/RoBERTa large # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel # model_name = "ernie_tiny" # PaddleHub Finetune API # 将自动训练、评测并保存模型 if args.do_train: print("start finetune and eval process") seq_label_task.finetune_and_eval() write_log('./work/log/' + args.do_model + '.txt', args, id + ',' + str(seq_label_task.best_score)) if args.do_predict: print("start predict process") ret = [] id2label = {val: key for key, val in reader.label_map.items()} input_data = [[d] for d in predict_data] # print(input_data[:10]) run_states = seq_label_task.predict(data=input_data) results = [] for batch_states in run_states: batch_results = batch_states.run_results # print('batch_infers',batch_results ) batch_infers = batch_results[0].reshape([-1]).astype( np.int32).tolist() seq_lens = batch_results[1].reshape([-1]).astype(np.int32).tolist() current_id = 0 for length in seq_lens: seq_infers = batch_infers[current_id:current_id + length] seq_result = list(map(id2label.get, seq_infers[1:-1])) current_id += length if args.add_crf else args.max_seq_len results.append(seq_result) ret = [] for sent, input, r_label in zip(predict_sents, input_data, results): sent["input"] = input sent["labels"] = r_label ret.append(json.dumps(sent, ensure_ascii=False)) write_by_lines( "{}.{}.{}.pred".format(output_predict_data_path, args.do_model, id), ret) get_submit_postprocess(args, id) get_submit_postprocess(args, id, check=True)
def one(args, schema_labels, predict_data, predict_sents, id): seq_label_task, reader = get_task(args, schema_labels, id) # 加载PaddleHub 预训练模型ERNIE Tiny/RoBERTa large # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel # model_name = "ernie_tiny" # PaddleHub Finetune API # 将自动训练、评测并保存模型 if args.do_train: print("start finetune and eval process") seq_label_task.finetune_and_eval() write_log('./work/log/' + args.do_model + '.txt', args, id + ',' + str(seq_label_task.best_score)) if args.do_model == 'role' and args.do_predict: print("start predict process") ret = [] id2label = {val: key for key, val in reader.label_map.items()} input_data = [[d] for d in predict_data] # print(input_data[:10]) run_states = seq_label_task.predict(data=input_data) results = [] for batch_states in run_states: batch_results = batch_states.run_results batch_infers = batch_results[0].reshape([-1]).astype( np.int32).tolist() # print(batch_results) seq_lens = batch_results[1].reshape([-1]).astype(np.int32).tolist() current_id = 0 for length in seq_lens: seq_infers = batch_infers[current_id:current_id + length] seq_result = list(map(id2label.get, seq_infers[1:-1])) current_id += length if args.add_crf else args.max_seq_len results.append(seq_result) ret = [] for sent, input, r_label in zip(predict_sents, input_data, results): sent["input"] = input sent["labels"] = r_label ret.append(json.dumps(sent, ensure_ascii=False)) write_by_lines( "{}.{}.{}.pred".format(output_predict_data_path, args.do_model, id), ret) get_submit_postprocess(args, id) get_submit_postprocess(args, id, check=True) if args.do_model in ['mcls', "mcls_onlysentence"] and args.do_predict: input_data = predict_data result = seq_label_task.predict(data=input_data, return_result=True) ret = [] submit = [] for s, r in zip(predict_sents, result): s['labels'] = [] # print(r) for r0 in r: # print(r0) for k, v in r0.items(): # print(k,v) if (v == 1): s['labels'].append(k) if (args.do_model == 'mcls_onlysentence'): submit.append('\t'.join([str(s["id"]), k])) else: submit.append('\t'.join( [str(s["id"]), k, s["entity"]])) ret.append(json.dumps(s, ensure_ascii=False)) write_by_lines( "{}.{}.{}.40-55.pred".format(output_predict_data_path, args.do_model, id), ret) write_by_lines( "{}{}.{}.40-55.ucas_valid_result.csv".format( output_path, args.do_model, id), submit)