def pred_tet(path_hyper_parameter=path_hyper_parameters, path_test=None, rate=1.0): # 测试集的准确率 hyper_parameters = load_json(path_hyper_parameter) if path_test: # 从外部引入测试数据地址 hyper_parameters['data']['val_data'] = path_test time_start = time.time() # graph初始化 graph = Graph(hyper_parameters) print("graph init ok!") graph.load_model() print("graph load ok!") ra_ed = graph.word_embedding # 数据预处理 pt = PreprocessText() y, x = read_and_process(hyper_parameters['data']['val_data']) # 取该数据集的百分之几的语料测试 len_rate = int(len(y) * rate) x = x[1:len_rate] y = y[1:len_rate] y_pred = [] count = 0 for x_one in x: count += 1 ques_embed = ra_ed.sentence2idx(x_one) if hyper_parameters['embedding_type'] in ['bert', 'albert']: x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] elif hyper_parameters['embedding_type'] == 'xlnet': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val_3 = np.array([ques_embed[2]]) x_val = [x_val_1, x_val_2, x_val_3] else: x_val = ques_embed # 预测 pred = graph.predict(x_val) pre = pt.prereocess_idx(pred[0]) label_pred = pre[0][0][0] if count % 1000 == 0: print(label_pred) y_pred.append(label_pred) print("data pred ok!") # 预测结果转为int类型 index_y = [pt.l2i_i2l['l2i'][i] for i in y] index_pred = [pt.l2i_i2l['l2i'][i] for i in y_pred] target_names = [ pt.l2i_i2l['i2l'][str(i)] for i in list(set((index_pred + index_y))) ] # 评估 report_predict = classification_report(index_y, index_pred, target_names=target_names, digits=9) print(report_predict) print("耗时:" + str(time.time() - time_start))
def pred_input(str_input, path_hyper_parameter=path_hyper_parameters): # 输入预测 # 加载超参数 hyper_parameters = load_json(path_hyper_parameter) pt = PreprocessTextMulti() # 模式初始化和加载 graph = Graph(hyper_parameters) graph.load_model() ra_ed = graph.word_embedding ques = str_input # str to token ques_embed = ra_ed.sentence2idx(ques) if hyper_parameters['embedding_type'] == 'bert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed # 预测 pred = graph.predict(x_val) print(pred) # 取id to label and pred pre = pt.prereocess_idx(pred[0]) ls_nulti = [] for ls in pre[0]: if ls[1] >= 0.73: ls_nulti.append(ls) print(str_input) print(pre[0]) print(ls_nulti)
def pred_input(path_hyper_parameter=path_hyper_parameters): # 输入预测 # 加载超参数 hyper_parameters = load_json(path_hyper_parameter) pt = PreprocessTextMulti(path_model_dir) # 模式初始化和加载 graph = Graph(hyper_parameters) graph.load_model() ra_ed = graph.word_embedding ques = '我要打王者荣耀' # str to token ques_embed = ra_ed.sentence2idx(ques) if hyper_parameters['embedding_type'] in ['bert', 'albert']: x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed # 预测 pred = graph.predict(x_val) print(pred) # 取id to label and pred pre = pt.prereocess_idx(pred[0]) ls_nulti = [] for ls in pre[0]: if ls[1] >= 0.5: ls_nulti.append(ls) print(pre[0]) print(ls_nulti) while True: print("请输入: ") ques = input() ques_embed = ra_ed.sentence2idx(ques) print(ques_embed) if hyper_parameters['embedding_type'] in ['bert', 'albert']: x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed pred = graph.predict(x_val) pre = pt.prereocess_idx(pred[0]) ls_nulti = [] for ls in pre[0]: if ls[1] >= 0.5: ls_nulti.append(ls) print(pre[0]) print(ls_nulti)
def pred_input(path_hyper_parameter=path_hyper_parameters): # 输入预测 # 加载超参数 hyper_parameters = load_json(path_hyper_parameter) pt = PreprocessText() # 模式初始化和加载 graph = Graph(hyper_parameters) graph.load_model() ra_ed = graph.word_embedding ques = '我要打王者荣耀' # str to token ques_embed = ra_ed.sentence2idx(ques) if hyper_parameters['embedding_type'] == 'bert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed # 预测 pred = graph.predict(x_val) # 取id to label and pred pre = pt.prereocess_idx(pred[0]) print(pre) while True: print("请输入: ") ques = input() ques_embed = ra_ed.sentence2idx(ques) print(ques_embed) if hyper_parameters['embedding_type'] == 'bert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed pred = graph.predict(x_val) pre = pt.prereocess_idx(pred[0]) print(pre)
def evaluate(path_hyper_parameter=path_hyper_parameters, rate=1.0): # 输入预测 # 加载超参数 hyper_parameters = load_json(path_hyper_parameter) pt = PreprocessTextMulti() # 模式初始化和加载 graph = Graph(hyper_parameters) graph.load_model() ra_ed = graph.word_embedding # get validation data ques_list, val_list, que, val = pt.preprocess_label_ques_to_idx( hyper_parameters['embedding_type'], hyper_parameters['data']['val_data'], ra_ed, rate=rate, shuffle=True) print(len(ques_list)) print("que:", len(que)) # print(val) # str to token ques_embed_list = [] count = 0 acc_count = 0 not_none_count = 0 not_none_acc_count = 0 sum_iou = 0 sum_all_iou = 0 for index, que___ in enumerate(que): # print("原句 ", index, que[index]) # print("真实分类 ", index, val[index]) # print("ques: ", ques) ques_embed = ra_ed.sentence2idx(que[index]) if hyper_parameters['embedding_type'] == 'albert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) ques_embed = [x_val_1, x_val_2] else: x_val = ques_embed # print("ques_embed: ", ques_embed) if hyper_parameters['embedding_type'] == 'bert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed # print("x_val", x_val) ques_embed_list.append(x_val) # 预测 pred = graph.predict(x_val) # print(pred) # 取id to label and pred pre = pt.prereocess_idx(pred[0]) # print("pre",pre) ls_nulti = [] threshold = 0.44 top_threshold = 0 for i, ls in enumerate(pre[0]): if i == 0 or ls[1] > threshold: ls_nulti.append(ls) top_threshold = ls[1] elif abs(ls[1] - top_threshold) < top_threshold / 4.0: ls_nulti.append(ls) # print("预测结果", index, pre[0]) # print(ls_nulti) res = cal_acc(ls_nulti, val[index].split(",")) res_iou, res_all_iou = cal_iou(ls_nulti, val[index].split(",")) sum_iou += res_iou sum_all_iou += res_all_iou if res: if val[index] != "无": not_none_acc_count += 1 acc_count += 1 else: print("原句 ", index, que[index]) print("真实分类 ", index, val[index]) print("pre ", pre) print("iou ", res_iou) count += 1 if val[index] != "无": not_none_count += 1 print("acc: ", acc_count / count) print("not none acc: ", not_none_acc_count / not_none_count) print("average iou: ", sum_iou / sum_all_iou) # log append_log(hyper_parameters, acc_count / count, not_none_acc_count / not_none_count, threshold)
'lr': 1e-4, 'l2': 1e-9, 'activate_classify': 'softmax', # 还可以填'random'、 'bert' or 'word2vec" 'embedding_type': 'random', 'is_training': False, 'model_path': path_model_fast_text_baiduqa_2019,}, 'embedding':{ 'embedding_type': 'random', 'corpus_path': path_embedding_random_char, 'level_type': 'char', 'embed_size': 300, 'len_max': 50,}, } # ns = np.array([1,2,3,4]) # print(type(ns)) pt = PreprocessText graph = Graph(hyper_parameters) graph.load_model() ra_ed = graph.word_embedding ques = '你好呀' ques_embed = ra_ed.sentence2idx(ques) pred = graph.predict(np.array([ques_embed])) pre = pt.prereocess_idx(pred[0]) print(pre) while True: print("请输入: ") ques = input() ques_embed = ra_ed.sentence2idx(ques) pred = graph.predict(np.array([ques_embed])) pre = pt.prereocess_idx(pred[0]) print(pre)
def evaluate(path_hyper_parameter=path_hyper_parameters, rate=1.0): # 输入预测 # 加载超参数 hyper_parameters = load_json(path_hyper_parameter) pt = PreprocessTextMulti() # 模式初始化和加载 graph = Graph(hyper_parameters) graph.load_model() ra_ed = graph.word_embedding print(hyper_parameters['data']['test_data']) # init confusion table dict_all=initConfusion() # get validation data ques_list, val_list, que, val = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], hyper_parameters['data']['test_data'], ra_ed, rate=rate, shuffle=True) print(len(ques_list)) print("que:",len(que)) # print(val) # str to token ques_embed_list = [] count = 0 acc_count = 0 not_none_count = 0 not_none_acc_count = 0 sum_iou = 0 density_correct=0 sum_all_iou=0 for index, que___ in enumerate(que): # print("原句 ", index, que[index]) # print("真实分类 ", index, val[index]) # print("ques: ", ques) ques_embed = ra_ed.sentence2idx(que[index]) if hyper_parameters['embedding_type'] == 'albert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) ques_embed = [x_val_1, x_val_2] else: x_val = ques_embed # print("ques_embed: ", ques_embed) if hyper_parameters['embedding_type'] == 'bert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed # print("x_val", x_val) ques_embed_list.append(x_val) # 预测 pred = graph.predict(x_val) # print(pred) # 取id to label and pred pre = pt.prereocess_idx(pred[0]) # print("pre",pre) ls_nulti = [] threshold = 0.60 top_threshold = 0 has_scope=False for i, ls in enumerate(pre[0]): if ls[0] in ['多发','散发','无']: if not has_scope: has_scope=True ls_nulti.append(ls) if ls[0] in val[index].split(","): dict_all[ls[0]]['TP'] += 1 else: dict_all[ls[0]]['FN'] += 1 else: if ls[0] in val[index].split(","): dict_all[ls[0]]['FP']+=1 else: dict_all[ls[0]]['TN']+=1 if ls[0] not in ['多发','散发','无']: if ls[1] > threshold: ls_nulti.append(ls) if ls[0] in val[index].split(","): dict_all[ls[0]]['TP']+=1 else: dict_all[ls[0]]['FP']+=1 else: if ls[0] in val[index].split(","): dict_all[ls[0]]['FN']+=1 else: dict_all[ls[0]]['TN']+=1 # print("预测结果", index, pre[0]) # print(ls_nulti) res = cal_acc(ls_nulti, val[index].split(",")) res_iou = cal_iou(ls_nulti, val[index].split(",")) sum_iou += res_iou # sum_all_iou+=res_all_iou density_res=checkDensity(ls_nulti,val[index].split(",")) if density_res: density_correct+=1 if res: acc_count += 1 else: print("原句 ", index, que[index]) print("真实分类 ", index, val[index]) print("pre ", pre) print("iou ", res_iou) print(ls_nulti) count += 1 if val[index] != "无": not_none_count += 1 # print("softmax count",softmax_count) print("acc: ", acc_count / count) # print("not none acc: ", not_none_acc_count / not_none_count) print("average iou: ", sum_iou / count) print('density correct', density_correct/count) print(dict_all) import prettytable as pt tb = pt.PrettyTable() tb.field_names = [" ", "Recall", "Precision", "TP","FP","TN","FN"] for item in dict_all: if dict_all[item]['TP']+dict_all[item]['FN']==0: recall=1 else: recall = dict_all[item]['TP']/(dict_all[item]['TP']+dict_all[item]['FN']) if dict_all[item]['TP']+dict_all[item]['FP']==0: precision=1 else: precision = dict_all[item]['TP'] / (dict_all[item]['TP'] + dict_all[item]['FP']) # print(item,recall,precision) tb.add_row([item, recall, precision, dict_all[item]['TP'],dict_all[item]['FP'],dict_all[item]['TN'],dict_all[item]['FN']]) print(tb) # log append_log(hyper_parameters, acc_count / count, not_none_acc_count / not_none_count, threshold)