def Evaluation_all(gold_label, predict_label): binary_alphabet = Alphabet() for i in range(20): binary_alphabet.add(DICT_INDEX_TO_LABEL[i]) cm = ConfusionMatrix(binary_alphabet) cm.add_list(predict_label, gold_label) macro_p, macro_r, macro_f1 = cm.get_average_prf() overall_accuracy = cm.get_accuracy() return overall_accuracy, macro_p, macro_r, macro_f1
def Evaluation_lst(gold_label, predict_label, print_all=False): binary_alphabet = Alphabet() for i in range(20): binary_alphabet.add(DICT_INDEX_TO_LABEL[i]) cm = ConfusionMatrix(binary_alphabet) cm.add_list(predict_label, gold_label) if print_all: cm.print_out() overall_accuracy = cm.get_accuracy() return overall_accuracy
def Evalation_list(gold_label, predict_label, print_all=False): binary_alphabet = Alphabet() for i in range(2): binary_alphabet.add(str(i)) cm = ConfusionMatrix(binary_alphabet) predict_label = list(map(str, predict_label)) gold_label = list(map(str, gold_label)) cm.add_list(predict_label, gold_label) if print_all: cm.print_out() overall_accuracy = cm.get_accuracy() return overall_accuracy
def Evaluation(gold_file_path, predict_file_path): with open(gold_file_path) as gold_file, open(predict_file_path) as predict_file: gold_list = [int(line.strip().split('\t')[0]) for line in gold_file] predicted_list = [int(line.strip().split("\t")[0]) for line in predict_file] predict_labels = [config.id2category[int(predict)] for predict in predicted_list] gold_labels = [config.id2category[int(gold)] for gold in gold_list] binary_alphabet = Alphabet() for i in range(20): binary_alphabet.add(DICT_INDEX_TO_LABEL[i]) cm = ConfusionMatrix(binary_alphabet) cm.add_list(predict_labels, gold_labels) confusion_matrix(gold_list, predicted_list) cm.print_summary() macro_p, macro_r, macro_f1 = cm.get_average_prf() overall_accuracy = cm.get_accuracy() return overall_accuracy, macro_p, macro_r, macro_f1
def Evaluation(gold_file_path, predict_file_path): with open(gold_file_path) as gold_file, open(predict_file_path) as predict_file: gold_list = [ line.strip().split('\t')[0] for line in gold_file] predicted_list = [line.strip().split("\t#\t")[0] for line in predict_file] binary_alphabet = Alphabet() for i in range(18): binary_alphabet.add(DICT_INDEX_TO_LABEL[i]) cm = ConfusionMatrix(binary_alphabet) cm.add_list(predicted_list, gold_list) cm.print_out() macro_p, macro_r, macro_f1 = cm.get_average_prf() overall_accuracy = cm.get_accuracy() return overall_accuracy, macro_p, macro_r, macro_f1
def test_step_for_cqa(s1_all, s2_all, y_all, tag): """ Evaluates model on a dev/test set """ golds = [] preds = [] softmax_scores = [] n = len(s1_all) batch_size = FLAGS.batch_size start_index = 0 while start_index < n: if start_index + batch_size <= n: s1_batch = s1_all[start_index:start_index + batch_size] s2_batch = s2_all[start_index:start_index + batch_size] y_batch = y_all[start_index:start_index + batch_size] feed_dict = { model.input_s1: s1_batch, model.input_s2: s2_batch, model.input_y: y_batch, model.dropout_keep_prob: 1.0 } step, loss, accuracy, curr_softmax_scores, curr_predictions, curr_golds = sess.run( [ global_step, model.loss, model.accuracy, model.softmax_scores, model.predictions, model.golds ], feed_dict) golds += list(curr_golds) preds += list(curr_predictions) softmax_scores += list(curr_softmax_scores) else: left_num = n - start_index # 填充一下 s1_batch = np.concatenate( (s1_all[start_index:], s1_all[:batch_size - left_num]), axis=0) s2_batch = np.concatenate( (s2_all[start_index:], s2_all[:batch_size - left_num]), axis=0) y_batch = np.concatenate( (y_all[start_index:], y_all[:batch_size - left_num]), axis=0) feed_dict = { model.input_s1: s1_batch, model.input_s2: s2_batch, model.input_y: y_batch, model.dropout_keep_prob: 1.0 } step, loss, accuracy, curr_softmax_scores, curr_predictions, curr_golds = sess.run( [ global_step, model.loss, model.accuracy, model.softmax_scores, model.predictions, model.golds ], feed_dict) golds += list(curr_golds[:left_num]) preds += list(curr_predictions[:left_num]) softmax_scores += list(curr_softmax_scores[:left_num]) break start_index += batch_size alphabet = Alphabet() for i in range(num_classes): alphabet.add(str(i)) confusionMatrix = ConfusionMatrix(alphabet) predictions = list(map(str, preds)) golds = list(map(str, golds)) confusionMatrix.add_list(predictions, golds) id_file = "" if tag == "dev": id_file = train_data_dir + "/dev/id" if tag == "test": id_file = train_data_dir + "/test/id" subtask = "" if train_data_dir.split("/")[-1] == "QA": subtask = "A" if train_data_dir.split("/")[-1] == "QQ": subtask = "B" pred_file = train_data_dir + "/result.%s.txt" % (timestamp) with open(pred_file, "w") as fw: for i, s in enumerate(softmax_scores): fw.write("%d\t%.4f\n" % (preds[i], s[num_classes - 1])) print(pred_file, id_file, tag, subtask) map_score, mrr_score = get_rank_score_by_file( pred_file, id_file, tag, subtask) return map_score, mrr_score, confusionMatrix.get_accuracy()