Beispiel #1
0
def Evaluation_all(gold_label, predict_label):
    binary_alphabet = Alphabet()
    for i in range(20):
        binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

    cm = ConfusionMatrix(binary_alphabet)
    cm.add_list(predict_label, gold_label)
    macro_p, macro_r, macro_f1 = cm.get_average_prf()
    overall_accuracy = cm.get_accuracy()
    return overall_accuracy, macro_p, macro_r, macro_f1
Beispiel #2
0
def Evaluation_lst(gold_label, predict_label, print_all=False):
    binary_alphabet = Alphabet()
    for i in range(20):
        binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

    cm = ConfusionMatrix(binary_alphabet)
    cm.add_list(predict_label, gold_label)

    if print_all:
        cm.print_out()
    overall_accuracy = cm.get_accuracy()
    return overall_accuracy
Beispiel #3
0
def Evalation_list(gold_label, predict_label, print_all=False):
    binary_alphabet = Alphabet()
    for i in range(2):
        binary_alphabet.add(str(i))

    cm = ConfusionMatrix(binary_alphabet)
    predict_label = list(map(str, predict_label))
    gold_label = list(map(str, gold_label))
    cm.add_list(predict_label, gold_label)

    if print_all:
        cm.print_out()
    overall_accuracy = cm.get_accuracy()
    return overall_accuracy
Beispiel #4
0
def Evaluation(gold_file_path, predict_file_path):
    with open(gold_file_path) as gold_file, open(predict_file_path) as predict_file:
        gold_list = [int(line.strip().split('\t')[0]) for line in gold_file]
        predicted_list = [int(line.strip().split("\t")[0]) for line in predict_file]
        predict_labels = [config.id2category[int(predict)] for predict in predicted_list]
        gold_labels = [config.id2category[int(gold)] for gold in gold_list]
        binary_alphabet = Alphabet()
        for i in range(20):
            binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

        cm = ConfusionMatrix(binary_alphabet)
        cm.add_list(predict_labels, gold_labels)

        confusion_matrix(gold_list, predicted_list)
        cm.print_summary()
        macro_p, macro_r, macro_f1 = cm.get_average_prf()
        overall_accuracy = cm.get_accuracy()
        return overall_accuracy, macro_p, macro_r, macro_f1
Beispiel #5
0
def Evaluation(gold_file_path, predict_file_path):
    with open(gold_file_path) as gold_file, open(predict_file_path) as predict_file:

        gold_list = [ line.strip().split('\t')[0] for line in gold_file]
        predicted_list = [line.strip().split("\t#\t")[0] for line in predict_file]


        binary_alphabet = Alphabet()
        for i in range(18):
            binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

        cm = ConfusionMatrix(binary_alphabet)
        cm.add_list(predicted_list, gold_list)

        cm.print_out()
        macro_p, macro_r, macro_f1 = cm.get_average_prf()
        overall_accuracy = cm.get_accuracy()
        return overall_accuracy, macro_p, macro_r, macro_f1
        def test_step_for_cqa(s1_all, s2_all, y_all, tag):
            """
            Evaluates model on a dev/test set
            """
            golds = []
            preds = []
            softmax_scores = []

            n = len(s1_all)
            batch_size = FLAGS.batch_size
            start_index = 0
            while start_index < n:
                if start_index + batch_size <= n:
                    s1_batch = s1_all[start_index:start_index + batch_size]
                    s2_batch = s2_all[start_index:start_index + batch_size]
                    y_batch = y_all[start_index:start_index + batch_size]

                    feed_dict = {
                        model.input_s1: s1_batch,
                        model.input_s2: s2_batch,
                        model.input_y: y_batch,
                        model.dropout_keep_prob: 1.0
                    }

                    step, loss, accuracy, curr_softmax_scores, curr_predictions, curr_golds = sess.run(
                        [
                            global_step, model.loss, model.accuracy,
                            model.softmax_scores, model.predictions,
                            model.golds
                        ], feed_dict)

                    golds += list(curr_golds)
                    preds += list(curr_predictions)
                    softmax_scores += list(curr_softmax_scores)

                else:
                    left_num = n - start_index
                    # 填充一下
                    s1_batch = np.concatenate(
                        (s1_all[start_index:], s1_all[:batch_size - left_num]),
                        axis=0)
                    s2_batch = np.concatenate(
                        (s2_all[start_index:], s2_all[:batch_size - left_num]),
                        axis=0)
                    y_batch = np.concatenate(
                        (y_all[start_index:], y_all[:batch_size - left_num]),
                        axis=0)

                    feed_dict = {
                        model.input_s1: s1_batch,
                        model.input_s2: s2_batch,
                        model.input_y: y_batch,
                        model.dropout_keep_prob: 1.0
                    }
                    step, loss, accuracy, curr_softmax_scores, curr_predictions, curr_golds = sess.run(
                        [
                            global_step, model.loss, model.accuracy,
                            model.softmax_scores, model.predictions,
                            model.golds
                        ], feed_dict)

                    golds += list(curr_golds[:left_num])
                    preds += list(curr_predictions[:left_num])
                    softmax_scores += list(curr_softmax_scores[:left_num])

                    break

                start_index += batch_size

            alphabet = Alphabet()
            for i in range(num_classes):
                alphabet.add(str(i))
            confusionMatrix = ConfusionMatrix(alphabet)
            predictions = list(map(str, preds))
            golds = list(map(str, golds))
            confusionMatrix.add_list(predictions, golds)

            id_file = ""
            if tag == "dev":
                id_file = train_data_dir + "/dev/id"
            if tag == "test":
                id_file = train_data_dir + "/test/id"

            subtask = ""
            if train_data_dir.split("/")[-1] == "QA":
                subtask = "A"
            if train_data_dir.split("/")[-1] == "QQ":
                subtask = "B"

            pred_file = train_data_dir + "/result.%s.txt" % (timestamp)
            with open(pred_file, "w") as fw:
                for i, s in enumerate(softmax_scores):
                    fw.write("%d\t%.4f\n" % (preds[i], s[num_classes - 1]))

            print(pred_file, id_file, tag, subtask)
            map_score, mrr_score = get_rank_score_by_file(
                pred_file, id_file, tag, subtask)

            return map_score, mrr_score, confusionMatrix.get_accuracy()