Example #1
0
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    for gold_span in gold_list:
        found_match = False
        for i, predicted_span in enumerate(predicted_list):
            if matching_fn(gold_span, predicted_span) and \
               not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched in matched_predicted:
        if not matched:
            cm.add('yes', 'no')
    return cm
Example #2
0
def evaluate_sense(gold_list, predicted_list):
	"""Evaluate sense classifier

	The label 'no' is for the relations that are missed by the system
	because the arguments don't match any of the gold relations.
	"""
	sense_alphabet = Alphabet()
	for relation in gold_list:
		sense_alphabet.add(relation['Sense'][0])
	sense_alphabet.add('no')
	sense_cm = ConfusionMatrix(sense_alphabet)
	gold_to_predicted_map, predicted_to_gold_map = \
			_link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

	for i, gold_relation in enumerate(gold_list):
		if i in gold_to_predicted_map:
			predicted_sense = gold_to_predicted_map[i]['Sense'][0]
			if predicted_sense in gold_relation['Sense']:
				sense_cm.add(predicted_sense, predicted_sense)
			else:
				if not sense_cm.alphabet.has_label(predicted_sense):
					predicted_sense = 'no'
				sense_cm.add(predicted_sense, gold_relation['Sense'][0])
		else:
			sense_cm.add('no', gold_relation['Sense'][0])

	for i, predicted_relation in enumerate(predicted_list):
		if i not in predicted_to_gold_map:
			predicted_sense = predicted_relation['Sense'][0]
			if not sense_cm.alphabet.has_label(predicted_sense):
				predicted_sense = 'no'
			sense_cm.add(predicted_sense, 'no')
	return sense_cm
Example #3
0
def Evaluation_all(gold_label, predict_label):
    binary_alphabet = Alphabet()
    for i in range(20):
        binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

    cm = ConfusionMatrix(binary_alphabet)
    cm.add_list(predict_label, gold_label)
    macro_p, macro_r, macro_f1 = cm.get_average_prf()
    overall_accuracy = cm.get_accuracy()
    return overall_accuracy, macro_p, macro_r, macro_f1
Example #4
0
def Evaluation_lst(gold_label, predict_label, print_all=False):
    binary_alphabet = Alphabet()
    for i in range(20):
        binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

    cm = ConfusionMatrix(binary_alphabet)
    cm.add_list(predict_label, gold_label)

    if print_all:
        cm.print_out()
    overall_accuracy = cm.get_accuracy()
    return overall_accuracy
Example #5
0
def Evalation_list(gold_label, predict_label, print_all=False):
    binary_alphabet = Alphabet()
    for i in range(2):
        binary_alphabet.add(str(i))

    cm = ConfusionMatrix(binary_alphabet)
    predict_label = list(map(str, predict_label))
    gold_label = list(map(str, gold_label))
    cm.add_list(predict_label, gold_label)

    if print_all:
        cm.print_out()
    overall_accuracy = cm.get_accuracy()
    return overall_accuracy
Example #6
0
def evaluate_sense(relation_pairs, valid_senses):
    sense_alphabet = Alphabet()
    #for g_relation, _ in relation_pairs:
    #if g_relation is not None:
    #sense = g_relation['Sense'][0]
    #if sense in valid_senses:
    #sense_alphabet.add(sense)
    for sense in valid_senses:
        sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)
    sense_alphabet.growing = False

    sense_cm = ConfusionMatrix(sense_alphabet)
    for g_relation, p_relation in relation_pairs:
        assert g_relation is not None or p_relation is not None
        if g_relation is None:
            predicted_sense = p_relation['Sense'][0]
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
        elif p_relation is None:
            gold_sense = g_relation['Sense'][0]
            if gold_sense in valid_senses:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)
        else:
            predicted_sense = p_relation['Sense'][0]
            gold_sense = g_relation['Sense'][0]
            if gold_sense in valid_senses:
                sense_cm.add(predicted_sense, gold_sense)
    return sense_cm
Example #7
0
def Evaluation(gold_file_path, predict_file_path):
    with open(gold_file_path) as gold_file, open(predict_file_path) as predict_file:
        gold_list = [int(line.strip().split('\t')[0]) for line in gold_file]
        predicted_list = [int(line.strip().split("\t")[0]) for line in predict_file]
        predict_labels = [config.id2category[int(predict)] for predict in predicted_list]
        gold_labels = [config.id2category[int(gold)] for gold in gold_list]
        binary_alphabet = Alphabet()
        for i in range(20):
            binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

        cm = ConfusionMatrix(binary_alphabet)
        cm.add_list(predict_labels, gold_labels)

        confusion_matrix(gold_list, predicted_list)
        cm.print_summary()
        macro_p, macro_r, macro_f1 = cm.get_average_prf()
        overall_accuracy = cm.get_accuracy()
        return overall_accuracy, macro_p, macro_r, macro_f1
Example #8
0
def Evaluation(gold_file_path, predict_file_path):
    with open(gold_file_path) as gold_file, open(predict_file_path) as predict_file:

        gold_list = [ line.strip().split('\t')[0] for line in gold_file]
        predicted_list = [line.strip().split("\t#\t")[0] for line in predict_file]


        binary_alphabet = Alphabet()
        for i in range(18):
            binary_alphabet.add(DICT_INDEX_TO_LABEL[i])

        cm = ConfusionMatrix(binary_alphabet)
        cm.add_list(predicted_list, gold_list)

        cm.print_out()
        macro_p, macro_r, macro_f1 = cm.get_average_prf()
        overall_accuracy = cm.get_accuracy()
        return overall_accuracy, macro_p, macro_r, macro_f1
Example #9
0
def evaluate_sense(gold_list, predicted_list):
    print "In function: evaluate_sense";
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations 
    that are missed by the system
    because the arguments don't match any of the gold relations.
    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)
    for relation in gold_list:
        sense = relation['Sense'][0]
        if sense in valid_senses:
            sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
            _link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm
Example #10
0
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    for gold_span in gold_list:
        found_match = False
        for i, predicted_span in enumerate(predicted_list):
            if matching_fn(gold_span,
                           predicted_span) and not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched in matched_predicted:
        if not matched:
            cm.add('yes', 'no')
    return cm
Example #11
0
def evaluate_sense(gold_list, predicted_list):
	"""Evaluate sense classifier

	The label 'no' is for the relations that are missed by the system
	because the arguments don't match any of the gold relations.
	"""
	sense_alphabet = Alphabet()
	for relation in gold_list:
		sense_alphabet.add(relation['Sense'][0])
	sense_alphabet.add('no')
	sense_cm = ConfusionMatrix(sense_alphabet)
	gold_to_predicted_map, predicted_to_gold_map = \
			_link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

	for i, gold_relation in enumerate(gold_list):
		if i in gold_to_predicted_map:
			predicted_sense = gold_to_predicted_map[i]['Sense'][0]
			if predicted_sense in gold_relation['Sense']:
				sense_cm.add(predicted_sense, predicted_sense)
			else:
				if not sense_cm.alphabet.has_label(predicted_sense):
					predicted_sense = 'no'
				sense_cm.add(predicted_sense, gold_relation['Sense'][0])
		else:
			sense_cm.add('no', gold_relation['Sense'][0])

	for i, predicted_relation in enumerate(predicted_list):
		if i not in predicted_to_gold_map:
			predicted_sense = predicted_relation['Sense'][0]
			if not sense_cm.alphabet.has_label(predicted_sense):
				predicted_sense = 'no'
			sense_cm.add(predicted_sense, 'no')
	return sense_cm
def evaluate_sense(relation_pairs, valid_senses):
    sense_alphabet = Alphabet()
    for g_relation, _ in relation_pairs:
        if g_relation is not None:
            sense = g_relation["Sense"][0]
            if sense in valid_senses:
                sense_alphabet.add(sense)
    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)
    sense_alphabet.growing = False

    sense_cm = ConfusionMatrix(sense_alphabet)
    for g_relation, p_relation in relation_pairs:
        assert g_relation is not None or p_relation is not None
        if g_relation is None:
            predicted_sense = p_relation["Sense"][0]
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
        elif p_relation is None:
            gold_sense = g_relation["Sense"][0]
            if gold_sense in valid_senses:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)
        else:
            predicted_sense = p_relation["Sense"][0]
            gold_sense = g_relation["Sense"][0]
            if gold_sense in valid_senses:
                sense_cm.add(predicted_sense, gold_sense)
    return sense_cm
Example #13
0
def compute_span_exact_match_metric(gold_list, predicted_list, verbose=False):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    predicted = defaultdict(list)
    for i, pspan in enumerate(predicted_list):
        predicted[pspan].append(i)
    empty_list = []
    key = indices = None
    for gold in gold_list:
        found_match = False
        indices = predicted.get(gold, empty_list)
        for i in indices:
            if not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            if verbose:
                print('Span:')
                print('<<<\t{:s}'.format(gold).encode(ENCODING))
                print()
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched, pred in zip(matched_predicted, predicted_list):
        if not matched:
            if verbose:
                print('Span:')
                print('>>>\t{:s}'.format(pred).encode(ENCODING))
                print()
            cm.add('yes', 'no')
    return cm
Example #14
0
def Evalution(gold_file_path, pred_file_path):
    gold_authorIdPaperId_to_label = {}
    pred_authorIdPaperId_to_label = {}

    gold_data = util.read_dict_from_csv(gold_file_path)
    for item in gold_data:
        AuthorId = item["AuthorId"]
        # 正样本
        for paperId in item["ConfirmedPaperIds"].split(" "):
            gold_authorIdPaperId_to_label[(AuthorId, paperId)] = "1"
        # 负样本
        for paperId in item["DeletedPaperIds"].split(" "):
            gold_authorIdPaperId_to_label[(AuthorId, paperId)] = "0"

    pred_data = util.read_dict_from_csv(pred_file_path)
    for item in pred_data:
        AuthorId = item["AuthorId"]
        # 正样本
        for paperId in item["ConfirmedPaperIds"].split(" "):
            pred_authorIdPaperId_to_label[(AuthorId, paperId)] = "1"
        # 负样本
        for paperId in item["DeletedPaperIds"].split(" "):
            pred_authorIdPaperId_to_label[(AuthorId, paperId)] = "0"

    # evaluation
    alphabet = Alphabet()
    alphabet.add("0")
    alphabet.add("1")

    cm = ConfusionMatrix(alphabet)
    for AuthorId, paperId in gold_authorIdPaperId_to_label:
        gold = gold_authorIdPaperId_to_label[(AuthorId, paperId)]
        pred = pred_authorIdPaperId_to_label[(AuthorId, paperId)]
        cm.add(pred, gold)

    return cm
Example #15
0
def evaluate_sense(gold_list, predicted_list):
    print "In function: evaluate_sense"
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations 
    that are missed by the system
    because the arguments don't match any of the gold relations.
    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)
    for relation in gold_list:
        sense = relation['Sense'][0]
        if sense in valid_senses:
            sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
            _link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm
Example #16
0
def evaluate(gold_file, pred_file):

    with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(pred_file, encoding="utf-8") as fin_pred:

        dict_P_to_url_label = {}
        for line in fin_gold:
            P, url, label, _ = line.strip().split("\t")
            if P not in dict_P_to_url_label:
                dict_P_to_url_label[P] = set()
            dict_P_to_url_label[P].add((url.strip(), label))

        #
        predict_set = set()
        for line in fin_pred:
            url, s, p, o, confidence = line.strip().split("\t")
            predict_set.add((url.strip(), p))

        alphabet = Alphabet()
        alphabet.add("0")
        alphabet.add("1")

        # 评估

        marco_p, marco_r, marco_f = 0, 0, 0
        N = 0

        for P in sorted(dict_P_to_url_label.keys()):

            confusionMatrix = ConfusionMatrix(alphabet)

            recall_error_cases = []
            precision_error_cases= []

            for url, label in dict_P_to_url_label[P]:

                pred = "0"
                if (url, P) in predict_set:
                    pred = "1"

                if label != pred:

                    if label == "1" and pred == "0":
                        recall_error_cases.append("%s\t%s->%s" % (url, label, pred))

                    if label == "0" and pred == "1":
                        precision_error_cases.append("%s\t%s->%s" % (url, label, pred))

                confusionMatrix.add(pred, label)

            print "==" * 40
            print P
            print
            confusionMatrix.print_out()
            p, r, f = confusionMatrix.get_prf("1")
            marco_p += p
            marco_r += r
            marco_f += f
            N += 1

            print "\n==>recall error cases:"
            print "\n".join(recall_error_cases)
            print "\n==>precision error cases:"
            print "\n".join(precision_error_cases)

    print "**" * 40
    print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N, marco_f / N)
Example #17
0
def evaluate(gold_file, pred_file):

    with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(
            pred_file, encoding="utf-8") as fin_pred:

        dict_P_to_url_label = {}
        for line in fin_gold:
            P, url, label, _ = line.strip().split("\t")
            if P not in dict_P_to_url_label:
                dict_P_to_url_label[P] = set()
            dict_P_to_url_label[P].add((url.strip(), label))

        #
        predict_set = set()
        for line in fin_pred:
            url, s, p, o, confidence = line.strip().split("\t")
            predict_set.add((url.strip(), p))

        alphabet = Alphabet()
        alphabet.add("0")
        alphabet.add("1")

        # 评估

        marco_p, marco_r, marco_f = 0, 0, 0
        N = 0

        for P in sorted(dict_P_to_url_label.keys()):

            confusionMatrix = ConfusionMatrix(alphabet)

            recall_error_cases = []
            precision_error_cases = []

            for url, label in dict_P_to_url_label[P]:

                pred = "0"
                if (url, P) in predict_set:
                    pred = "1"

                if label != pred:

                    if label == "1" and pred == "0":
                        recall_error_cases.append("%s\t%s->%s" %
                                                  (url, label, pred))

                    if label == "0" and pred == "1":
                        precision_error_cases.append("%s\t%s->%s" %
                                                     (url, label, pred))

                confusionMatrix.add(pred, label)

            print "==" * 40
            print P
            print
            confusionMatrix.print_out()
            p, r, f = confusionMatrix.get_prf("1")
            marco_p += p
            marco_r += r
            marco_f += f
            N += 1

            print "\n==>recall error cases:"
            print "\n".join(recall_error_cases)
            print "\n==>precision error cases:"
            print "\n".join(precision_error_cases)

    print "**" * 40
    print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N,
                                           marco_f / N)
Example #18
0
def evaluate_sense(gold_list, predicted_list, verbose=False):
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations

    that are missed by the system
    because the arguments don't match any of the gold relations.

    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)

    isense = None
    for relation in gold_list:
        isense = relation['Sense'][0]
        if isense in valid_senses:
            sense_alphabet.add(isense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
        _link_gold_predicted(gold_list, predicted_list,
                             spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    if verbose:
                        print('Sense:')
                        print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                        print('>>>\t{:s}'.format(predicted_sense).encode(
                            ENCODING))
                        print('Arg1:\t{:s}'.format(
                            gold_relation['Arg1']['RawText']).encode(ENCODING))
                        print('Arg2:\t{:s}'.format(
                            gold_relation['Arg2']['RawText']).encode(ENCODING))
                        print()
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                if verbose:
                    print('Sense:')
                    print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                    print('>>>\t{:s}'.format(
                        ConfusionMatrix.NEGATIVE_CLASS).encode(
                        ENCODING))
                    print('Arg1:\t{:s}'.format(
                        gold_relation['Arg1']['RawText']).encode(ENCODING))
                    print('Arg2:\t{:s}'.format(
                        gold_relation['Arg2']['RawText']).encode(ENCODING))
                    print()
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            if verbose:
                print('Sense:')
                print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                print('>>>\t{:s}'.format(
                    ConfusionMatrix.NEGATIVE_CLASS).encode(
                    ENCODING))
                print('Arg1:\t{:s}'.format(
                    gold_relation['Arg1']['RawText']).encode(ENCODING))
                print('Arg2:\t{:s}'.format(
                    gold_relation['Arg2']['RawText']).encode(ENCODING))
                print()
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm
        def test_step_for_cqa(s1_all, s2_all, y_all, tag):
            """
            Evaluates model on a dev/test set
            """
            golds = []
            preds = []
            softmax_scores = []

            n = len(s1_all)
            batch_size = FLAGS.batch_size
            start_index = 0
            while start_index < n:
                if start_index + batch_size <= n:
                    s1_batch = s1_all[start_index:start_index + batch_size]
                    s2_batch = s2_all[start_index:start_index + batch_size]
                    y_batch = y_all[start_index:start_index + batch_size]

                    feed_dict = {
                        model.input_s1: s1_batch,
                        model.input_s2: s2_batch,
                        model.input_y: y_batch,
                        model.dropout_keep_prob: 1.0
                    }

                    step, loss, accuracy, curr_softmax_scores, curr_predictions, curr_golds = sess.run(
                        [
                            global_step, model.loss, model.accuracy,
                            model.softmax_scores, model.predictions,
                            model.golds
                        ], feed_dict)

                    golds += list(curr_golds)
                    preds += list(curr_predictions)
                    softmax_scores += list(curr_softmax_scores)

                else:
                    left_num = n - start_index
                    # 填充一下
                    s1_batch = np.concatenate(
                        (s1_all[start_index:], s1_all[:batch_size - left_num]),
                        axis=0)
                    s2_batch = np.concatenate(
                        (s2_all[start_index:], s2_all[:batch_size - left_num]),
                        axis=0)
                    y_batch = np.concatenate(
                        (y_all[start_index:], y_all[:batch_size - left_num]),
                        axis=0)

                    feed_dict = {
                        model.input_s1: s1_batch,
                        model.input_s2: s2_batch,
                        model.input_y: y_batch,
                        model.dropout_keep_prob: 1.0
                    }
                    step, loss, accuracy, curr_softmax_scores, curr_predictions, curr_golds = sess.run(
                        [
                            global_step, model.loss, model.accuracy,
                            model.softmax_scores, model.predictions,
                            model.golds
                        ], feed_dict)

                    golds += list(curr_golds[:left_num])
                    preds += list(curr_predictions[:left_num])
                    softmax_scores += list(curr_softmax_scores[:left_num])

                    break

                start_index += batch_size

            alphabet = Alphabet()
            for i in range(num_classes):
                alphabet.add(str(i))
            confusionMatrix = ConfusionMatrix(alphabet)
            predictions = list(map(str, preds))
            golds = list(map(str, golds))
            confusionMatrix.add_list(predictions, golds)

            id_file = ""
            if tag == "dev":
                id_file = train_data_dir + "/dev/id"
            if tag == "test":
                id_file = train_data_dir + "/test/id"

            subtask = ""
            if train_data_dir.split("/")[-1] == "QA":
                subtask = "A"
            if train_data_dir.split("/")[-1] == "QQ":
                subtask = "B"

            pred_file = train_data_dir + "/result.%s.txt" % (timestamp)
            with open(pred_file, "w") as fw:
                for i, s in enumerate(softmax_scores):
                    fw.write("%d\t%.4f\n" % (preds[i], s[num_classes - 1]))

            print(pred_file, id_file, tag, subtask)
            map_score, mrr_score = get_rank_score_by_file(
                pred_file, id_file, tag, subtask)

            return map_score, mrr_score, confusionMatrix.get_accuracy()
Example #20
0
        def test_step(s1_all, s2_all, y_all):
            """
            Evaluates model on a dev set
            """
            golds = []
            preds = []

            n = len(s1_all)
            batch_size = FLAGS.main_batch_size
            start_index = 0
            while start_index < n:
                if start_index + batch_size <= n:
                    s1_batch = s1_all[start_index: start_index + batch_size]
                    s2_batch = s2_all[start_index: start_index + batch_size]
                    y_batch = y_all[start_index: start_index + batch_size]

                    feed_dict = {

                        model.main_input_s1: s1_batch,
                        model.main_input_s2: s2_batch,
                        model.main_input_y: y_batch,

                        model.dropout_keep_prob: 1.0,
                        model.is_train: False
                    }
                    step, curr_predictions, curr_golds = sess.run(
                        [global_step,
                         model.main_predictions, model.main_golds], feed_dict)

                    golds += list(curr_golds)
                    preds += list(curr_predictions)

                else:
                    left_num = n - start_index
                    # 填充一下
                    s1_batch = np.concatenate((s1_all[start_index:], s1_all[:batch_size - left_num]), axis=0)
                    s2_batch = np.concatenate((s2_all[start_index:], s2_all[:batch_size - left_num]), axis=0)
                    y_batch = np.concatenate((y_all[start_index:], y_all[:batch_size - left_num]), axis=0)

                    feed_dict = {
                        model.main_input_s1: s1_batch,
                        model.main_input_s2: s2_batch,
                        model.main_input_y: y_batch,

                        model.dropout_keep_prob: 1.0,
                        model.is_train: False
                    }

                    step, curr_predictions, curr_golds = sess.run(
                        [global_step,
                         model.main_predictions, model.main_golds], feed_dict)

                    golds += list(curr_golds[:left_num])
                    preds += list(curr_predictions[:left_num])

                    break

                start_index += batch_size

            alphabet = Alphabet()
            for i in range(main_num_classes):
                alphabet.add(str(i))
            confusionMatrix = ConfusionMatrix(alphabet)
            preds = list(map(str, preds))
            golds = list(map(str, golds))
            confusionMatrix.add_list(preds, golds)

            return confusionMatrix