Beispiel #1
0
def evaluate(gold_file, pred_file):

    with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(
            pred_file, encoding="utf-8") as fin_pred:

        dict_P_to_url_label = {}
        for line in fin_gold:
            P, url, label, _ = line.strip().split("\t")
            if P not in dict_P_to_url_label:
                dict_P_to_url_label[P] = set()
            dict_P_to_url_label[P].add((url.strip(), label))

        #
        predict_set = set()
        for line in fin_pred:
            url, s, p, o, confidence = line.strip().split("\t")
            predict_set.add((url.strip(), p))

        alphabet = Alphabet()
        alphabet.add("0")
        alphabet.add("1")

        # 评估

        marco_p, marco_r, marco_f = 0, 0, 0
        N = 0

        for P in sorted(dict_P_to_url_label.keys()):

            confusionMatrix = ConfusionMatrix(alphabet)

            recall_error_cases = []
            precision_error_cases = []

            for url, label in dict_P_to_url_label[P]:

                pred = "0"
                if (url, P) in predict_set:
                    pred = "1"

                if label != pred:

                    if label == "1" and pred == "0":
                        recall_error_cases.append("%s\t%s->%s" %
                                                  (url, label, pred))

                    if label == "0" and pred == "1":
                        precision_error_cases.append("%s\t%s->%s" %
                                                     (url, label, pred))

                confusionMatrix.add(pred, label)

            print "==" * 40
            print P
            print
            confusionMatrix.print_out()
            p, r, f = confusionMatrix.get_prf("1")
            marco_p += p
            marco_r += r
            marco_f += f
            N += 1

            print "\n==>recall error cases:"
            print "\n".join(recall_error_cases)
            print "\n==>precision error cases:"
            print "\n".join(precision_error_cases)

    print "**" * 40
    print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N,
                                           marco_f / N)
Beispiel #2
0
def evaluate(gold_file, pred_file):

    with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(pred_file, encoding="utf-8") as fin_pred:

        dict_P_to_url_label = {}
        for line in fin_gold:
            P, url, label, _ = line.strip().split("\t")
            if P not in dict_P_to_url_label:
                dict_P_to_url_label[P] = set()
            dict_P_to_url_label[P].add((url.strip(), label))

        #
        predict_set = set()
        for line in fin_pred:
            url, s, p, o, confidence = line.strip().split("\t")
            predict_set.add((url.strip(), p))

        alphabet = Alphabet()
        alphabet.add("0")
        alphabet.add("1")

        # 评估

        marco_p, marco_r, marco_f = 0, 0, 0
        N = 0

        for P in sorted(dict_P_to_url_label.keys()):

            confusionMatrix = ConfusionMatrix(alphabet)

            recall_error_cases = []
            precision_error_cases= []

            for url, label in dict_P_to_url_label[P]:

                pred = "0"
                if (url, P) in predict_set:
                    pred = "1"

                if label != pred:

                    if label == "1" and pred == "0":
                        recall_error_cases.append("%s\t%s->%s" % (url, label, pred))

                    if label == "0" and pred == "1":
                        precision_error_cases.append("%s\t%s->%s" % (url, label, pred))

                confusionMatrix.add(pred, label)

            print "==" * 40
            print P
            print
            confusionMatrix.print_out()
            p, r, f = confusionMatrix.get_prf("1")
            marco_p += p
            marco_r += r
            marco_f += f
            N += 1

            print "\n==>recall error cases:"
            print "\n".join(recall_error_cases)
            print "\n==>precision error cases:"
            print "\n".join(precision_error_cases)

    print "**" * 40
    print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N, marco_f / N)