예제 #1
0
파일: scorer.py 프로젝트: soiliml/conll15st
def evaluate_sense(gold_list, predicted_list):
	"""Evaluate sense classifier

	The label 'no' is for the relations that are missed by the system
	because the arguments don't match any of the gold relations.
	"""
	sense_alphabet = Alphabet()
	for relation in gold_list:
		sense_alphabet.add(relation['Sense'][0])
	sense_alphabet.add('no')
	sense_cm = ConfusionMatrix(sense_alphabet)
	gold_to_predicted_map, predicted_to_gold_map = \
			_link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

	for i, gold_relation in enumerate(gold_list):
		if i in gold_to_predicted_map:
			predicted_sense = gold_to_predicted_map[i]['Sense'][0]
			if predicted_sense in gold_relation['Sense']:
				sense_cm.add(predicted_sense, predicted_sense)
			else:
				if not sense_cm.alphabet.has_label(predicted_sense):
					predicted_sense = 'no'
				sense_cm.add(predicted_sense, gold_relation['Sense'][0])
		else:
			sense_cm.add('no', gold_relation['Sense'][0])

	for i, predicted_relation in enumerate(predicted_list):
		if i not in predicted_to_gold_map:
			predicted_sense = predicted_relation['Sense'][0]
			if not sense_cm.alphabet.has_label(predicted_sense):
				predicted_sense = 'no'
			sense_cm.add(predicted_sense, 'no')
	return sense_cm
예제 #2
0
def evaluate_sense(gold_list, predicted_list):
	"""Evaluate sense classifier

	The label 'no' is for the relations that are missed by the system
	because the arguments don't match any of the gold relations.
	"""
	sense_alphabet = Alphabet()
	for relation in gold_list:
		sense_alphabet.add(relation['Sense'][0])
	sense_alphabet.add('no')
	sense_cm = ConfusionMatrix(sense_alphabet)
	gold_to_predicted_map, predicted_to_gold_map = \
			_link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

	for i, gold_relation in enumerate(gold_list):
		if i in gold_to_predicted_map:
			predicted_sense = gold_to_predicted_map[i]['Sense'][0]
			if predicted_sense in gold_relation['Sense']:
				sense_cm.add(predicted_sense, predicted_sense)
			else:
				if not sense_cm.alphabet.has_label(predicted_sense):
					predicted_sense = 'no'
				sense_cm.add(predicted_sense, gold_relation['Sense'][0])
		else:
			sense_cm.add('no', gold_relation['Sense'][0])

	for i, predicted_relation in enumerate(predicted_list):
		if i not in predicted_to_gold_map:
			predicted_sense = predicted_relation['Sense'][0]
			if not sense_cm.alphabet.has_label(predicted_sense):
				predicted_sense = 'no'
			sense_cm.add(predicted_sense, 'no')
	return sense_cm
예제 #3
0
def evaluate_sense(relation_pairs, valid_senses):
    sense_alphabet = Alphabet()
    #for g_relation, _ in relation_pairs:
    #if g_relation is not None:
    #sense = g_relation['Sense'][0]
    #if sense in valid_senses:
    #sense_alphabet.add(sense)
    for sense in valid_senses:
        sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)
    sense_alphabet.growing = False

    sense_cm = ConfusionMatrix(sense_alphabet)
    for g_relation, p_relation in relation_pairs:
        assert g_relation is not None or p_relation is not None
        if g_relation is None:
            predicted_sense = p_relation['Sense'][0]
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
        elif p_relation is None:
            gold_sense = g_relation['Sense'][0]
            if gold_sense in valid_senses:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)
        else:
            predicted_sense = p_relation['Sense'][0]
            gold_sense = g_relation['Sense'][0]
            if gold_sense in valid_senses:
                sense_cm.add(predicted_sense, gold_sense)
    return sense_cm
예제 #4
0
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    for gold_span in gold_list:
        found_match = False
        for i, predicted_span in enumerate(predicted_list):
            if matching_fn(gold_span,
                           predicted_span) and not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched in matched_predicted:
        if not matched:
            cm.add('yes', 'no')
    return cm
예제 #5
0
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    for gold_span in gold_list:
        found_match = False
        for i, predicted_span in enumerate(predicted_list):
            if matching_fn(gold_span, predicted_span) and \
               not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched in matched_predicted:
        if not matched:
            cm.add('yes', 'no')
    return cm
예제 #6
0
def evaluate_sense(relation_pairs, valid_senses):
    sense_alphabet = Alphabet()
    for g_relation, _ in relation_pairs:
        if g_relation is not None:
            sense = g_relation["Sense"][0]
            if sense in valid_senses:
                sense_alphabet.add(sense)
    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)
    sense_alphabet.growing = False

    sense_cm = ConfusionMatrix(sense_alphabet)
    for g_relation, p_relation in relation_pairs:
        assert g_relation is not None or p_relation is not None
        if g_relation is None:
            predicted_sense = p_relation["Sense"][0]
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
        elif p_relation is None:
            gold_sense = g_relation["Sense"][0]
            if gold_sense in valid_senses:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)
        else:
            predicted_sense = p_relation["Sense"][0]
            gold_sense = g_relation["Sense"][0]
            if gold_sense in valid_senses:
                sense_cm.add(predicted_sense, gold_sense)
    return sense_cm
예제 #7
0
def Evalution(gold_file_path, pred_file_path):
    gold_authorIdPaperId_to_label = {}
    pred_authorIdPaperId_to_label = {}

    gold_data = util.read_dict_from_csv(gold_file_path)
    for item in gold_data:
        AuthorId = item["AuthorId"]
        # 正样本
        for paperId in item["ConfirmedPaperIds"].split(" "):
            gold_authorIdPaperId_to_label[(AuthorId, paperId)] = "1"
        # 负样本
        for paperId in item["DeletedPaperIds"].split(" "):
            gold_authorIdPaperId_to_label[(AuthorId, paperId)] = "0"

    pred_data = util.read_dict_from_csv(pred_file_path)
    for item in pred_data:
        AuthorId = item["AuthorId"]
        # 正样本
        for paperId in item["ConfirmedPaperIds"].split(" "):
            pred_authorIdPaperId_to_label[(AuthorId, paperId)] = "1"
        # 负样本
        for paperId in item["DeletedPaperIds"].split(" "):
            pred_authorIdPaperId_to_label[(AuthorId, paperId)] = "0"

    # evaluation
    alphabet = Alphabet()
    alphabet.add("0")
    alphabet.add("1")

    cm = ConfusionMatrix(alphabet)
    for AuthorId, paperId in gold_authorIdPaperId_to_label:
        gold = gold_authorIdPaperId_to_label[(AuthorId, paperId)]
        pred = pred_authorIdPaperId_to_label[(AuthorId, paperId)]
        cm.add(pred, gold)

    return cm
예제 #8
0
def evaluate_sense(gold_list, predicted_list):
    print "In function: evaluate_sense"
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations 
    that are missed by the system
    because the arguments don't match any of the gold relations.
    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)
    for relation in gold_list:
        sense = relation['Sense'][0]
        if sense in valid_senses:
            sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
            _link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm
예제 #9
0
def evaluate_sense(gold_list, predicted_list):
    print "In function: evaluate_sense";
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations 
    that are missed by the system
    because the arguments don't match any of the gold relations.
    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)
    for relation in gold_list:
        sense = relation['Sense'][0]
        if sense in valid_senses:
            sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
            _link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm
예제 #10
0
def compute_span_exact_match_metric(gold_list, predicted_list, verbose=False):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    predicted = defaultdict(list)
    for i, pspan in enumerate(predicted_list):
        predicted[pspan].append(i)
    empty_list = []
    key = indices = None
    for gold in gold_list:
        found_match = False
        indices = predicted.get(gold, empty_list)
        for i in indices:
            if not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            if verbose:
                print('Span:')
                print('<<<\t{:s}'.format(gold).encode(ENCODING))
                print()
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched, pred in zip(matched_predicted, predicted_list):
        if not matched:
            if verbose:
                print('Span:')
                print('>>>\t{:s}'.format(pred).encode(ENCODING))
                print()
            cm.add('yes', 'no')
    return cm
예제 #11
0
def evaluate_sense(gold_list, predicted_list, verbose=False):
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations

    that are missed by the system
    because the arguments don't match any of the gold relations.

    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)

    isense = None
    for relation in gold_list:
        isense = relation['Sense'][0]
        if isense in valid_senses:
            sense_alphabet.add(isense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
        _link_gold_predicted(gold_list, predicted_list,
                             spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    if verbose:
                        print('Sense:')
                        print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                        print('>>>\t{:s}'.format(predicted_sense).encode(
                            ENCODING))
                        print('Arg1:\t{:s}'.format(
                            gold_relation['Arg1']['RawText']).encode(ENCODING))
                        print('Arg2:\t{:s}'.format(
                            gold_relation['Arg2']['RawText']).encode(ENCODING))
                        print()
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                if verbose:
                    print('Sense:')
                    print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                    print('>>>\t{:s}'.format(
                        ConfusionMatrix.NEGATIVE_CLASS).encode(
                        ENCODING))
                    print('Arg1:\t{:s}'.format(
                        gold_relation['Arg1']['RawText']).encode(ENCODING))
                    print('Arg2:\t{:s}'.format(
                        gold_relation['Arg2']['RawText']).encode(ENCODING))
                    print()
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            if verbose:
                print('Sense:')
                print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                print('>>>\t{:s}'.format(
                    ConfusionMatrix.NEGATIVE_CLASS).encode(
                    ENCODING))
                print('Arg1:\t{:s}'.format(
                    gold_relation['Arg1']['RawText']).encode(ENCODING))
                print('Arg2:\t{:s}'.format(
                    gold_relation['Arg2']['RawText']).encode(ENCODING))
                print()
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm
예제 #12
0
def compute_binary_eval_metric(predicted_list, gold_list, binary_alphabet):
    cm = ConfusionMatrix(binary_alphabet)
    for (predicted_span, gold_span) in zip( predicted_list, gold_list):
        cm.add(predicted_span, gold_span)
    return cm
예제 #13
0
def evaluate(gold_file, pred_file):

    with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(
            pred_file, encoding="utf-8") as fin_pred:

        dict_P_to_url_label = {}
        for line in fin_gold:
            P, url, label, _ = line.strip().split("\t")
            if P not in dict_P_to_url_label:
                dict_P_to_url_label[P] = set()
            dict_P_to_url_label[P].add((url.strip(), label))

        #
        predict_set = set()
        for line in fin_pred:
            url, s, p, o, confidence = line.strip().split("\t")
            predict_set.add((url.strip(), p))

        alphabet = Alphabet()
        alphabet.add("0")
        alphabet.add("1")

        # 评估

        marco_p, marco_r, marco_f = 0, 0, 0
        N = 0

        for P in sorted(dict_P_to_url_label.keys()):

            confusionMatrix = ConfusionMatrix(alphabet)

            recall_error_cases = []
            precision_error_cases = []

            for url, label in dict_P_to_url_label[P]:

                pred = "0"
                if (url, P) in predict_set:
                    pred = "1"

                if label != pred:

                    if label == "1" and pred == "0":
                        recall_error_cases.append("%s\t%s->%s" %
                                                  (url, label, pred))

                    if label == "0" and pred == "1":
                        precision_error_cases.append("%s\t%s->%s" %
                                                     (url, label, pred))

                confusionMatrix.add(pred, label)

            print "==" * 40
            print P
            print
            confusionMatrix.print_out()
            p, r, f = confusionMatrix.get_prf("1")
            marco_p += p
            marco_r += r
            marco_f += f
            N += 1

            print "\n==>recall error cases:"
            print "\n".join(recall_error_cases)
            print "\n==>precision error cases:"
            print "\n".join(precision_error_cases)

    print "**" * 40
    print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N,
                                           marco_f / N)
예제 #14
0
def evaluate(gold_file, pred_file):

    with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(pred_file, encoding="utf-8") as fin_pred:

        dict_P_to_url_label = {}
        for line in fin_gold:
            P, url, label, _ = line.strip().split("\t")
            if P not in dict_P_to_url_label:
                dict_P_to_url_label[P] = set()
            dict_P_to_url_label[P].add((url.strip(), label))

        #
        predict_set = set()
        for line in fin_pred:
            url, s, p, o, confidence = line.strip().split("\t")
            predict_set.add((url.strip(), p))

        alphabet = Alphabet()
        alphabet.add("0")
        alphabet.add("1")

        # 评估

        marco_p, marco_r, marco_f = 0, 0, 0
        N = 0

        for P in sorted(dict_P_to_url_label.keys()):

            confusionMatrix = ConfusionMatrix(alphabet)

            recall_error_cases = []
            precision_error_cases= []

            for url, label in dict_P_to_url_label[P]:

                pred = "0"
                if (url, P) in predict_set:
                    pred = "1"

                if label != pred:

                    if label == "1" and pred == "0":
                        recall_error_cases.append("%s\t%s->%s" % (url, label, pred))

                    if label == "0" and pred == "1":
                        precision_error_cases.append("%s\t%s->%s" % (url, label, pred))

                confusionMatrix.add(pred, label)

            print "==" * 40
            print P
            print
            confusionMatrix.print_out()
            p, r, f = confusionMatrix.get_prf("1")
            marco_p += p
            marco_r += r
            marco_f += f
            N += 1

            print "\n==>recall error cases:"
            print "\n".join(recall_error_cases)
            print "\n==>precision error cases:"
            print "\n".join(precision_error_cases)

    print "**" * 40
    print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N, marco_f / N)
예제 #15
0
def compute_binary_eval_metric(predicted_list, gold_list, binary_alphabet):
    cm = ConfusionMatrix(binary_alphabet)
    for (predicted_span, gold_span) in zip(predicted_list, gold_list):
        cm.add(predicted_span, gold_span)
    return cm