def evaluate_sense(gold_list, predicted_list): """Evaluate sense classifier The label 'no' is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() for relation in gold_list: sense_alphabet.add(relation['Sense'][0]) sense_alphabet.add('no') sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = 'no' sense_cm.add(predicted_sense, gold_relation['Sense'][0]) else: sense_cm.add('no', gold_relation['Sense'][0]) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = 'no' sense_cm.add(predicted_sense, 'no') return sense_cm
def evaluate_sense(relation_pairs, valid_senses): sense_alphabet = Alphabet() #for g_relation, _ in relation_pairs: #if g_relation is not None: #sense = g_relation['Sense'][0] #if sense in valid_senses: #sense_alphabet.add(sense) for sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_alphabet.growing = False sense_cm = ConfusionMatrix(sense_alphabet) for g_relation, p_relation in relation_pairs: assert g_relation is not None or p_relation is not None if g_relation is None: predicted_sense = p_relation['Sense'][0] sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) elif p_relation is None: gold_sense = g_relation['Sense'][0] if gold_sense in valid_senses: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) else: predicted_sense = p_relation['Sense'][0] gold_sense = g_relation['Sense'][0] if gold_sense in valid_senses: sense_cm.add(predicted_sense, gold_sense) return sense_cm
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn): """Compute binary evaluation metric """ binary_alphabet = Alphabet() binary_alphabet.add('yes') binary_alphabet.add('no') cm = ConfusionMatrix(binary_alphabet) matched_predicted = [False for x in predicted_list] for gold_span in gold_list: found_match = False for i, predicted_span in enumerate(predicted_list): if matching_fn(gold_span, predicted_span) and not matched_predicted[i]: cm.add('yes', 'yes') matched_predicted[i] = True found_match = True break if not found_match: cm.add('no', 'yes') # Predicted span that does not match with any for matched in matched_predicted: if not matched: cm.add('yes', 'no') return cm
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn): """Compute binary evaluation metric """ binary_alphabet = Alphabet() binary_alphabet.add('yes') binary_alphabet.add('no') cm = ConfusionMatrix(binary_alphabet) matched_predicted = [False for x in predicted_list] for gold_span in gold_list: found_match = False for i, predicted_span in enumerate(predicted_list): if matching_fn(gold_span, predicted_span) and \ not matched_predicted[i]: cm.add('yes', 'yes') matched_predicted[i] = True found_match = True break if not found_match: cm.add('no', 'yes') # Predicted span that does not match with any for matched in matched_predicted: if not matched: cm.add('yes', 'no') return cm
def evaluate_sense(relation_pairs, valid_senses): sense_alphabet = Alphabet() for g_relation, _ in relation_pairs: if g_relation is not None: sense = g_relation["Sense"][0] if sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_alphabet.growing = False sense_cm = ConfusionMatrix(sense_alphabet) for g_relation, p_relation in relation_pairs: assert g_relation is not None or p_relation is not None if g_relation is None: predicted_sense = p_relation["Sense"][0] sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) elif p_relation is None: gold_sense = g_relation["Sense"][0] if gold_sense in valid_senses: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) else: predicted_sense = p_relation["Sense"][0] gold_sense = g_relation["Sense"][0] if gold_sense in valid_senses: sense_cm.add(predicted_sense, gold_sense) return sense_cm
def Evalution(gold_file_path, pred_file_path): gold_authorIdPaperId_to_label = {} pred_authorIdPaperId_to_label = {} gold_data = util.read_dict_from_csv(gold_file_path) for item in gold_data: AuthorId = item["AuthorId"] # 正样本 for paperId in item["ConfirmedPaperIds"].split(" "): gold_authorIdPaperId_to_label[(AuthorId, paperId)] = "1" # 负样本 for paperId in item["DeletedPaperIds"].split(" "): gold_authorIdPaperId_to_label[(AuthorId, paperId)] = "0" pred_data = util.read_dict_from_csv(pred_file_path) for item in pred_data: AuthorId = item["AuthorId"] # 正样本 for paperId in item["ConfirmedPaperIds"].split(" "): pred_authorIdPaperId_to_label[(AuthorId, paperId)] = "1" # 负样本 for paperId in item["DeletedPaperIds"].split(" "): pred_authorIdPaperId_to_label[(AuthorId, paperId)] = "0" # evaluation alphabet = Alphabet() alphabet.add("0") alphabet.add("1") cm = ConfusionMatrix(alphabet) for AuthorId, paperId in gold_authorIdPaperId_to_label: gold = gold_authorIdPaperId_to_label[(AuthorId, paperId)] pred = pred_authorIdPaperId_to_label[(AuthorId, paperId)] cm.add(pred, gold) return cm
def evaluate_sense(gold_list, predicted_list): print "In function: evaluate_sense" """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) for relation in gold_list: sense = relation['Sense'][0] if sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, gold_sense) else: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm
def evaluate_sense(gold_list, predicted_list): print "In function: evaluate_sense"; """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) for relation in gold_list: sense = relation['Sense'][0] if sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, gold_sense) else: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm
def compute_span_exact_match_metric(gold_list, predicted_list, verbose=False): """Compute binary evaluation metric """ binary_alphabet = Alphabet() binary_alphabet.add('yes') binary_alphabet.add('no') cm = ConfusionMatrix(binary_alphabet) matched_predicted = [False for x in predicted_list] predicted = defaultdict(list) for i, pspan in enumerate(predicted_list): predicted[pspan].append(i) empty_list = [] key = indices = None for gold in gold_list: found_match = False indices = predicted.get(gold, empty_list) for i in indices: if not matched_predicted[i]: cm.add('yes', 'yes') matched_predicted[i] = True found_match = True break if not found_match: if verbose: print('Span:') print('<<<\t{:s}'.format(gold).encode(ENCODING)) print() cm.add('no', 'yes') # Predicted span that does not match with any for matched, pred in zip(matched_predicted, predicted_list): if not matched: if verbose: print('Span:') print('>>>\t{:s}'.format(pred).encode(ENCODING)) print() cm.add('yes', 'no') return cm
def evaluate_sense(gold_list, predicted_list, verbose=False): """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) isense = None for relation in gold_list: isense = relation['Sense'][0] if isense in valid_senses: sense_alphabet.add(isense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format(predicted_sense).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(predicted_sense, gold_sense) else: if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format( ConfusionMatrix.NEGATIVE_CLASS).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format( ConfusionMatrix.NEGATIVE_CLASS).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm
def compute_binary_eval_metric(predicted_list, gold_list, binary_alphabet): cm = ConfusionMatrix(binary_alphabet) for (predicted_span, gold_span) in zip( predicted_list, gold_list): cm.add(predicted_span, gold_span) return cm
def evaluate(gold_file, pred_file): with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open( pred_file, encoding="utf-8") as fin_pred: dict_P_to_url_label = {} for line in fin_gold: P, url, label, _ = line.strip().split("\t") if P not in dict_P_to_url_label: dict_P_to_url_label[P] = set() dict_P_to_url_label[P].add((url.strip(), label)) # predict_set = set() for line in fin_pred: url, s, p, o, confidence = line.strip().split("\t") predict_set.add((url.strip(), p)) alphabet = Alphabet() alphabet.add("0") alphabet.add("1") # 评估 marco_p, marco_r, marco_f = 0, 0, 0 N = 0 for P in sorted(dict_P_to_url_label.keys()): confusionMatrix = ConfusionMatrix(alphabet) recall_error_cases = [] precision_error_cases = [] for url, label in dict_P_to_url_label[P]: pred = "0" if (url, P) in predict_set: pred = "1" if label != pred: if label == "1" and pred == "0": recall_error_cases.append("%s\t%s->%s" % (url, label, pred)) if label == "0" and pred == "1": precision_error_cases.append("%s\t%s->%s" % (url, label, pred)) confusionMatrix.add(pred, label) print "==" * 40 print P print confusionMatrix.print_out() p, r, f = confusionMatrix.get_prf("1") marco_p += p marco_r += r marco_f += f N += 1 print "\n==>recall error cases:" print "\n".join(recall_error_cases) print "\n==>precision error cases:" print "\n".join(precision_error_cases) print "**" * 40 print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N, marco_f / N)
def evaluate(gold_file, pred_file): with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(pred_file, encoding="utf-8") as fin_pred: dict_P_to_url_label = {} for line in fin_gold: P, url, label, _ = line.strip().split("\t") if P not in dict_P_to_url_label: dict_P_to_url_label[P] = set() dict_P_to_url_label[P].add((url.strip(), label)) # predict_set = set() for line in fin_pred: url, s, p, o, confidence = line.strip().split("\t") predict_set.add((url.strip(), p)) alphabet = Alphabet() alphabet.add("0") alphabet.add("1") # 评估 marco_p, marco_r, marco_f = 0, 0, 0 N = 0 for P in sorted(dict_P_to_url_label.keys()): confusionMatrix = ConfusionMatrix(alphabet) recall_error_cases = [] precision_error_cases= [] for url, label in dict_P_to_url_label[P]: pred = "0" if (url, P) in predict_set: pred = "1" if label != pred: if label == "1" and pred == "0": recall_error_cases.append("%s\t%s->%s" % (url, label, pred)) if label == "0" and pred == "1": precision_error_cases.append("%s\t%s->%s" % (url, label, pred)) confusionMatrix.add(pred, label) print "==" * 40 print P print confusionMatrix.print_out() p, r, f = confusionMatrix.get_prf("1") marco_p += p marco_r += r marco_f += f N += 1 print "\n==>recall error cases:" print "\n".join(recall_error_cases) print "\n==>precision error cases:" print "\n".join(precision_error_cases) print "**" * 40 print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N, marco_f / N)
def compute_binary_eval_metric(predicted_list, gold_list, binary_alphabet): cm = ConfusionMatrix(binary_alphabet) for (predicted_span, gold_span) in zip(predicted_list, gold_list): cm.add(predicted_span, gold_span) return cm