Beispiel #1
0
def evaluate(key_lines, sys_lines, metrics, NP_only, remove_nested,
             keep_singletons, min_span):
    doc_coref_infos = get_coref_infos(key_lines, sys_lines, NP_only,
                                      remove_nested, keep_singletons, min_span)

    output_scores = {}
    conll = 0
    conll_subparts_num = 0

    for name, metric in metrics:
        recall, precision, f1 = evaluator.evaluate_documents(doc_coref_infos,
                                                             metric,
                                                             beta=1)
        if name in ["muc", "bcub", "ceafe"]:
            conll += f1
            conll_subparts_num += 1
        output_scores.update({
            f"{name}/recall": recall,
            f"{name}/precision": precision,
            f"{name}/f1": f1
        })

        logger.info(
            name.ljust(10),
            "Recall: %.2f" % (recall * 100),
            " Precision: %.2f" % (precision * 100),
            " F1: %.2f" % (f1 * 100),
        )

    if conll_subparts_num == 3:
        conll = (conll / 3) * 100
        logger.info("CoNLL score: %.2f" % conll)
        output_scores.update({f"conll_score": conll})

    return output_scores
Beispiel #2
0
def evaluate(key_directory, sys_directory, metrics, keep_singletons,
             keep_non_referring, use_MIN):

    doc_coref_infos, doc_non_referring_infos = reader.get_coref_infos(
        key_directory, sys_directory, keep_singletons, keep_non_referring,
        use_MIN)

    conll = 0
    conll_subparts_num = 0

    for name, metric in metrics:
        recall, precision, f1 = evaluator.evaluate_documents(doc_coref_infos,
                                                             metric,
                                                             beta=1)
        if name in ["muc", "bcub", "ceafe"]:
            conll += f1
            conll_subparts_num += 1

        print(name)
        print('Recall: %.2f' % (recall * 100),
              ' Precision: %.2f' % (precision * 100), ' F1: %.2f' % (f1 * 100))

    if conll_subparts_num == 3:
        conll = (conll / 3) * 100
        print('CoNLL score: %.2f' % conll)

    if keep_non_referring:
        recall, precision, f1 = evaluate_non_referrings(
            doc_non_referring_infos)
        print('============================================')
        print('Non-referring markable identification scores:')
        print('Recall: %.2f' % (recall * 100),
              ' Precision: %.2f' % (precision * 100), ' F1: %.2f' % (f1 * 100))
Beispiel #3
0
def evaluate(key_file, sys_file, metrics, keep_singletons,
             keep_split_antecedent, keep_bridging, keep_non_referring,
             only_split_antecedent, evaluate_discourse_deixis, use_MIN):

    doc_coref_infos, doc_non_referring_infos, doc_bridging_infos = reader.get_coref_infos(
        key_file, sys_file, keep_singletons, keep_split_antecedent,
        keep_bridging, keep_non_referring, evaluate_discourse_deixis, use_MIN)

    conll = 0
    conll_subparts_num = 0

    for name, metric in metrics:
        recall, precision, f1 = evaluator.evaluate_documents(
            doc_coref_infos,
            metric,
            beta=1,
            only_split_antecedent=only_split_antecedent)
        if name in ["muc", "bcub", "ceafe"]:
            conll += f1
            conll_subparts_num += 1

        print(name)
        print('Recall: %.2f' % (recall * 100),
              ' Precision: %.2f' % (precision * 100), ' F1: %.2f' % (f1 * 100))

    if conll_subparts_num == 3:
        conll = (conll / 3) * 100
        print('CoNLL score: %.2f' % conll)

    if keep_non_referring:
        recall, precision, f1 = evaluate_non_referrings(
            doc_non_referring_infos)
        print('============================================')
        print('Non-referring markable identification scores:')
        print('Recall: %.2f' % (recall * 100),
              ' Precision: %.2f' % (precision * 100), ' F1: %.2f' % (f1 * 100))
    if keep_bridging:
        score_ar, score_fbm, score_fbe = evaluator.evaluate_bridgings(
            doc_bridging_infos)
        recall_ar, precision_ar, f1_ar = score_ar
        recall_fbm, precision_fbm, f1_fbm = score_fbm
        recall_fbe, precision_fbe, f1_fbe = score_fbe

        print('============================================')
        print('Bridging anaphora recognition scores:')
        print('Recall: %.2f' % (recall_ar * 100),
              ' Precision: %.2f' % (precision_ar * 100),
              ' F1: %.2f' % (f1_ar * 100))
        print('Full bridging scores (Markable Level):')
        print('Recall: %.2f' % (recall_fbm * 100),
              ' Precision: %.2f' % (precision_fbm * 100),
              ' F1: %.2f' % (f1_fbm * 100))
        print('Full bridging scores (Entity Level):')
        print('Recall: %.2f' % (recall_fbe * 100),
              ' Precision: %.2f' % (precision_fbe * 100),
              ' F1: %.2f' % (f1_fbe * 100))
Beispiel #4
0
def coref_evaluate(key_file, sys_file, args):
    metrics = [('mentions', evaluator.mentions), ('muc', evaluator.muc),
               ('bcub', evaluator.b_cubed), ('ceafe', evaluator.ceafe),
               ('lea', evaluator.lea)]
    NP_only, remove_nested, keep_singletons, min_span = False, False, True, False

    doc_coref_infos = reader.get_coref_infos(key_file,
                                             sys_file,
                                             NP_only,
                                             remove_nested,
                                             keep_singletons,
                                             min_span,
                                             mode=args.mode)

    conll = 0
    conll_subparts_num = 0
    results = {}
    for name, metric in metrics:
        try:
            recall, precision, f1 = evaluator.evaluate_documents(
                doc_coref_infos, metric, beta=1)
        except:
            recall = precision = f1 = -10

        results[name] = {
            'recall': recall * 100,
            'precision': precision * 100,
            'f1': f1 * 100
        }
        if args.mode == 'testing':
            print(name.ljust(10), 'Recall: %.2f' % (recall * 100),
                  ' Precision: %.2f' % (precision * 100),
                  ' F1: %.2f' % (f1 * 100))

    for key in ['recall', 'precision', 'f1']:
        results['avg_{}'.format(key)] = (results["muc"][key] +
                                         results["bcub"][key] +
                                         results["ceafe"][key]) / 3
    return results
Beispiel #5
0
def evaluate(key_file, sys_file, metrics, NP_only, remove_nested,
             keep_singletons, min_span):
    doc_coref_infos = reader.get_coref_infos(key_file, sys_file, NP_only,
                                             remove_nested, keep_singletons,
                                             min_span)

    conll = 0
    conll_subparts_num = 0

    for name, metric in metrics:
        recall, precision, f1 = evaluator.evaluate_documents(doc_coref_infos,
                                                             metric,
                                                             beta=1)
        if name in ["muc", "bcub", "ceafe"]:
            conll += f1
            conll_subparts_num += 1

        print(name.ljust(10), 'Recall: %.2f' % (recall * 100),
              ' Precision: %.2f' % (precision * 100), ' F1: %.2f' % (f1 * 100))

    if conll_subparts_num == 3:
        conll = (conll / 3) * 100
        print('CoNLL score: %.2f' % conll)
Beispiel #6
0
def evaluate(key_file, sys_file, metrics, NP_only, remove_nested,
             keep_singletons, min_span):
    doc_coref_infos = reader.get_coref_infos(key_file, sys_file, NP_only,
                                             remove_nested, keep_singletons,
                                             min_span)

    conll = 0
    conll_subparts_num = 0

    print('             recall  precision         F1')
    for name, metric in metrics:
        recall, precision, f1 = evaluator.evaluate_documents(doc_coref_infos,
                                                             metric,
                                                             beta=1)
        if name in ('muc', 'bcub', 'ceafe'):
            conll += f1
            conll_subparts_num += 1
        print('%s     %6.2f     %6.2f     %6.2f' %
              (name.ljust(8), recall * 100, precision * 100, f1 * 100))

    if conll_subparts_num == 3:
        conll = (conll / 3) * 100
        print('CoNLL score: %6.2f' % conll)