def evaluate_predicate_mention(test_graphs, prop_ex, nom_file):
    """
    Calculate the average predicate mention metric on test graphs.
    :param test_graphs: the graphs for the test sets
    :param prop_ex: the proposition extraction object
    :return the average predicate mention metric on the test graphs
    """
    pred_graphs = [
        predict_predicate_mention(test_graph, prop_ex, nom_file)
        for test_graph in test_graphs
    ]
    return np.mean([
        compute_predicate_mention_agreement(test_graph, pred_graph)[0]
        for test_graph, pred_graph in zip(test_graphs, pred_graphs)
    ])
def evaluate_predicate_mention_verbal(test_graphs, prop_ex):
    """
    Calculate the average predicate mention metric on the verbal propositions in test graphs
    :param test_graphs: the graphs for the test sets
    :param prop_ex: the proposition extraction object
    :return the average predicate mention metric on the verbal propositions in test graphs
    """
    verbal_graphs = map(filter_verbal, test_graphs)
    pred_graphs = [
        predict_predicate_mention(verbal_graph,
                                  prop_ex,
                                  apply_non_verbal=False)
        for verbal_graph in verbal_graphs
    ]
    return np.mean([
        compute_predicate_mention_agreement(test_graph, pred_graph)[0]
        for test_graph, pred_graph in zip(verbal_graphs, pred_graphs)
    ])
Example #3
0
def evaluate_predicate_mention_non_verbal(test_graphs, prop_ex, nom_file):
    """
    Calculate the average predicate mention metric on the non-verbal propositions in test graphs
    :param test_graphs: the graphs for the test sets
    :param prop_ex: the proposition extraction object
    :return the average predicate mention metric on the non-verbal propositions in test graphs
    """
    non_verbal_graphs = map(filter_non_verbal, test_graphs)
    pred_graphs = [
        predict_predicate_mention(non_verbal_graph,
                                  prop_ex,
                                  apply_verbal=False,
                                  nom_file=nom_file)
        for non_verbal_graph in non_verbal_graphs
    ]
    return np.mean([
        compute_predicate_mention_agreement(
            test_graph, pred_graph, for_inter_annotator_agreement=False)[0]
        for test_graph, pred_graph in zip(non_verbal_graphs, pred_graphs)
    ])
Example #4
0
def compute_agreement(annotator1_file, annotator2_file):
    """
    Receives two annotation files about the same story, each annotated by a different annotator,
    and computes the task-level agreement:
    1) Entity mentions
    2) Entity coreference
    3) Predicate mentions
    4) Predicate coreference
    5) Argument mention within predicate chains
    6) Entailment graph
    :param annotator1_file The path for the first graph
    :param annotator2_file The path for the second graph
    """

    # Load the annotation files to OKR objects
    graph1 = load_graph_from_file(annotator1_file)
    graph2 = load_graph_from_file(annotator2_file)

    # Compute agreement for entity mentions and update the graphs to contain only annotations
    # in which both annotators agreed on the entity mentions
    ent_mention_score, consensual_graph1, consensual_graph2 = compute_entity_mention_agreement(graph1, graph2)
    print 'Entity mentions: %.3f' % ent_mention_score

    # Compute agreement for entity coreference and update the graphs to contain only annotations
    # in which both annotators agreed on the entity clusters
    ent_muc, ent_b_cube, ent_ceaf_c, ent_conll_f1, consensual_graph1, consensual_graph2 = \
        compute_entity_coref_agreement(consensual_graph1, consensual_graph2)
    print 'Entity coreference: MUC=%.3f, B^3=%.3f, CEAF_C=%.3f, MELA=%.3f' % (ent_muc, ent_b_cube, ent_ceaf_c, ent_conll_f1)

    # Compute agreement for predicate mentions and update the graphs to contain only annotations
    # in which both annotators agreed on the predicate mentions
    # For analysis purposes, compute also verbal and non-verbal
    pred_mention_non_verbal_score = compute_predicate_mention_agreement_non_verbal(consensual_graph1, consensual_graph2)

    pred_mention_verbal_score = compute_predicate_mention_agreement_verbal(consensual_graph1, consensual_graph2)

    pred_mention_score, consensual_graph1, consensual_graph2 = compute_predicate_mention_agreement(consensual_graph1,
                                                                                      consensual_graph2)

    print 'Predicate mentions: %.3f, verbal: %.3f, non-verbal: %.3f' % (pred_mention_score,
                                                                        pred_mention_verbal_score,
                                                                        pred_mention_non_verbal_score)

    # Compute agreement for predicate coreference and update the graphs to contain only annotations
    # in which both annotators agreed on the predicate clusters
    pred_muc, pred_b_cube, pred_ceaf_c, pred_conll_f1, consensual_graph1, consensual_graph2,optimal_alignment = \
        compute_predicate_coref_agreement(consensual_graph1, consensual_graph2)
    print 'Predicate coreference: MUC=%.3f, B^3=%.3f, CEAF_C=%.3f, MELA=%.3f' % (pred_muc, pred_b_cube, pred_ceaf_c, pred_conll_f1)

    # Compute agreement for argument mention within predicate chains and update the graphs to contain only annotations
    # in which both annotators agreed on the argument mentions
    arg_mention_score, consensual_graph1, consensual_graph2= compute_argument_mention_agreement(consensual_graph1,
                                                                                                consensual_graph2)
    print 'Argument mentions: %.3f' % arg_mention_score
	
    #Compute coreference scores for alignement between arguments of the same propositions:
    arg_muc, arg_b_cube, arg_ceaf_c, arg_conll_f1, consensual_graph1, consensual_graph2 = \
        compute_argument_coref_agreement(consensual_graph1, consensual_graph2,optimal_alignment)
    print 'Argument coreference: MUC=%.3f, B^3=%.3f, CEAF_C=%.3f, MELA=%.3f' % (arg_muc, arg_b_cube, arg_ceaf_c, arg_conll_f1)

    # Compute agreement for the entailment graph and update the graphs to contain only annotations
    # in which both annotators agreed on the edges (propositions, arguments and entities)
    entities_f1, arguments_kappa, propositions_f1, consensual_graph1, consensual_graph2 = \
        compute_entailment_graph_agreement(consensual_graph1, consensual_graph2)
    print 'Entailment graph F1: entities=%.3f, propositions=%.3f' % (entities_f1, propositions_f1)

    return [ent_mention_score, ent_muc, ent_b_cube, ent_ceaf_c, ent_conll_f1,
            pred_mention_score, pred_mention_verbal_score, pred_mention_non_verbal_score,
            pred_muc, pred_b_cube, pred_ceaf_c, pred_conll_f1,
            arg_mention_score, arg_muc, arg_b_cube, arg_ceaf_c, arg_conll_f1,
            entities_f1,  propositions_f1]