def evaluate_predicate_coref(test_graphs): """ Receives the OKR test graphs and evaluates them for predicate coreference :param test_graphs: the OKR test graphs :return: the coreference scores: MUC, B-CUBED, CEAF and MELA (CoNLL F1). """ parser = spacy_wrapper() scores = [] for graph in test_graphs: # Cluster the mentions prop_mentions = [] for prop in graph.propositions.values(): for mention in prop.mentions.values(): if mention.indices == [-1]: continue head_lemma, head_pos = get_mention_head(mention, parser, graph) prop_mentions.append((mention, head_lemma, head_pos)) clusters = cluster_mentions(prop_mentions, score) clusters = [set([item[0] for item in cluster]) for cluster in clusters] # Evaluate curr_scores, _ = eval_clusters(clusters, graph) scores.append(curr_scores) scores = np.mean(scores, axis=0).tolist() return scores
def evaluate_argument_coref(test_graphs): """ Receives the OKR test graphs and evaluates them for argument coreference :param test_graphs: the OKR test graphs :return: the coreference scores: MUC, B-CUBED, CEAF and MELA (CoNLL F1). """ scores = [] for graph in test_graphs: arg_clustering = {} for prop_id, prop in graph.propositions.iteritems(): # Cluster the arguments all_args = [ arg for mention in prop.mentions.values() for arg in mention.argument_mentions.values() ] score = lambda mention, cluster: same_entity( cluster, mention, graph) clusters = cluster_mentions(all_args, score) clusters = [ set([str(mention) for mention in cluster]) for cluster in clusters ] arg_clustering[prop_id] = clusters # Evaluate curr_scores = eval_clusters(graph, arg_clustering) scores.append(curr_scores) scores = np.mean(scores, axis=0).tolist() return scores
def visually_analyse_baseline_entity_coref( gold_annotations_dir='../../data/baseline/dev'): for file_name in os.listdir(gold_annotations_dir): graph = load_graph_from_file(gold_annotations_dir + '/' + file_name) entities = [(str(mention), unicode(mention.terms)) for entity in graph.entities.values() for mention in entity.mentions.values()] predicted_clusters = cluster_mentions(entities, score) sentence_wise_predicted_mentions = defaultdict(list) sentence_wise_gold_mentions = defaultdict(list) for entity in graph.entities.values(): for mention in entity.mentions.values(): sentence_wise_gold_mentions[mention.sentence_id].append({ "indices": mention.indices, "coref": entity.id, 'text': mention.terms }) # for entity_id, entity in enumerate(predicted_clusters): # for mention_id, mention_terms in entity: # indices = map(int, mention_id.split('[')[1].rstrip(']').split(', ')) # sentence_wise_predicted_mentions[int(mention_id.split('[')[0])].append({"indices":indices,"coref":entity_id, 'text':mention_terms}) print '###' + file_name + '\n' for sentence_id, sentence in enumerate(graph.sentences.values()): print 'Sentence: ', ' '.join(sentence) # print 'Predicted entities: ', ', '.join([element['text'] for element in sentence_wise_predicted_mentions[sentence_id+1]]) print 'Gold entities: ', ', '.join([ element['text'] for element in sentence_wise_gold_mentions[sentence_id + 1] ]) print ' ' print('\n The Gold clusters:') for entity in graph.entities.values(): if len(entity.mentions.values()) != 1: print 'cluster_id: ', entity.id, ', '.join( [mention.terms for mention in entity.mentions.values()]) print ' ' print 'The predicted clusters:' for entity_id, entity in enumerate(predicted_clusters): if len(entity) != 1: print 'cluster_id: ', entity_id, ', '.join( [mention_terms for _, mention_terms in entity]) print ' ' print '**********'
def parse_sentence_wise_corenlp_coref_and_cluster_further(input_dir = './coref_sentence_wise/test', gold_annotations_folder = '../../../data/baseline/test'): """ Parse the output xml file annotated by coreNLP which was supplied files sentencewise, use the mentions as the starting state for the baseline entity coreference resolver and evaluate the final clusters. """ scores = [] for file in os.listdir(gold_annotations_folder): if re.match(r'(.+)\.xml', file)!= None: okr_graph = load_graph_from_file(gold_annotations_folder + '/'+ file) clusters = [] entities = [(str(mention), unicode(mention.terms)) for entity in okr_graph.entities.values() for mention in entity.mentions.values()] gold_entity_mentions= [str(mention) for entity in okr_graph.entities.values() for mention in entity.mentions.values()] for annotated_file in os.listdir(input_dir): if(re.match(file[:-4] + r'\.txt\.sent(\d+)\.txt.xml', annotated_file)!= None): sent_number = int(re.match(file[:-4] + r'\.txt\.sent(\d+)\.txt.xml', annotated_file).group(1)) sentence_tokens = okr_graph.sentences[sent_number] tree = ET.parse(input_dir + '/' + annotated_file) document = tree.getroot()[0] coref_node = document.find('coreference') if(coref_node!= None): for coref_id, coref_chain in enumerate(coref_node): cluster = [] for mention in coref_chain: start = int(mention[1].text)-1 end = int(mention[2].text)-1 indices = range(start,end) text = mention[4].text mention_string = str(sent_number)+ str(indices) # Filter from the clusters the mentions which are not in gold if(mention_string in gold_entity_mentions): cluster.append((mention_string, text)) if(len(cluster)!=0): clusters.append(set(cluster)) clusters_hybrid = cluster_mentions(entities, score, clusters, merge_initially = True) clusters_hybrid = [set([item[0] for item in cluster]) for cluster in clusters_hybrid] curr_scores = eval_clusters(clusters_hybrid, okr_graph) scores.append(curr_scores) print(scores) scores = np.mean(scores, axis=0).tolist() print(scores)
def get_argument_and_entity_clusters(graph): """ Extract entities and argument mentions from the graphs, take their union and cluster using the baseline entity corefeence algorithm. :param graph: the OKR objec """ entities = [(str(mention), unicode(mention.terms)) for entity in graph.entities.values() for mention in entity.mentions.values()] arguments = [(str(mention), unicode(mention_string_to_terms(graph, mention, prop_mention.sentence_id ))) for prop in graph.propositions.values() for prop_mention in prop.mentions.values() for mention in prop_mention.argument_mentions.values() if prop_mention.indices!=[-1]] args_and_entities_union = list(set(entities).union(set(arguments))) args_and_entities_clusters = cluster_mentions(args_and_entities_union, argument_score) args_and_entities_clusters = [set([item[0] for item in cluster]) for cluster in args_and_entities_clusters] return args_and_entities_clusters
def parse_and_cluster_and_evaluate_corenlp_coref_cross_doc(input_dir = 'CoreNLP_coref_anno/test', gold_annotations_folder = '../../../data/baseline/test'): """ Parse the output xml file annotated by coreNLP (cross-document), use baseline system to cluster further and evaluate the accuracy of mentions and coreference resolution with gold annotations. """ scores = [] for file in os.listdir(input_dir): if re.match(r'(.+)\.xml', file)!= None: clusters = [] okr_graph = load_graph_from_file(gold_annotations_folder + '/'+ re.match(r'(.+)\.xml', file).group(1)[:-4]+'.xml') entities = [(str(mention), unicode(mention.terms)) for entity in okr_graph.entities.values() for mention in entity.mentions.values()] gold_entity_mentions= [str(mention) for entity in okr_graph.entities.values() for mention in entity.mentions.values()] tree = ET.parse(input_dir + '/' + file) document = tree.getroot()[0] coref_node = document.find('coreference') for coref_id, coref_chain in enumerate(coref_node): cluster = [] for mention in coref_chain: sent_num = int(mention[0].text) start = int(mention[1].text)-1 end = int(mention[2].text)-1 indices = range(start,end) text = mention[4].text mention_string = str(sent_num)+ str(indices) if(mention_string in gold_entity_mentions): cluster.append((mention_string, text)) if(len(cluster)!=0): clusters.append(set(cluster)) clusters_hybrid = cluster_mentions(entities, score, clusters) clusters_hybrid = [set([item[0] for item in cluster]) for cluster in clusters_hybrid] curr_scores = eval_clusters(clusters_hybrid, okr_graph) scores.append(curr_scores) print(scores) scores = np.mean(scores, axis=0).tolist() print(scores)
def evaluate_entity_coref(test_graphs): """ Receives the OKR test graphs and evaluates them for entity coreference :param test_graphs: the OKR test graphs :return: the coreference scores: MUC, B-CUBED, CEAF and MELA (CoNLL F1). """ scores = [] for graph in test_graphs: # Cluster the entities entities = [(str(mention), unicode(mention.terms)) for entity in graph.entities.values() for mention in entity.mentions.values()] clusters = cluster_mentions(entities, score) clusters = [set([item[0] for item in cluster]) for cluster in clusters] # Evaluate curr_scores = eval_clusters(clusters, graph) scores.append(curr_scores) scores = np.mean(scores, axis=0).tolist() return scores
def create_report_for_prop_coref( gold_annotations_dir='../../data/baseline/dev'): """ Qualitatively analyse the output from baseline for predicate coreference system. This method is similar to analyse_baseline_prop_coref() but only displays the distribution of the gold and predicted clusters for which the system has made mistakes in clustering (two kinds of mistakes - incorrect merges and missed merges). Only sample representatives of a cluster are displayed. """ parser = spacy_wrapper() for file_name in os.listdir(gold_annotations_dir): graph = load_graph_from_file(gold_annotations_dir + '/' + file_name) prop_mentions = [] for prop in graph.propositions.values(): for mention in prop.mentions.values(): if mention.indices == [-1]: continue head_lemma, head_pos = get_mention_head(mention, parser, graph) prop_mentions.append((mention, head_lemma, head_pos)) predicted_clusters = cluster_mentions(prop_mentions, score) sentence_wise_predicted_mentions = defaultdict(list) sentence_wise_gold_mentions = defaultdict(list) # Mappings mention_to_gold_pred = {} mention_to_predicted_pred = {} # Gold distribution of each predicted prop gold_dist = {} predicted_dist = {} for prop in graph.propositions.values(): for mention in prop.mentions.values(): sentence_wise_gold_mentions[mention.sentence_id].append({ "indices": mention.indices, "coref": prop.id, 'text': mention.terms, 'arguments': mention.argument_mentions.values() }) mention_to_gold_pred[str(mention)] = prop.id for prop_id, prop in enumerate(predicted_clusters): gold_dist[prop_id] = defaultdict(list) for mention, mention_head, _ in prop: sentence_wise_predicted_mentions[mention.sentence_id].append({ "indices": mention.indices, "coref": prop_id, 'text': mention.terms }) mention_to_predicted_pred[str(mention)] = prop_id gold_pred = mention_to_gold_pred[str(mention)] gold_dist[prop_id][gold_pred].append((mention, mention_head)) for prop in graph.propositions.values(): predicted_dist[prop.id] = defaultdict(list) for mention in prop.mentions.values(): if (str(mention) in mention_to_predicted_pred.keys()): predicted_predicate = mention_to_predicted_pred[str( mention)] predicted_dist[prop.id][predicted_predicate].append( mention) else: predicted_dist[prop.id][-1].append(mention) print '###' + file_name + '\n' print('Mentions which should have been clustered: ') for prop_id in predicted_dist.keys(): if (len(predicted_dist[prop_id].keys()) != 1): print 'gold_cluster_id: ', prop_id mentions_to_print = [ predicted_dist[prop_id][predicted_pred_id][0] for predicted_pred_id in predicted_dist[prop_id].keys() ] print(', '.join([ mention.terms + '(' + str(mention.sentence_id) + ')' + '{' + ', '.join([ ' '.join([ graph.sentences[mention.sentence_id][int(id)] for id in str(argument).rstrip(']').split('[') [1].split(', ') ]) for argument in mention.argument_mentions.values() ]) + '}' for mention in mentions_to_print ])) print("\nMentions which should not have been clustered: ") for prop_id in gold_dist.keys(): if (len(gold_dist[prop_id]) != 1): print 'predicted_cluster_id: ', prop_id mentions_to_print = [ gold_dist[prop_id][gold_pred_id][0][0] for gold_pred_id in gold_dist[prop_id].keys() ] print(', '.join([ mention.terms + '(' + str(mention.sentence_id) + ')' + '{' + ', '.join([ ' '.join([ graph.sentences[mention.sentence_id][int(id)] for id in str(argument).rstrip(']').split('[') [1].split(', ') ]) for argument in mention.argument_mentions.values() ]) + '}' for mention in mentions_to_print ])) print '**********'
def analyse_baseline_prop_coref(gold_annotations_dir='../../data/baseline/dev', verbose=False): """ Qualitatively analyse the output from baseline for predicate coreference system. This method outputs all sentences, then list of gold predicate clusters and the list of predicted predicate clusters. """ parser = spacy_wrapper() for file_name in os.listdir(gold_annotations_dir): graph = load_graph_from_file(gold_annotations_dir + '/' + file_name) prop_mentions = [] for prop in graph.propositions.values(): for mention in prop.mentions.values(): if mention.indices == [-1]: continue head_lemma, head_pos = get_mention_head(mention, parser, graph) prop_mentions.append((mention, head_lemma, head_pos)) predicted_clusters = cluster_mentions(prop_mentions, score) sentence_wise_predicted_mentions = defaultdict(list) sentence_wise_gold_mentions = defaultdict(list) # Mappings mention_to_gold_pred = {} mention_to_predicted_pred = {} # gold_dist -> Gold distribution of each predicted proposition cluster # predicted_dist -> Predicted distribution of each gold proposition cluster gold_dist = {} predicted_dist = {} for prop in graph.propositions.values(): for mention in prop.mentions.values(): sentence_wise_gold_mentions[mention.sentence_id].append({ "indices": mention.indices, "coref": prop.id, 'text': mention.terms, 'arguments': mention.argument_mentions.values() }) mention_to_gold_pred[str(mention)] = prop.id for prop_id, prop in enumerate(predicted_clusters): gold_dist[prop_id] = defaultdict(list) for mention, mention_head, _ in prop: sentence_wise_predicted_mentions[mention.sentence_id].append({ "indices": mention.indices, "coref": prop_id, 'text': mention.terms }) mention_to_predicted_pred[str(mention)] = prop_id gold_pred = mention_to_gold_pred[str(mention)] gold_dist[prop_id][gold_pred].append((mention, mention_head)) for prop in graph.propositions.values(): predicted_dist[prop.id] = defaultdict(list) for mention in prop.mentions.values(): if (str(mention) in mention_to_predicted_pred.keys()): predicted_predicate = mention_to_predicted_pred[str( mention)] predicted_dist[prop.id][predicted_predicate].append( mention) else: predicted_dist[prop.id][-1].append(mention) print '###' + file_name + '\n' for sentence_id, sentence in enumerate(graph.sentences.values()): print 'Sentence: ', str(sentence_id + 1), ' ', ' '.join(sentence) print 'Gold predicates: ', ', '.join([ element['text'] + '(' + str(element['coref']) + ')' + '{' + ', '.join([str(argument) for argument in element["arguments"]]) + '}' for element in sentence_wise_gold_mentions[sentence_id + 1] ]) print 'Predicted predicates: ', ', '.join([ element['text'] + '(' + str(element['coref']) + ')' for element in sentence_wise_predicted_mentions[sentence_id + 1] ]) print ' ' print('\n The Gold clusters:') for prop_id in predicted_dist.keys(): print 'cluster_id: ', prop_id for predicted_pred_id in predicted_dist[prop_id].keys(): print "\npredicted id:", predicted_pred_id, ':', ', '.join([ mention.terms + '(' + str(mention.sentence_id) + ')' for mention in predicted_dist[prop_id][predicted_pred_id] ]) print 'The predicted clusters:' for prop_id in gold_dist.keys(): print 'cluster_id: ', prop_id for gold_pred_id in gold_dist[prop_id].keys(): print "\ngold id:", gold_pred_id, ':', ', '.join([ mention.terms + '(' + mention_head + ', ' + str(mention.sentence_id) + ')' for mention, mention_head in gold_dist[prop_id][gold_pred_id] ]) print '**********'
def generate_list_of_errors(gold_annotations_dir='../../data/baseline/dev'): for file_name in os.listdir(gold_annotations_dir): graph = load_graph_from_file(gold_annotations_dir + '/' + file_name) entities = [(str(mention), unicode(mention.terms)) for entity in graph.entities.values() for mention in entity.mentions.values()] predicted_clusters = cluster_mentions(entities, score) sentence_wise_predicted_mentions = defaultdict(list) sentence_wise_gold_mentions = defaultdict(list) # Mappings mention_to_gold_entity = {} mention_to_predicted_entity = {} # Gold distribution of each predicted prop gold_dist_of_predicted_entities = {} predicted_dist_of_gold_entities = {} for entity in graph.entities.values(): for mention in entity.mentions.values(): sentence_wise_gold_mentions[mention.sentence_id].append({ "indices": mention.indices, "coref": entity.id, 'text': mention.terms }) mention_to_gold_entity[str(mention)] = entity.id for entity_id, entity in enumerate(predicted_clusters): gold_dist_of_predicted_entities[entity_id] = defaultdict(list) for mention_id, mention_terms in entity: sentence_wise_predicted_mentions[mention_id].append({ "coref": entity_id, 'text': mention_terms }) mention_to_predicted_entity[mention_id] = entity_id gold_entity = mention_to_gold_entity[mention_id] gold_dist_of_predicted_entities[entity_id][gold_entity].append( (mention_id, mention_terms)) for entity in graph.entities.values(): predicted_dist_of_gold_entities[entity.id] = defaultdict(list) for mention in entity.mentions.values(): if (str(mention) in mention_to_predicted_entity.keys()): predicted_entity = mention_to_predicted_entity[str( mention)] predicted_dist_of_gold_entities[ entity.id][predicted_entity].append(mention) else: predicted_dist_of_gold_entities[entity.id][-1].append( mention) print '###' + file_name + '\n' print('Mentions which should have been clustered: ') for entity_id in predicted_dist_of_gold_entities.keys(): if (len(predicted_dist_of_gold_entities[entity_id].keys()) != 1): print 'gold_cluster_id: ', entity_id mentions_to_print = [ predicted_dist_of_gold_entities[entity_id] [predicted_entity_id][0] for predicted_entity_id in predicted_dist_of_gold_entities[entity_id].keys() ] # print mentions_to_print if ((mentions_to_print != None)): print(', '.join( [mention.terms for mention in mentions_to_print])) print("\nMentions which should not have been clustered: ") for entity_id in gold_dist_of_predicted_entities.keys(): if (len(gold_dist_of_predicted_entities[entity_id]) != 1): print 'predicted_cluster_id: ', entity_id mentions_to_print = [ gold_dist_of_predicted_entities[entity_id][gold_entity_id] [0] for gold_entity_id in gold_dist_of_predicted_entities[entity_id].keys() ] if ((mentions_to_print != None)): print(', '.join([ mention_terms for _, mention_terms in mentions_to_print ])) # for sentence_id, sentence in enumerate(graph.sentences.values()): # print 'Sentence: ', ' '.join(sentence) # print 'Gold entities: ', ', '.join([element['text'] for element in sentence_wise_gold_mentions[sentence_id+1]]) # print ' ' # print('\n The Gold clusters:') # for entity in graph.entities.values(): # if len(entity.mentions.values())!=1: # print'cluster_id: ', entity.id , ', '.join([mention.terms for mention in entity.mentions.values()]) # print ' ' # print 'The predicted clusters:' # for entity_id, entity in enumerate(predicted_clusters): # if len(entity)!=1: # print 'cluster_id: ', entity_id, ', '.join([mention_terms for _, mention_terms in entity]) # print ' ' print '**********'