Example #1
0
def evaluate_predicate_coref(test_graphs):
    """
    Receives the OKR test graphs and evaluates them for predicate coreference
    :param test_graphs: the OKR test graphs
    :return: the coreference scores: MUC, B-CUBED, CEAF and MELA (CoNLL F1).
    """
    parser = spacy_wrapper()

    scores = []

    for graph in test_graphs:

        # Cluster the mentions
        prop_mentions = []
        for prop in graph.propositions.values():
            for mention in prop.mentions.values():

                if mention.indices == [-1]:
                    continue

                head_lemma, head_pos = get_mention_head(mention, parser, graph)
                prop_mentions.append((mention, head_lemma, head_pos))

        clusters = cluster_mentions(prop_mentions, score)
        clusters = [set([item[0] for item in cluster]) for cluster in clusters]

        # Evaluate
        curr_scores, _ = eval_clusters(clusters, graph)
        scores.append(curr_scores)

    scores = np.mean(scores, axis=0).tolist()

    return scores
Example #2
0
def evaluate_argument_coref(test_graphs):
    """
    Receives the OKR test graphs and evaluates them for argument coreference
    :param test_graphs: the OKR test graphs
    :return: the coreference scores: MUC, B-CUBED, CEAF and MELA (CoNLL F1).
    """
    scores = []

    for graph in test_graphs:

        arg_clustering = {}
        for prop_id, prop in graph.propositions.iteritems():

            # Cluster the arguments
            all_args = [
                arg for mention in prop.mentions.values()
                for arg in mention.argument_mentions.values()
            ]
            score = lambda mention, cluster: same_entity(
                cluster, mention, graph)
            clusters = cluster_mentions(all_args, score)
            clusters = [
                set([str(mention) for mention in cluster])
                for cluster in clusters
            ]
            arg_clustering[prop_id] = clusters

        # Evaluate
        curr_scores = eval_clusters(graph, arg_clustering)
        scores.append(curr_scores)

    scores = np.mean(scores, axis=0).tolist()

    return scores
Example #3
0
def visually_analyse_baseline_entity_coref(
        gold_annotations_dir='../../data/baseline/dev'):

    for file_name in os.listdir(gold_annotations_dir):

        graph = load_graph_from_file(gold_annotations_dir + '/' + file_name)

        entities = [(str(mention), unicode(mention.terms))
                    for entity in graph.entities.values()
                    for mention in entity.mentions.values()]
        predicted_clusters = cluster_mentions(entities, score)

        sentence_wise_predicted_mentions = defaultdict(list)
        sentence_wise_gold_mentions = defaultdict(list)

        for entity in graph.entities.values():
            for mention in entity.mentions.values():
                sentence_wise_gold_mentions[mention.sentence_id].append({
                    "indices":
                    mention.indices,
                    "coref":
                    entity.id,
                    'text':
                    mention.terms
                })

        # for entity_id, entity in enumerate(predicted_clusters):
        # 	for mention_id, mention_terms in entity:
        # 		indices = map(int, mention_id.split('[')[1].rstrip(']').split(', '))
        # 		sentence_wise_predicted_mentions[int(mention_id.split('[')[0])].append({"indices":indices,"coref":entity_id, 'text':mention_terms})

        print '###' + file_name + '\n'
        for sentence_id, sentence in enumerate(graph.sentences.values()):
            print 'Sentence: ', ' '.join(sentence)
            # print 'Predicted entities: ', ', '.join([element['text'] for element in sentence_wise_predicted_mentions[sentence_id+1]])
            print 'Gold entities: ', ', '.join([
                element['text']
                for element in sentence_wise_gold_mentions[sentence_id + 1]
            ])
            print ' '

        print('\n The Gold clusters:')
        for entity in graph.entities.values():
            if len(entity.mentions.values()) != 1:
                print 'cluster_id: ', entity.id, ', '.join(
                    [mention.terms for mention in entity.mentions.values()])
        print ' '

        print 'The predicted clusters:'
        for entity_id, entity in enumerate(predicted_clusters):
            if len(entity) != 1:
                print 'cluster_id: ', entity_id, ', '.join(
                    [mention_terms for _, mention_terms in entity])
        print ' '

        print '**********'
def parse_sentence_wise_corenlp_coref_and_cluster_further(input_dir = './coref_sentence_wise/test', gold_annotations_folder = '../../../data/baseline/test'): 
	"""
	Parse the output xml file annotated by coreNLP which was supplied files sentencewise, use the mentions as the starting state for the baseline entity coreference resolver 
	and evaluate the final clusters.
	"""
	
	scores = []
	
	for file in os.listdir(gold_annotations_folder):
		if re.match(r'(.+)\.xml', file)!= None:
			okr_graph = load_graph_from_file(gold_annotations_folder + '/'+ file)

			clusters = []
			entities = [(str(mention), unicode(mention.terms)) for entity in okr_graph.entities.values() for mention in
                    entity.mentions.values()]
			gold_entity_mentions= [str(mention) for entity in okr_graph.entities.values() for mention in entity.mentions.values()]
			for annotated_file in os.listdir(input_dir):
				
				if(re.match(file[:-4] + r'\.txt\.sent(\d+)\.txt.xml', annotated_file)!= None):
					sent_number = int(re.match(file[:-4] + r'\.txt\.sent(\d+)\.txt.xml', annotated_file).group(1))
					sentence_tokens = okr_graph.sentences[sent_number]


					tree = ET.parse(input_dir + '/' + annotated_file)
					document = tree.getroot()[0]
					coref_node = document.find('coreference')
					if(coref_node!= None):
						for coref_id, coref_chain in enumerate(coref_node):
							cluster = []
							for mention in coref_chain:
								start = int(mention[1].text)-1
								end = int(mention[2].text)-1
								indices = range(start,end)
								text = mention[4].text	
								mention_string = str(sent_number)+ str(indices)
						
								# Filter from the clusters the mentions which are not in gold

								if(mention_string in gold_entity_mentions):
									cluster.append((mention_string, text))
							if(len(cluster)!=0):
								clusters.append(set(cluster))	
			clusters_hybrid = cluster_mentions(entities, score, clusters, merge_initially = True)
			clusters_hybrid = [set([item[0] for item in cluster]) for cluster in clusters_hybrid]


			curr_scores = eval_clusters(clusters_hybrid, okr_graph)
			scores.append(curr_scores)


						
	print(scores)		
	scores = np.mean(scores, axis=0).tolist()    
	print(scores)					
Example #5
0
def get_argument_and_entity_clusters(graph):
	"""
	Extract entities and argument mentions from the graphs, take their union and cluster using the baseline entity corefeence algorithm.
	:param graph: the OKR objec
	"""
	entities = [(str(mention), unicode(mention.terms)) for entity in graph.entities.values() for mention in entity.mentions.values()]

	arguments = [(str(mention), unicode(mention_string_to_terms(graph, mention, prop_mention.sentence_id ))) for prop in graph.propositions.values() for prop_mention in prop.mentions.values() for mention in prop_mention.argument_mentions.values() if prop_mention.indices!=[-1]]
	args_and_entities_union = list(set(entities).union(set(arguments)))
	args_and_entities_clusters = cluster_mentions(args_and_entities_union, argument_score)
	args_and_entities_clusters = [set([item[0] for item in cluster]) for cluster in args_and_entities_clusters]
	return args_and_entities_clusters
def parse_and_cluster_and_evaluate_corenlp_coref_cross_doc(input_dir = 'CoreNLP_coref_anno/test', gold_annotations_folder = '../../../data/baseline/test'): 
	"""
	Parse the output xml file annotated by coreNLP (cross-document), use baseline system to cluster further and evaluate the accuracy of 
	mentions and coreference resolution with gold annotations.
	"""
	
	scores = []
	
	for file in os.listdir(input_dir):
		if re.match(r'(.+)\.xml', file)!= None:
			clusters = []
			okr_graph = load_graph_from_file(gold_annotations_folder + '/'+ re.match(r'(.+)\.xml', file).group(1)[:-4]+'.xml')
			entities = [(str(mention), unicode(mention.terms)) for entity in okr_graph.entities.values() for mention in
                    entity.mentions.values()]
			gold_entity_mentions= [str(mention) for entity in okr_graph.entities.values() for mention in entity.mentions.values()]                    

			tree = ET.parse(input_dir + '/' + file)
			document = tree.getroot()[0]
			coref_node = document.find('coreference')
			
			for coref_id, coref_chain in enumerate(coref_node):
				cluster = []
				for mention in coref_chain:
					sent_num = int(mention[0].text)
					start = int(mention[1].text)-1
					end = int(mention[2].text)-1
					indices = range(start,end)
					text = mention[4].text
					mention_string = str(sent_num)+ str(indices)
					if(mention_string in gold_entity_mentions):
						cluster.append((mention_string, text))

				if(len(cluster)!=0):
					clusters.append(set(cluster))	
			clusters_hybrid = cluster_mentions(entities, score, clusters)
			clusters_hybrid = [set([item[0] for item in cluster]) for cluster in clusters_hybrid]


			curr_scores = eval_clusters(clusters_hybrid, okr_graph)
		
			scores.append(curr_scores)

	print(scores)		
	scores = np.mean(scores, axis=0).tolist()    
	print(scores)
Example #7
0
def evaluate_entity_coref(test_graphs):
    """
    Receives the OKR test graphs and evaluates them for entity coreference
    :param test_graphs: the OKR test graphs
    :return: the coreference scores: MUC, B-CUBED, CEAF and MELA (CoNLL F1).
    """
    scores = []

    for graph in test_graphs:

        # Cluster the entities
        entities = [(str(mention), unicode(mention.terms))
                    for entity in graph.entities.values()
                    for mention in entity.mentions.values()]
        clusters = cluster_mentions(entities, score)
        clusters = [set([item[0] for item in cluster]) for cluster in clusters]

        # Evaluate
        curr_scores = eval_clusters(clusters, graph)
        scores.append(curr_scores)

    scores = np.mean(scores, axis=0).tolist()

    return scores
Example #8
0
def create_report_for_prop_coref(
        gold_annotations_dir='../../data/baseline/dev'):
    """
	Qualitatively analyse the output from baseline for predicate coreference system. This method is similar to 
	analyse_baseline_prop_coref() but only displays the distribution of the gold and predicted clusters for which 
	the system has made mistakes in clustering (two kinds of mistakes - incorrect merges and missed merges). Only sample
	representatives of a cluster are displayed. 
	"""
    parser = spacy_wrapper()
    for file_name in os.listdir(gold_annotations_dir):

        graph = load_graph_from_file(gold_annotations_dir + '/' + file_name)

        prop_mentions = []

        for prop in graph.propositions.values():
            for mention in prop.mentions.values():

                if mention.indices == [-1]:
                    continue
                head_lemma, head_pos = get_mention_head(mention, parser, graph)
                prop_mentions.append((mention, head_lemma, head_pos))

        predicted_clusters = cluster_mentions(prop_mentions, score)

        sentence_wise_predicted_mentions = defaultdict(list)
        sentence_wise_gold_mentions = defaultdict(list)

        # Mappings
        mention_to_gold_pred = {}
        mention_to_predicted_pred = {}

        # Gold distribution of each predicted prop
        gold_dist = {}
        predicted_dist = {}

        for prop in graph.propositions.values():
            for mention in prop.mentions.values():
                sentence_wise_gold_mentions[mention.sentence_id].append({
                    "indices":
                    mention.indices,
                    "coref":
                    prop.id,
                    'text':
                    mention.terms,
                    'arguments':
                    mention.argument_mentions.values()
                })
                mention_to_gold_pred[str(mention)] = prop.id

        for prop_id, prop in enumerate(predicted_clusters):
            gold_dist[prop_id] = defaultdict(list)
            for mention, mention_head, _ in prop:
                sentence_wise_predicted_mentions[mention.sentence_id].append({
                    "indices":
                    mention.indices,
                    "coref":
                    prop_id,
                    'text':
                    mention.terms
                })
                mention_to_predicted_pred[str(mention)] = prop_id
                gold_pred = mention_to_gold_pred[str(mention)]
                gold_dist[prop_id][gold_pred].append((mention, mention_head))

        for prop in graph.propositions.values():
            predicted_dist[prop.id] = defaultdict(list)
            for mention in prop.mentions.values():
                if (str(mention) in mention_to_predicted_pred.keys()):
                    predicted_predicate = mention_to_predicted_pred[str(
                        mention)]
                    predicted_dist[prop.id][predicted_predicate].append(
                        mention)
                else:
                    predicted_dist[prop.id][-1].append(mention)

        print '###' + file_name + '\n'

        print('Mentions which should have been clustered: ')
        for prop_id in predicted_dist.keys():

            if (len(predicted_dist[prop_id].keys()) != 1):
                print 'gold_cluster_id: ', prop_id
                mentions_to_print = [
                    predicted_dist[prop_id][predicted_pred_id][0]
                    for predicted_pred_id in predicted_dist[prop_id].keys()
                ]
                print(', '.join([
                    mention.terms + '(' + str(mention.sentence_id) + ')' +
                    '{' + ', '.join([
                        ' '.join([
                            graph.sentences[mention.sentence_id][int(id)]
                            for id in str(argument).rstrip(']').split('[')
                            [1].split(', ')
                        ]) for argument in mention.argument_mentions.values()
                    ]) + '}' for mention in mentions_to_print
                ]))

        print("\nMentions which should not have been clustered: ")
        for prop_id in gold_dist.keys():
            if (len(gold_dist[prop_id]) != 1):
                print 'predicted_cluster_id: ', prop_id
                mentions_to_print = [
                    gold_dist[prop_id][gold_pred_id][0][0]
                    for gold_pred_id in gold_dist[prop_id].keys()
                ]
                print(', '.join([
                    mention.terms + '(' + str(mention.sentence_id) + ')' +
                    '{' + ', '.join([
                        ' '.join([
                            graph.sentences[mention.sentence_id][int(id)]
                            for id in str(argument).rstrip(']').split('[')
                            [1].split(', ')
                        ]) for argument in mention.argument_mentions.values()
                    ]) + '}' for mention in mentions_to_print
                ]))

        print '**********'
Example #9
0
def analyse_baseline_prop_coref(gold_annotations_dir='../../data/baseline/dev',
                                verbose=False):
    """
	Qualitatively analyse the output from baseline for predicate coreference system. This
	method outputs all sentences, then list of gold predicate clusters and the list of predicted predicate clusters.
	"""
    parser = spacy_wrapper()
    for file_name in os.listdir(gold_annotations_dir):

        graph = load_graph_from_file(gold_annotations_dir + '/' + file_name)

        prop_mentions = []

        for prop in graph.propositions.values():
            for mention in prop.mentions.values():

                if mention.indices == [-1]:
                    continue
                head_lemma, head_pos = get_mention_head(mention, parser, graph)
                prop_mentions.append((mention, head_lemma, head_pos))

        predicted_clusters = cluster_mentions(prop_mentions, score)

        sentence_wise_predicted_mentions = defaultdict(list)
        sentence_wise_gold_mentions = defaultdict(list)

        # Mappings
        mention_to_gold_pred = {}
        mention_to_predicted_pred = {}

        # gold_dist -> Gold distribution of each predicted proposition cluster
        # predicted_dist -> Predicted distribution of each gold proposition cluster

        gold_dist = {}
        predicted_dist = {}

        for prop in graph.propositions.values():
            for mention in prop.mentions.values():
                sentence_wise_gold_mentions[mention.sentence_id].append({
                    "indices":
                    mention.indices,
                    "coref":
                    prop.id,
                    'text':
                    mention.terms,
                    'arguments':
                    mention.argument_mentions.values()
                })
                mention_to_gold_pred[str(mention)] = prop.id

        for prop_id, prop in enumerate(predicted_clusters):
            gold_dist[prop_id] = defaultdict(list)
            for mention, mention_head, _ in prop:
                sentence_wise_predicted_mentions[mention.sentence_id].append({
                    "indices":
                    mention.indices,
                    "coref":
                    prop_id,
                    'text':
                    mention.terms
                })
                mention_to_predicted_pred[str(mention)] = prop_id
                gold_pred = mention_to_gold_pred[str(mention)]
                gold_dist[prop_id][gold_pred].append((mention, mention_head))

        for prop in graph.propositions.values():
            predicted_dist[prop.id] = defaultdict(list)
            for mention in prop.mentions.values():
                if (str(mention) in mention_to_predicted_pred.keys()):
                    predicted_predicate = mention_to_predicted_pred[str(
                        mention)]
                    predicted_dist[prop.id][predicted_predicate].append(
                        mention)
                else:
                    predicted_dist[prop.id][-1].append(mention)

        print '###' + file_name + '\n'

        for sentence_id, sentence in enumerate(graph.sentences.values()):
            print 'Sentence: ', str(sentence_id + 1), ' ', ' '.join(sentence)
            print 'Gold predicates: ', ', '.join([
                element['text'] + '(' + str(element['coref']) + ')' + '{' +
                ', '.join([str(argument)
                           for argument in element["arguments"]]) + '}'
                for element in sentence_wise_gold_mentions[sentence_id + 1]
            ])
            print 'Predicted predicates: ', ', '.join([
                element['text'] + '(' + str(element['coref']) + ')'
                for element in sentence_wise_predicted_mentions[sentence_id +
                                                                1]
            ])
            print ' '

        print('\n The Gold clusters:')

        for prop_id in predicted_dist.keys():
            print 'cluster_id: ', prop_id
            for predicted_pred_id in predicted_dist[prop_id].keys():
                print "\npredicted id:", predicted_pred_id, ':', ', '.join([
                    mention.terms + '(' + str(mention.sentence_id) + ')'
                    for mention in predicted_dist[prop_id][predicted_pred_id]
                ])

        print 'The predicted clusters:'

        for prop_id in gold_dist.keys():
            print 'cluster_id: ', prop_id
            for gold_pred_id in gold_dist[prop_id].keys():
                print "\ngold id:", gold_pred_id, ':', ', '.join([
                    mention.terms + '(' + mention_head + ', ' +
                    str(mention.sentence_id) + ')' for mention, mention_head in
                    gold_dist[prop_id][gold_pred_id]
                ])

        print '**********'
Example #10
0
def generate_list_of_errors(gold_annotations_dir='../../data/baseline/dev'):
    for file_name in os.listdir(gold_annotations_dir):

        graph = load_graph_from_file(gold_annotations_dir + '/' + file_name)

        entities = [(str(mention), unicode(mention.terms))
                    for entity in graph.entities.values()
                    for mention in entity.mentions.values()]
        predicted_clusters = cluster_mentions(entities, score)

        sentence_wise_predicted_mentions = defaultdict(list)
        sentence_wise_gold_mentions = defaultdict(list)

        # Mappings
        mention_to_gold_entity = {}
        mention_to_predicted_entity = {}

        # Gold distribution of each predicted prop
        gold_dist_of_predicted_entities = {}
        predicted_dist_of_gold_entities = {}

        for entity in graph.entities.values():
            for mention in entity.mentions.values():
                sentence_wise_gold_mentions[mention.sentence_id].append({
                    "indices":
                    mention.indices,
                    "coref":
                    entity.id,
                    'text':
                    mention.terms
                })
                mention_to_gold_entity[str(mention)] = entity.id

        for entity_id, entity in enumerate(predicted_clusters):
            gold_dist_of_predicted_entities[entity_id] = defaultdict(list)
            for mention_id, mention_terms in entity:
                sentence_wise_predicted_mentions[mention_id].append({
                    "coref":
                    entity_id,
                    'text':
                    mention_terms
                })

                mention_to_predicted_entity[mention_id] = entity_id
                gold_entity = mention_to_gold_entity[mention_id]
                gold_dist_of_predicted_entities[entity_id][gold_entity].append(
                    (mention_id, mention_terms))

        for entity in graph.entities.values():
            predicted_dist_of_gold_entities[entity.id] = defaultdict(list)
            for mention in entity.mentions.values():
                if (str(mention) in mention_to_predicted_entity.keys()):
                    predicted_entity = mention_to_predicted_entity[str(
                        mention)]
                    predicted_dist_of_gold_entities[
                        entity.id][predicted_entity].append(mention)
                else:
                    predicted_dist_of_gold_entities[entity.id][-1].append(
                        mention)

        print '###' + file_name + '\n'

        print('Mentions which should have been clustered: ')
        for entity_id in predicted_dist_of_gold_entities.keys():

            if (len(predicted_dist_of_gold_entities[entity_id].keys()) != 1):
                print 'gold_cluster_id: ', entity_id
                mentions_to_print = [
                    predicted_dist_of_gold_entities[entity_id]
                    [predicted_entity_id][0] for predicted_entity_id in
                    predicted_dist_of_gold_entities[entity_id].keys()
                ]
                # print mentions_to_print
                if ((mentions_to_print != None)):
                    print(', '.join(
                        [mention.terms for mention in mentions_to_print]))

        print("\nMentions which should not have been clustered: ")
        for entity_id in gold_dist_of_predicted_entities.keys():
            if (len(gold_dist_of_predicted_entities[entity_id]) != 1):
                print 'predicted_cluster_id: ', entity_id
                mentions_to_print = [
                    gold_dist_of_predicted_entities[entity_id][gold_entity_id]
                    [0] for gold_entity_id in
                    gold_dist_of_predicted_entities[entity_id].keys()
                ]
                if ((mentions_to_print != None)):
                    print(', '.join([
                        mention_terms for _, mention_terms in mentions_to_print
                    ]))

        # for sentence_id, sentence in enumerate(graph.sentences.values()):
        # 	print 'Sentence: ', ' '.join(sentence)
        # 	print 'Gold entities: ', ', '.join([element['text'] for element in sentence_wise_gold_mentions[sentence_id+1]])
        # 	print ' '

        # print('\n The Gold clusters:')
        # for entity in graph.entities.values():
        # 	if len(entity.mentions.values())!=1:
        # 		print'cluster_id: ', entity.id , ', '.join([mention.terms for mention in entity.mentions.values()])
        # print ' '

        # print 'The predicted clusters:'
        # for entity_id, entity in enumerate(predicted_clusters):
        # 	if len(entity)!=1:
        # 		print 'cluster_id: ', entity_id, ', '.join([mention_terms for _, mention_terms in entity])
        # print ' '

        print '**********'