Beispiel #1
0
 def contains(self, mention, types):
     contains = False
     for cluster_type in types:
         document_element_id = mention.get('document_element_id')
         keyframe_id = mention.get('keyframe_id')
         key = '{docid}:{doce_or_kf_id}:{cluster_type}'.format(
             docid=mention.get('document_id'),
             doce_or_kf_id=keyframe_id
             if keyframe_id else document_element_id,
             cluster_type=cluster_type)
         if key not in self.get('regions'): continue
         for span_string in self.get('regions').get(key):
             span_string = '{docid}:{doce_or_kf_id}:{span_string}'.format(
                 docid=mention.get('document_id'),
                 doce_or_kf_id=keyframe_id
                 if keyframe_id else document_element_id,
                 span_string=span_string)
             region = spanstring_to_object(self.get('logger'), span_string)
             region.set(
                 'modality',
                 self.get('document_mappings').get(
                     'modality', region.get('document_element_id')))
             boundaries_key = 'keyframe' if region.get(
                 'keyframe_id') else region.get('modality')
             document_element_or_keyframe_id = region.get(
                 'keyframe_id') if region.get(
                     'keyframe_id') else region.get('document_element_id')
             region.set(
                 'boundary',
                 self.get('document_boundaries').get(boundaries_key).get(
                     document_element_or_keyframe_id))
             if get_intersection_over_union(mention, region) >= 0.8:
                 contains = True
     return contains
Beispiel #2
0
 def get_entity_and_event_similarity(self, gold_cluster, system_cluster):
     similarity = 0
     if self.get('number_of_matching_types',
                 gold_cluster.get('top_level_types'),
                 system_cluster.get('top_level_types')):
         mentions = {
             'gold': list(gold_cluster.get('mentions').values()),
             'system': list(system_cluster.get('mentions').values())
         }
         mappings = {}
         for filetype in mentions:
             mappings[filetype] = {'id_to_index': {}, 'index_to_id': {}}
             index = 0
             for mention in mentions[filetype]:
                 mappings[filetype]['id_to_index'][mention.get(
                     'ID')] = index
                 mappings[filetype]['index_to_id'][index] = mention.get(
                     'ID')
                 index += 1
         similarities = {}
         for gold_mention in mentions['gold']:
             for system_mention in mentions['system']:
                 if gold_mention.get('ID') not in similarities:
                     similarities[gold_mention.get('ID')] = {}
                 iou = get_intersection_over_union(gold_mention,
                                                   system_mention)
                 iou = 0 if iou < 0.8 else iou
                 similarities[gold_mention.get('ID')][system_mention.get(
                     'ID')] = iou
         cost_matrix = get_cost_matrix(similarities, mappings)
         alignment = {'gold_mention': {}, 'system_mention': {}}
         for gold_mention_index, system_mention_index in Munkres().compute(
                 cost_matrix):
             gold_mention_id = mappings['gold']['index_to_id'][
                 gold_mention_index]
             system_mention_id = mappings['system']['index_to_id'][
                 system_mention_index]
             alignment['gold_mention'][gold_mention_id] = {
                 'system_mention': system_mention_id,
                 'score': similarities[gold_mention_id][system_mention_id]
             }
             alignment['system_mention'][system_mention_id] = {
                 'gold_mention': gold_mention_id,
                 'score': similarities[gold_mention_id][system_mention_id]
             }
             if similarities[gold_mention_id][system_mention_id] > 0:
                 # lenient similarity computation
                 similarity += 1
                 # alternative would be to add up the amount of overlap
                 # similarity += similarities[gold_mention_id][system_mention_id]
     return similarity
Beispiel #3
0
 def contains_strict(self, mention, types, metatype):
     document_element_id = mention.get('document_element_id')
     keyframe_id = mention.get('keyframe_id')
     for cluster_type in types:
         key = '{docid}:{doce_or_kf_id}:{cluster_type}'.format(docid=mention.get('document_id'),
                                                        doce_or_kf_id=keyframe_id if keyframe_id else document_element_id,
                                                        cluster_type=cluster_type)
         if key not in self.get('regions'): continue
         for span_string in self.get('regions').get(key):
             fq_span_string = '{docid}:{doce_or_kf_id}:{span_string}'.format(docid=mention.get('document_id'),
                                                        doce_or_kf_id=keyframe_id if keyframe_id else document_element_id,
                                                        span_string=span_string)
             region = augment_mention_object(spanstring_to_object(self.logger, fq_span_string), self.get('document_mappings'), self.get('document_boundaries'))
             if get_intersection_over_union(mention, region) > 0:
                 return True
     return False
Beispiel #4
0
 def is_predicate_justification_correct(self, system_predicate_justifications, gold_predicate_justifications):
     document_mappings = self.get('gold_responses').get('document_mappings')
     document_boundaries = self.get('gold_responses').get('document_boundaries')
     justification_correctness = False
     max_num_justifications = 2
     for system_predicate_justification in sorted(system_predicate_justifications.values(), key=lambda pj: pj.get('predicate_justification_confidence'), reverse=True):
         system_predicate_justification_span = system_predicate_justification.get('predicate_justification')
         system_mention_object = augment_mention_object(spanstring_to_object(self.logger, system_predicate_justification_span), document_mappings, document_boundaries)
         for gold_predicate_justification in gold_predicate_justifications.values():
             gold_predicate_justification_span = gold_predicate_justification.get('predicate_justification')
             gold_mention_object = augment_mention_object(spanstring_to_object(self.logger, gold_predicate_justification_span), document_mappings, document_boundaries)
             if get_intersection_over_union(system_mention_object, gold_mention_object) > 0:
                 justification_correctness = True
         max_num_justifications -= 1
         if max_num_justifications == 0: break
     return justification_correctness
Beispiel #5
0
 def contains_relaxed(self, mention, types, metatype):
     document_id = mention.get('document_id')
     document_element_id = mention.get('document_element_id')
     keyframe_id = mention.get('keyframe_id')
     doce_or_kf_id = keyframe_id if keyframe_id else document_element_id
     for cluster_type in types:
         top_level_type = get_top_level_type(cluster_type, metatype)
         for key in self.get('regions'):
             document_id_, doce_or_kf_id_, cluster_type_ = key.split(':')
             metatype_ = self.get('ontology_type_mappings').get('type_to_metatype', cluster_type_)
             top_level_type_ = get_top_level_type(cluster_type_, metatype_)
             if document_id != document_id_: continue
             if doce_or_kf_id != doce_or_kf_id_: continue
             if top_level_type != top_level_type_: continue
             for region_string in [s.__str__() for s in self.get('regions').get(key)]:
                 fq_region_string = '{docid}:{doce_or_kf_id}:{region_string}'.format(docid=document_id_,
                                                        doce_or_kf_id=doce_or_kf_id_,
                                                        region_string=region_string)
                 region = augment_mention_object(spanstring_to_object(self.logger, fq_region_string), self.get('document_mappings'), self.get('document_boundaries'))
                 if get_intersection_over_union(mention, region) > 0:
                     return True
     return False
Beispiel #6
0
 def get_entity_and_event_similarity(self, gold_cluster, system_cluster):
     similarity = 0
     if self.get('number_of_matching_types',
                 gold_cluster.get('top_level_types'),
                 system_cluster.get('top_level_types')):
         mentions = {
             'gold': list(gold_cluster.get('mentions').values()),
             'system': list(system_cluster.get('mentions').values())
         }
         mappings = {}
         for filetype in mentions:
             mappings[filetype] = {'id_to_index': {}, 'index_to_id': {}}
             index = 0
             for mention in mentions[filetype]:
                 mappings[filetype]['id_to_index'][mention.get(
                     'ID')] = index
                 mappings[filetype]['index_to_id'][index] = mention.get(
                     'ID')
                 index += 1
         similarities = {}
         for gold_mention in mentions['gold']:
             document_element_id = gold_mention.get('document_element_id')
             modality = self.get('document_mappings').get(
                 'modality', document_element_id)
             language = self.get('document_mappings').get(
                 'language', document_element_id)
             for system_mention in mentions['system']:
                 if gold_mention.get('ID') not in similarities:
                     similarities[gold_mention.get('ID')] = {}
                 iou = get_intersection_over_union(gold_mention,
                                                   system_mention)
                 iou = 0 if iou < self.get('threshold', modality,
                                           language) else iou
                 similarities[gold_mention.get('ID')][system_mention.get(
                     'ID')] = iou
         cost_matrix = get_cost_matrix(similarities, mappings)
         alignment = {'gold_mention': {}, 'system_mention': {}}
         for gold_mention_index, system_mention_index in Munkres().compute(
                 cost_matrix):
             gold_mention_id = mappings['gold']['index_to_id'][
                 gold_mention_index]
             system_mention_id = mappings['system']['index_to_id'][
                 system_mention_index]
             alignment['gold_mention'][gold_mention_id] = {
                 'system_mention': system_mention_id,
                 'score': similarities[gold_mention_id][system_mention_id]
             }
             alignment['system_mention'][system_mention_id] = {
                 'gold_mention': gold_mention_id,
                 'score': similarities[gold_mention_id][system_mention_id]
             }
             if similarities[gold_mention_id][system_mention_id] > 0:
                 # lenient similarity computation
                 if self.get('weighted') == 'no':
                     # total mentions
                     similarity += 1
                 elif self.get('weighted') == 'yes':
                     # total iou
                     similarity += similarities[gold_mention_id][
                         system_mention_id]
     return similarity