def contains(self, mention, types): contains = False for cluster_type in types: document_element_id = mention.get('document_element_id') keyframe_id = mention.get('keyframe_id') key = '{docid}:{doce_or_kf_id}:{cluster_type}'.format( docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, cluster_type=cluster_type) if key not in self.get('regions'): continue for span_string in self.get('regions').get(key): span_string = '{docid}:{doce_or_kf_id}:{span_string}'.format( docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, span_string=span_string) region = spanstring_to_object(self.get('logger'), span_string) region.set( 'modality', self.get('document_mappings').get( 'modality', region.get('document_element_id'))) boundaries_key = 'keyframe' if region.get( 'keyframe_id') else region.get('modality') document_element_or_keyframe_id = region.get( 'keyframe_id') if region.get( 'keyframe_id') else region.get('document_element_id') region.set( 'boundary', self.get('document_boundaries').get(boundaries_key).get( document_element_or_keyframe_id)) if get_intersection_over_union(mention, region) >= 0.8: contains = True return contains
def get_entity_and_event_similarity(self, gold_cluster, system_cluster): similarity = 0 if self.get('number_of_matching_types', gold_cluster.get('top_level_types'), system_cluster.get('top_level_types')): mentions = { 'gold': list(gold_cluster.get('mentions').values()), 'system': list(system_cluster.get('mentions').values()) } mappings = {} for filetype in mentions: mappings[filetype] = {'id_to_index': {}, 'index_to_id': {}} index = 0 for mention in mentions[filetype]: mappings[filetype]['id_to_index'][mention.get( 'ID')] = index mappings[filetype]['index_to_id'][index] = mention.get( 'ID') index += 1 similarities = {} for gold_mention in mentions['gold']: for system_mention in mentions['system']: if gold_mention.get('ID') not in similarities: similarities[gold_mention.get('ID')] = {} iou = get_intersection_over_union(gold_mention, system_mention) iou = 0 if iou < 0.8 else iou similarities[gold_mention.get('ID')][system_mention.get( 'ID')] = iou cost_matrix = get_cost_matrix(similarities, mappings) alignment = {'gold_mention': {}, 'system_mention': {}} for gold_mention_index, system_mention_index in Munkres().compute( cost_matrix): gold_mention_id = mappings['gold']['index_to_id'][ gold_mention_index] system_mention_id = mappings['system']['index_to_id'][ system_mention_index] alignment['gold_mention'][gold_mention_id] = { 'system_mention': system_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } alignment['system_mention'][system_mention_id] = { 'gold_mention': gold_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } if similarities[gold_mention_id][system_mention_id] > 0: # lenient similarity computation similarity += 1 # alternative would be to add up the amount of overlap # similarity += similarities[gold_mention_id][system_mention_id] return similarity
def contains_strict(self, mention, types, metatype): document_element_id = mention.get('document_element_id') keyframe_id = mention.get('keyframe_id') for cluster_type in types: key = '{docid}:{doce_or_kf_id}:{cluster_type}'.format(docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, cluster_type=cluster_type) if key not in self.get('regions'): continue for span_string in self.get('regions').get(key): fq_span_string = '{docid}:{doce_or_kf_id}:{span_string}'.format(docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, span_string=span_string) region = augment_mention_object(spanstring_to_object(self.logger, fq_span_string), self.get('document_mappings'), self.get('document_boundaries')) if get_intersection_over_union(mention, region) > 0: return True return False
def is_predicate_justification_correct(self, system_predicate_justifications, gold_predicate_justifications): document_mappings = self.get('gold_responses').get('document_mappings') document_boundaries = self.get('gold_responses').get('document_boundaries') justification_correctness = False max_num_justifications = 2 for system_predicate_justification in sorted(system_predicate_justifications.values(), key=lambda pj: pj.get('predicate_justification_confidence'), reverse=True): system_predicate_justification_span = system_predicate_justification.get('predicate_justification') system_mention_object = augment_mention_object(spanstring_to_object(self.logger, system_predicate_justification_span), document_mappings, document_boundaries) for gold_predicate_justification in gold_predicate_justifications.values(): gold_predicate_justification_span = gold_predicate_justification.get('predicate_justification') gold_mention_object = augment_mention_object(spanstring_to_object(self.logger, gold_predicate_justification_span), document_mappings, document_boundaries) if get_intersection_over_union(system_mention_object, gold_mention_object) > 0: justification_correctness = True max_num_justifications -= 1 if max_num_justifications == 0: break return justification_correctness
def contains_relaxed(self, mention, types, metatype): document_id = mention.get('document_id') document_element_id = mention.get('document_element_id') keyframe_id = mention.get('keyframe_id') doce_or_kf_id = keyframe_id if keyframe_id else document_element_id for cluster_type in types: top_level_type = get_top_level_type(cluster_type, metatype) for key in self.get('regions'): document_id_, doce_or_kf_id_, cluster_type_ = key.split(':') metatype_ = self.get('ontology_type_mappings').get('type_to_metatype', cluster_type_) top_level_type_ = get_top_level_type(cluster_type_, metatype_) if document_id != document_id_: continue if doce_or_kf_id != doce_or_kf_id_: continue if top_level_type != top_level_type_: continue for region_string in [s.__str__() for s in self.get('regions').get(key)]: fq_region_string = '{docid}:{doce_or_kf_id}:{region_string}'.format(docid=document_id_, doce_or_kf_id=doce_or_kf_id_, region_string=region_string) region = augment_mention_object(spanstring_to_object(self.logger, fq_region_string), self.get('document_mappings'), self.get('document_boundaries')) if get_intersection_over_union(mention, region) > 0: return True return False
def get_entity_and_event_similarity(self, gold_cluster, system_cluster): similarity = 0 if self.get('number_of_matching_types', gold_cluster.get('top_level_types'), system_cluster.get('top_level_types')): mentions = { 'gold': list(gold_cluster.get('mentions').values()), 'system': list(system_cluster.get('mentions').values()) } mappings = {} for filetype in mentions: mappings[filetype] = {'id_to_index': {}, 'index_to_id': {}} index = 0 for mention in mentions[filetype]: mappings[filetype]['id_to_index'][mention.get( 'ID')] = index mappings[filetype]['index_to_id'][index] = mention.get( 'ID') index += 1 similarities = {} for gold_mention in mentions['gold']: document_element_id = gold_mention.get('document_element_id') modality = self.get('document_mappings').get( 'modality', document_element_id) language = self.get('document_mappings').get( 'language', document_element_id) for system_mention in mentions['system']: if gold_mention.get('ID') not in similarities: similarities[gold_mention.get('ID')] = {} iou = get_intersection_over_union(gold_mention, system_mention) iou = 0 if iou < self.get('threshold', modality, language) else iou similarities[gold_mention.get('ID')][system_mention.get( 'ID')] = iou cost_matrix = get_cost_matrix(similarities, mappings) alignment = {'gold_mention': {}, 'system_mention': {}} for gold_mention_index, system_mention_index in Munkres().compute( cost_matrix): gold_mention_id = mappings['gold']['index_to_id'][ gold_mention_index] system_mention_id = mappings['system']['index_to_id'][ system_mention_index] alignment['gold_mention'][gold_mention_id] = { 'system_mention': system_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } alignment['system_mention'][system_mention_id] = { 'gold_mention': gold_mention_id, 'score': similarities[gold_mention_id][system_mention_id] } if similarities[gold_mention_id][system_mention_id] > 0: # lenient similarity computation if self.get('weighted') == 'no': # total mentions similarity += 1 elif self.get('weighted') == 'yes': # total iou similarity += similarities[gold_mention_id][ system_mention_id] return similarity