def generate_document_id(self, responses, entry): document_id = None if entry.get('kb_document_id'): document_id = entry.get('kb_document_id') elif entry.get('object_informative_justification_span_text'): span_object = spanstring_to_object(entry.get('logger'), entry.get('object_informative_justification_span_text')) document_id = span_object.get('document_id') elif entry.get('predicate_justification_span_text'): span_object = spanstring_to_object(entry.get('logger'), entry.get('predicate_justification_span_text')) document_id = span_object.get('document_id') elif entry.get('mention_span_text'): span_object = spanstring_to_object(entry.get('logger'), entry.get('mention_span_text')) document_id = span_object.get('document_id') entry.set('document_id', document_id)
def contains(self, mention, types): contains = False for cluster_type in types: document_element_id = mention.get('document_element_id') keyframe_id = mention.get('keyframe_id') key = '{docid}:{doce_or_kf_id}:{cluster_type}'.format( docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, cluster_type=cluster_type) if key not in self.get('regions'): continue for span_string in self.get('regions').get(key): span_string = '{docid}:{doce_or_kf_id}:{span_string}'.format( docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, span_string=span_string) region = spanstring_to_object(self.get('logger'), span_string) region.set( 'modality', self.get('document_mappings').get( 'modality', region.get('document_element_id'))) boundaries_key = 'keyframe' if region.get( 'keyframe_id') else region.get('modality') document_element_or_keyframe_id = region.get( 'keyframe_id') if region.get( 'keyframe_id') else region.get('document_element_id') region.set( 'boundary', self.get('document_boundaries').get(boundaries_key).get( document_element_or_keyframe_id)) if get_intersection_over_union(mention, region) >= 0.8: contains = True return contains
def is_predicate_justification_correct(self, system_predicate_justifications, gold_predicate_justifications): document_mappings = self.get('gold_responses').get('document_mappings') document_boundaries = self.get('gold_responses').get('document_boundaries') justification_correctness = False max_num_justifications = 2 for system_predicate_justification in sorted(system_predicate_justifications.values(), key=lambda pj: pj.get('predicate_justification_confidence'), reverse=True): system_predicate_justification_span = system_predicate_justification.get('predicate_justification') system_mention_object = augment_mention_object(spanstring_to_object(self.logger, system_predicate_justification_span), document_mappings, document_boundaries) for gold_predicate_justification in gold_predicate_justifications.values(): gold_predicate_justification_span = gold_predicate_justification.get('predicate_justification') gold_mention_object = augment_mention_object(spanstring_to_object(self.logger, gold_predicate_justification_span), document_mappings, document_boundaries) if get_intersection_over_union(system_mention_object, gold_mention_object) > 0: justification_correctness = True max_num_justifications -= 1 if max_num_justifications == 0: break return justification_correctness
def add_mention(self, span_string, t_cv, cm_cv, j_cv, where): logger = self.get('logger') mention = augment_mention_object(spanstring_to_object(logger, span_string, where), self.get('document_mappings'), self.get('document_boundaries')) mention.set('ID', span_string) mention.set('span_string', span_string) mention.set('t_cv', t_cv) mention.set('cm_cv', cm_cv) mention.set('j_cv', j_cv) self.get('mentions').add(key=mention.get('ID'), value=mention)
def add_mention(self, span_string, t_cv, cm_cv, j_cv, where): logger = self.get('logger') mention = spanstring_to_object(logger, span_string, where) mention.set('ID', span_string) mention.set('span_string', span_string) mention.set('t_cv', t_cv) mention.set('cm_cv', cm_cv) mention.set('j_cv', j_cv) mention.set('modality', self.get('document_mappings').get('modality', mention.get('document_element_id'))) boundaries_key = 'keyframe' if mention.get('keyframe_id') else mention.get('modality') document_element_or_keyframe_id = mention.get('keyframe_id') if mention.get('keyframe_id') else mention.get('document_element_id') mention.set('boundary', self.get('document_boundaries').get(boundaries_key).get(document_element_or_keyframe_id)) self.get('mentions').add(key=mention.get('ID'), value=mention)
def get_mention(entry, document_mappings, document_boundaries): logger = entry.get('logger') where = entry.get('where') span_string = entry.get('mention_span_text') mention = spanstring_to_object(logger, span_string, where) mention.set('ID', span_string) mention.set('span_string', span_string) mention.set('t_cv', entry.get('type_statement_confidence')) mention.set('cm_cv', entry.get('cluster_membership_confidence')) mention.set('j_cv', entry.get('mention_type_justification_confidence')) mention.set('modality', document_mappings.get('modality', mention.get('document_element_id'))) boundaries_key = 'keyframe' if mention.get('keyframe_id') else mention.get('modality') document_element_or_keyframe_id = mention.get('keyframe_id') if mention.get('keyframe_id') else mention.get('document_element_id') mention.set('boundary', document_boundaries.get(boundaries_key).get(document_element_or_keyframe_id)) return mention
def contains_strict(self, mention, types, metatype): document_element_id = mention.get('document_element_id') keyframe_id = mention.get('keyframe_id') for cluster_type in types: key = '{docid}:{doce_or_kf_id}:{cluster_type}'.format(docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, cluster_type=cluster_type) if key not in self.get('regions'): continue for span_string in self.get('regions').get(key): fq_span_string = '{docid}:{doce_or_kf_id}:{span_string}'.format(docid=mention.get('document_id'), doce_or_kf_id=keyframe_id if keyframe_id else document_element_id, span_string=span_string) region = augment_mention_object(spanstring_to_object(self.logger, fq_span_string), self.get('document_mappings'), self.get('document_boundaries')) if get_intersection_over_union(mention, region) > 0: return True return False
def contains_relaxed(self, mention, types, metatype): document_id = mention.get('document_id') document_element_id = mention.get('document_element_id') keyframe_id = mention.get('keyframe_id') doce_or_kf_id = keyframe_id if keyframe_id else document_element_id for cluster_type in types: top_level_type = get_top_level_type(cluster_type, metatype) for key in self.get('regions'): document_id_, doce_or_kf_id_, cluster_type_ = key.split(':') metatype_ = self.get('ontology_type_mappings').get('type_to_metatype', cluster_type_) top_level_type_ = get_top_level_type(cluster_type_, metatype_) if document_id != document_id_: continue if doce_or_kf_id != doce_or_kf_id_: continue if top_level_type != top_level_type_: continue for region_string in [s.__str__() for s in self.get('regions').get(key)]: fq_region_string = '{docid}:{doce_or_kf_id}:{region_string}'.format(docid=document_id_, doce_or_kf_id=doce_or_kf_id_, region_string=region_string) region = augment_mention_object(spanstring_to_object(self.logger, fq_region_string), self.get('document_mappings'), self.get('document_boundaries')) if get_intersection_over_union(mention, region) > 0: return True return False