Example #1
0
    def _get_string_match(self, doc: Document, e1: Entity, e2: Entity):
        ret = {}

        if 'head_str_match' in self.classifier_converters:
            e1_main_token = find_span_head_token(doc, e1)
            e2_main_token = find_span_head_token(doc, e2)
            ret['head_str_match'] = self.classifier_converters[
                'head_str_match'][doc.token_features['lemmas'][e1_main_token]
                                  == doc.token_features['lemmas']
                                  [e2_main_token]]

        e1_words = doc.token_features['lemmas'][e1.start_token:e1.end_token]
        e2_words = doc.token_features['lemmas'][e2.start_token:e2.end_token]
        if 'exact_str_match' in self.classifier_converters:
            ret['exact_str_match'] = self.classifier_converters[
                'exact_str_match'][set(e1_words) == set(e2_words)]
        if 'partial_str_match' in self.classifier_converters:
            ret['partial_str_match'] = self.classifier_converters[
                'partial_str_match'][bool(set(e1_words) & set(e2_words))]
        if 'ordered_exact_str_match' in self.classifier_converters:
            ret['ordered_exact_str_match'] = self.classifier_converters[
                'ordered_exact_str_match'][e1_words == e2_words]
        if 'ordered_partial_str_match' in self.classifier_converters:
            ret['ordered_partial_str_match'] = self.classifier_converters[
                'ordered_partial_str_match'][
                    self._get_ordered_partial_match(e1_words, e2_words)
                    or self._get_ordered_partial_match(e2_words, e1_words)]
        return ret
Example #2
0
 def _get_context_entities(doc: Document, e1: Entity,
                           e2: Entity) -> List[Entity]:
     entities = []
     for entity in doc.entities:
         if e1.start_token <= entity.start_token and entity.end_token <= e2.end_token:
             head = find_span_head_token(doc, entity)
             entities.append((entity, head))
     entities = sorted(entities, key=lambda x: x[1])
     return list(map(lambda x: x[0], entities))
Example #3
0
    def _get_ne_types(self, doc: Document, e1: Entity, e2: Entity):
        ret = {}
        if 'head_ne_types_0' not in self.classifier_converters:
            return ret

        for i, entity in enumerate((e1, e2)):
            name = "head_ne_types_{}".format(i)
            head = find_span_head_token(doc, entity)
            ret[name] = self.classifier_converters[name][ne_type_for_token(
                doc, head)]
        return ret
Example #4
0
    def _get_agreement(self, doc: Document, e1: Entity, e2: Entity):
        ret = {}

        e1_main_token = find_span_head_token(doc, e1)
        e2_main_token = find_span_head_token(doc, e2)

        e1_feats = doc.token_features['feats'][e1_main_token]
        e2_feats = doc.token_features['feats'][e2_main_token]

        for key in self.agreement_types:
            name = key + '_agreement'
            if name in self.classifier_converters:
                if key in e1_feats and key in e2_feats and e1_feats[
                        key] == e2_feats[key]:
                    label = 'agreement'
                elif key not in e1_feats or key not in e2_feats:
                    label = 'unknown'
                else:
                    label = 'disagreement'
                ret[name] = self.classifier_converters[name][label]
        return ret
    def extract_features(self, doc: Document, entity: Entity) -> dict:
        features = self.shared_fe.extract_features_from_doc(
            doc, entity.start_token, entity.end_token)
        if 'encoder_entity_types' in self.entity_converters:
            features['encoder_entity_types'] = self.entity_converters[
                'encoder_entity_types'][entity.type]

        head = find_span_head_token(doc, entity)
        for feat_name in self.feats_list:
            name = "encoder_" + feat_name
            if name in self.entity_converters:
                features[name] = self.entity_converters[name][
                    doc.token_features[feat_name][head]]

        for speech_type in self.speech_types:
            name = "encoder_" + speech_type
            if name in self.entity_converters:
                has_type = any(
                    list(
                        map(
                            lambda x: x != 'O', doc.token_features[speech_type]
                            [entity.start_token:entity.end_token])))
                features[name] = self.entity_converters[name][has_type]

        if 'head_we_in_encoder' in self.continuous_converters:
            features['head_we_in_encoder'] = self.continuous_converters[
                'head_we_in_encoder'][doc.tokens[head].lower()]
        for name in self.identity_head_features:
            if name in self.continuous_converters:
                features[name] = self.continuous_converters[name][
                    doc.token_features[name][head]]

        if 'encoder_entity_ne' in self.entity_converters:
            features['encoder_entity_ne'] = self.entity_converters[
                'encoder_entity_ne'][ne_type_for_token(doc, head)]

        features['entity_seq_len'] = features['seq_len']
        del features['seq_len']

        return features
Example #6
0
    def _fix_entity_types(docs):
        ret = []
        for doc in docs:
            new_entities = []
            entity_mapping = {}
            new_rels = []
            for entity in doc.entities:
                head = find_span_head_token(doc, entity)
                if doc.token_features['pos'][head] == 'PRON':
                    e_type = 'pron'
                else:
                    e_type = 'noun'
                new_entity = entity.with_type(e_type)
                entity_mapping[entity] = new_entity
                new_entities.append(new_entity)
            for rel in doc.relations:
                new_rels.append(
                    Relation(entity_mapping[rel.first_entity],
                             entity_mapping[rel.second_entity], rel.type))

            ret.append(
                doc.without_relations().without_entities().with_entities(
                    new_entities).with_relations(new_rels))
        return ret