예제 #1
0
 def rule0_func(self, sent_data):
     sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data
     ent1_token = ent_text_to_ent(ent1, sent_data)
     ent2_token = ent_text_to_ent(ent2, sent_data)
     if ent1_token.label_ in self.persons_ents and ent2_token.label_ in self.places_ents:
         return True
     return False
예제 #2
0
def remove_non_relevant_samples(data):
    filtered_data = []
    for sent_data, y in data:
        sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data
        ent1_token = ent_text_to_ent(ent1, sent_data)
        ent2_token = ent_text_to_ent(ent2, sent_data)
        if ent1_token.label_ in person_ents and ent2_token.label_ in places_ents:
            filtered_data.append((sent_data, y))
    return filtered_data
예제 #3
0
 def rule1_func(self, sent_data):
     sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data
     ent1_token = ent_text_to_ent(ent1, sent_data)
     ent2_token = ent_text_to_ent(ent2, sent_data)
     if ent1_token.label_ in self.places_ents and ent2_token.label_ in self.persons_ents:
         if "'s" in words_between_ents(sent_data) and ents_between(
                 ent1_token, ent2_token, sent_data) == 0:
             return True
     return False
예제 #4
0
def words_between_ents(sent_data):
    sent_id, sent_tokens, sent_text, noun_chunks, named_entities, ent1, ent2 = sent_data
    words_in_sent = [x.text for x in sent_tokens]
    ent1_token = ent_text_to_ent(ent1, sent_data)
    ent2_token = ent_text_to_ent(ent2, sent_data)
    words_between = []
    if ent1_token.start > ent2_token.start:
        return words_between
    for i in range(ent1_token.end, ent2_token.start):
        words_between.append(words_in_sent[i])
    return words_between