def __init__(self, lang):
        if lang == LANG.DE:
            self.spre = ShortestPathRE().de_lang()
            self.pbre = PatternBasedRE().de_lang()
            #self.entity_extractor = FlairEntityExtractor().de_lang()
            self.entity_extractor = SpacyEntityExtractor().de_lang()

        else:
            self.spre = ShortestPathRE().en_lang()
            self.pbre = PatternBasedRE().en_lang()
            #self.entity_extractor = FlairEntityExtractor().en_lang()
            self.entity_extractor = SpacyEntityExtractor().en_lang()
class RelationExtractor:
    def __init__(self, lang):
        if lang == LANG.DE:
            self.spre = ShortestPathRE().de_lang()
            self.pbre = PatternBasedRE().de_lang()
            #self.entity_extractor = FlairEntityExtractor().de_lang()
            self.entity_extractor = SpacyEntityExtractor().de_lang()

        else:
            self.spre = ShortestPathRE().en_lang()
            self.pbre = PatternBasedRE().en_lang()
            #self.entity_extractor = FlairEntityExtractor().en_lang()
            self.entity_extractor = SpacyEntityExtractor().en_lang()

    def extract_relations(self, text, plot_graph=False, validate=False, out_val_file=None):
        extracted_relations = []

        for sentence in sent_tokenize(text):
            entities, per_entities = self.entity_extractor.extract_entities(sentence)
            logger.debug(f'Extracted entities: {entities}')

            # Shortest path relation extraction
            if len(per_entities) > 0:  # PER-PER or USR-PER
                extracted_relations = self.spre.extract_sp_relation(entities, per_entities, sentence, plot_graph)
            # Pattern based relation extraction
            else:  # USR-REL
                extracted_relations = self.pbre.extract_rel(sentence)

            if validate:
                with open(out_val_file,
                          'a', encoding='utf-8') as f:
                    validated = f'{extracted_relations}; {sentence}\n'
                    f.write(validated)

        return extracted_relations
Example #3
0
def test_extract_rel_8():
    utterance = u'''sadly i am on call with my uncle in the hospital so i never let mine die'''
    pbre = PatternBasedRE.en_lang()
    result = pbre.extract_rel(utterance, plot_tree=False)
    assert result == [('uncle-of', 'USER')]
Example #4
0
def test_extract_rel_6():
    utterance = 'i miss my wife and kids so much'
    pbre = PatternBasedRE().en_lang()
    result = pbre.extract_rel(utterance, plot_tree=False)
    assert result == [('wife-of', 'USER')]
Example #5
0
def test_extract_rel_7():
    utterance = 'no , my dad taught me good music and good work ethics.'
    pbre = PatternBasedRE.en_lang()
    result = pbre.extract_rel(utterance, plot_tree=False)
    assert result == [('father-of', 'USER')]
Example #6
0
def test_extract_rel_5():
    utterance = 'Ich habe einen Bruder'
    pbre = PatternBasedRE().de_lang()
    result = pbre.extract_rel(utterance, plot_tree=False)
    assert result == [('brother-of', 'USER')]
Example #7
0
def test_extract_rel_1():
    utterance = 'I have older brother who lives in Berlin'
    pbre = PatternBasedRE().en_lang()
    result = pbre.extract_rel(utterance, plot_tree=False)
    assert result == [('brother-of', 'USER')]
Example #8
0
def test_extract_rel_4():
    utterance = 'My little sister Lisa is moving to London'
    pbre = PatternBasedRE().en_lang()
    result = pbre.extract_rel(utterance, plot_tree=True)
    assert result == [('sister-of', 'USER')]
Example #9
0
def test_extract_rel_3():
    utterance = 'I have one brother'
    pbre = PatternBasedRE().en_lang()
    result = pbre.extract_rel(utterance, plot_tree=True)
    assert result == [('brother-of', 'USER')]
Example #10
0
def test_extract_rel_2():
    utterance = 'I have two sisters'
    pbre = PatternBasedRE().en_lang()
    result = pbre.extract_rel(utterance, plot_tree=True)
    assert result == [('sister-of', 'USER')]