def test_extractor_1(): context = Context( 'Tom and Bob declined cancer treatment.', ['Tom', 'and', 'Bob', 'declined', 'cancer', 'treatment', '.']) context.add('entities', ['B-A', '', 'B-A', '', 'B-B', 'B-C', '']) spacy = SpacyIntegration('en_core_web_lg') extract = StrictDependencyRelationshipExtract(spacy=spacy, attribute='entities', relationships=[ ('A', 'has_B', 'B'), ('B', 'has_C', 'C'), ]) relationships = extract.evaluate(context) assert len(relationships) == 2 kb_triples = relationships[0] assert kb_triples[0].subj == 'Tom' assert kb_triples[0].obj == 'cancer' assert kb_triples[1].subj == 'cancer' assert kb_triples[1].obj == 'treatment' kb_triples = relationships[1] assert kb_triples[0].subj == 'Bob' assert kb_triples[0].obj == 'cancer' assert kb_triples[1].subj == 'cancer' assert kb_triples[1].obj == 'treatment'
def test_extractor_2(): context = Context('Tom declined cancer treatment.', ['Tom', 'declined', 'cancer', 'treatment', '.']) context.add('scope1', ['', 'S1', 'S1', '', '']) context.add('scope2', ['', '', 'S2', 'S2', '']) relationships = ScopeOverlapExtract('scope1', 'scope2').evaluate(context) assert relationships == ['', 'REL', 'REL', 'REL', '']
def test_chunker_single_term_phrase(): chunker = TreeChunker("NP: {<DT>?<JJ>*<NN>}", attribute='pos') context = Context( 'learn php from guru99', ['learn','php', 'from','guru99'] ) context.add('pos', ['JJ', 'NN', 'IN', 'NN']) chunks = chunker.tag(context) assert chunks == ['B-NP', 'I-NP', '', 'B-NP']
def test_chunker_single_term_phrase_not_pos(): chunker = TreeChunker("AB: {<A><B>}", attribute='outer') context = Context( 'learn php from guru99', ['learn','php', 'from','guru99'] ) context.add('outer', ['A', 'B', '', '']) chunks = chunker.tag(context) assert chunks == ['B-AB', 'I-AB', '', '']
def test_extractor(): context = Context('We are not going to New York.', ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.']) context.add('negative', ['', '', 'NEG', 'NEG', '', '', '', '']) context.add('negative_scope', ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG']) context.add('entities', ['', '', '', '', '', 'B-LOC', 'I-LOC', '']) negative_extracts = TagExtract('negative', ['NEG']).evaluate(context) negative_scope_extracts = TagExtract('negative_scope', ['NEG']).evaluate(context) location_extracts = TagExtract('entities', ['LOC']).evaluate(context) assert negative_extracts == { 'NEG': [ [(2, 'not'), (3, 'going')], ] } assert negative_scope_extracts == { 'NEG': [ [(2, 'not'), (3, 'going'), (4, 'to'), (5, 'New'), (6, 'York'), (7, '.')], ] } assert location_extracts == {'LOC': [[(5, 'New'), (6, 'York')]]}
def test_context_cache(): context = Context('We are not going to New York.', ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'], actor='Tom', line=4, sent_number=3) assert context.cache['actor'] == 'Tom' assert context.cache['line'] == 4 assert context.cache['sent_number'] == 3
def test_combine(): context = Context( 'We are not going to New York.', ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'] ) context.add('entities', ['', '', '', '', '', 'LOC', 'LOC', '']) context.add('negatives', ['', '', 'NEG', '', '', '', '', '']) data = Merger.combine( context, ['tokens', 'entities', 'negatives'] ) assert data == [ { 'tokens': 'We' }, { 'tokens': 'are' }, { 'tokens': 'not', 'negatives': 'NEG' }, { 'tokens': 'going' }, { 'tokens': 'to' }, { 'tokens': 'New', 'entities': 'LOC' }, { 'tokens': 'York', 'entities': 'LOC' }, { 'tokens': '.' }, ]
def test_extracting_multiple_pipes(): pipeline = ContextPipeline(pipes=[ TokenAttributesPipe(), ChunkerPipe('regex_ner', PhraseChunker(tags=[ Tag('LOC', phrase=RegexPhrase(r'\b(New York)\b')), ]), order=2), ChunkerPipe('neg', PhraseChunker( tags=[ Tag('FRW-NEG', phrase=RegexPhrase(r'\b(not)\b')), ], apply_iob2=False, ), order=3), TokenTaggerPipe( 'neg_scope', focus='neg', tagger=Scoper(scopes=[ Scope( applied_tag='FRW-NEG', scope_direction=ScopeDirection.RIGHT, order=1, ) ], ), order=4, ) ]) context = Context('We are not going to New York.', ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.']) context = pipeline.execute(context) assert len(context.keys) == 8 assert context.get('tokens') == [ 'We', 'are', 'not', 'going', 'to', 'New', 'York', '.' ] assert context.get('pos') == [ 'PRP', 'VBP', 'RB', 'VBG', 'TO', 'NNP', 'NNP', '.' ] assert context.get('spacings') == [ 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no', 'no' ] assert context.get('start_positions') == [ '0', '3', '7', '11', '17', '20', '24', '28' ] assert context.get('end_positions') == [ '1', '5', '9', '15', '18', '22', '27', '28' ] assert context.get('regex_ner') == [ '', '', '', '', '', 'B-LOC', 'I-LOC', '' ] assert context.get('neg') == ['', '', 'FRW-NEG', '', '', '', '', ''] assert context.get('neg_scope') == [ '', '', 'FRW-NEG', 'FRW-NEG', 'FRW-NEG', 'FRW-NEG', 'FRW-NEG', 'FRW-NEG', ]
def test_context_add_and_get(): context = Context('We are not going to New York.', ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.']) context.add('entities', ['', '', '', '', '', 'LOC', 'LOC', '']) context.add('negatives', ['', '', 'NEG', '', '', '', '', '']) context.add('negative_scope', ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG']) assert len(context.keys) == 4 assert len(context.get('tokens')) == 8 assert len(context.get('entities')) == 8 assert len(context.get('negatives')) == 8 assert len(context.get('negative_scope')) == 8