def test_extractor_1():
    context = Context(
        'Tom and Bob declined cancer treatment.',
        ['Tom', 'and', 'Bob', 'declined', 'cancer', 'treatment', '.'])
    context.add('entities', ['B-A', '', 'B-A', '', 'B-B', 'B-C', ''])

    spacy = SpacyIntegration('en_core_web_lg')
    extract = StrictDependencyRelationshipExtract(spacy=spacy,
                                                  attribute='entities',
                                                  relationships=[
                                                      ('A', 'has_B', 'B'),
                                                      ('B', 'has_C', 'C'),
                                                  ])

    relationships = extract.evaluate(context)
    assert len(relationships) == 2

    kb_triples = relationships[0]
    assert kb_triples[0].subj == 'Tom'
    assert kb_triples[0].obj == 'cancer'

    assert kb_triples[1].subj == 'cancer'
    assert kb_triples[1].obj == 'treatment'

    kb_triples = relationships[1]
    assert kb_triples[0].subj == 'Bob'
    assert kb_triples[0].obj == 'cancer'

    assert kb_triples[1].subj == 'cancer'
    assert kb_triples[1].obj == 'treatment'
Esempio n. 2
0
def test_extractor_2():
    context = Context('Tom declined cancer treatment.',
                      ['Tom', 'declined', 'cancer', 'treatment', '.'])
    context.add('scope1', ['', 'S1', 'S1', '', ''])
    context.add('scope2', ['', '', 'S2', 'S2', ''])

    relationships = ScopeOverlapExtract('scope1', 'scope2').evaluate(context)
    assert relationships == ['', 'REL', 'REL', 'REL', '']
Esempio n. 3
0
def test_chunker_single_term_phrase():
    chunker = TreeChunker("NP: {<DT>?<JJ>*<NN>}", attribute='pos')

    context = Context(
        'learn php from guru99',
        ['learn','php', 'from','guru99']
    )
    context.add('pos', ['JJ', 'NN', 'IN', 'NN'])

    chunks = chunker.tag(context)
    assert chunks == ['B-NP', 'I-NP', '', 'B-NP']
Esempio n. 4
0
def test_chunker_single_term_phrase_not_pos():
    chunker = TreeChunker("AB: {<A><B>}", attribute='outer')

    context = Context(
        'learn php from guru99',
        ['learn','php', 'from','guru99']
    )
    context.add('outer', ['A', 'B', '', ''])

    chunks = chunker.tag(context)
    assert chunks == ['B-AB', 'I-AB', '', '']
Esempio n. 5
0
def test_extractor():
    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context.add('negative', ['', '', 'NEG', 'NEG', '', '', '', ''])
    context.add('negative_scope',
                ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG'])
    context.add('entities', ['', '', '', '', '', 'B-LOC', 'I-LOC', ''])

    negative_extracts = TagExtract('negative', ['NEG']).evaluate(context)
    negative_scope_extracts = TagExtract('negative_scope',
                                         ['NEG']).evaluate(context)
    location_extracts = TagExtract('entities', ['LOC']).evaluate(context)

    assert negative_extracts == {
        'NEG': [
            [(2, 'not'), (3, 'going')],
        ]
    }

    assert negative_scope_extracts == {
        'NEG': [
            [(2, 'not'), (3, 'going'), (4, 'to'), (5, 'New'), (6, 'York'),
             (7, '.')],
        ]
    }

    assert location_extracts == {'LOC': [[(5, 'New'), (6, 'York')]]}
Esempio n. 6
0
def test_context_cache():
    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'],
                      actor='Tom',
                      line=4,
                      sent_number=3)
    assert context.cache['actor'] == 'Tom'
    assert context.cache['line'] == 4
    assert context.cache['sent_number'] == 3
Esempio n. 7
0
def test_combine():
    context = Context(
        'We are not going to New York.',
        ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.']
    )
    context.add('entities', ['', '', '', '', '', 'LOC', 'LOC', ''])
    context.add('negatives', ['', '', 'NEG', '', '', '', '', ''])

    data = Merger.combine(
        context,
        ['tokens', 'entities', 'negatives']
    )

    assert data == [
        { 'tokens': 'We' },
        { 'tokens': 'are' },
        { 'tokens': 'not', 'negatives': 'NEG' },
        { 'tokens': 'going' },
        { 'tokens': 'to' },
        { 'tokens': 'New', 'entities': 'LOC' },
        { 'tokens': 'York', 'entities': 'LOC' },
        { 'tokens': '.' },
    ]
Esempio n. 8
0
def test_extracting_multiple_pipes():
    pipeline = ContextPipeline(pipes=[
        TokenAttributesPipe(),
        ChunkerPipe('regex_ner',
                    PhraseChunker(tags=[
                        Tag('LOC', phrase=RegexPhrase(r'\b(New York)\b')),
                    ]),
                    order=2),
        ChunkerPipe('neg',
                    PhraseChunker(
                        tags=[
                            Tag('FRW-NEG', phrase=RegexPhrase(r'\b(not)\b')),
                        ],
                        apply_iob2=False,
                    ),
                    order=3),
        TokenTaggerPipe(
            'neg_scope',
            focus='neg',
            tagger=Scoper(scopes=[
                Scope(
                    applied_tag='FRW-NEG',
                    scope_direction=ScopeDirection.RIGHT,
                    order=1,
                )
            ], ),
            order=4,
        )
    ])

    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context = pipeline.execute(context)

    assert len(context.keys) == 8
    assert context.get('tokens') == [
        'We', 'are', 'not', 'going', 'to', 'New', 'York', '.'
    ]
    assert context.get('pos') == [
        'PRP', 'VBP', 'RB', 'VBG', 'TO', 'NNP', 'NNP', '.'
    ]
    assert context.get('spacings') == [
        'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no', 'no'
    ]
    assert context.get('start_positions') == [
        '0', '3', '7', '11', '17', '20', '24', '28'
    ]
    assert context.get('end_positions') == [
        '1', '5', '9', '15', '18', '22', '27', '28'
    ]
    assert context.get('regex_ner') == [
        '', '', '', '', '', 'B-LOC', 'I-LOC', ''
    ]
    assert context.get('neg') == ['', '', 'FRW-NEG', '', '', '', '', '']
    assert context.get('neg_scope') == [
        '',
        '',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
    ]
Esempio n. 9
0
def test_context_add_and_get():
    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context.add('entities', ['', '', '', '', '', 'LOC', 'LOC', ''])
    context.add('negatives', ['', '', 'NEG', '', '', '', '', ''])
    context.add('negative_scope',
                ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG'])

    assert len(context.keys) == 4
    assert len(context.get('tokens')) == 8
    assert len(context.get('entities')) == 8
    assert len(context.get('negatives')) == 8
    assert len(context.get('negative_scope')) == 8