Example #1
0
def test_context_add_and_get():
    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context.add('entities', ['', '', '', '', '', 'LOC', 'LOC', ''])
    context.add('negatives', ['', '', 'NEG', '', '', '', '', ''])
    context.add('negative_scope',
                ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG'])

    assert len(context.keys) == 4
    assert len(context.get('tokens')) == 8
    assert len(context.get('entities')) == 8
    assert len(context.get('negatives')) == 8
    assert len(context.get('negative_scope')) == 8
Example #2
0
def test_extracting_multiple_pipes():
    pipeline = ContextPipeline(pipes=[
        TokenAttributesPipe(),
        ChunkerPipe('regex_ner',
                    PhraseChunker(tags=[
                        Tag('LOC', phrase=RegexPhrase(r'\b(New York)\b')),
                    ]),
                    order=2),
        ChunkerPipe('neg',
                    PhraseChunker(
                        tags=[
                            Tag('FRW-NEG', phrase=RegexPhrase(r'\b(not)\b')),
                        ],
                        apply_iob2=False,
                    ),
                    order=3),
        TokenTaggerPipe(
            'neg_scope',
            focus='neg',
            tagger=Scoper(scopes=[
                Scope(
                    applied_tag='FRW-NEG',
                    scope_direction=ScopeDirection.RIGHT,
                    order=1,
                )
            ], ),
            order=4,
        )
    ])

    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context = pipeline.execute(context)

    assert len(context.keys) == 8
    assert context.get('tokens') == [
        'We', 'are', 'not', 'going', 'to', 'New', 'York', '.'
    ]
    assert context.get('pos') == [
        'PRP', 'VBP', 'RB', 'VBG', 'TO', 'NNP', 'NNP', '.'
    ]
    assert context.get('spacings') == [
        'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no', 'no'
    ]
    assert context.get('start_positions') == [
        '0', '3', '7', '11', '17', '20', '24', '28'
    ]
    assert context.get('end_positions') == [
        '1', '5', '9', '15', '18', '22', '27', '28'
    ]
    assert context.get('regex_ner') == [
        '', '', '', '', '', 'B-LOC', 'I-LOC', ''
    ]
    assert context.get('neg') == ['', '', 'FRW-NEG', '', '', '', '', '']
    assert context.get('neg_scope') == [
        '',
        '',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
    ]