Ejemplo n.º 1
0
def test_forward_scope_should_move_right():
    scopes = [Scope('NEG', scope_direction=ScopeDirection.RIGHT, order=1)]
    scoper = Scoper(scopes)

    tokens = ['', '', 'NEG', '', '', '', '', '']

    scope_tags = scoper.tag(tokens)
    assert scope_tags == ['', '', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG']
Ejemplo n.º 2
0
def test_backward_scope_should_move_left():
    scopes = [Scope('NEG', scope_direction=ScopeDirection.LEFT, order=1)]
    scoper = Scoper(scopes)

    tokens = ['', '', 'NEG', '', '', '', '', '']

    scope_tags = scoper.tag(tokens)
    assert scope_tags == ['NEG', 'NEG', 'NEG', '', '', '', '', '']
Ejemplo n.º 3
0
def test_forward_scope_should_move_both_directions():
    scopes = [
        Scope('NEG', scope_direction=ScopeDirection.TWOWAY, order=1),
    ]
    scoper = Scoper(scopes)

    tokens = ['', '', 'NEG', '', '', '', '', '']

    scope_tags = scoper.tag(tokens)
    assert scope_tags == [
        'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG', 'NEG'
    ]
Ejemplo n.º 4
0
def test_backward_scope_should_stop():
    scopes = [
        Scope('NEG',
              scope_direction=ScopeDirection.LEFT,
              order=1,
              stop_when=['TRANS'])
    ]
    scoper = Scoper(scopes)

    tokens = ['TRANS', 'PHRASE', 'NEG', '', '', '', '', '']

    scope_tags = scoper.tag(tokens)
    assert scope_tags == ['', 'NEG', 'NEG', '', '', '', '', '']
Ejemplo n.º 5
0
def test_should_exit_after_max_travel_distance_hit():
    scopes = [
        Scope('NEG',
              scope_direction=ScopeDirection.LEFT,
              order=1,
              stop_when=['TRANS'],
              max_travel_distance=2)
    ]
    scoper = Scoper(scopes)

    tokens = ['TRANS', 'PHRASE', '', '', '', '', 'NEG', '']

    scope_tags = scoper.tag(tokens)
    assert scope_tags == ['', '', '', '', 'NEG', 'NEG', 'NEG', '']
Ejemplo n.º 6
0
def test_extracting_multiple_pipes():
    pipeline = ContextPipeline(pipes=[
        TokenAttributesPipe(),
        ChunkerPipe('regex_ner',
                    PhraseChunker(tags=[
                        Tag('LOC', phrase=RegexPhrase(r'\b(New York)\b')),
                    ]),
                    order=2),
        ChunkerPipe('neg',
                    PhraseChunker(
                        tags=[
                            Tag('FRW-NEG', phrase=RegexPhrase(r'\b(not)\b')),
                        ],
                        apply_iob2=False,
                    ),
                    order=3),
        TokenTaggerPipe(
            'neg_scope',
            focus='neg',
            tagger=Scoper(scopes=[
                Scope(
                    applied_tag='FRW-NEG',
                    scope_direction=ScopeDirection.RIGHT,
                    order=1,
                )
            ], ),
            order=4,
        )
    ])

    context = Context('We are not going to New York.',
                      ['We', 'are', 'not', 'going', 'to', 'New', 'York', '.'])
    context = pipeline.execute(context)

    assert len(context.keys) == 8
    assert context.get('tokens') == [
        'We', 'are', 'not', 'going', 'to', 'New', 'York', '.'
    ]
    assert context.get('pos') == [
        'PRP', 'VBP', 'RB', 'VBG', 'TO', 'NNP', 'NNP', '.'
    ]
    assert context.get('spacings') == [
        'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no', 'no'
    ]
    assert context.get('start_positions') == [
        '0', '3', '7', '11', '17', '20', '24', '28'
    ]
    assert context.get('end_positions') == [
        '1', '5', '9', '15', '18', '22', '27', '28'
    ]
    assert context.get('regex_ner') == [
        '', '', '', '', '', 'B-LOC', 'I-LOC', ''
    ]
    assert context.get('neg') == ['', '', 'FRW-NEG', '', '', '', '', '']
    assert context.get('neg_scope') == [
        '',
        '',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
        'FRW-NEG',
    ]