Ejemplo n.º 1
0
def test_text_case_sensitivity():
    text = 'hello Hello HELLO'

    annotator = Annotator(
        'Hello',
        patterns=('hello', ),
    )
    annotations = list(annotator.annotate(text))

    assert annotations == [
        Annotation('hello', (0, 5), type='Hello'),
        Annotation('Hello', (6, 11), type='Hello'),
        Annotation('HELLO', (12, 17), type='Hello'),
    ]

    annotator = Annotator(
        'AlternativeHello',
        patterns=('Hello', ),
        settings={'case_sensitive': True},
    )
    annotations = list(annotator.annotate(text))

    assert annotations == [
        Annotation('Hello', (6, 11), type='AlternativeHello'),
    ]
Ejemplo n.º 2
0
def test_eq():
    A = Annotation('hello', (0, 5), type='Greeting', score=1.0)
    B = Annotation('hello', (0, 5), type='Greeting', score=1.0)
    C = Annotation('world', (0, 5), type='Place', score=1.0)

    assert A == B
    assert A is not B
    assert A != C
Ejemplo n.º 3
0
def test_ordering():
    A = Annotation('hello', (0, 5))
    B = Annotation('hell', (0, 4))
    C = Annotation('world', (6, 11))

    annotations = AnnotationList([C, A])
    annotations.append(B)

    assert annotations.index(A) == 0
    assert annotations.index(B) == 1
    assert annotations.index(C) == 2
Ejemplo n.º 4
0
def test_disambiguate():
    A1 = Annotation('hello', (0, 5), score=1.1)
    A2 = Annotation('hell', (0, 4), score=1.0)
    B = Annotation('world', (6, 11), score=1.1)

    annotations = AnnotationList([A1, A2, B])

    assert annotations.disambiguate() == (A1, B)
    assert A2 in annotations

    annotations.disambiguate(discard_others=True)
    assert A2 not in annotations
Ejemplo n.º 5
0
def test_cells():
    A1 = Annotation('hello', (0, 5), type='Greeting')
    A2 = Annotation('hell', (0, 4), type='Place')
    B1 = Annotation('world!', (6, 12), type='Place')
    B2 = Annotation('world', (6, 11), type='Place')
    C = Annotation('!', (11, 12), type='Symbol')

    annotations = AnnotationList([A1, A2, B1, B2, C])
    assert annotations.cells == [
        [A1, A2],
        [B1, B2],
        [C],
    ]
Ejemplo n.º 6
0
def test_regex_case_sensitivity():
    text = 'HELLO Bello cello'

    annotator = Annotator(
        'Thing',
        patterns='/[hbc]ello/',
    )
    annotations = list(annotator.annotate(text))

    assert annotations == [
        Annotation('HELLO', (0, 5), type='Thing'),
        Annotation('Bello', (6, 11), type='Thing'),
        Annotation('cello', (12, 17), type='Thing'),
    ]
Ejemplo n.º 7
0
def test_representation():
    text = 'hello world'

    annotator = Annotator(
        'HelloWorld',
        patterns={
            'hello': 'Hello',
            'world': 'World',
        },
    )
    annotations = list(annotator.annotate(text))

    assert annotations == [
        Annotation('hello', (0, 5), type='HelloWorld', data='Hello'),
        Annotation('world', (6, 11), type='HelloWorld', data='World'),
    ]
Ejemplo n.º 8
0
def test_agent():
    text = '''
        hello
        world
    '''

    agent = Agent()
    agent.add_annotator(Annotator(
        'HelloWorld',
        patterns=('hello', 'world'),
    ))
    annotated_text = agent.annotate(text)

    assert len(annotated_text.lines) == 4
    assert annotated_text.lines[1].cells[0][0] == \
        Annotation('hello', (0, 5), type='HelloWorld')
    assert annotated_text.lines[2].cells[0][0] == \
        Annotation('world', (0, 5), type='HelloWorld')
Ejemplo n.º 9
0
def test_agent_create_annotator():
    text = 'hello world'

    agent = Agent()
    agent.create_annotator(
        'Hello',
        patterns='hello',
    )
    annotated_text = agent.annotate(text)

    assert annotated_text.lines[0].annotations == [
        Annotation('hello', (0, 5), type='Hello'),
    ]
Ejemplo n.º 10
0
def test_filter_and_boost():
    A1 = Annotation('hello', (0, 5), type='Greeting', score=1.0)
    A2 = Annotation('hell', (0, 4), type='Place', score=1.0)
    B = Annotation('world', (6, 11), type='Place', score=1.0)

    annotations = AnnotationList([A1, A2, B])

    assert annotations.filter(type='Greeting') == [A1]
    assert annotations.filter(type='Place') == [A2, B]

    annotations.filter(type='Greeting').boost(1.25)
    assert A1.score == 1.25
    assert A2.score == 1.0
    assert B.score == 1.0

    annotations.cells[0].filter(type='Place').boost(1.50)
    assert A1.score == 1.25
    assert A2.score == 1.50
    assert B.score == 1.0

    annotations.cells[1].boost(1.75)
    assert A1.score == 1.25
    assert A2.score == 1.50
    assert B.score == 1.75
Ejemplo n.º 11
0
def test_line_add(A, B, L1, L2, T1, T2):
    T3 = T1 + T2

    # verify integrity of source objects
    assert A.span == (0, 5)
    assert B.span == (0, 5)
    assert T1.annotations == L1 == [A]
    assert T2.annotations == L2 == [B]
    assert T1.text == 'hello'
    assert T2.text == 'world'

    # verify validity of created object
    B2 = Annotation('world', (6, 11))
    assert T3.text == 'hello world'
    assert T3.annotations == [A, B2]
Ejemplo n.º 12
0
def test_tokens():
    text = 'hello, world!'

    annotator = Annotator(
        'Thing',
        tokens={
            'comma': ',',
            'dot': '.',
            'sep': '/({comma}|{dot})? /',
        },
        patterns='hello{sep}world!',
    )
    annotations = list(annotator.annotate(text))

    assert annotations == [
        Annotation('hello, world!', (0, 13), type='Thing'),
    ]
Ejemplo n.º 13
0
def test_combinations():
    A1 = Annotation('hello', (0, 5))
    A2 = Annotation('hell', (0, 4))
    A3 = Annotation('hello world', (0, 11))
    B1 = Annotation('world', (6, 11))
    B2 = Annotation('worl', (6, 10))
    B3 = Annotation('world!', (6, 12))
    C = Annotation('!', (11, 12))

    annotations = AnnotationList([A1, A2, A3, B1, B2, B3, C])

    assert set(annotations.combinations()) == {
        (A1, B1, C),
        (A1, B2, C),
        (A1, B3),
        (A2, B1, C),
        (A2, B2, C),
        (A2, B3),
        (A3, C),
    }
Ejemplo n.º 14
0
def A():
    return Annotation('hello', (0, 5))
Ejemplo n.º 15
0
def B():
    return Annotation('world', (0, 5))