def test_text_case_sensitivity(): text = 'hello Hello HELLO' annotator = Annotator( 'Hello', patterns=('hello', ), ) annotations = list(annotator.annotate(text)) assert annotations == [ Annotation('hello', (0, 5), type='Hello'), Annotation('Hello', (6, 11), type='Hello'), Annotation('HELLO', (12, 17), type='Hello'), ] annotator = Annotator( 'AlternativeHello', patterns=('Hello', ), settings={'case_sensitive': True}, ) annotations = list(annotator.annotate(text)) assert annotations == [ Annotation('Hello', (6, 11), type='AlternativeHello'), ]
def test_eq(): A = Annotation('hello', (0, 5), type='Greeting', score=1.0) B = Annotation('hello', (0, 5), type='Greeting', score=1.0) C = Annotation('world', (0, 5), type='Place', score=1.0) assert A == B assert A is not B assert A != C
def test_ordering(): A = Annotation('hello', (0, 5)) B = Annotation('hell', (0, 4)) C = Annotation('world', (6, 11)) annotations = AnnotationList([C, A]) annotations.append(B) assert annotations.index(A) == 0 assert annotations.index(B) == 1 assert annotations.index(C) == 2
def test_disambiguate(): A1 = Annotation('hello', (0, 5), score=1.1) A2 = Annotation('hell', (0, 4), score=1.0) B = Annotation('world', (6, 11), score=1.1) annotations = AnnotationList([A1, A2, B]) assert annotations.disambiguate() == (A1, B) assert A2 in annotations annotations.disambiguate(discard_others=True) assert A2 not in annotations
def test_cells(): A1 = Annotation('hello', (0, 5), type='Greeting') A2 = Annotation('hell', (0, 4), type='Place') B1 = Annotation('world!', (6, 12), type='Place') B2 = Annotation('world', (6, 11), type='Place') C = Annotation('!', (11, 12), type='Symbol') annotations = AnnotationList([A1, A2, B1, B2, C]) assert annotations.cells == [ [A1, A2], [B1, B2], [C], ]
def test_regex_case_sensitivity(): text = 'HELLO Bello cello' annotator = Annotator( 'Thing', patterns='/[hbc]ello/', ) annotations = list(annotator.annotate(text)) assert annotations == [ Annotation('HELLO', (0, 5), type='Thing'), Annotation('Bello', (6, 11), type='Thing'), Annotation('cello', (12, 17), type='Thing'), ]
def test_representation(): text = 'hello world' annotator = Annotator( 'HelloWorld', patterns={ 'hello': 'Hello', 'world': 'World', }, ) annotations = list(annotator.annotate(text)) assert annotations == [ Annotation('hello', (0, 5), type='HelloWorld', data='Hello'), Annotation('world', (6, 11), type='HelloWorld', data='World'), ]
def test_agent(): text = ''' hello world ''' agent = Agent() agent.add_annotator(Annotator( 'HelloWorld', patterns=('hello', 'world'), )) annotated_text = agent.annotate(text) assert len(annotated_text.lines) == 4 assert annotated_text.lines[1].cells[0][0] == \ Annotation('hello', (0, 5), type='HelloWorld') assert annotated_text.lines[2].cells[0][0] == \ Annotation('world', (0, 5), type='HelloWorld')
def test_agent_create_annotator(): text = 'hello world' agent = Agent() agent.create_annotator( 'Hello', patterns='hello', ) annotated_text = agent.annotate(text) assert annotated_text.lines[0].annotations == [ Annotation('hello', (0, 5), type='Hello'), ]
def test_filter_and_boost(): A1 = Annotation('hello', (0, 5), type='Greeting', score=1.0) A2 = Annotation('hell', (0, 4), type='Place', score=1.0) B = Annotation('world', (6, 11), type='Place', score=1.0) annotations = AnnotationList([A1, A2, B]) assert annotations.filter(type='Greeting') == [A1] assert annotations.filter(type='Place') == [A2, B] annotations.filter(type='Greeting').boost(1.25) assert A1.score == 1.25 assert A2.score == 1.0 assert B.score == 1.0 annotations.cells[0].filter(type='Place').boost(1.50) assert A1.score == 1.25 assert A2.score == 1.50 assert B.score == 1.0 annotations.cells[1].boost(1.75) assert A1.score == 1.25 assert A2.score == 1.50 assert B.score == 1.75
def test_line_add(A, B, L1, L2, T1, T2): T3 = T1 + T2 # verify integrity of source objects assert A.span == (0, 5) assert B.span == (0, 5) assert T1.annotations == L1 == [A] assert T2.annotations == L2 == [B] assert T1.text == 'hello' assert T2.text == 'world' # verify validity of created object B2 = Annotation('world', (6, 11)) assert T3.text == 'hello world' assert T3.annotations == [A, B2]
def test_tokens(): text = 'hello, world!' annotator = Annotator( 'Thing', tokens={ 'comma': ',', 'dot': '.', 'sep': '/({comma}|{dot})? /', }, patterns='hello{sep}world!', ) annotations = list(annotator.annotate(text)) assert annotations == [ Annotation('hello, world!', (0, 13), type='Thing'), ]
def test_combinations(): A1 = Annotation('hello', (0, 5)) A2 = Annotation('hell', (0, 4)) A3 = Annotation('hello world', (0, 11)) B1 = Annotation('world', (6, 11)) B2 = Annotation('worl', (6, 10)) B3 = Annotation('world!', (6, 12)) C = Annotation('!', (11, 12)) annotations = AnnotationList([A1, A2, A3, B1, B2, B3, C]) assert set(annotations.combinations()) == { (A1, B1, C), (A1, B2, C), (A1, B3), (A2, B1, C), (A2, B2, C), (A2, B3), (A3, C), }
def A(): return Annotation('hello', (0, 5))
def B(): return Annotation('world', (0, 5))