Example #1
0
def test_select(tokens, sentences):
    cas = Cas()
    cas.add_annotations(tokens + sentences)

    actual_tokens = list(cas.select("cassis.Token"))
    actual_sentences = list(cas.select("cassis.Sentence"))

    assert actual_tokens == tokens
    assert actual_sentences == sentences
Example #2
0
def test_select(small_typesystem_xml, tokens, sentences):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens + sentences)

    actual_tokens = list(cas.select("cassis.Token"))
    actual_sentences = list(cas.select("cassis.Sentence"))

    assert actual_tokens == tokens
    assert actual_sentences == sentences
Example #3
0
def test_select(tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(annotations=annotations)

    actual_tokens = list(cas.select('cassis.Token'))
    actual_sentences = list(cas.select('cassis.Sentence'))

    assert actual_tokens == tokens
    assert actual_sentences == sentences
Example #4
0
def test_removing_of_existing_fs_works(small_typesystem_xml, tokens,
                                       sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(annotations)

    for token in tokens:
        cas.remove_annotation(token)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))
    assert set(actual_annotations) == set(sentences)

    for sentence in sentences:
        cas.remove_annotation(sentence)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))
    assert set(actual_annotations) == set()
Example #5
0
def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(annotations)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))

    assert set(actual_annotations) == set(annotations)
Example #6
0
def test_annotations_are_ordered_correctly(tokens):
    annotations = list(tokens)
    random.shuffle(list(annotations))
    cas = Cas()
    for token in annotations:
        cas.add_annotation(token)

    actual_tokens = list(cas.select("cassis.Token"))

    assert actual_tokens == tokens
Example #7
0
def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    view = cas.create_view("testView")

    cas.add_annotations(annotations)
    view.add_annotations(annotations)

    for annotation in annotations:
        cas.remove_annotation(annotation)

    assert set(cas.select("uima.tcas.Annotation")) == set()
    assert set(view.select("uima.tcas.Annotation")) == set(annotations)
Example #8
0
def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens):
    typesystem = load_typesystem(small_typesystem_xml)
    cas = Cas(typesystem)

    annotations = list(tokens)
    random.shuffle(list(annotations))

    for token in annotations:
        cas.add_annotation(token)

    actual_tokens = list(cas.select("cassis.Token"))

    assert actual_tokens == tokens
Example #9
0
def test_add_annotation_generates_ids(small_typesystem_xml, tokens):
    typesystem = load_typesystem(small_typesystem_xml)
    cas = Cas(typesystem)
    TokenType = typesystem.get_type("cassis.Token")

    tokens = [
        TokenType(begin=0, end=3, id="0", pos="NNP"),
        TokenType(begin=4, end=10, id="1", pos="VBD"),
        TokenType(begin=11, end=14, id="2", pos="IN"),
        TokenType(begin=15, end=18, id="3", pos="DT"),
        TokenType(begin=19, end=24, id="4", pos="NN"),
        TokenType(begin=25, end=26, id="5", pos="."),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert all([token.xmiID is not None for token in actual_tokens])
Example #10
0
def test_add_annotation_generates_ids(small_typesystem_xml, tokens):
    cas = Cas()
    typesystem = load_typesystem(small_typesystem_xml)
    TokenType = typesystem.get_type('cassis.Token')

    tokens = [
        TokenType(sofa=1, begin=0, end=3, id='0', pos='NNP'),
        TokenType(sofa=1, begin=4, end=10, id='1', pos='VBD'),
        TokenType(sofa=1, begin=11, end=14, id='2', pos='IN'),
        TokenType(sofa=1, begin=15, end=18, id='3', pos='DT'),
        TokenType(sofa=1, begin=19, end=24, id='4', pos='NN'),
        TokenType(sofa=1, begin=25, end=26, id='5', pos='.'),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert all([token.xmiID != None for token in actual_tokens])
Example #11
0
def test_add_annotation(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)
    TokenType = typesystem.get_type("cassis.Token")
    cas = Cas(typesystem)
    cas.sofa_string = "Joe waited for the train ."

    tokens = [
        TokenType(begin=0, end=3, id="0", pos="NNP"),
        TokenType(begin=4, end=10, id="1", pos="VBD"),
        TokenType(begin=11, end=14, id="2", pos="IN"),
        TokenType(begin=15, end=18, id="3", pos="DT"),
        TokenType(begin=19, end=24, id="4", pos="NN"),
        TokenType(begin=25, end=26, id="5", pos="."),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert actual_tokens == tokens
Example #12
0
def test_add_annotation(small_typesystem_xml):
    sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train .')
    cas = Cas(sofas=[sofa])
    typesystem = load_typesystem(small_typesystem_xml)
    TokenType = typesystem.get_type('cassis.Token')

    tokens = [
        TokenType(xmiID=13, sofa=1, begin=0, end=3, id='0', pos='NNP'),
        TokenType(xmiID=19, sofa=1, begin=4, end=10, id='1', pos='VBD'),
        TokenType(xmiID=25, sofa=1, begin=11, end=14, id='2', pos='IN'),
        TokenType(xmiID=31, sofa=1, begin=15, end=18, id='3', pos='DT'),
        TokenType(xmiID=37, sofa=1, begin=19, end=24, id='4', pos='NN'),
        TokenType(xmiID=43, sofa=1, begin=25, end=26, id='5', pos='.'),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert actual_tokens == tokens