def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type("cassis.Token") token = TokenType(begin=0, end=3, id="0", pos="NNP") cas = Cas(lenient=True) cas.add_annotation(token)
def test_leniency_type_not_in_typeystem_not_lenient(small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type("cassis.Token") token = TokenType(begin=0, end=3, id="0", pos="NNP") cas = Cas() with pytest.raises(RuntimeError, match="Typesystem of CAS does not contain type"): cas.add_annotation(token)
def test_annotations_are_ordered_correctly(tokens): annotations = list(tokens) random.shuffle(list(annotations)) cas = Cas() for token in annotations: cas.add_annotation(token) actual_tokens = list(cas.select("cassis.Token")) assert actual_tokens == tokens
def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens): typesystem = load_typesystem(small_typesystem_xml) cas = Cas(typesystem) annotations = list(tokens) random.shuffle(list(annotations)) for token in annotations: cas.add_annotation(token) actual_tokens = list(cas.select("cassis.Token")) assert actual_tokens == tokens
def test_select_covered_overlapping(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) AnnotationType = cas.typesystem.create_type("test.Annotation") SentenceType = cas.typesystem.get_type("cassis.Sentence") sentence = SentenceType(begin=0, end=10) annotations = [ AnnotationType(begin=0, end=5), AnnotationType(begin=0, end=5) ] cas.add_annotation(sentence) cas.add_annotations(annotations) actual_annotations = list(cas.select_covered("test.Annotation", sentence)) assert actual_annotations == annotations
def test_add_annotation_generates_ids(small_typesystem_xml, tokens): typesystem = load_typesystem(small_typesystem_xml) cas = Cas(typesystem) TokenType = typesystem.get_type("cassis.Token") tokens = [ TokenType(begin=0, end=3, id="0", pos="NNP"), TokenType(begin=4, end=10, id="1", pos="VBD"), TokenType(begin=11, end=14, id="2", pos="IN"), TokenType(begin=15, end=18, id="3", pos="DT"), TokenType(begin=19, end=24, id="4", pos="NN"), TokenType(begin=25, end=26, id="5", pos="."), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert all([token.xmiID is not None for token in actual_tokens])
def test_add_annotation_generates_ids(small_typesystem_xml, tokens): cas = Cas() typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type('cassis.Token') tokens = [ TokenType(sofa=1, begin=0, end=3, id='0', pos='NNP'), TokenType(sofa=1, begin=4, end=10, id='1', pos='VBD'), TokenType(sofa=1, begin=11, end=14, id='2', pos='IN'), TokenType(sofa=1, begin=15, end=18, id='3', pos='DT'), TokenType(sofa=1, begin=19, end=24, id='4', pos='NN'), TokenType(sofa=1, begin=25, end=26, id='5', pos='.'), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert all([token.xmiID != None for token in actual_tokens])
def test_add_annotation(small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type("cassis.Token") cas = Cas(typesystem) cas.sofa_string = "Joe waited for the train ." tokens = [ TokenType(begin=0, end=3, id="0", pos="NNP"), TokenType(begin=4, end=10, id="1", pos="VBD"), TokenType(begin=11, end=14, id="2", pos="IN"), TokenType(begin=15, end=18, id="3", pos="DT"), TokenType(begin=19, end=24, id="4", pos="NN"), TokenType(begin=25, end=26, id="5", pos="."), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert actual_tokens == tokens
def test_add_annotation(small_typesystem_xml): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train .') cas = Cas(sofas=[sofa]) typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type('cassis.Token') tokens = [ TokenType(xmiID=13, sofa=1, begin=0, end=3, id='0', pos='NNP'), TokenType(xmiID=19, sofa=1, begin=4, end=10, id='1', pos='VBD'), TokenType(xmiID=25, sofa=1, begin=11, end=14, id='2', pos='IN'), TokenType(xmiID=31, sofa=1, begin=15, end=18, id='3', pos='DT'), TokenType(xmiID=37, sofa=1, begin=19, end=24, id='4', pos='NN'), TokenType(xmiID=43, sofa=1, begin=25, end=26, id='5', pos='.'), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert actual_tokens == tokens