def test_get_view_throws_if_view_does_not_exist(): cas = Cas() with pytest.raises( KeyError, message=r"There is no view with name [testView] in this CAS!"): cas.get_view("testView")
def test_default_typesystem_is_not_shared(): # https://github.com/dkpro/dkpro-cassis/issues/67 cas1 = Cas() cas2 = Cas() t1 = cas1.typesystem.create_type(name="test.Type") t2 = cas2.typesystem.create_type(name="test.Type")
def test_initial_view_is_created(): cas = Cas() view = cas.get_view("_InitialView") sofa = view.get_sofa() attr.validate(sofa) assert sofa.sofaID == "_InitialView"
def test_FeatureStructure_get_covered_text_tokens(tokens): cas = Cas() cas.sofa_string = "Joe waited for the train . The train was late ." actual_text = [token.get_covered_text() for token in tokens] expected_text = ["Joe", "waited", "for", "the", "train", ".", "The", "train", "was", "late", "."] assert actual_text == expected_text
def test_get_covered_text_sentences(sentences): cas = Cas() cas.sofa_string = "Joe waited for the train . The train was late ." actual_text = [cas.get_covered_text(sentence) for sentence in sentences] expected_text = ["Joe waited for the train .", "The train was late ."] assert actual_text == expected_text
def test_create_view_creates_view(): cas = Cas() view = cas.create_view("testView") sofa = view.get_sofa() attr.validate(sofa) assert sofa.sofaID == "testView"
def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(annotations) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set(annotations)
def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type("cassis.Token") token = TokenType(begin=0, end=3, id="0", pos="NNP") cas = Cas(lenient=True) cas.add_annotation(token)
def deserialize(self, source: Union[IO, str], typesystem: TypeSystem): # namespaces NS_XMI = "{http://www.omg.org/XMI}" NS_CAS = "{http:///uima/cas.ecore}" TAG_XMI = NS_XMI + "XMI" TAG_CAS_NULL = NS_CAS + "NULL" TAG_CAS_SOFA = NS_CAS + "Sofa" TAG_CAS_VIEW = NS_CAS + "View" sofas = [] views = {} annotations = {} context = etree.iterparse(source, events=("end",)) for event, elem in context: assert event == "end" if elem.tag == TAG_XMI: # Ignore the closing 'xmi:XMI' tag pass elif elem.tag == TAG_CAS_NULL: pass elif elem.tag == TAG_CAS_SOFA: sofa = self._parse_sofa(elem) sofas.append(sofa) elif elem.tag == TAG_CAS_VIEW: proto_view = self._parse_view(elem) views[proto_view.sofa] = proto_view else: annotation = self._parse_annotation(typesystem, elem) annotations[annotation.xmiID] = annotation # Free already processed elements from memory self._clear_elem(elem) if len(sofas) != len(views): raise RuntimeError("Number of views and sofas is not equal!") cas = Cas() for sofa in sofas: proto_view = views[sofa.xmiID] if sofa.sofaID == "_InitialView": view = cas.get_view("_InitialView") else: view = cas.create_view(sofa.sofaID) view.sofa_string = sofa.sofaString view.sofa_mime = sofa.mimeType for member_id in proto_view.members: annotation = annotations[member_id] view.add_annotation(annotation) return cas
def test_leniency_type_not_in_typeystem_not_lenient(small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type("cassis.Token") token = TokenType(begin=0, end=3, id="0", pos="NNP") cas = Cas() with pytest.raises(RuntimeError, match="Typesystem of CAS does not contain type"): cas.add_annotation(token)
def test_get_covered_text_sentences(sentences): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train . The train was late .') cas = Cas(annotations=sentences, sofas=[sofa]) actual_text = [cas.get_covered_text(sentence) for sentence in sentences] expected_text = ['Joe waited for the train .', 'The train was late .'] assert actual_text == expected_text
def test_select(tokens, sentences): annotations = tokens + sentences cas = Cas(annotations=annotations) actual_tokens = list(cas.select('cassis.Token')) actual_sentences = list(cas.select('cassis.Sentence')) assert actual_tokens == tokens assert actual_sentences == sentences
def test_removing_throws_if_fs_in_other_view(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(tokens) view = cas.create_view("testView") with pytest.raises(ValueError): view.remove_annotation(tokens[0])
def test_annotations_are_ordered_correctly(tokens): annotations = list(tokens) random.shuffle(list(annotations)) cas = Cas() for token in annotations: cas.add_annotation(token) actual_tokens = list(cas.select("cassis.Token")) assert actual_tokens == tokens
def test_get_covered_text_tokens(tokens): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train . The train was late .') cas = Cas(annotations=tokens, sofas=[sofa]) actual_text = [cas.get_covered_text(token) for token in tokens] expected_text = [ 'Joe', 'waited', 'for', 'the', 'train', '.', 'The', 'train', 'was', 'late', '.' ] assert actual_text == expected_text
def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens): typesystem = load_typesystem(small_typesystem_xml) cas = Cas(typesystem) annotations = list(tokens) random.shuffle(list(annotations)) for token in annotations: cas.add_annotation(token) actual_tokens = list(cas.select("cassis.Token")) assert actual_tokens == tokens
def test_select_covered(tokens, sentences): annotations = tokens + sentences cas = Cas(annotations=annotations) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] actual_tokens_in_first_sentence = list( cas.select_covered('cassis.Token', first_sentence)) actual_tokens_in_second_sentence = list( cas.select_covered('cassis.Token', second_sentence)) assert actual_tokens_in_first_sentence == tokens_in_first_sentence assert actual_tokens_in_second_sentence == tokens_in_second_sentence
def _get_or_create_view(self, cas: Cas, view_name: str, fs_id: Optional[int] = None, sofa_num: Optional[int] = None) -> Cas: if view_name == NAME_DEFAULT_SOFA: view = cas.get_view(NAME_DEFAULT_SOFA) # We need to make sure that the sofa gets the real xmi, see #155 if fs_id is not None: view.get_sofa().xmiID = fs_id return view else: return cas.create_view(view_name, xmiID=fs_id, sofaNum=sofa_num)
def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]: xmi_attrs = {"{http://www.omg.org/XMI}version": "2.0"} root = etree.Element(etree.QName(self._nsmap["xmi"], "XMI"), nsmap=self._nsmap, **xmi_attrs) self._serialize_cas_null(root) # Find all fs, even the ones that are not directly added to a sofa for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID): self._serialize_feature_structure(cas, root, fs) for sofa in cas.sofas: self._serialize_sofa(root, sofa) for view in cas.views: self._serialize_view(root, view) doc = etree.ElementTree(root) etree.cleanup_namespaces(doc, top_nsmap=self._nsmap) return_str = sink is None if return_str: sink = BytesIO() doc.write(sink, xml_declaration=True, pretty_print=pretty_print, encoding="UTF-8") if return_str: return sink.getvalue().decode("utf-8") return None
def test_removing_of_existing_fs_works(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(annotations) for token in tokens: cas.remove_annotation(token) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set(sentences) for sentence in sentences: cas.remove_annotation(sentence) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set()
def test_add_annotation_generates_ids(small_typesystem_xml, tokens): typesystem = load_typesystem(small_typesystem_xml) cas = Cas(typesystem) TokenType = typesystem.get_type("cassis.Token") tokens = [ TokenType(begin=0, end=3, id="0", pos="NNP"), TokenType(begin=4, end=10, id="1", pos="VBD"), TokenType(begin=11, end=14, id="2", pos="IN"), TokenType(begin=15, end=18, id="3", pos="DT"), TokenType(begin=19, end=24, id="4", pos="NN"), TokenType(begin=25, end=26, id="5", pos="."), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert all([token.xmiID is not None for token in actual_tokens])
def test_add_annotation_generates_ids(small_typesystem_xml, tokens): cas = Cas() typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type('cassis.Token') tokens = [ TokenType(sofa=1, begin=0, end=3, id='0', pos='NNP'), TokenType(sofa=1, begin=4, end=10, id='1', pos='VBD'), TokenType(sofa=1, begin=11, end=14, id='2', pos='IN'), TokenType(sofa=1, begin=15, end=18, id='3', pos='DT'), TokenType(sofa=1, begin=19, end=24, id='4', pos='NN'), TokenType(sofa=1, begin=25, end=26, id='5', pos='.'), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert all([token.xmiID != None for token in actual_tokens])
def test_add_annotation(small_typesystem_xml): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train .') cas = Cas(sofas=[sofa]) typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type('cassis.Token') tokens = [ TokenType(xmiID=13, sofa=1, begin=0, end=3, id='0', pos='NNP'), TokenType(xmiID=19, sofa=1, begin=4, end=10, id='1', pos='VBD'), TokenType(xmiID=25, sofa=1, begin=11, end=14, id='2', pos='IN'), TokenType(xmiID=31, sofa=1, begin=15, end=18, id='3', pos='DT'), TokenType(xmiID=37, sofa=1, begin=19, end=24, id='4', pos='NN'), TokenType(xmiID=43, sofa=1, begin=25, end=26, id='5', pos='.'), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert actual_tokens == tokens
def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) view = cas.create_view("testView") cas.add_annotations(annotations) view.add_annotations(annotations) for annotation in annotations: cas.remove_annotation(annotation) assert set(cas.select("uima.tcas.Annotation")) == set() assert set(view.select("uima.tcas.Annotation")) == set(annotations)
def test_select_covering_also_returns_parent_instances(small_typesystem_xml, tokens, sentences): typesystem = load_typesystem(small_typesystem_xml) SubSentenceType = typesystem.create_type("cassis.SubSentence", supertypeName="cassis.Sentence") cas = Cas(typesystem=typesystem) first_sentence, second_sentence = sentences annotations = tokens + sentences subsentence1 = SubSentenceType(begin=first_sentence.begin, end=first_sentence.end) subsentence2 = SubSentenceType(begin=second_sentence.begin, end=second_sentence.end) annotations.append(subsentence1) annotations.append(subsentence2) cas.add_annotations(annotations) tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] for token in tokens_in_first_sentence: result = set(cas.select_covering("cassis.Sentence", token)) assert result == {first_sentence, subsentence1} for token in tokens_in_second_sentence: result = set(cas.select_covering("cassis.Sentence", token)) assert result == {second_sentence, subsentence2}
def test_select_covered_also_returns_parent_instances(small_typesystem_xml, tokens, sentences): typesystem = load_typesystem(small_typesystem_xml) SubTokenType = typesystem.create_type("cassis.SubToken", supertypeName="cassis.Token") annotations = tokens + sentences subtoken1 = SubTokenType(begin=tokens[2].begin, end=tokens[3].end) subtoken2 = SubTokenType(begin=tokens[8].begin, end=tokens[8].end) annotations.append(subtoken1) annotations.append(subtoken2) cas = Cas(typesystem=typesystem) cas.add_annotations(annotations) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] actual_tokens_in_first_sentence = list( cas.select_covered("cassis.Token", first_sentence)) actual_tokens_in_second_sentence = list( cas.select_covered("cassis.Token", second_sentence)) assert set(actual_tokens_in_first_sentence) == set( tokens_in_first_sentence + [subtoken1]) assert set(actual_tokens_in_second_sentence) == set( tokens_in_second_sentence + [subtoken2])
def test_create_view_throws_if_view_already_exists(): cas = Cas() cas.create_view("testView") with pytest.raises(ValueError, message=r"A view with name [testView] already exists!"): cas.create_view("testView")
def test_select_only_returns_annotations_of_current_view(tokens, sentences, small_typesystem_xml): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(tokens) view = cas.create_view("testView") view.add_annotations(sentences) actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all()) actual_annotations_in_test_view = list(cas.get_view("testView").select_all()) assert tokens == actual_annotations_in_initial_view assert sentences == actual_annotations_in_test_view
def test_select(tokens, sentences): cas = Cas() cas.add_annotations(tokens + sentences) actual_tokens = list(cas.select("cassis.Token")) actual_sentences = list(cas.select("cassis.Sentence")) assert actual_tokens == tokens assert actual_sentences == sentences
def test_select(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(tokens + sentences) actual_tokens = list(cas.select("cassis.Token")) actual_sentences = list(cas.select("cassis.Sentence")) assert actual_tokens == tokens assert actual_sentences == sentences