コード例 #1
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_get_view_throws_if_view_does_not_exist():
    cas = Cas()

    with pytest.raises(
            KeyError,
            message=r"There is no view with name [testView] in this CAS!"):
        cas.get_view("testView")
コード例 #2
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_default_typesystem_is_not_shared():
    # https://github.com/dkpro/dkpro-cassis/issues/67
    cas1 = Cas()
    cas2 = Cas()

    t1 = cas1.typesystem.create_type(name="test.Type")
    t2 = cas2.typesystem.create_type(name="test.Type")
コード例 #3
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_initial_view_is_created():
    cas = Cas()

    view = cas.get_view("_InitialView")

    sofa = view.get_sofa()
    attr.validate(sofa)
    assert sofa.sofaID == "_InitialView"
コード例 #4
0
def test_FeatureStructure_get_covered_text_tokens(tokens):
    cas = Cas()
    cas.sofa_string = "Joe waited for the train . The train was late ."

    actual_text = [token.get_covered_text() for token in tokens]

    expected_text = ["Joe", "waited", "for", "the", "train", ".", "The", "train", "was", "late", "."]
    assert actual_text == expected_text
コード例 #5
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_get_covered_text_sentences(sentences):
    cas = Cas()
    cas.sofa_string = "Joe waited for the train . The train was late ."

    actual_text = [cas.get_covered_text(sentence) for sentence in sentences]

    expected_text = ["Joe waited for the train .", "The train was late ."]
    assert actual_text == expected_text
コード例 #6
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_create_view_creates_view():
    cas = Cas()

    view = cas.create_view("testView")
    sofa = view.get_sofa()

    attr.validate(sofa)
    assert sofa.sofaID == "testView"
コード例 #7
0
def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(annotations)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))

    assert set(actual_annotations) == set(annotations)
コード例 #8
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    TokenType = typesystem.get_type("cassis.Token")
    token = TokenType(begin=0, end=3, id="0", pos="NNP")

    cas = Cas(lenient=True)
    cas.add_annotation(token)
コード例 #9
0
    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_NULL = NS_CAS + "NULL"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        sofas = []
        views = {}
        annotations = {}

        context = etree.iterparse(source, events=("end",))

        for event, elem in context:
            assert event == "end"

            if elem.tag == TAG_XMI:
                # Ignore the closing 'xmi:XMI' tag
                pass
            elif elem.tag == TAG_CAS_NULL:
                pass
            elif elem.tag == TAG_CAS_SOFA:
                sofa = self._parse_sofa(elem)
                sofas.append(sofa)
            elif elem.tag == TAG_CAS_VIEW:
                proto_view = self._parse_view(elem)
                views[proto_view.sofa] = proto_view
            else:
                annotation = self._parse_annotation(typesystem, elem)
                annotations[annotation.xmiID] = annotation

            # Free already processed elements from memory
            self._clear_elem(elem)

        if len(sofas) != len(views):
            raise RuntimeError("Number of views and sofas is not equal!")

        cas = Cas()
        for sofa in sofas:
            proto_view = views[sofa.xmiID]

            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")
            else:
                view = cas.create_view(sofa.sofaID)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            for member_id in proto_view.members:
                annotation = annotations[member_id]

                view.add_annotation(annotation)

        return cas
コード例 #10
0
def test_leniency_type_not_in_typeystem_not_lenient(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    TokenType = typesystem.get_type("cassis.Token")
    token = TokenType(begin=0, end=3, id="0", pos="NNP")

    cas = Cas()
    with pytest.raises(RuntimeError, match="Typesystem of CAS does not contain type"):
        cas.add_annotation(token)
コード例 #11
0
def test_get_covered_text_sentences(sentences):
    sofa = Sofa(sofaNum=1,
                sofaString='Joe waited for the train . The train was late .')
    cas = Cas(annotations=sentences, sofas=[sofa])

    actual_text = [cas.get_covered_text(sentence) for sentence in sentences]

    expected_text = ['Joe waited for the train .', 'The train was late .']
    assert actual_text == expected_text
コード例 #12
0
def test_select(tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(annotations=annotations)

    actual_tokens = list(cas.select('cassis.Token'))
    actual_sentences = list(cas.select('cassis.Sentence'))

    assert actual_tokens == tokens
    assert actual_sentences == sentences
コード例 #13
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_removing_throws_if_fs_in_other_view(small_typesystem_xml, tokens,
                                             sentences):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens)

    view = cas.create_view("testView")

    with pytest.raises(ValueError):
        view.remove_annotation(tokens[0])
コード例 #14
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_annotations_are_ordered_correctly(tokens):
    annotations = list(tokens)
    random.shuffle(list(annotations))
    cas = Cas()
    for token in annotations:
        cas.add_annotation(token)

    actual_tokens = list(cas.select("cassis.Token"))

    assert actual_tokens == tokens
コード例 #15
0
def test_get_covered_text_tokens(tokens):
    sofa = Sofa(sofaNum=1,
                sofaString='Joe waited for the train . The train was late .')
    cas = Cas(annotations=tokens, sofas=[sofa])

    actual_text = [cas.get_covered_text(token) for token in tokens]

    expected_text = [
        'Joe', 'waited', 'for', 'the', 'train', '.', 'The', 'train', 'was',
        'late', '.'
    ]
    assert actual_text == expected_text
コード例 #16
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens):
    typesystem = load_typesystem(small_typesystem_xml)
    cas = Cas(typesystem)

    annotations = list(tokens)
    random.shuffle(list(annotations))

    for token in annotations:
        cas.add_annotation(token)

    actual_tokens = list(cas.select("cassis.Token"))

    assert actual_tokens == tokens
コード例 #17
0
def test_select_covered(tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(annotations=annotations)
    first_sentence, second_sentence = sentences
    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    actual_tokens_in_first_sentence = list(
        cas.select_covered('cassis.Token', first_sentence))
    actual_tokens_in_second_sentence = list(
        cas.select_covered('cassis.Token', second_sentence))

    assert actual_tokens_in_first_sentence == tokens_in_first_sentence
    assert actual_tokens_in_second_sentence == tokens_in_second_sentence
コード例 #18
0
    def _get_or_create_view(self,
                            cas: Cas,
                            view_name: str,
                            fs_id: Optional[int] = None,
                            sofa_num: Optional[int] = None) -> Cas:
        if view_name == NAME_DEFAULT_SOFA:
            view = cas.get_view(NAME_DEFAULT_SOFA)

            # We need to make sure that the sofa gets the real xmi, see #155
            if fs_id is not None:
                view.get_sofa().xmiID = fs_id

            return view
        else:
            return cas.create_view(view_name, xmiID=fs_id, sofaNum=sofa_num)
コード例 #19
0
    def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]:
        xmi_attrs = {"{http://www.omg.org/XMI}version": "2.0"}

        root = etree.Element(etree.QName(self._nsmap["xmi"], "XMI"), nsmap=self._nsmap, **xmi_attrs)

        self._serialize_cas_null(root)

        # Find all fs, even the ones that are not directly added to a sofa
        for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID):
            self._serialize_feature_structure(cas, root, fs)

        for sofa in cas.sofas:
            self._serialize_sofa(root, sofa)

        for view in cas.views:
            self._serialize_view(root, view)

        doc = etree.ElementTree(root)
        etree.cleanup_namespaces(doc, top_nsmap=self._nsmap)

        return_str = sink is None
        if return_str:
            sink = BytesIO()

        doc.write(sink, xml_declaration=True, pretty_print=pretty_print, encoding="UTF-8")

        if return_str:
            return sink.getvalue().decode("utf-8")

        return None
コード例 #20
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_removing_of_existing_fs_works(small_typesystem_xml, tokens,
                                       sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(annotations)

    for token in tokens:
        cas.remove_annotation(token)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))
    assert set(actual_annotations) == set(sentences)

    for sentence in sentences:
        cas.remove_annotation(sentence)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))
    assert set(actual_annotations) == set()
コード例 #21
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_add_annotation_generates_ids(small_typesystem_xml, tokens):
    typesystem = load_typesystem(small_typesystem_xml)
    cas = Cas(typesystem)
    TokenType = typesystem.get_type("cassis.Token")

    tokens = [
        TokenType(begin=0, end=3, id="0", pos="NNP"),
        TokenType(begin=4, end=10, id="1", pos="VBD"),
        TokenType(begin=11, end=14, id="2", pos="IN"),
        TokenType(begin=15, end=18, id="3", pos="DT"),
        TokenType(begin=19, end=24, id="4", pos="NN"),
        TokenType(begin=25, end=26, id="5", pos="."),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert all([token.xmiID is not None for token in actual_tokens])
コード例 #22
0
def test_add_annotation_generates_ids(small_typesystem_xml, tokens):
    cas = Cas()
    typesystem = load_typesystem(small_typesystem_xml)
    TokenType = typesystem.get_type('cassis.Token')

    tokens = [
        TokenType(sofa=1, begin=0, end=3, id='0', pos='NNP'),
        TokenType(sofa=1, begin=4, end=10, id='1', pos='VBD'),
        TokenType(sofa=1, begin=11, end=14, id='2', pos='IN'),
        TokenType(sofa=1, begin=15, end=18, id='3', pos='DT'),
        TokenType(sofa=1, begin=19, end=24, id='4', pos='NN'),
        TokenType(sofa=1, begin=25, end=26, id='5', pos='.'),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert all([token.xmiID != None for token in actual_tokens])
コード例 #23
0
def test_add_annotation(small_typesystem_xml):
    sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train .')
    cas = Cas(sofas=[sofa])
    typesystem = load_typesystem(small_typesystem_xml)
    TokenType = typesystem.get_type('cassis.Token')

    tokens = [
        TokenType(xmiID=13, sofa=1, begin=0, end=3, id='0', pos='NNP'),
        TokenType(xmiID=19, sofa=1, begin=4, end=10, id='1', pos='VBD'),
        TokenType(xmiID=25, sofa=1, begin=11, end=14, id='2', pos='IN'),
        TokenType(xmiID=31, sofa=1, begin=15, end=18, id='3', pos='DT'),
        TokenType(xmiID=37, sofa=1, begin=19, end=24, id='4', pos='NN'),
        TokenType(xmiID=43, sofa=1, begin=25, end=26, id='5', pos='.'),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert actual_tokens == tokens
コード例 #24
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    view = cas.create_view("testView")

    cas.add_annotations(annotations)
    view.add_annotations(annotations)

    for annotation in annotations:
        cas.remove_annotation(annotation)

    assert set(cas.select("uima.tcas.Annotation")) == set()
    assert set(view.select("uima.tcas.Annotation")) == set(annotations)
コード例 #25
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_select_covering_also_returns_parent_instances(small_typesystem_xml,
                                                       tokens, sentences):
    typesystem = load_typesystem(small_typesystem_xml)
    SubSentenceType = typesystem.create_type("cassis.SubSentence",
                                             supertypeName="cassis.Sentence")

    cas = Cas(typesystem=typesystem)

    first_sentence, second_sentence = sentences
    annotations = tokens + sentences
    subsentence1 = SubSentenceType(begin=first_sentence.begin,
                                   end=first_sentence.end)
    subsentence2 = SubSentenceType(begin=second_sentence.begin,
                                   end=second_sentence.end)
    annotations.append(subsentence1)
    annotations.append(subsentence2)
    cas.add_annotations(annotations)

    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    for token in tokens_in_first_sentence:
        result = set(cas.select_covering("cassis.Sentence", token))

        assert result == {first_sentence, subsentence1}

    for token in tokens_in_second_sentence:
        result = set(cas.select_covering("cassis.Sentence", token))

        assert result == {second_sentence, subsentence2}
コード例 #26
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_select_covered_also_returns_parent_instances(small_typesystem_xml,
                                                      tokens, sentences):
    typesystem = load_typesystem(small_typesystem_xml)
    SubTokenType = typesystem.create_type("cassis.SubToken",
                                          supertypeName="cassis.Token")

    annotations = tokens + sentences
    subtoken1 = SubTokenType(begin=tokens[2].begin, end=tokens[3].end)
    subtoken2 = SubTokenType(begin=tokens[8].begin, end=tokens[8].end)
    annotations.append(subtoken1)
    annotations.append(subtoken2)

    cas = Cas(typesystem=typesystem)
    cas.add_annotations(annotations)

    first_sentence, second_sentence = sentences
    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    actual_tokens_in_first_sentence = list(
        cas.select_covered("cassis.Token", first_sentence))
    actual_tokens_in_second_sentence = list(
        cas.select_covered("cassis.Token", second_sentence))

    assert set(actual_tokens_in_first_sentence) == set(
        tokens_in_first_sentence + [subtoken1])
    assert set(actual_tokens_in_second_sentence) == set(
        tokens_in_second_sentence + [subtoken2])
コード例 #27
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_create_view_throws_if_view_already_exists():
    cas = Cas()
    cas.create_view("testView")

    with pytest.raises(ValueError,
                       message=r"A view with name [testView] already exists!"):
        cas.create_view("testView")
コード例 #28
0
def test_select_only_returns_annotations_of_current_view(tokens, sentences, small_typesystem_xml):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens)
    view = cas.create_view("testView")
    view.add_annotations(sentences)

    actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all())
    actual_annotations_in_test_view = list(cas.get_view("testView").select_all())

    assert tokens == actual_annotations_in_initial_view
    assert sentences == actual_annotations_in_test_view
コード例 #29
0
ファイル: test_cas.py プロジェクト: malteos/dkpro-cassis
def test_select(tokens, sentences):
    cas = Cas()
    cas.add_annotations(tokens + sentences)

    actual_tokens = list(cas.select("cassis.Token"))
    actual_sentences = list(cas.select("cassis.Sentence"))

    assert actual_tokens == tokens
    assert actual_sentences == sentences
コード例 #30
0
ファイル: test_cas.py プロジェクト: hatzel/dkpro-cassis
def test_select(small_typesystem_xml, tokens, sentences):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens + sentences)

    actual_tokens = list(cas.select("cassis.Token"))
    actual_sentences = list(cas.select("cassis.Sentence"))

    assert actual_tokens == tokens
    assert actual_sentences == sentences