Ejemplo n.º 1
0
def test_deserializing_from_string():
    cas_xmi = '''<?xml version="1.0" encoding="UTF-8"?>
    <xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore"
             xmlns:cassis="http:///cassis.ecore" xmi:version="2.0">
        <cas:NULL xmi:id="0"/>
        <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="47" language="x-unspecified"/>
        <cassis:Sentence xmi:id="79" sofa="1" begin="0" end="26" id="0"/>
        <cassis:Sentence xmi:id="84" sofa="1" begin="27" end="47" id="1"/>
        <cas:Sofa xmi:id="1" sofaNum="1" sofaID="mySofa" mimeType="text/plain"
                  sofaString="Joe waited for the train . The train was late ."/>
        <cas:View sofa="1" members="8 13 19 25 31 37 43 49 55 61 67 73 79 84"/>
    </xmi:XMI>    
    '''
    load_typesystem(cas_xmi)
Ejemplo n.º 2
0
def test_deserializing_small_typesystem(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    assert len(typesystem) == 3

    # Assert annotation type
    annotation_features = [Feature('language', '', 'uima.cas.String')]
    annotation_type = Type('uima.tcas.DocumentAnnotation', '',
                           'uima.tcas.Annotation', annotation_features)
    assert typesystem.get_type(
        'uima.tcas.DocumentAnnotation') == annotation_type

    # Assert token type
    token_features = [
        Feature('id', '', 'uima.cas.Integer'),
        Feature('pos', '', 'uima.cas.String')
    ]
    token_type = Type('cassis.Token', '', 'uima.tcas.Annotation',
                      token_features)
    assert typesystem.get_type('cassis.Token') == token_type

    # Assert sentence type
    sentence_features = [Feature('id', '', 'uima.cas.Integer')]
    sentence_type = Type('cassis.Sentence', '', 'uima.tcas.Annotation',
                         sentence_features)
    assert typesystem.get_type('cassis.Sentence') == sentence_type
Ejemplo n.º 3
0
def test_deserializing_small_typesystem(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    assert len(list(typesystem.get_types())) == 2

    # Assert annotation type
    annotation_type = typesystem.get_type("uima.tcas.DocumentAnnotation")
    assert annotation_type.name == "uima.tcas.DocumentAnnotation"
    assert annotation_type.supertypeName == "uima.tcas.Annotation"

    language_feature = annotation_type.get_feature("language")
    assert language_feature.name == "language"
    assert language_feature.rangeTypeName == "uima.cas.String"

    # Assert token type
    token_type = typesystem.get_type("cassis.Token")
    assert token_type.name == "cassis.Token"
    assert token_type.supertypeName == "uima.tcas.Annotation"

    token_id_feature = token_type.get_feature("id")
    assert token_id_feature.name == "id"
    assert token_id_feature.rangeTypeName == "uima.cas.Integer"

    token_pos_feature = token_type.get_feature("pos")
    assert token_pos_feature.name == "pos"
    assert token_pos_feature.rangeTypeName == "uima.cas.String"

    # Assert sentence type
    sentence_type = typesystem.get_type("cassis.Sentence")
    assert sentence_type.name == "cassis.Sentence"
    assert sentence_type.supertypeName == "uima.tcas.Annotation"

    sentence_type_id_feature = sentence_type.get_feature("id")
    assert sentence_type_id_feature.name == "id"
    assert sentence_type_id_feature.rangeTypeName == "uima.cas.Integer"
Ejemplo n.º 4
0
    def test_send_single_cas_from_python_to_ruta(self, notebook):
        # Step 1: Get the file paths
        typesytem_file = os.path.join(TEST_RESOURCE_DIR, "TypeSystem.xml")
        cas_file = os.path.join(TEST_RESOURCE_DIR, "example.xmi")

        # Step 2: Get a (local) python instance of the cas for comparison
        with open(typesytem_file, 'rb') as f:
            typesystem = cassis.load_typesystem(f)
        with open(cas_file, 'rb') as f:
            cas = cassis.load_cas_from_xmi(f, typesystem=typesystem)

        # Step 3: Send a command to a SoS notebook cell that is loading the cas in that cell in a notebook
        cas_init_expr = f"""
        import cassis
        with open("{typesytem_file}", 'rb') as f:
            typesystem = cassis.load_typesystem(f)
        with open("{cas_file}", 'rb') as f:
            cas_var = cassis.load_cas_from_xmi(f, typesystem=typesystem)
        """

        notebook.call(cas_init_expr, kernel=SOS_KERNEL_NAME)

        # Step 4: Execute `%get cas` command in a Ruta cell and capture the return.
        notebook.call("%get cas_var", kernel=RUTA_KERNEL_NAME)
        actual_sofa = notebook.check_output("%displayMode RUTA_COLORING", kernel=RUTA_KERNEL_NAME)

        expected_sofa = cas.sofa_string

        # Step 5: Compare results. Ignore special characters.
        assert [c for c in actual_sofa if c.isalpha()] == [c for c in expected_sofa if c.isalpha()]
Ejemplo n.º 5
0
    def __init__(
        self,
        args='object',
        xmi_string=None,
        text=[
            'Backgammon', 'is', 'one', 'of', 'the', 'oldest', 'known', 'board',
            'games', '.'
        ],
        cas_path=None,
        type_system_path='../pydkpro/typesystems/temp_TypeSytems.xml',
        token_type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'
    ):
        self.args = args
        self.text = text
        self.cas_path = cas_path
        self.type_system_path = type_system_path
        self.token_type = token_type
        self.token_list = []
        with open(self.type_system_path, 'rb') as f:
            self.typesystem = load_typesystem(f)
        if cas_path:
            with open(self.cas_path, 'rb') as f:
                self.cas = load_cas_from_xmi(
                    f, typesystem=load_dkpro_core_typesystem())
        elif isinstance(self.args, cassis.TypeSystem):
            self.cas = cs(typesystem=self.args)

        if xmi_string:
            self.cas = load_cas_from_xmi(
                xmi_string, typesystem=load_dkpro_core_typesystem())

        else:
            self.cas = cs(typesystem=self.typesystem)
            self.cas.sofa_mime = "text/plain"
            self.cas.sofa_string = ""
Ejemplo n.º 6
0
def test_type_can_retrieve_children(typesystem_with_inheritance_xml):
    typesystem = load_typesystem(typesystem_with_inheritance_xml)

    t = typesystem.get_type("cassis.Child")

    children = [item.name for item in t.children]

    assert children == ["cassis.GrandChild"]
Ejemplo n.º 7
0
def test_serializing_typesystem_to_file(tmpdir, typesystem_xml):
    typesystem = load_typesystem(typesystem_xml)
    path = str(tmpdir.join("typesystem.xml"))

    typesystem.to_xml(path)

    with open(path, "rb") as actual:
        assert_xml_equal(actual, typesystem_xml)
def convert_stuff():
    with open(PATH_GENERATED + "/userstudy/obama/TypeSystem.xml", "rb") as f:
        typesystem = load_typesystem(f)

    with open(PATH_GENERATED + "/userstudy/obama/Wikipedia-Obama.xmi",
              "rb") as f:
        cas = load_cas_from_xmi(f, typesystem)

    featurize_cas(cas)
Ejemplo n.º 9
0
def test_type_can_create_instance_with_deeply_inherited_fields(
        typesystem_with_inheritance_xml):
    # https://github.com/dkpro/dkpro-cassis/issues/97
    typesystem = load_typesystem(typesystem_with_inheritance_xml)

    t = typesystem.get_type("cassis.GrandGrandGrandChild")

    assert "parentFeature" in t._inherited_features
    assert "childFeature" in t._inherited_features
Ejemplo n.º 10
0
def test_that_typesystem_with_redefined_documentation_annotation_works(
    typesystem_with_redefined_documentannotation_xml, ):
    typesystem = load_typesystem(
        typesystem_with_redefined_documentannotation_xml)

    actual_xml = typesystem.to_xml()

    assert_xml_equal(actual_xml,
                     typesystem_with_redefined_documentannotation_xml)
Ejemplo n.º 11
0
def test_serializing_small_typesystem_to_file(tmpdir, small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)
    path = tmpdir.join('typesystem.xml')

    with open(path, 'wb') as f:
        typesystem.to_xml(f)

    with open(path, 'rb') as actual:
        assert_xml_equal(actual.read(), small_typesystem_xml.encode('utf-8'))
Ejemplo n.º 12
0
def test_is_instance_of(child_name: str, parent_name: str, expected: bool):
    # We cannot use fixtures and parameterize at the same time, so we
    # manually load the type system
    path = os.path.join(FIXTURE_DIR, "typesystems",
                        "important_dkpro_types.xml")

    with open(path, "r") as f:
        ts = load_typesystem(f.read())

    assert ts.is_instance_of(child_name, parent_name) == expected
Ejemplo n.º 13
0
def test_type_can_retrieve_descendants(typesystem_with_inheritance_xml):
    typesystem = load_typesystem(typesystem_with_inheritance_xml)

    t = typesystem.get_type("cassis.Child")

    descendants = [item.name for item in t.descendants]

    assert descendants == [
        "cassis.Child", "cassis.GrandChild", "cassis.GrandGrandChild",
        "cassis.GrandGrandGrandChild"
    ]
    def documents(self) -> List["TrainingDocument"]:
        # We parse this lazily as sometimes when already training, we just do not need to parse it at all.
        typesystem = load_typesystem(self._typesystem_xml)
        training_documents = []
        for document in self._documents_json:
            cas = load_cas_from_xmi(document["xmi"], typesystem)
            document_id = document["documentId"]
            user_id = document["userId"]
            training_documents.append(
                TrainingDocument(cas, document_id, user_id))

        return training_documents
Ejemplo n.º 15
0
def rebuilt2xmi(ci,
                output_dir,
                typesystem_path,
                iiif_mappings,
                pct_coordinates=False) -> str:
    """
    Converts a rebuilt ContentItem into Apache UIMA/XMI format.

    The resulting file will be named after the content item's ID, adding
    the `.xmi` extension.

    :param ci: the content item to be converted
    :type ci: `impresso_commons.classes.ContentItem`
    :param output_dir: the path to the output directory
    :type output_dir: str
    :param typesystem_path: TypeSystem file containing defitions of annotation
    layers.
    :type typesystem_path: str
    """

    with open(typesystem_path, "rb") as f:
        typesystem = load_typesystem(f)

    cas = Cas(typesystem=typesystem)
    cas.sofa_string = ci.fulltext
    cas.sofa_mime = 'text/plain'

    sentType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence'
    imgLinkType = 'webanno.custom.ImpressoImages'
    Sentence = typesystem.get_type(sentType)
    ImageLink = typesystem.get_type(imgLinkType)

    # create sentence-level annotations
    start_offset = 0
    for break_offset in ci.lines:
        start = start_offset
        end = break_offset
        start_offset = break_offset
        cas.add_annotation(Sentence(begin=start, end=end))

    iiif_links = compute_image_links(ci,
                                     iiif_links=iiif_mappings,
                                     pct=pct_coordinates)

    # inject the IIIF links into
    for iiif_link, start, end in iiif_links:
        cas.add_annotation(ImageLink(begin=start, end=end, link=iiif_link))

    outfile_path = os.path.join(output_dir, f'{ci.id}.xmi')
    cas.to_xmi(outfile_path, pretty_print=True)
    return outfile_path
def parse_prediction_request(json_object: JsonDict) -> PredictionRequest:
    metadata = json_object["metadata"]
    document = json_object["document"]

    layer = metadata["layer"]
    feature = metadata["feature"]
    project_id = metadata["projectId"]

    typesystem = load_typesystem(json_object["typeSystem"])
    cas = load_cas_from_xmi(document["xmi"], typesystem)
    document_id = document["documentId"]
    user_id = document["userId"]

    return PredictionRequest(cas, layer, feature, project_id, document_id,
                             user_id)
Ejemplo n.º 17
0
def load_isaac_ts() -> TypeSystem:
    dkpro_ts = load_dkpro_core_typesystem()

    # https://stackoverflow.com/a/20885799
    try:
        import importlib.resources as pkg_resources
    except ImportError:
        # Try backported to PY<37 `importlib_resources`.
        import importlib_resources as pkg_resources

    from . import resources

    with pkg_resources.open_binary(resources, ISAAC_TYPESYSTEM_FILE) as f:
        typesystem = load_typesystem(f)

    final_ts = merge_typesystems(dkpro_ts, typesystem)
    return final_ts
Ejemplo n.º 18
0
    def put_vars(self, items, to_kernel=None):
        """
        Functionality to transfer CAS objects from the IRuta kernel to the SoS (Python) kernel.
        This function is called when a user invokes the line magic %put or %with.
        """

        if len(items) != 1:
            raise Exception(
                "%put takes exactly one variable name as argument. ")
        var_name = items[0]

        temp_directory = tempfile.TemporaryDirectory()
        temp_typesystem_file = tempfile.NamedTemporaryFile(
            suffix=".xml", dir=temp_directory.name, delete=False)
        temp_typesystem_file_path = os.path.normpath(
            temp_typesystem_file.name).replace('\\', "/")
        temp_xmi_file = tempfile.NamedTemporaryFile(suffix=".xmi",
                                                    dir=temp_directory.name,
                                                    delete=False)
        temp_xmi_file_path = os.path.normpath(temp_xmi_file.name).replace(
            '\\', "/")

        # Step 1: Writing CAS and TypeSystem to disk with Ruta
        cmd_transfer_var = f"%displayMode NONE\n" \
                           f"%saveTypeSystem {temp_typesystem_file_path}\n" \
                           f"%saveCas {temp_xmi_file_path}"

        env.log_to_file('KERNEL', f'Executing "{cmd_transfer_var}"')
        self.ruta_kernel.run_cell(cmd_transfer_var,
                                  silent=True,
                                  store_history=False,
                                  on_error='Failed to write UIMA CAS to disk.')

        # Step 2: Reading CAS and TypeSystem from disk with python/cassis
        typesystem = cassis.load_typesystem(temp_typesystem_file)
        cas = cassis.load_cas_from_xmi(temp_xmi_file, typesystem=typesystem)

        # Step 3: Clean-up temp files
        temp_typesystem_file.close()
        temp_xmi_file.close()
        temp_directory.cleanup()

        return {var_name: cas}
Ejemplo n.º 19
0
 def file_to_cas(self, filepath):
     # TODO below code is implemented for pydkpro purpose only
     in_text = filepath
     ts_xml = 'pydkpro/typesystems/temp_TypeSytems_textToXMI.xml'
     log_path = 'pydkpro/test_data/textToXMI.log'
     cmd = shlex.split(
         "java -jar pydkpro/pydkpro-0.0.1-SNAPSHOT-standalone_textXMI.jar %s %s %s"
         % (in_text, os.path.dirname(in_text), ts_xml))
     if os.path.exists(in_text + '.xmi'):
         os.remove(in_text + '.xmi')
     with codecs.open(log_path, 'w', 'utf-8') as f:
         p = subprocess.Popen(cmd, stdout=f, stderr=f)
         p.wait()
     with open(ts_xml, 'rb') as f:
         self.typesystem = load_typesystem(f)
     with open(in_text + '.xmi', 'rb') as f:
         self.cas = load_cas_from_xmi(f, typesystem=self.typesystem)
     os.remove(in_text + '.xmi')
     return self
Ejemplo n.º 20
0
def test_that_merging_incompatible_typesystem_throws(
        name, rangeTypeName, elementType, multipleReferencesAllowed):
    with open(typesystem_merge_base_path(), "r") as f:
        base = load_typesystem(f.read())

    ts = TypeSystem()
    t = ts.create_type("test.ArraysAndListsWithElementTypes",
                       supertypeName="uima.cas.TOP")
    ts.add_feature(
        type_=t,
        name=name,
        rangeTypeName=rangeTypeName,
        elementType=elementType,
        multipleReferencesAllowed=multipleReferencesAllowed,
    )

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=UserWarning)
        with pytest.raises(ValueError, match=r".*\[{0}\].*".format(name)):
            merge_typesystems(base, ts)
Ejemplo n.º 21
0
def test_deserializing_small_typesystem(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    # There are two types in the type system and we implicitly
    # define DocumentAnnotation
    assert len(list(typesystem.get_types())) == 3

    # Assert annotation type
    annotation_type = typesystem.get_type("uima.tcas.DocumentAnnotation")
    assert annotation_type.name == "uima.tcas.DocumentAnnotation"
    assert annotation_type.supertypeName == "uima.tcas.Annotation"

    language_feature = annotation_type.get_feature("language")
    assert language_feature.name == "language"
    assert language_feature.rangeTypeName == "uima.cas.String"

    # Assert token type
    token_type = typesystem.get_type("cassis.Token")
    assert token_type.name == "cassis.Token"
    assert token_type.supertypeName == "uima.tcas.Annotation"

    token_id_feature = token_type.get_feature("id")
    assert token_id_feature.name == "id"
    assert token_id_feature.rangeTypeName == "uima.cas.Integer"

    token_pos_feature = token_type.get_feature("pos")
    assert token_pos_feature.name == "pos"
    assert token_pos_feature.rangeTypeName == "uima.cas.String"
    assert token_pos_feature.multipleReferencesAllowed is True

    # Assert sentence type
    sentence_type = typesystem.get_type("cassis.Sentence")
    assert sentence_type.name == "cassis.Sentence"
    assert sentence_type.supertypeName == "uima.tcas.Annotation"

    sentence_type_id_feature = sentence_type.get_feature("id")
    assert sentence_type_id_feature.name == "id"
    assert sentence_type_id_feature.rangeTypeName == "uima.cas.Integer"
    assert sentence_type_id_feature.multipleReferencesAllowed is False
Ejemplo n.º 22
0
def test_that_merging_compatible_typesystem_works(name, rangeTypeName,
                                                  elementType,
                                                  multipleReferencesAllowed):
    with open(typesystem_merge_base_path(), "r") as f:
        base = load_typesystem(f.read())

    ts = TypeSystem()
    t = ts.create_type("test.ArraysAndListsWithElementTypes",
                       supertypeName="uima.cas.TOP")
    ts.add_feature(
        type_=t,
        name=name,
        rangeTypeName=rangeTypeName,
        elementType=elementType,
        multipleReferencesAllowed=multipleReferencesAllowed,
    )

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=UserWarning)

        result = merge_typesystems(base, ts)

    assert result.contains_type("test.ArraysAndListsWithElementTypes")
Ejemplo n.º 23
0
    def test_send_single_cas_from_ruta_to_python(self, notebook):
        # Step 1: Get the file paths
        typesystem_file = os.path.join(TEST_RESOURCE_DIR, "TypeSystem.xml")
        cas_file = os.path.join(TEST_RESOURCE_DIR, "example.xmi")

        # Step 2: Get a (local) python instance of the cas for comparison
        with open(typesystem_file, 'rb') as f:
            typesystem = cassis.load_typesystem(f)
        with open(cas_file, 'rb') as f:
            cas = cassis.load_cas_from_xmi(f, typesystem=typesystem)

        # Step 3: Load CAS into Ruta
        cas_init_expr = f"%displayMode NONE\n" \
                        f"%loadCas {cas_file}\n" \
                        f"%loadTypeSystem {typesystem_file}"
        notebook.call(cas_init_expr, kernel=RUTA_KERNEL_NAME)

        # Step 4: Send files to SoS Kernel with %put
        notebook.call("%put modified_cas", kernel=RUTA_KERNEL_NAME)

        # Step 5: Check variable content
        actual_sofa = notebook.check_output("print(modified_cas.sofa_string)", kernel=SOS_KERNEL_NAME)
        expected_sofa = cas.sofa_string.strip()
        assert actual_sofa == expected_sofa
Ejemplo n.º 24
0
def test_serializing_small_typesystem_to_string(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    actual_xml = typesystem.to_xml()

    assert_xml_equal(actual_xml, small_typesystem_xml.encode('utf-8'))
Ejemplo n.º 25
0
def test_deserializing_from_file(typesystem_path):
    with open(typesystem_path, "rb") as f:
        load_typesystem(f)
Ejemplo n.º 26
0
 def __init__(self):
     with open('../pydkpro/typesystems/dkpro-core-types.xml', 'rb') as f:
         self.typesystem = load_typesystem(f)
Ejemplo n.º 27
0
def test_deserializing_from_string(typesystem_xml):
    load_typesystem(typesystem_xml)
Ejemplo n.º 28
0
def test_serializing_typesystem_to_string(typesystem_xml):
    typesystem = load_typesystem(typesystem_xml)

    actual_xml = typesystem.to_xml()

    assert_xml_equal(actual_xml, typesystem_xml)
Ejemplo n.º 29
0
def test_that_typesystem_with_child_redefining_type_same_warns():
    path = os.path.join(FIXTURE_DIR, "typesystems",
                        "typesystem_with_inheritance_redefined_same.xml")
    with pytest.warns(UserWarning):
        with open(path, "rb") as f:
            load_typesystem(f)
Ejemplo n.º 30
0
def test_that_typesystem_with_child_redefining_type_differently_throws():
    path = os.path.join(FIXTURE_DIR, "typesystems",
                        "typesystem_with_inheritance_redefined_different.xml")
    with pytest.raises(ValueError):
        with open(path, "rb") as f:
            load_typesystem(f)