Esempio n. 1
0
    def __init__(
        self,
        args='object',
        xmi_string=None,
        text=[
            'Backgammon', 'is', 'one', 'of', 'the', 'oldest', 'known', 'board',
            'games', '.'
        ],
        cas_path=None,
        type_system_path='../pydkpro/typesystems/temp_TypeSytems.xml',
        token_type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'
    ):
        self.args = args
        self.text = text
        self.cas_path = cas_path
        self.type_system_path = type_system_path
        self.token_type = token_type
        self.token_list = []
        with open(self.type_system_path, 'rb') as f:
            self.typesystem = load_typesystem(f)
        if cas_path:
            with open(self.cas_path, 'rb') as f:
                self.cas = load_cas_from_xmi(
                    f, typesystem=load_dkpro_core_typesystem())
        elif isinstance(self.args, cassis.TypeSystem):
            self.cas = cs(typesystem=self.args)

        if xmi_string:
            self.cas = load_cas_from_xmi(
                xmi_string, typesystem=load_dkpro_core_typesystem())

        else:
            self.cas = cs(typesystem=self.typesystem)
            self.cas.sofa_mime = "text/plain"
            self.cas.sofa_string = ""
Esempio n. 2
0
def test_xmi_deserialization_performance():
    start = timer()
    for i in range(0, iterations):
        load_cas_from_xmi(randomized_cas_xmi, typesystem)
    end = timer()

    print(
        f"XMI: Deserializing {iterations} CASes with {generator.size} each took {end - start} seconds ({len(randomized_cas_xmi_bytes)} bytes each)"
    )
Esempio n. 3
0
    def test_send_single_cas_from_python_to_ruta(self, notebook):
        # Step 1: Get the file paths
        typesytem_file = os.path.join(TEST_RESOURCE_DIR, "TypeSystem.xml")
        cas_file = os.path.join(TEST_RESOURCE_DIR, "example.xmi")

        # Step 2: Get a (local) python instance of the cas for comparison
        with open(typesytem_file, 'rb') as f:
            typesystem = cassis.load_typesystem(f)
        with open(cas_file, 'rb') as f:
            cas = cassis.load_cas_from_xmi(f, typesystem=typesystem)

        # Step 3: Send a command to a SoS notebook cell that is loading the cas in that cell in a notebook
        cas_init_expr = f"""
        import cassis
        with open("{typesytem_file}", 'rb') as f:
            typesystem = cassis.load_typesystem(f)
        with open("{cas_file}", 'rb') as f:
            cas_var = cassis.load_cas_from_xmi(f, typesystem=typesystem)
        """

        notebook.call(cas_init_expr, kernel=SOS_KERNEL_NAME)

        # Step 4: Execute `%get cas` command in a Ruta cell and capture the return.
        notebook.call("%get cas_var", kernel=RUTA_KERNEL_NAME)
        actual_sofa = notebook.check_output("%displayMode RUTA_COLORING", kernel=RUTA_KERNEL_NAME)

        expected_sofa = cas.sofa_string

        # Step 5: Compare results. Ignore special characters.
        assert [c for c in actual_sofa if c.isalpha()] == [c for c in expected_sofa if c.isalpha()]
def convert_stuff():
    with open(PATH_GENERATED + "/userstudy/obama/TypeSystem.xml", "rb") as f:
        typesystem = load_typesystem(f)

    with open(PATH_GENERATED + "/userstudy/obama/Wikipedia-Obama.xmi",
              "rb") as f:
        cas = load_cas_from_xmi(f, typesystem)

    featurize_cas(cas)
    def documents(self) -> List["TrainingDocument"]:
        # We parse this lazily as sometimes when already training, we just do not need to parse it at all.
        typesystem = load_typesystem(self._typesystem_xml)
        training_documents = []
        for document in self._documents_json:
            cas = load_cas_from_xmi(document["xmi"], typesystem)
            document_id = document["documentId"]
            user_id = document["userId"]
            training_documents.append(
                TrainingDocument(cas, document_id, user_id))

        return training_documents
def parse_prediction_request(json_object: JsonDict) -> PredictionRequest:
    metadata = json_object["metadata"]
    document = json_object["document"]

    layer = metadata["layer"]
    feature = metadata["feature"]
    project_id = metadata["projectId"]

    typesystem = load_typesystem(json_object["typeSystem"])
    cas = load_cas_from_xmi(document["xmi"], typesystem)
    document_id = document["documentId"]
    user_id = document["userId"]

    return PredictionRequest(cas, layer, feature, project_id, document_id,
                             user_id)
Esempio n. 7
0
    def put_vars(self, items, to_kernel=None):
        """
        Functionality to transfer CAS objects from the IRuta kernel to the SoS (Python) kernel.
        This function is called when a user invokes the line magic %put or %with.
        """

        if len(items) != 1:
            raise Exception(
                "%put takes exactly one variable name as argument. ")
        var_name = items[0]

        temp_directory = tempfile.TemporaryDirectory()
        temp_typesystem_file = tempfile.NamedTemporaryFile(
            suffix=".xml", dir=temp_directory.name, delete=False)
        temp_typesystem_file_path = os.path.normpath(
            temp_typesystem_file.name).replace('\\', "/")
        temp_xmi_file = tempfile.NamedTemporaryFile(suffix=".xmi",
                                                    dir=temp_directory.name,
                                                    delete=False)
        temp_xmi_file_path = os.path.normpath(temp_xmi_file.name).replace(
            '\\', "/")

        # Step 1: Writing CAS and TypeSystem to disk with Ruta
        cmd_transfer_var = f"%displayMode NONE\n" \
                           f"%saveTypeSystem {temp_typesystem_file_path}\n" \
                           f"%saveCas {temp_xmi_file_path}"

        env.log_to_file('KERNEL', f'Executing "{cmd_transfer_var}"')
        self.ruta_kernel.run_cell(cmd_transfer_var,
                                  silent=True,
                                  store_history=False,
                                  on_error='Failed to write UIMA CAS to disk.')

        # Step 2: Reading CAS and TypeSystem from disk with python/cassis
        typesystem = cassis.load_typesystem(temp_typesystem_file)
        cas = cassis.load_cas_from_xmi(temp_xmi_file, typesystem=typesystem)

        # Step 3: Clean-up temp files
        temp_typesystem_file.close()
        temp_xmi_file.close()
        temp_directory.cleanup()

        return {var_name: cas}
Esempio n. 8
0
 def file_to_cas(self, filepath):
     # TODO below code is implemented for pydkpro purpose only
     in_text = filepath
     ts_xml = 'pydkpro/typesystems/temp_TypeSytems_textToXMI.xml'
     log_path = 'pydkpro/test_data/textToXMI.log'
     cmd = shlex.split(
         "java -jar pydkpro/pydkpro-0.0.1-SNAPSHOT-standalone_textXMI.jar %s %s %s"
         % (in_text, os.path.dirname(in_text), ts_xml))
     if os.path.exists(in_text + '.xmi'):
         os.remove(in_text + '.xmi')
     with codecs.open(log_path, 'w', 'utf-8') as f:
         p = subprocess.Popen(cmd, stdout=f, stderr=f)
         p.wait()
     with open(ts_xml, 'rb') as f:
         self.typesystem = load_typesystem(f)
     with open(in_text + '.xmi', 'rb') as f:
         self.cas = load_cas_from_xmi(f, typesystem=self.typesystem)
     os.remove(in_text + '.xmi')
     return self
Esempio n. 9
0
    def test_send_single_cas_from_ruta_to_python(self, notebook):
        # Step 1: Get the file paths
        typesystem_file = os.path.join(TEST_RESOURCE_DIR, "TypeSystem.xml")
        cas_file = os.path.join(TEST_RESOURCE_DIR, "example.xmi")

        # Step 2: Get a (local) python instance of the cas for comparison
        with open(typesystem_file, 'rb') as f:
            typesystem = cassis.load_typesystem(f)
        with open(cas_file, 'rb') as f:
            cas = cassis.load_cas_from_xmi(f, typesystem=typesystem)

        # Step 3: Load CAS into Ruta
        cas_init_expr = f"%displayMode NONE\n" \
                        f"%loadCas {cas_file}\n" \
                        f"%loadTypeSystem {typesystem_file}"
        notebook.call(cas_init_expr, kernel=RUTA_KERNEL_NAME)

        # Step 4: Send files to SoS Kernel with %put
        notebook.call("%put modified_cas", kernel=RUTA_KERNEL_NAME)

        # Step 5: Check variable content
        actual_sofa = notebook.check_output("print(modified_cas.sofa_string)", kernel=SOS_KERNEL_NAME)
        expected_sofa = cas.sofa_string.strip()
        assert actual_sofa == expected_sofa
Esempio n. 10
0
 def from_xmi(self, xmi_string):
     self.cas = load_cas_from_xmi(xmi_string, typesystem=self.typesystem)
     return self
Esempio n. 11
0
 def load_from_dkpro_xmi(self, xmi_string):
     self.cas = load_cas_from_xmi(xmi_string,
                                  typesystem=load_dkpro_core_typesystem())
     return self