def __init__( self, args='object', xmi_string=None, text=[ 'Backgammon', 'is', 'one', 'of', 'the', 'oldest', 'known', 'board', 'games', '.' ], cas_path=None, type_system_path='../pydkpro/typesystems/temp_TypeSytems.xml', token_type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token' ): self.args = args self.text = text self.cas_path = cas_path self.type_system_path = type_system_path self.token_type = token_type self.token_list = [] with open(self.type_system_path, 'rb') as f: self.typesystem = load_typesystem(f) if cas_path: with open(self.cas_path, 'rb') as f: self.cas = load_cas_from_xmi( f, typesystem=load_dkpro_core_typesystem()) elif isinstance(self.args, cassis.TypeSystem): self.cas = cs(typesystem=self.args) if xmi_string: self.cas = load_cas_from_xmi( xmi_string, typesystem=load_dkpro_core_typesystem()) else: self.cas = cs(typesystem=self.typesystem) self.cas.sofa_mime = "text/plain" self.cas.sofa_string = ""
def test_xmi_deserialization_performance(): start = timer() for i in range(0, iterations): load_cas_from_xmi(randomized_cas_xmi, typesystem) end = timer() print( f"XMI: Deserializing {iterations} CASes with {generator.size} each took {end - start} seconds ({len(randomized_cas_xmi_bytes)} bytes each)" )
def test_send_single_cas_from_python_to_ruta(self, notebook): # Step 1: Get the file paths typesytem_file = os.path.join(TEST_RESOURCE_DIR, "TypeSystem.xml") cas_file = os.path.join(TEST_RESOURCE_DIR, "example.xmi") # Step 2: Get a (local) python instance of the cas for comparison with open(typesytem_file, 'rb') as f: typesystem = cassis.load_typesystem(f) with open(cas_file, 'rb') as f: cas = cassis.load_cas_from_xmi(f, typesystem=typesystem) # Step 3: Send a command to a SoS notebook cell that is loading the cas in that cell in a notebook cas_init_expr = f""" import cassis with open("{typesytem_file}", 'rb') as f: typesystem = cassis.load_typesystem(f) with open("{cas_file}", 'rb') as f: cas_var = cassis.load_cas_from_xmi(f, typesystem=typesystem) """ notebook.call(cas_init_expr, kernel=SOS_KERNEL_NAME) # Step 4: Execute `%get cas` command in a Ruta cell and capture the return. notebook.call("%get cas_var", kernel=RUTA_KERNEL_NAME) actual_sofa = notebook.check_output("%displayMode RUTA_COLORING", kernel=RUTA_KERNEL_NAME) expected_sofa = cas.sofa_string # Step 5: Compare results. Ignore special characters. assert [c for c in actual_sofa if c.isalpha()] == [c for c in expected_sofa if c.isalpha()]
def convert_stuff(): with open(PATH_GENERATED + "/userstudy/obama/TypeSystem.xml", "rb") as f: typesystem = load_typesystem(f) with open(PATH_GENERATED + "/userstudy/obama/Wikipedia-Obama.xmi", "rb") as f: cas = load_cas_from_xmi(f, typesystem) featurize_cas(cas)
def documents(self) -> List["TrainingDocument"]: # We parse this lazily as sometimes when already training, we just do not need to parse it at all. typesystem = load_typesystem(self._typesystem_xml) training_documents = [] for document in self._documents_json: cas = load_cas_from_xmi(document["xmi"], typesystem) document_id = document["documentId"] user_id = document["userId"] training_documents.append( TrainingDocument(cas, document_id, user_id)) return training_documents
def parse_prediction_request(json_object: JsonDict) -> PredictionRequest: metadata = json_object["metadata"] document = json_object["document"] layer = metadata["layer"] feature = metadata["feature"] project_id = metadata["projectId"] typesystem = load_typesystem(json_object["typeSystem"]) cas = load_cas_from_xmi(document["xmi"], typesystem) document_id = document["documentId"] user_id = document["userId"] return PredictionRequest(cas, layer, feature, project_id, document_id, user_id)
def put_vars(self, items, to_kernel=None): """ Functionality to transfer CAS objects from the IRuta kernel to the SoS (Python) kernel. This function is called when a user invokes the line magic %put or %with. """ if len(items) != 1: raise Exception( "%put takes exactly one variable name as argument. ") var_name = items[0] temp_directory = tempfile.TemporaryDirectory() temp_typesystem_file = tempfile.NamedTemporaryFile( suffix=".xml", dir=temp_directory.name, delete=False) temp_typesystem_file_path = os.path.normpath( temp_typesystem_file.name).replace('\\', "/") temp_xmi_file = tempfile.NamedTemporaryFile(suffix=".xmi", dir=temp_directory.name, delete=False) temp_xmi_file_path = os.path.normpath(temp_xmi_file.name).replace( '\\', "/") # Step 1: Writing CAS and TypeSystem to disk with Ruta cmd_transfer_var = f"%displayMode NONE\n" \ f"%saveTypeSystem {temp_typesystem_file_path}\n" \ f"%saveCas {temp_xmi_file_path}" env.log_to_file('KERNEL', f'Executing "{cmd_transfer_var}"') self.ruta_kernel.run_cell(cmd_transfer_var, silent=True, store_history=False, on_error='Failed to write UIMA CAS to disk.') # Step 2: Reading CAS and TypeSystem from disk with python/cassis typesystem = cassis.load_typesystem(temp_typesystem_file) cas = cassis.load_cas_from_xmi(temp_xmi_file, typesystem=typesystem) # Step 3: Clean-up temp files temp_typesystem_file.close() temp_xmi_file.close() temp_directory.cleanup() return {var_name: cas}
def file_to_cas(self, filepath): # TODO below code is implemented for pydkpro purpose only in_text = filepath ts_xml = 'pydkpro/typesystems/temp_TypeSytems_textToXMI.xml' log_path = 'pydkpro/test_data/textToXMI.log' cmd = shlex.split( "java -jar pydkpro/pydkpro-0.0.1-SNAPSHOT-standalone_textXMI.jar %s %s %s" % (in_text, os.path.dirname(in_text), ts_xml)) if os.path.exists(in_text + '.xmi'): os.remove(in_text + '.xmi') with codecs.open(log_path, 'w', 'utf-8') as f: p = subprocess.Popen(cmd, stdout=f, stderr=f) p.wait() with open(ts_xml, 'rb') as f: self.typesystem = load_typesystem(f) with open(in_text + '.xmi', 'rb') as f: self.cas = load_cas_from_xmi(f, typesystem=self.typesystem) os.remove(in_text + '.xmi') return self
def test_send_single_cas_from_ruta_to_python(self, notebook): # Step 1: Get the file paths typesystem_file = os.path.join(TEST_RESOURCE_DIR, "TypeSystem.xml") cas_file = os.path.join(TEST_RESOURCE_DIR, "example.xmi") # Step 2: Get a (local) python instance of the cas for comparison with open(typesystem_file, 'rb') as f: typesystem = cassis.load_typesystem(f) with open(cas_file, 'rb') as f: cas = cassis.load_cas_from_xmi(f, typesystem=typesystem) # Step 3: Load CAS into Ruta cas_init_expr = f"%displayMode NONE\n" \ f"%loadCas {cas_file}\n" \ f"%loadTypeSystem {typesystem_file}" notebook.call(cas_init_expr, kernel=RUTA_KERNEL_NAME) # Step 4: Send files to SoS Kernel with %put notebook.call("%put modified_cas", kernel=RUTA_KERNEL_NAME) # Step 5: Check variable content actual_sofa = notebook.check_output("print(modified_cas.sofa_string)", kernel=SOS_KERNEL_NAME) expected_sofa = cas.sofa_string.strip() assert actual_sofa == expected_sofa
def from_xmi(self, xmi_string): self.cas = load_cas_from_xmi(xmi_string, typesystem=self.typesystem) return self
def load_from_dkpro_xmi(self, xmi_string): self.cas = load_cas_from_xmi(xmi_string, typesystem=load_dkpro_core_typesystem()) return self