def exec(self, process: ProcessContainer, args): """Execute a process Parameters ---------- process Metadata of the process args list of arguments """ token = None config = ConfigAccess.instance().config['runner'] if 'token' in config: token = config['token'] client = ag.Client(token) # exec the process params = ' '.join(args[1:]) files = [] for input_ in process.inputs: if input_.is_data: filename = ntpath.basename(input_.value) params = params.replace(input_.value, filename) files.append(input_.value) for output in process.outputs: if output.is_data: filename = ntpath.basename(output.value) params = params.replace(output.value, filename) # print('files:', files) # print('params:', params) try: out_dict = client.run_job(process.id, files=files, params=params) except ag.StatusError as e: print('API status Error:', e.status_code) print('API status Error:', e.msg) # print(out_dict) # get the outputs job_id = out_dict['id'] for output in process.outputs: output_filename = ntpath.basename(output.value) output_dir = os.path.dirname(os.path.abspath(output.value)) url = out_dict[str(job_id)][output_filename] filepath = client.download_file(file_url=url, outdir=output_dir, force=True)
def _import_dir_bioformat(self, raw_dataset_uri, dir_uri, filter_, author, format_, date, directory_tag_key): fiji_exe = ConfigAccess.instance().get('fiji') rawdataset_uri = os.path.abspath(raw_dataset_uri) data_dir_path = os.path.dirname(rawdataset_uri) cmd = f'{fiji_exe} --headless -macro bioimageit_convert.ijm "folder,{dir_uri},{data_dir_path},false,{filter_},{author},{date},{directory_tag_key}"' print("import bioformat cmd:", cmd) if platform.system() == 'Windows': subprocess.run(cmd, check=True) else: subprocess.run(cmd, shell=True, executable='/bin/bash', check=True) self._add_to_rawdataset_biobormat(data_dir_path, raw_dataset_uri)
def run_merged(self): """Run the process that merge txt number inputs This is the main function that run the process on the experiment data Raises ------ RunnerExecError """ for observer in self._observers: observer.notify({'progress': 0, 'message': 'start'}) # 1- Query all the input data and verify that the size # are equal, if not raise an exception input_data, data_count = self._query_inputs() # 2- Create the ProcessedDataSet processed_dataset = self.experiment.create_processed_dataset( self._output_dataset ) # 3- Create run run = Run() run.metadata.process_name = self.process.metadata.fullname() run.metadata.process_uri = self.process.uri for t in range(len(self._inputs_names)): run.metadata.inputs.append( RunInputContainer( self._inputs_names[t], self._inputs_datasets[t], self._inputs_query[t], self._inputs_origin_output_name[t], ) ) for i in range(0, len(self._process_params), 2): run.metadata.parameters.append( RunParameterContainer( self._process_params[i], self._process_params[i + 1] ) ) processed_dataset.add_run(run) # 4- merge Inputs inputs_values = [0 for i in range(len(self._inputs_names))] for n in range(len(self._inputs_names)): inputs_values[n] = list() for i in range(data_count): data_info = RawData(input_data[n][i]) if ( data_info.metadata.format == "numbercsv" ): with open(data_info.metadata.uri, 'r') as file: value = file.read().replace('\n', '').replace(' ', '') inputs_values[n].append(value) else: raise RunnerExecError( 'run merge can use only number datatype') # 5- save data in tmp files files in the processed dataset dir tmp_inputs_files = [0 for i in range(len(self._inputs_names))] processed_data_dir = processed_dataset.md_uri.replace( "processeddataset.md.json", "" ) for n in range(len(self._inputs_names)): tmp_inputs_files[n] = os.path.join( processed_data_dir, self._inputs_names[n] + '.csv' ) f = open(tmp_inputs_files[n], 'w') for i in range(len(inputs_values[n])): value = str(inputs_values[n][i]) if i < len(inputs_values[n]) - 1: f.write(value + ",") else: f.write(value) f.close() # 6- create input metadata for output .md.json inputs_metadata = [] for n in range(len(tmp_inputs_files)): inp_metadata = ProcessedDataInputContainer() inp_metadata.name = self._inputs_names[n] inp_metadata.uri = tmp_inputs_files[n] inp_metadata.type = 'txt' inputs_metadata.append(inp_metadata) # 7- run process on generated files args = [] # 7.1- inputs for n in range(len(self._inputs_names)): args.append(self._inputs_names[n]) args.append(tmp_inputs_files[n]) # 7.2- params for param in self._process_params: args.append(param) # 4.3- outputs for output in self.process.metadata.outputs: extension = '.' + FormatsAccess.instance().get(output.type).extension # args args.append(output.name) output_file_name = output.name args.append(os.path.join(processed_data_dir, output_file_name + extension)) # output metadata processed_data = ProcessedData() processed_data.metadata.name = output.name processed_data.metadata.author = \ ConfigAccess.instance().get('user')['name'] processed_data.metadata.date = format_date('now') processed_data.metadata.format = output.type processed_data.metadata.run_uri = run.md_uri processed_data.metadata.inputs = inputs_metadata processed_data.metadata.output = { 'name': output.name, 'label': output.description, } # save the metadata and create its md_uri and uri processed_dataset.create_data(processed_data) # 8- exec runner = Runner(self.process) runner.exec(*args) # notify observers for observer in self._observers: observer.notify({'progress': 100, 'message': 'done'})
def run_sequence(self): """Run the process in a sequence This is the main function that run the process on the experiment data Raises ------ RunnerExecError """ # 1- Query all the input data and verify that the size # are equal, if not raise an exception input_data, data_count = self._query_inputs() # 2- Create the ProcessedDataSet processed_dataset = self.experiment.create_processed_dataset( self._output_dataset ) # 3- Create run run = Run() run.metadata.process_name = self.process.metadata.fullname() run.metadata.process_uri = self.process.uri for t in range(len(self._inputs_names)): run.metadata.inputs.append( RunInputContainer( self._inputs_names[t], self._inputs_datasets[t], self._inputs_query[t], self._inputs_origin_output_name[t], ) ) for i in range(0, len(self._process_params), 2): run.metadata.parameters.append( RunParameterContainer( self._process_params[i], self._process_params[i + 1] ) ) processed_dataset.add_run(run) # 4- loop over the input data for i in range(data_count): data_info_zero = RawData(input_data[0][i].uri()) # 4.0- notify observers for observer in self._observers: notification = dict() notification['progress'] = int(100 * i / data_count) notification['message'] = "Process " + \ data_info_zero.metadata.name observer.notify(notification) # 4.1- Parse IO args = [] # get the input arguments inputs_metadata = [] for n in range(len(self._inputs_names)): args.append(self._inputs_names[n]) data_info = RawData(input_data[n][i].uri()) # input data can be a processedData but # we only read the common metadata args.append(data_info.metadata.uri) inp_metadata = ProcessedDataInputContainer() inp_metadata.name = self._inputs_names[n] inp_metadata.uri = input_data[n][i].uri() inp_metadata.type = data_info.metadata.type inputs_metadata.append(inp_metadata) # get the params arguments for param in self._process_params: args.append(param) # setup outputs for output in self.process.metadata.outputs: # output metadata processed_data = ProcessedData() processed_data.metadata.name = ( data_info_zero.metadata.name + "_" + output.name ) processed_data.metadata.author = \ ConfigAccess.instance().get('user')['name'] processed_data.metadata.date = format_date('now') processed_data.metadata.format = output.type processed_data.metadata.run_uri = run.md_uri processed_data.metadata.inputs = inputs_metadata processed_data.metadata.output = { 'name': output.name, 'label': output.description, } # save the metadata and create its md_uri and uri processed_dataset.create_data(processed_data) # args args.append(output.name) args.append(processed_data.metadata.uri) # 4.2- exec runner = Runner(self.process) # print("args = ", args) runner.exec(*args) # 4.0- notify observers for observer in self._observers: observer.notify({'progress': 100, 'message': 'done'})
def __init__(self): self.workspace_dir = ConfigAccess.instance().config['workspace'] config = ConfigAccess.instance().config['metadata'] self.service = metadataServices.get(config["service"], **config)
def __init__(self, md_uri: str = ''): self.md_uri = md_uri self.metadata = None # RunContainer() config = ConfigAccess.instance().config['metadata'] self.service = metadataServices.get(config["service"], **config) self.read()
def __init__(self): super().__init__() self.service_name = 'LocalRunnerService' self.conda_dir = ConfigAccess.instance().get('runner')['conda_dir'] print(self.conda_dir)
def test_access(self): ConfigAccess(self.config_local_file) content = ConfigAccess.instance().config self.assertTrue(_check_content(content))
def __init__(self): super().__init__() config = ConfigAccess.instance().config['process'] self.xml_dir = config['xml_dirs'][0] self.tools_file = config['tools']
def __init__(self): config = ConfigAccess.instance().config['process'] self.service = processServices.get(config['service'], **config)
def __init__(self, uri: str): self.uri = uri config = ConfigAccess.instance().config['process'] self.service = processServices.get(config['service'], **config) self.metadata = self.service.read_process(self.uri)