def example_barrier(self): jobs = [ self.job_sleep(2), self.job_sleep(2), self.job_sleep(2), self.job_sleep(2), self.job_sleep(2), self.job_sleep(2), self.job_sleep(2) ] job_names = [ 'step1.1', 'step1.2', 'step2.1.1', 'step2.1.2', 'step2.2.1', 'step2.2.2', 'step3' ] barriers = [ BarrierJob(name='barrier1'), BarrierJob(name='barrier2.1'), BarrierJob(name='barrier2.2'), BarrierJob(name='barrier3') ] for j, n in zip(jobs, job_names): j.name = n dependencies = [(jobs[0], barriers[0]), (jobs[1], barriers[0]), (barriers[0], barriers[1]), (barriers[0], barriers[2]), (barriers[1], jobs[2]), (barriers[1], jobs[3]), (barriers[2], jobs[4]), (barriers[2], jobs[5]), (jobs[2], barriers[3]), (jobs[3], barriers[3]), (jobs[4], barriers[3]), (jobs[5], barriers[3]), (barriers[3], jobs[6])] workflow = Workflow(jobs + barriers, dependencies) return workflow
def SimpleJobExample(configuration_item_name, userid, userpw=None): """ Dummy workflow to test the install Parameters ---------- configuration_item_name: str the name of the configuration item (ex. "Gabriel") userid: str user name on the server side userpw: str (optional) user password to login the server using ssh. If you want to use "id_rsa.pub", just leave userpw to None To copy the public key on the server use ssh-copy-id -i name@server. """ job_1 = Job(command=["sleep", "5"], name="job 1") job_2 = Job(command=["sleep", "5"], name="job 2") job_3 = Job(command=["sleep", "5"], name="job 3") job_4 = Job(command=["sleep", "5"], name="job 4") jobs = [job_1, job_2, job_3, job_4] dependencies = [(job_1, job_2), (job_1, job_3), (job_2, job_4), (job_3, job_4)] workflow = Workflow(jobs=jobs, dependencies=dependencies) controller = WorkflowController(configuration_item_name, userid, userpw) controller.submit_workflow(workflow=workflow, name="TestConnectionExample")
def example_dynamic_outputs_with_loo(self): # small leave-one-out # jobs job1 = self.job_list_with_outputs2() job2_train = self.job_reduce_cat(14) job2_train.name = 'train' job2_test = self.job8_with_output() job2_test.name = 'test' # building the workflow jobs = [job1, job2_train, job2_test] dependencies = [] links = { job2_train: { 'inputs': [(job1, 'outputs', ('list_all_but_one', 2))] }, job2_test: { 'input': [(job1, 'outputs', ('list_to_sequence', 2))] }, } function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[job1, job2_train, job2_test], name=function_name, param_links=links) return workflow
def example_dynamic_outputs_with_loo_jobs(self): # small leave-one-out # jobs job1 = self.job_list_with_outputs2() loo_job = LeaveOneOutJob( referenced_input_files=job1.referenced_output_files, param_dict={'index': 2}) job2_train = self.job_reduce_cat(14) job2_train.name = 'train' job2_test = self.job8_with_output() job2_test.name = 'test' # building the workflow jobs = [job1, loo_job, job2_train, job2_test] dependencies = [] links = { loo_job: { 'inputs': [(job1, 'outputs')] }, job2_train: { 'inputs': [(loo_job, 'train')] }, job2_test: { 'input': [(loo_job, 'test')] }, } function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[job1, loo_job, job2_train, job2_test], name=function_name, param_links=links) return workflow
def example_multiple(self): workflow1 = self.example_simple() workflow2 = self.example_simple_exception1() workflow3 = self.example_simple_exception2() jobs = workflow1.jobs jobs.extend(workflow2.jobs) jobs.extend(workflow3.jobs) dependencies = list(workflow1.dependencies) dependencies.extend(workflow2.dependencies) dependencies.extend(workflow3.dependencies) param_links = dict(workflow1.param_links) param_links.update(workflow2.param_links) param_links.update(workflow3.param_links) group1 = Group(name="simple example", elements=workflow1.root_group) group2 = Group(name="simple with exception in Job1", elements=workflow2.root_group) group3 = Group(name="simple with exception in Job3", elements=workflow3.root_group) function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[group1, group2, group3], name=function_name) return workflow
def example_dynamic_outputs(self): # jobs job1 = self.job1_with_outputs1() job2 = self.job2_with_outputs1() job3 = self.job3() job4 = self.job4() # building the workflow jobs = [job1, job2, job3, job4] dependencies = [(job1, job2), (job1, job3), (job2, job4), (job3, job4)] group_1 = Group(name='group_1', elements=[job2, job3]) group_2 = Group(name='group_2', elements=[job1, group_1]) links = { job2: { 'filePathIn1': [(job1, 'filePathOut1')] }, job3: { 'filePathIn': [(job1, 'filePathOut2')] }, job4: { 'file1': [(job2, 'filePathOut')] }, } function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[group_2, job4], name=function_name, param_links=links) return workflow
def example_dynamic_outputs_with_cv(self): # small 4-fold cross-validation # jobs job1 = self.job_list_with_outputs2() job2_train = self.job_reduce_cat(16) job2_train.name = 'train' job2_test = self.job_reduce_cat(17) job2_test.name = 'test' # building the workflow jobs = [job1, job2_train, job2_test] dependencies = [] links = { job2_train: { 'inputs': [(job1, 'outputs', ('list_cv_train_fold', 1, 4))] }, job2_test: { 'inputs': [(job1, 'outputs', ('list_cv_test_fold', 1, 4))] }, } function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[job1, job2_train, job2_test], name=function_name, param_links=links) return workflow
def _create_workflow(self, subject_ids): study_config = self._study workflow = Workflow( name='Morphologist UI - %s' % study_config.study_name, jobs=[]) workflow.root_group = [] initial_vol_format = study_config.volumes_format priority = (len(subject_ids) - 1) * 100 for subject_id in subject_ids: analysis = self._study.analyses[subject_id] subject = self._study.subjects[subject_id] analysis.set_parameters(subject) #analysis.propagate_parameters() pipeline = analysis.pipeline pipeline.enable_all_pipeline_steps() # force highest priority normalization method # FIXME: specific knowledge of Morphologist should not be used here. pipeline.Normalization_select_Normalization_pipeline \ = 'NormalizeSPM' pipeline_tools.disable_runtime_steps_with_existing_outputs( pipeline) missing = pipeline_tools.nodes_with_missing_inputs(pipeline) if missing: self.check_missing_models(pipeline, missing) print('MISSING INPUTS IN NODES:', missing) raise MissingInputFileError("subject: %s" % subject_id) wf = pipeline_workflow.workflow_from_pipeline( pipeline, study_config=study_config, jobs_priority=priority) njobs = len([j for j in wf.jobs if isinstance(j, Job)]) if njobs != 0: priority -= 100 workflow.jobs += wf.jobs workflow.dependencies += wf.dependencies group = Group(wf.root_group, name='Morphologist %s' % str(subject)) group.user_storage = subject_id workflow.root_group.append(group) # += wf.root_group workflow.groups += [group] + wf.groups return workflow
def example_special_command(self): # jobs test_command_job = self.job_test_command_1() # building the workflow jobs = [test_command_job] dependencies = [] function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, name=function_name) return workflow
def example_n_jobs(self, nb=300, time=60): jobs = [] for i in range(0, nb): job = self.job_sleep(time) jobs.append(job) dependencies = [] function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, name=function_name) return workflow
def example_special_transfer(self): # jobs test_dir_contents = self.job_test_dir_contents() test_multi_file_format = self.job_test_multi_file_format() # building the workflow jobs = [test_dir_contents, test_multi_file_format] dependencies = [] function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, name=function_name) return workflow
def example_native_spec_pbs(self): # jobs job1 = self.job1(option="-l walltime=5:00:00,pmem=16gb") job2 = self.job1(option="-l walltime=5:00:0") job3 = self.job1() # building the workflow jobs = [job1, job2, job3] workflow = Workflow(jobs, dependencies=[], name="jobs with native spec for pbs") return workflow
def create_workflow(inp, out, names=None, verbose=False): if not osp.isfile(inp): raise Exception('File not found %s' % inp) commands = [e.rstrip('\n').split(' ') for e in open(inp).readlines()] if verbose: print commands names = ['job_%s' % i for i in xrange(len(commands))] if names is None else names jobs = [Job(command=cmd, name=name) for cmd, name in zip(commands, names)] workflow = Workflow(jobs=jobs, dependencies=[]) Helper.serialize(out, workflow)
def export_to_gui(self, soma_workflow_dirpath, **Xy): ''' Example ------- see the directory of "examples/run_somaworkflow_gui.py" in epac ''' try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError if not os.path.exists(soma_workflow_dirpath): os.makedirs(soma_workflow_dirpath) tmp_work_dir_path = soma_workflow_dirpath cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) db_size = estimate_dataset_size(**Xy) db_size = int(db_size / (1024 * 1024)) # convert it into mega byte save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local=False, ft_working_directory=ft_working_directory) soma_workflow = Workflow(jobs=jobs) if soma_workflow_dirpath and soma_workflow_dirpath != "": out_soma_workflow_file = os.path.join( soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui) Helper.serialize(out_soma_workflow_file, soma_workflow) os.chdir(cur_work_dir)
def create_somaWF(liste_python_files): jobs = [] for file_python in liste_python_files: file_name = os.path.basename(file_python) job_1 = Job(command=["python", file_python], name=file_name) jobs.append(job_1) #jobs = [job_1] dependencies = [] workflow = Workflow(jobs=jobs, dependencies=dependencies) # save the workflow into a file somaWF_name = os.path.join(path_script, "soma_WF_JOBS") Helper.serialize(somaWF_name, workflow)
def example_serial_jobs(self, nb=5): jobs = [] dependencies = [] previous_job = self.job_sleep(2) jobs.append(previous_job) for i in range(0, nb): job = self.job_sleep(2) jobs.append(job) dependencies.append((previous_job, job)) previous_job = job function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, name=function_name) return workflow
def example_fake_pipelineT1(self, n_iter=100): jobs = [] dependencies = [] root_group = [] for i in range(0, n_iter): job1 = self.job_sleep(2) job1.name = "Brain extraction" jobs.append(job1) job11 = self.job_sleep(1) job11.name = "test 1" jobs.append(job11) job12 = self.job_sleep(1) job12.name = "test 2" jobs.append(job12) job13 = self.job_sleep(1) job13.name = "test 3" jobs.append(job13) job2 = self.job_sleep(2) job2.name = "Gray/white segmentation" jobs.append(job2) job3 = self.job_sleep(2) job3.name = "Left hemisphere sulci recognition" jobs.append(job3) job4 = self.job_sleep(2) job4.name = "Right hemisphere sulci recognition" jobs.append(job4) # dependencies.append((job1, job2)) dependencies.append((job1, job11)) dependencies.append((job11, job12)) dependencies.append((job12, job13)) dependencies.append((job13, job2)) dependencies.append((job2, job3)) dependencies.append((job2, job4)) group_sulci = Group(name="Sulci recognition", elements=[job3, job4]) group_subject = Group( name="sulci recognition -- subject " + repr(i), elements=[job1, job11, job12, job13, job2, group_sulci]) root_group.append(group_subject) function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group, name=function_name) return workflow
def example_dynamic_outputs_with_mapreduce_jobs(self): # small map/reduce using MapJob / ReduceJob # jobs job1 = self.job_list_with_outputs() job2_0 = self.job8_with_output() job2_0.name = 'job2_0' job2_1 = self.job8_with_output() job2_1.name = 'job2_1' job3 = self.job_reduce_cat() map_job = MapJob(referenced_input_files=job1.referenced_output_files, name='map') reduce_job = ReduceJob() # building the workflow jobs = [job1, job2_0, job2_1, job3, map_job, reduce_job] dependencies = [] group_1 = Group(name='group_1', elements=[job2_0, job2_1]) links = { map_job: { 'inputs': [(job1, 'outputs')] }, job2_0: { 'input': [(map_job, 'output_0')] }, job2_1: { 'input': [(map_job, 'output_1')] }, reduce_job: { 'input_0': [(job2_0, 'output')], 'input_1': [(job2_1, 'output')], 'lengths': [(map_job, 'lengths')] }, job3: { 'inputs': [(reduce_job, 'outputs')] }, } function_name = inspect.stack()[0][3] workflow = Workflow( jobs, dependencies, root_group=[job1, map_job, group_1, reduce_job, job3], name=function_name, param_links=links) return workflow
def example_simple_exception2(self): # jobs job1 = self.job1() job2 = self.job2() job4 = self.job4() job3 = self.job3_exception() jobs = [job1, job2, job3, job4] dependencies = [(job1, job2), (job1, job3), (job2, job4), (job3, job4)] group_1 = Group(name='group_1', elements=[job2, job3]) group_2 = Group(name='group_2', elements=[job1, group_1]) function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[group_2, job4], name=function_name) return workflow
def example_dynamic_outputs_with_cv_jobs(self): # small 4-fold cross-validation # jobs job1 = self.job_list_with_outputs2() cv_job = CrossValidationFoldJob( referenced_input_files=job1.referenced_output_files, param_dict={ 'nfolds': 4, 'fold': 1 }) job2_train = self.job_reduce_cat(16) job2_train.name = 'train' job2_test = self.job_reduce_cat(17) job2_test.name = 'test' # building the workflow jobs = [job1, cv_job, job2_train, job2_test] dependencies = [] links = { cv_job: { 'inputs': [(job1, 'outputs')] }, job2_train: { 'inputs': [(cv_job, 'train')] }, job2_test: { 'inputs': [(cv_job, 'test')] }, } function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[job1, cv_job, job2_train, job2_test], name=function_name, param_links=links) return workflow
def example_n_jobs_with_dependencies(self, nb=500, time=60): dependencies = [] jobs = [] intermed_job1 = self.job_sleep(2) jobs.append(intermed_job1) intermed_job2 = self.job_sleep(2) jobs.append(intermed_job2) elem_group1 = [] for i in range(0, nb): job = self.job_sleep(time) jobs.append(job) elem_group1.append(job) dependencies.append((job, intermed_job1)) group1 = Group(name="Group 1", elements=elem_group1) elem_group2 = [] for i in range(0, nb): job = self.job_sleep(time) jobs.append(job) elem_group2.append(job) dependencies.append((intermed_job1, job)) dependencies.append((job, intermed_job2)) group2 = Group(name="Group 2", elements=elem_group2) elem_group3 = [] for i in range(0, nb): job = self.job_sleep(time) jobs.append(job) elem_group3.append(job) dependencies.append((intermed_job2, job)) group3 = Group(name="Group 3", elements=elem_group3) root_group = [group1, intermed_job1, group2, intermed_job2, group3] function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group, name=function_name) return workflow
def save_wf(wf, output_file, mode="soma-workflow"): """Save the workflow in a file. Support simple JSON commands list (cmd-list) or soma-workflow. Parameters: ---------- wf : tuple (cmd-dict, dependancies), Workflow to save. output_file : str, filename for the workflow. mode : str in ["soma-workflow", "cmd_list"], optional (default="soma-workflow") format to save the workflow. """ cmd = wf[0] dep_orig = wf[1] if mode == "soma-workflow": from soma_workflow.client import Job, Workflow, Helper for k, v in cmd.iteritems(): cmd[k] = Job(command=v, name=k) dep = [((cmd[a], cmd[b])) for a, b in dep_orig] jobs = np.asarray(cmd.values())[np.argsort(cmd.keys())] workflow = Workflow(jobs=jobs.tolist(), dependencies=dep) Helper.serialize(output_file, workflow) return workflow elif mode == "cmd-list": import json for k, v in cmd.iteritems(): cmd[k] = " ".join(v) with open(output_file, 'w') as fd: json.dump(dict(cmd=cmd, dep=dep_orig), fd, indent=True) return cmd else: raise TypeError("Invalid workflow mode \'{}\'".format(mode))
def example_dynamic_outputs_with_mapreduce(self): # small map/reduce # jobs job1 = self.job_list_with_outputs() job2_0 = self.job8_with_output() job2_0.name = 'job2_0' job2_1 = self.job8_with_output() job2_1.name = 'job2_1' job3 = self.job_reduce_cat() # building the workflow jobs = [job1, job2_0, job2_1, job3] dependencies = [] group_1 = Group(name='group_1', elements=[job2_0, job2_1]) links = { job2_0: { 'input': [(job1, 'outputs', ('list_to_sequence', 0))] }, job2_1: { 'input': [(job1, 'outputs', ('list_to_sequence', 1))] }, job3: { 'inputs': [(job2_0, 'output', ('sequence_to_list', 0)), (job2_1, 'output', ('sequence_to_list', 1))] }, } function_name = inspect.stack()[0][3] workflow = Workflow(jobs, dependencies, root_group=[job1, group_1, job3], name=function_name, param_links=links) return workflow
dependencies.append((fit, transform)) inner_fold_jobs.append(transform) # Just for grouping # Predict task predict_cmd = predict_command(param_transform_files, param_prediction_file) job_name = common_job_name + "/predict" predict = Job(command=predict_cmd, name=job_name) jobs.append(predict) dependencies.append((transform, predict)) inner_fold_jobs.append(predict) # Just for grouping # Set dependencies of cleaning job dependencies.append((predict, clean_job)) # End loop on params # Group all jobs of this fold in a group group_elements.append( Group(elements=inner_fold_jobs, name="Outer fold {out}/Inner fold {inn}".format( out=outer_fold_index, inn=inner_fold_index))) # End inner loop # End outer loop workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=group_elements, name=WF_NAME) # save the workflow into a file Helper.serialize(os.path.join(OUT_DIR, WF_NAME), workflow)
f.close() # Creation of the FileTransfer object to transfer the working directory my_working_directory = FileTransfer(is_input=True, client_path="/tmp/my_working_directory", name="working directory") # Jobs and Workflow job1 = Job(command=["cp", "myfile1", "copy_of_myfile1"], name="job1", referenced_input_files=[my_working_directory], referenced_output_files=[my_working_directory], working_directory=my_working_directory) job2 = Job(command=["cp", "myfile2", "copy_of_myfile2"], name="job2", referenced_input_files=[my_working_directory], referenced_output_files=[my_working_directory], working_directory=my_working_directory) workflow = Workflow(jobs=[job1, job2], dependencies=[]) # Submit the workflow print("password? ") password = getpass.getpass() controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="working directory transfer example")
def morphologist_all(t1file, sid, outdir, study="morphologist", waittime=10, somaworkflow=False, spmexec="/i2bm/local/spm8-standalone/run_spm8.sh", spmdir="/i2bm/local/spm8-standalone"): """ Performs all the Morphologist steps. Steps: 1- Ensure image orientation and reorient it if needed (Prepare Subject for Anatomical Pipeline). 2- Computation of a brain mask (Brain Mask Segmentation). 3- Computation of a mask for each hemisphere (Split Brain Mask). 4- A grey/white classification of each hemisphere to perform "Voxel Based Morphometry" (Grey White Classification) and spherical triangulation of cortical hemispheres (Grey White Surface). 5- Spherical triangulation of the external interface of the cortex of one or two hemispheres (Get Spherical Hemi Surface). 6- Computation of a graph representing the cortical fold topography (Cortical Fold Graph). 7- Automatic identification of the cortical sulci (Automatic Sulci Recognition), located in the "sulci" toolbox. The execution is performed with soma_workflow that has to be installed in the bv_env environment. To check the worklow submission, use the 'soma_workflow_gui' command. If the input 't1file' has no the expected extension, an Exception will be raised. If the $outdir/$study/$sid has already been created, an Exception will be raised. Parameters ---------- t1file: str (mandatory) the path to a ".nii.gz" anatomical T1 weighted file. sid: str (mandatory) a subject identifier. outdir: str (mandatory) the morphologist output files will be written in $outdir/$study/$sid. study: str (mandatory) the name of the study. waittime: float (optional, default 10) a delay (in seconds) used to check the worflow status. somaworkflow: bool (optional, default False) if True use somaworkflow for the execution. spmexec: str (optional) the path to the standalone SPM execution file. spmdir: str (optional) the standalone SPM directory. Returns ------- wffile: str a file containing the submitted workflow. wfid: int the submitted workflow identifier. wfstatus: str the submited worflow status afer 'waittime' seconds. """ # Check roughly the input file extension if not t1file.endswith(".nii.gz"): raise Exception("'{0}' is not a COMPRESSED NIFTI file.".format(t1file)) # Create a configuration for the morphologist study study_config = StudyConfig( modules=StudyConfig.default_modules + ["FomConfig", "BrainVISAConfig"]) study_dict = { "name": "morphologist_fom", "input_directory": outdir, "output_directory": outdir, "input_fom": "morphologist-auto-nonoverlap-1.0", "output_fom": "morphologist-auto-nonoverlap-1.0", "shared_fom": "shared-brainvisa-1.0", "spm_directory": spmdir, "use_soma_workflow": True, "use_fom": True, "spm_standalone": True, "use_matlab": False, "volumes_format": "NIFTI gz", "meshes_format": "GIFTI", "use_spm": True, "spm_exec": spmexec, "study_config.somaworkflow_computing_resource": "localhost", "somaworkflow_computing_resources_config": { "localhost": { } } } study_config.set_study_configuration(study_dict) # Create the morphologist pipeline pipeline = get_process_instance( "morphologist.capsul.morphologist.Morphologist") morphologist_pipeline = process_with_fom.ProcessWithFom( pipeline, study_config) morphologist_pipeline.attributes = dict( (trait_name, getattr(morphologist_pipeline, trait_name)) for trait_name in morphologist_pipeline.user_traits()) morphologist_pipeline.attributes["center"] = "morphologist" morphologist_pipeline.attributes["subject"] = sid morphologist_pipeline.create_completion() # Create morphologist expected tree # ToDo: use ImportT1 from axon subjectdir = os.path.join(outdir, study, sid) if os.path.isdir(subjectdir): raise Exception("Folder '{0}' already created.".format(subjectdir)) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "default_analysis", "folds", "3.1", "default_session_auto")) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "registration")) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "segmentation", "mesh")) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "tmp")) # Copy T1 file in the morphologist expected location destfile = os.path.join(subjectdir, "t1mri", "default_acquisition", sid + ".nii.gz") shutil.copy(t1file, destfile) # Create source_referential morphologist expected file source_referential = {"uuid": str(soma.uuid.Uuid())} referential_file = os.path.join( subjectdir, "t1mri", "default_acquisition", "registration", "RawT1-{0}_default_acquisition.referential".format(sid)) attributes = "attributes = {0}".format(json.dumps(source_referential)) with open(referential_file, "w") as openfile: openfile.write(attributes) # Create a worflow from the morphologist pipeline workflow = Workflow(name="{0} {1}".format(study, sid), jobs=[]) workflow.root_group = [] # Create the workflow wf = pipeline_workflow.workflow_from_pipeline( morphologist_pipeline.process, study_config=study_config) workflow.add_workflow(wf, as_group="{0}_{1}".format(study, sid)) wffile = os.path.join(subjectdir, "{0}.wf".format(study)) pickle.dump(workflow, open(wffile, "w")) # Execute the workflow with somaworkflow if somaworkflow: controller = WorkflowController() wfid = controller.submit_workflow( workflow=workflow, name="{0}_{1}".format(study, sid)) # Return the worflow status after execution while True: time.sleep(waittime) wfstatus = controller.workflow_status(wfid) if wfstatus not in [ "worklflow_not_started", "workflow_in_progress"]: break # Execute the workflow with subprocess else: # -> construct the ordered list of commands to be executed workflow_repr = workflow.to_dict() graph = Graph() for job in workflow_repr["jobs"]: graph.add_node(GraphNode(job, None)) for link in workflow_repr["dependencies"]: graph.add_link(link[0], link[1]) ordered_nodes = [str(node[0]) for node in graph.topological_sort()] commands = [] jobs = workflow_repr["serialized_jobs"] temporaries = workflow_repr["serialized_temporary_paths"] barriers = workflow_repr["serialized_barriers"] for index in ordered_nodes: if index in jobs: commands.append(jobs[index]["command"]) elif index in barriers: continue else: raise Exception("Unexpected node in workflow.") # -> Go through all commands tmpmap = {} for cmd in commands: # -> deal with temporary files for index, item in enumerate(cmd): if not isinstance(item, basestring): if str(item) not in tmpmap: if str(item) in temporaries: struct = temporaries[str(item)] name = cmd[2].split(";")[1].split()[-1] tmppath = os.path.join( subjectdir, "t1mri", "default_acquisition", "tmp", str(item) + name + struct["suffix"]) tmpmap[str(item)] = tmppath else: raise MorphologistError( "Can't complete command '{0}'.".format( cmd)) cmd[index] = tmpmap[str(item)] # -> execute the command worker = MorphologistWrapper(cmd) worker() if worker.exitcode != 0: raise MorphologistRuntimeError( " ".join(worker.cmd), worker.stderr) wfstatus = "Done" wfid = "subprocess" return wffile, wfid, wfstatus
# f.write("Content of my file \n") # f.close() # FileTransfer creation for input files myfile = FileTransfer(is_input=True, client_path="/tmp/soma_workflow_examples/myfile", name="myfile") # FileTransfer creation for output files copy_of_myfile = FileTransfer(is_input=False, client_path="/tmp/soma_workflow_examples/copy_of_myfile", name="copy of my file") # Job and Workflow copy_job = Job(command=["cp", myfile, copy_of_myfile], name="copy", referenced_input_files=[myfile], referenced_output_files=[copy_of_myfile]) workflow = Workflow(jobs=[copy_job], dependencies=[]) login = '******' password = '******' controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="simple transfer")
from soma_workflow.client import Job, Workflow, WorkflowController job_1 = Job(command=["sleep", "60"], name="job 1") job_2 = Job(command=["sleep", "60"], name="job 2") job_3 = Job(command=["sleep", "60"], name="job 3") job_4 = Job(command=["sleep", "60"], name="job 4") jobs = [job_1, job_2, job_3, job_4] dependencies = [(job_1, job_2), (job_1, job_3), (job_2, job_4), (job_3, job_4)] workflow = Workflow(jobs=jobs, dependencies=dependencies) controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="simple example")
group_elements = [] first_job = Job(command=["sleep", "10"], name="first job") last_job = Job(command=["sleep", "10"], name="last job") jobs.append(first_job) jobs.append(last_job) for i in range(0, 30): job = Job(command=["sleep", "60"], name="job " + repr(i)) jobs.append(job) dependencies.append((first_job, job)) dependencies.append((job, last_job)) group_elements.append(job) thirty_jobs_group = Group(elements=group_elements, name="my 30 jobs") workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=[first_job, thirty_jobs_group, last_job]) login = '******' password = '******' controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="Simple workflow with group")
listRunPerSubj[s]) body = body + ", " + str(EEGbadlist[s]) + ", " + ')' jobname = subj for c in cond: jobname = jobname + '_' + str(c) ListJobName.append(jobname) # write jobs in a dedicated folder name_file = [] name_file = os.path.join( path_script, ("JOBS_PREPROC/Preproc_STC1" + jobname + ".py")) Listfile.append(name_file) with open(name_file, 'w') as python_file: python_file.write(body) jobs = [] for i in range(len(Listfile)): JobVar = Job(command=['python', Listfile[i]], name=ListJobName[i], native_specification='-l walltime=4:00:00 -l nodes=1:ppn=8') jobs.append(JobVar) WfVar = Workflow(jobs=jobs, dependencies=[]) # save the workflow into a file somaWF_name = os.path.join(path_script, "SOMA_WFs/soma_WF_PREPROC_allsub_allcond") Helper.serialize(somaWF_name, WfVar) #####################################################################
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="transform", ... num_processes=3, remove_finished_wf=False) >>> tree_root_node = sfw_engine.run(**Xy) light mode >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}, {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}]) ''' try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer from soma_workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) is_run_local = False if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() is_run_local = True # print "is_run_local=", is_run_local if not is_run_local: ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") else: ft_working_directory = tmp_work_dir_path ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory) soma_workflow = Workflow(jobs=jobs) controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue) Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) self.engine_info = self.get_engine_info(controller, wf_id) if self.remove_finished_wf: controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree: shutil.rmtree(tmp_work_dir_path) return self.tree_root