def create_workflow(inp, out, names = None, verbose=False): if not osp.isfile(inp): raise Exception('File not found %s'%inp) commands = [e.rstrip('\n').split(' ') for e in open(inp).readlines()] if verbose: print commands names = ['job_%s'%i for i in xrange(len(commands))] if names is None else names jobs = [Job(command=cmd, name=name) for cmd, name in zip(commands, names)] workflow = Workflow(jobs=jobs, dependencies=[]) Helper.serialize(out, workflow)
def create_workflow(inp, out, names=None, verbose=False): if not osp.isfile(inp): raise Exception('File not found %s' % inp) commands = [e.rstrip('\n').split(' ') for e in open(inp).readlines()] if verbose: print commands names = ['job_%s' % i for i in xrange(len(commands))] if names is None else names jobs = [Job(command=cmd, name=name) for cmd, name in zip(commands, names)] workflow = Workflow(jobs=jobs, dependencies=[]) Helper.serialize(out, workflow)
def export_to_gui(self, soma_workflow_dirpath, **Xy): ''' Example ------- see the directory of "examples/run_somaworkflow_gui.py" in epac ''' try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError if not os.path.exists(soma_workflow_dirpath): os.makedirs(soma_workflow_dirpath) tmp_work_dir_path = soma_workflow_dirpath cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) db_size = estimate_dataset_size(**Xy) db_size = int(db_size / (1024 * 1024)) # convert it into mega byte save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local=False, ft_working_directory=ft_working_directory) soma_workflow = Workflow(jobs=jobs) if soma_workflow_dirpath and soma_workflow_dirpath != "": out_soma_workflow_file = os.path.join( soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui) Helper.serialize(out_soma_workflow_file, soma_workflow) os.chdir(cur_work_dir)
def create_somaWF(liste_python_files): jobs = [] for file_python in liste_python_files: file_name = os.path.basename(file_python) job_1 = Job(command=["python", file_python], name=file_name) jobs.append(job_1) #jobs = [job_1] dependencies = [] workflow = Workflow(jobs=jobs, dependencies=dependencies) # save the workflow into a file somaWF_name = os.path.join(path_script, "soma_WF_JOBS") Helper.serialize(somaWF_name, workflow)
def export_to_gui(self, soma_workflow_dirpath, **Xy): """ Example ------- see the directory of "examples/run_somaworkflow_gui.py" in epac """ try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer except ImportError: errmsg = ( "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n" ) sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError if not os.path.exists(soma_workflow_dirpath): os.makedirs(soma_workflow_dirpath) tmp_work_dir_path = soma_workflow_dirpath cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) db_size = estimate_dataset_size(**Xy) db_size = int(db_size / (1024 * 1024)) # convert it into mega byte save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local=False, ft_working_directory=ft_working_directory) soma_workflow = Workflow(jobs=jobs) if soma_workflow_dirpath and soma_workflow_dirpath != "": out_soma_workflow_file = os.path.join( soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui ) Helper.serialize(out_soma_workflow_file, soma_workflow) os.chdir(cur_work_dir)
def test_serialization(self): simple_wf_examples = workflow_local.WorkflowExamplesLocal() tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer() srp_wf_examples = workflow_shared.WorkflowExamplesShared() self.temporaries += [ simple_wf_examples.output_dir, tr_wf_examples.output_dir, srp_wf_examples.output_dir ] workflows = [] workflows.append(("multiple", simple_wf_examples.example_multiple())) workflows.append( ("special_command", simple_wf_examples.example_special_command())) workflows.append( ("mutiple_transfer", tr_wf_examples.example_multiple())) workflows.append(("special_command_transfer", tr_wf_examples.example_special_command())) workflows.append( ("special_transfer", tr_wf_examples.example_special_transfer())) workflows.append(("mutiple_srp", srp_wf_examples.example_multiple())) workflows.append( ("special_command_srp", srp_wf_examples.example_special_command())) for workflow_name, workflow in workflows: print("Testing", workflow_name) file_path = tempfile.mkstemp(prefix="json_", suffix=workflow_name + ".wf") os.close(file_path[0]) file_path = file_path[1] Helper.serialize(file_path, workflow) new_workflow = Helper.unserialize(file_path) self.assertTrue( new_workflow.attributs_equal(workflow), "Serialization failed for workflow %s" % workflow_name) try: os.remove(file_path) except IOError: pass
def test_serialization(self): simple_wf_examples = workflow_local.WorkflowExamplesLocal() tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer() srp_wf_examples = workflow_shared.WorkflowExamplesShared() self.temporaries += [simple_wf_examples.output_dir, tr_wf_examples.output_dir, srp_wf_examples.output_dir] workflows = [] workflows.append(("multiple", simple_wf_examples.example_multiple())) workflows.append(("special_command", simple_wf_examples.example_special_command())) workflows.append(("mutiple_transfer", tr_wf_examples.example_multiple())) workflows.append(("special_command_transfer", tr_wf_examples.example_special_command())) workflows.append(("special_transfer", tr_wf_examples.example_special_transfer())) workflows.append(("mutiple_srp", srp_wf_examples.example_multiple())) workflows.append(("special_command_srp", srp_wf_examples.example_special_command())) for workflow_name, workflow in workflows: print("Testing", workflow_name) file_path = tempfile.mkstemp(prefix="json_", suffix=workflow_name + ".wf") os.close(file_path[0]) file_path = file_path[1] Helper.serialize(file_path, workflow) new_workflow = Helper.unserialize(file_path) self.assertTrue(new_workflow.attributs_equal(workflow), "Serialization failed for workflow %s" % workflow_name) try: os.remove(file_path) except IOError: pass
def test_serialization(self): directory = "/tmp/" simple_wf_examples = workflow_local.WorkflowExamplesLocal() tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer() srp_wf_examples = workflow_shared.WorkflowExamplesShared() workflows = [] workflows.append(("multiple", simple_wf_examples.example_multiple())) workflows.append(("special_command", simple_wf_examples.example_special_command())) workflows.append(("mutiple_transfer", tr_wf_examples.example_multiple())) workflows.append(("special_command_transfer", tr_wf_examples.example_special_command())) workflows.append(("special_transfer", tr_wf_examples.example_special_transfer())) workflows.append(("mutiple_srp", srp_wf_examples.example_multiple())) workflows.append(("special_command_srp", srp_wf_examples.example_special_command())) for workflow_name, workflow in workflows: print("Testing", workflow_name) file_path = os.path.join(directory, "json_" + workflow_name + ".wf") Helper.serialize(file_path, workflow) new_workflow = Helper.unserialize(file_path) self.assertTrue(new_workflow.attributs_equal(workflow), "Serialization failed for workflow %s" % workflow_name) try: os.remove(file_path) except IOError: pass
def test_serialization(self): directory = "/tmp/" simple_wf_examples = workflow_local.WorkflowExamplesLocal() tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer() srp_wf_examples = workflow_shared.WorkflowExamplesShared() workflows = [] workflows.append(("multiple", simple_wf_examples.example_multiple())) workflows.append(("special_command", simple_wf_examples.example_special_command())) workflows.append(("mutiple_transfer", tr_wf_examples.example_multiple())) workflows.append(("special_command_transfer", tr_wf_examples.example_special_command())) workflows.append(("special_transfer", tr_wf_examples.example_special_transfer())) workflows.append(("mutiple_srp", srp_wf_examples.example_multiple())) workflows.append(("special_command_srp", srp_wf_examples.example_special_command())) for workflow_name, workflow in workflows: print "Testing", workflow_name file_path = os.path.join(directory, "json_" + workflow_name + ".wf") Helper.serialize(file_path, workflow) new_workflow = Helper.unserialize(file_path) self.assertTrue(new_workflow.attributs_equal(workflow), "Serialization failed for workflow %s" % workflow_name) try: os.remove(file_path) except IOError: pass
def save_wf(wf, output_file, mode="soma-workflow"): """Save the workflow in a file. Support simple JSON commands list (cmd-list) or soma-workflow. Parameters: ---------- wf : tuple (cmd-dict, dependancies), Workflow to save. output_file : str, filename for the workflow. mode : str in ["soma-workflow", "cmd_list"], optional (default="soma-workflow") format to save the workflow. """ cmd = wf[0] dep_orig = wf[1] if mode == "soma-workflow": from soma_workflow.client import Job, Workflow, Helper for k, v in cmd.iteritems(): cmd[k] = Job(command=v, name=k) dep = [((cmd[a], cmd[b])) for a, b in dep_orig] jobs = np.asarray(cmd.values())[np.argsort(cmd.keys())] workflow = Workflow(jobs=jobs.tolist(), dependencies=dep) Helper.serialize(output_file, workflow) return workflow elif mode == "cmd-list": import json for k, v in cmd.iteritems(): cmd[k] = " ".join(v) with open(output_file, 'w') as fd: json.dump(dict(cmd=cmd, dep=dep_orig), fd, indent=True) return cmd else: raise TypeError("Invalid workflow mode \'{}\'".format(mode))
], name="Merging all the r2 and distribution respectively together.", working_directory=scripts_path) jobs.append(job_merge) # Plotting the maps job_final = Job(command=[ "python", "create_maps.py", "--input", derivatives_path, "--parameters", parameters_path, "--subject", args.subject, "--fmri_data", fmri_path ], name="Creating the maps.", working_directory=scripts_path) jobs.append(job_final) dependencies.append((job_merge, job_final)) workflow = Workflow(jobs=jobs, dependencies=dependencies) Helper.serialize(os.path.join(inputs_path, 'delete.somawf'), workflow) ### Submit the workflow to computing resource (configured in the client-server mode) controller = WorkflowController( "DSV_cluster_{}".format(login), login, password) #"DSV_cluster_ap259944", login, password workflow_id = controller.submit_workflow(workflow=workflow, name="Ridge - LPP") print("Finished !!!")
listRunPerSubj[s]) body = body + ", " + str(EEGbadlist[s]) + ", " + ')' jobname = subj for c in cond: jobname = jobname + '_' + str(c) ListJobName.append(jobname) # write jobs in a dedicated folder name_file = [] name_file = os.path.join( path_script, ("JOBS_PREPROC/Preproc_STC1" + jobname + ".py")) Listfile.append(name_file) with open(name_file, 'w') as python_file: python_file.write(body) jobs = [] for i in range(len(Listfile)): JobVar = Job(command=['python', Listfile[i]], name=ListJobName[i], native_specification='-l walltime=4:00:00 -l nodes=1:ppn=8') jobs.append(JobVar) WfVar = Workflow(jobs=jobs, dependencies=[]) # save the workflow into a file somaWF_name = os.path.join(path_script, "SOMA_WFs/soma_WF_PREPROC_allsub_allcond") Helper.serialize(somaWF_name, WfVar) #####################################################################
# use a transparent and complete job name referring to arguments of interest tmpname = "_dSPM_" + ListSubj[subj] nametag.append(tmpname) # write jobs in a dedicated folder name_file = [] name_file = os.path.join(path_script, ("JOBS_SOURCE/Source_" + tmpname + ".py")) Listfile.append(name_file) python_file = open(name_file, "w") python_file.write(body) python_file.close name_file1 = os.path.join(path_script, ("JOBS_SOURCE/source_distTEVT.py")) with open(name_file1, 'w') as python_file: python_file.write(everybody) jobs = [] for i in range(len(Listfile)): #job_1 = Job(command=["python", Listfile[i]], name = nametag[i], native_specification="-l walltime=1:00:00, -l nodes=1:ppn=1") job_1 = Job(command=["python", Listfile[i]], name=nametag[i]) jobs.append(job_1) workflow = Workflow(jobs=jobs, dependencies=[]) # save the workflow into a file somaWF_name = os.path.join(path_script, "SOMA_WFs/soma_WF_SOURCE") Helper.serialize(somaWF_name, workflow) #####################################################################
dependencies.append((fit, transform)) inner_fold_jobs.append(transform) # Just for grouping # Predict task predict_cmd = predict_command(param_transform_files, param_prediction_file) job_name = common_job_name + "/predict" predict = Job(command=predict_cmd, name=job_name) jobs.append(predict) dependencies.append((transform, predict)) inner_fold_jobs.append(predict) # Just for grouping # Set dependencies of cleaning job dependencies.append((predict, clean_job)) # End loop on params # Group all jobs of this fold in a group group_elements.append( Group(elements=inner_fold_jobs, name="Outer fold {out}/Inner fold {inn}".format( out=outer_fold_index, inn=inner_fold_index))) # End inner loop # End outer loop workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=group_elements, name=WF_NAME) # save the workflow into a file Helper.serialize(os.path.join(OUT_DIR, WF_NAME), workflow)
jobs += group_score + group_significativity + group_merge jobs.append(job_final) scores = Group(elements=group_score, name="group where test scores are calculated") significativity = Group( elements=group_significativity, name="group where distributions are calculated for significance") merge = Group(elements=group_merge, name="group where we merge results") workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=[scores, significativity, merge, job_final]) Helper.serialize( os.path.join(inputs_path, 'optimized_cluster_part_2.somawf'), workflow) ### Submit the workflow to computing resource (configured in the client-server mode) controller = WorkflowController( "DSV_cluster_{}".format(login), login, password) #"DSV_cluster_ap259944", login, password workflow_id = controller.submit_workflow(workflow=workflow, name="Cluster optimized part 2") print("Finished !!!")
with open(name_file, 'w') as python_file: python_file.write(body) ############################################################################### # create workflow jobs = [] for i in range(len(Listfile)): JobVar = Job(command=['python', Listfile[i]], name = ListJobName[i], native_specification = '-l walltime=4:00:00, -l nodes=1:ppn=2') jobs.append(JobVar) # define dependancies (tuples of two jobs) # the second job will be executed after the first # here, plot the grand average after having written evoked for each subject n = len(ListSubject) dependencies = [(jobs[c*n + s + c],jobs[(c + 1)*n + c]) for s,subject in enumerate(ListSubject) for c,condcouple in enumerate(ListCondition)] # save the workflow into a file WfVar = Workflow(jobs=jobs, dependencies=dependencies) somaWF_name = os.path.join(wdir, 'somawf/workflows/DEMO_WF') Helper.serialize(somaWF_name, WfVar)
2. copy all the spm batch files in the directory containing the following python script; then run python create_workflow_spm12batches.py This must generate a file called 'spm12_batches.somawf' 3. Launch soma_workflow_gui on he command line in the same directory; open the spm12_batches.somawf file and press 'submit' Note: use your own PC if there are many cores. """ import os, sys, glob from soma_workflow.client import Job, Workflow, Helper if len(sys.argv) == 1: spm12_batches = glob.glob("*.mat") else: spm12_batches = sys.argv[1:] jobs = [] for b in spm12_batches: jobs.append(Job(command=["spm12", "run", os.path.abspath(b)], name=b)) workflow = Workflow(jobs) Helper.serialize('spm12_batches.somawf', workflow) print '''Now, you can open 'spm12_batches.somawf' in soma_workflow_gui and submit it'''
outer = cmd[5] key = outer + design if (not dep_dic.has_key(key)): dep_dic[key] = [] cur_job = Job(command=cmd, name="Reduce %s outer %s" % (design, outer)) red_jobs[key] = cur_job elif (cmd[1].endswith("multiblox_comparison.outer.reducer.R")): print "multiblox_comparison.outer.reducer" cur_job = Job(command=cmd, name="Final Reduce") final_red = cur_job elif (cmd[1].endswith("coxnet.mapper.R")): print "coxnet mapper" cur_job = Job(command=cmd, name="Coxnet") glmnet_job = cur_job pass else: raise Exception("Unknown task, abort...") jobs.append(cur_job) dependencies = [] for k, j in red_jobs.items(): parent_list = dep_dic[k] for p in parent_list: dependencies.append((p, j)) dependencies.append((j, final_red)) dependencies.append((glmnet_job, final_red)) workflow = Workflow(jobs=jobs, dependencies=dependencies) # save the workflow into a file Helper.serialize(workflow_file, workflow)
2. copy all the spm batch files in the directory containing the following python script; then run python create_workflow_spm12batches.py This must generate a file called 'spm12_batches.somawf' 3. Launch soma_workflow_gui on he command line in the same directory; open the spm12_batches.somawf file and press 'submit' Note: use your own PC if there are many cores. """ import os, sys, glob from soma_workflow.client import Job, Workflow, Helper if len(sys.argv)==1: spm12_batches = glob.glob("*.mat") else: spm12_batches = sys.argv[1:] jobs = [] for b in spm12_batches: jobs.append(Job(command=["spm12", "run", os.path.abspath(b)], name=b)) workflow=Workflow(jobs) Helper.serialize('spm12_batches.somawf', workflow) print '''Now, you can open 'spm12_batches.somawf' in soma_workflow_gui and submit it'''
"python", "create_maps.py", "--input", derivatives_path, "--parameters", parameters_path, "--subject", args.subject, "--fmri_data", fmri_path ], name="Creating the maps.", working_directory=scripts_path) jobs.append(job_final) dependencies.append((job_merge, job_final)) cv_alphas = Group(elements=group_cv_alphas, name="CV on alphas") significativity = Group(elements=group_significativity, name="Fit of the models with best alphas") workflow = Workflow( jobs=jobs, dependencies=dependencies, root_group=[job_0, cv_alphas, significativity, job_merge, job_final]) Helper.serialize(os.path.join(inputs_path, 'cluster_jobs.somawf'), workflow) ### Submit the workflow to computing resource (configured in the client-server mode) controller = WorkflowController( "DSV_cluster_{}".format(login), login, password) #"DSV_cluster_ap259944", login, password workflow_id = controller.submit_workflow(workflow=workflow, name="Ridge - LPP") print("Finished !!!")
def export(self, script_path): try: from soma_workflow.client import Job from soma_workflow.client import Group from soma_workflow.client import Workflow from soma_workflow.client import SharedResourcePath from soma_workflow.client import Helper except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError # dataset on remote machine dataset_dir = SharedResourcePath( relative_path=self.dataset_relative_path, namespace=self.namespace, uuid=self.uuid) # Tree on remote machine epac_tree_dir = SharedResourcePath( relative_path=self.tree_relative_path, namespace=self.namespace, uuid=self.uuid) # Reduce output on remote machine out_dir = SharedResourcePath(relative_path=self.output_relative_path, namespace=self.namespace, uuid=self.uuid) # workflow file for soma-workflow soma_workflow_file = script_path # iterate all key jobs job_paths = [] for root, _, files in os.walk( os.path.join(self.root, self.jobs_relative_path)): for f in files: _, ext = os.path.splitext(f) if ext == ".job": job_paths.append(f) # Building mapper task dependencies = [] map_jobs = [] for i in range(len(job_paths)): job_relative_path = os.path.join(self.jobs_relative_path, job_paths[i]) key_path = SharedResourcePath(relative_path=job_relative_path, namespace=self.namespace, uuid=self.uuid) map_cmd = [] map_cmd.append("epac_mapper") map_cmd.append("--datasets") map_cmd.append(dataset_dir) map_cmd.append("--keysfile") map_cmd.append(key_path) map_cmd.append("--treedir") map_cmd.append(epac_tree_dir) map_job = Job(command=map_cmd, name="map_step", referenced_input_files=[], referenced_output_files=[]) map_jobs.append(map_job) group_map_jobs = Group(elements=map_jobs, name="all map jobs") # Building reduce step reduce_cmd = [] reduce_cmd.append("epac_reducer") reduce_cmd.append("--treedir") reduce_cmd.append(epac_tree_dir) reduce_cmd.append("--outdir") reduce_cmd.append(out_dir) reduce_job = Job(command=reduce_cmd, name="reduce_step", referenced_input_files=[], referenced_output_files=[]) for map_job in map_jobs: dependencies.append((map_job, reduce_job)) jobs = map_jobs + [reduce_job] # Build workflow and save into disk workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=[group_map_jobs, reduce_job]) Helper.serialize(soma_workflow_file, workflow)
def export(self, workflow_dir, num_processes): ''' Parameters ---------- workflow_dir: string the directory to export workflow num_processes: integer the number of processes you want to run ''' try: from soma_workflow.client import Job from soma_workflow.client import Group from soma_workflow.client import Workflow from soma_workflow.client import Helper except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError self.workflow_dir = workflow_dir soma_workflow_file = os.path.join(self.workflow_dir, "soma_workflow") if not os.path.exists(self.workflow_dir): os.makedirs(self.workflow_dir) tree_root = load_tree(self.epac_tree_dir_path) keysfile_list = export_jobs(tree_root, num_processes, workflow_dir) # Building mapper task dependencies = [] map_jobs = [] for i in range(len(keysfile_list)): key_path = os.path.join(workflow_dir, keysfile_list[i]) map_cmd = [] map_cmd.append("epac_mapper") map_cmd.append("--datasets") map_cmd.append(self.dataset_dir_path) map_cmd.append("--keysfile") map_cmd.append(key_path) map_cmd.append("--treedir") map_cmd.append(self.epac_tree_dir_path) map_job = Job(command=map_cmd, name="map_step", referenced_input_files=[], referenced_output_files=[]) map_jobs.append(map_job) group_map_jobs = Group(elements=map_jobs, name="all map jobs") # Building reduce task reduce_cmd = [] reduce_cmd.append("epac_reducer") reduce_cmd.append("--treedir") reduce_cmd.append(self.epac_tree_dir_path) reduce_cmd.append("--outdir") reduce_cmd.append(self.out_dir_path) reduce_job = Job(command=reduce_cmd, name="reduce_step", referenced_input_files=[], referenced_output_files=[]) for map_job in map_jobs: dependencies.append((map_job, reduce_job)) jobs = map_jobs + [reduce_job] # Build workflow and save into disk workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=[group_map_jobs, reduce_job]) Helper.serialize(soma_workflow_file, workflow)