def SimpleJobExample(configuration_item_name, userid, userpw=None): """ Dummy workflow to test the install Parameters ---------- configuration_item_name: str the name of the configuration item (ex. "Gabriel") userid: str user name on the server side userpw: str (optional) user password to login the server using ssh. If you want to use "id_rsa.pub", just leave userpw to None To copy the public key on the server use ssh-copy-id -i name@server. """ job_1 = Job(command=["sleep", "5"], name="job 1") job_2 = Job(command=["sleep", "5"], name="job 2") job_3 = Job(command=["sleep", "5"], name="job 3") job_4 = Job(command=["sleep", "5"], name="job 4") jobs = [job_1, job_2, job_3, job_4] dependencies = [(job_1, job_2), (job_1, job_3), (job_2, job_4), (job_3, job_4)] workflow = Workflow(jobs=jobs, dependencies=dependencies) controller = WorkflowController(configuration_item_name, userid, userpw) controller.submit_workflow(workflow=workflow, name="TestConnectionExample")
def setup_connection(cls, resource_id, login, password): cls.login = login cls.password = password cls.resource_id = resource_id cls.wf_ctrl = WorkflowController(resource_id, login, password) cls.transfer_timeout = -24 cls.jobs_timeout = 1 cls.job_examples = JobExamples(cls.wf_ctrl, 'python', cls.transfer_timeout, cls.jobs_timeout)
group_elements = [] first_job = Job(command=["sleep", "10"], name="first job") last_job = Job(command=["sleep", "10"], name="last job") jobs.append(first_job) jobs.append(last_job) for i in range(0, 30): job = Job(command=["sleep", "60"], name="job " + repr(i)) jobs.append(job) dependencies.append((first_job, job)) dependencies.append((job, last_job)) group_elements.append(job) thirty_jobs_group = Group(elements=group_elements, name="my 30 jobs") workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=[first_job, thirty_jobs_group, last_job]) login = '******' password = '******' controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="Simple workflow with group")
def morphologist_all(t1file, sid, outdir, study="morphologist", waittime=10, somaworkflow=False, spmexec="/i2bm/local/spm8-standalone/run_spm8.sh", spmdir="/i2bm/local/spm8-standalone"): """ Performs all the Morphologist steps. Steps: 1- Ensure image orientation and reorient it if needed (Prepare Subject for Anatomical Pipeline). 2- Computation of a brain mask (Brain Mask Segmentation). 3- Computation of a mask for each hemisphere (Split Brain Mask). 4- A grey/white classification of each hemisphere to perform "Voxel Based Morphometry" (Grey White Classification) and spherical triangulation of cortical hemispheres (Grey White Surface). 5- Spherical triangulation of the external interface of the cortex of one or two hemispheres (Get Spherical Hemi Surface). 6- Computation of a graph representing the cortical fold topography (Cortical Fold Graph). 7- Automatic identification of the cortical sulci (Automatic Sulci Recognition), located in the "sulci" toolbox. The execution is performed with soma_workflow that has to be installed in the bv_env environment. To check the worklow submission, use the 'soma_workflow_gui' command. If the input 't1file' has no the expected extension, an Exception will be raised. If the $outdir/$study/$sid has already been created, an Exception will be raised. Parameters ---------- t1file: str (mandatory) the path to a ".nii.gz" anatomical T1 weighted file. sid: str (mandatory) a subject identifier. outdir: str (mandatory) the morphologist output files will be written in $outdir/$study/$sid. study: str (mandatory) the name of the study. waittime: float (optional, default 10) a delay (in seconds) used to check the worflow status. somaworkflow: bool (optional, default False) if True use somaworkflow for the execution. spmexec: str (optional) the path to the standalone SPM execution file. spmdir: str (optional) the standalone SPM directory. Returns ------- wffile: str a file containing the submitted workflow. wfid: int the submitted workflow identifier. wfstatus: str the submited worflow status afer 'waittime' seconds. """ # Check roughly the input file extension if not t1file.endswith(".nii.gz"): raise Exception("'{0}' is not a COMPRESSED NIFTI file.".format(t1file)) # Create a configuration for the morphologist study study_config = StudyConfig( modules=StudyConfig.default_modules + ["FomConfig", "BrainVISAConfig"]) study_dict = { "name": "morphologist_fom", "input_directory": outdir, "output_directory": outdir, "input_fom": "morphologist-auto-nonoverlap-1.0", "output_fom": "morphologist-auto-nonoverlap-1.0", "shared_fom": "shared-brainvisa-1.0", "spm_directory": spmdir, "use_soma_workflow": True, "use_fom": True, "spm_standalone": True, "use_matlab": False, "volumes_format": "NIFTI gz", "meshes_format": "GIFTI", "use_spm": True, "spm_exec": spmexec, "study_config.somaworkflow_computing_resource": "localhost", "somaworkflow_computing_resources_config": { "localhost": { } } } study_config.set_study_configuration(study_dict) # Create the morphologist pipeline pipeline = get_process_instance( "morphologist.capsul.morphologist.Morphologist") morphologist_pipeline = process_with_fom.ProcessWithFom( pipeline, study_config) morphologist_pipeline.attributes = dict( (trait_name, getattr(morphologist_pipeline, trait_name)) for trait_name in morphologist_pipeline.user_traits()) morphologist_pipeline.attributes["center"] = "morphologist" morphologist_pipeline.attributes["subject"] = sid morphologist_pipeline.create_completion() # Create morphologist expected tree # ToDo: use ImportT1 from axon subjectdir = os.path.join(outdir, study, sid) if os.path.isdir(subjectdir): raise Exception("Folder '{0}' already created.".format(subjectdir)) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "default_analysis", "folds", "3.1", "default_session_auto")) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "registration")) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "segmentation", "mesh")) os.makedirs(os.path.join( subjectdir, "t1mri", "default_acquisition", "tmp")) # Copy T1 file in the morphologist expected location destfile = os.path.join(subjectdir, "t1mri", "default_acquisition", sid + ".nii.gz") shutil.copy(t1file, destfile) # Create source_referential morphologist expected file source_referential = {"uuid": str(soma.uuid.Uuid())} referential_file = os.path.join( subjectdir, "t1mri", "default_acquisition", "registration", "RawT1-{0}_default_acquisition.referential".format(sid)) attributes = "attributes = {0}".format(json.dumps(source_referential)) with open(referential_file, "w") as openfile: openfile.write(attributes) # Create a worflow from the morphologist pipeline workflow = Workflow(name="{0} {1}".format(study, sid), jobs=[]) workflow.root_group = [] # Create the workflow wf = pipeline_workflow.workflow_from_pipeline( morphologist_pipeline.process, study_config=study_config) workflow.add_workflow(wf, as_group="{0}_{1}".format(study, sid)) wffile = os.path.join(subjectdir, "{0}.wf".format(study)) pickle.dump(workflow, open(wffile, "w")) # Execute the workflow with somaworkflow if somaworkflow: controller = WorkflowController() wfid = controller.submit_workflow( workflow=workflow, name="{0}_{1}".format(study, sid)) # Return the worflow status after execution while True: time.sleep(waittime) wfstatus = controller.workflow_status(wfid) if wfstatus not in [ "worklflow_not_started", "workflow_in_progress"]: break # Execute the workflow with subprocess else: # -> construct the ordered list of commands to be executed workflow_repr = workflow.to_dict() graph = Graph() for job in workflow_repr["jobs"]: graph.add_node(GraphNode(job, None)) for link in workflow_repr["dependencies"]: graph.add_link(link[0], link[1]) ordered_nodes = [str(node[0]) for node in graph.topological_sort()] commands = [] jobs = workflow_repr["serialized_jobs"] temporaries = workflow_repr["serialized_temporary_paths"] barriers = workflow_repr["serialized_barriers"] for index in ordered_nodes: if index in jobs: commands.append(jobs[index]["command"]) elif index in barriers: continue else: raise Exception("Unexpected node in workflow.") # -> Go through all commands tmpmap = {} for cmd in commands: # -> deal with temporary files for index, item in enumerate(cmd): if not isinstance(item, basestring): if str(item) not in tmpmap: if str(item) in temporaries: struct = temporaries[str(item)] name = cmd[2].split(";")[1].split()[-1] tmppath = os.path.join( subjectdir, "t1mri", "default_acquisition", "tmp", str(item) + name + struct["suffix"]) tmpmap[str(item)] = tmppath else: raise MorphologistError( "Can't complete command '{0}'.".format( cmd)) cmd[index] = tmpmap[str(item)] # -> execute the command worker = MorphologistWrapper(cmd) worker() if worker.exitcode != 0: raise MorphologistRuntimeError( " ".join(worker.cmd), worker.stderr) wfstatus = "Done" wfid = "subprocess" return wffile, wfid, wfstatus
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="transform", ... num_processes=3, remove_finished_wf=False) >>> tree_root_node = sfw_engine.run(**Xy) light mode >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}, {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}]) ''' try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer from soma_workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) is_run_local = False if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() is_run_local = True # print "is_run_local=", is_run_local if not is_run_local: ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") else: ft_working_directory = tmp_work_dir_path ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory) soma_workflow = Workflow(jobs=jobs) controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue) Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) self.engine_info = self.get_engine_info(controller, wf_id) if self.remove_finished_wf: controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree: shutil.rmtree(tmp_work_dir_path) return self.tree_root
def run_test(cls, debug=False, interactive=False, **kwargs): sys.stdout.write("********* soma-workflow tests: %s *********\n" % cls.__name__) config_file_path = Configuration.search_config_path() resource_ids = Configuration.get_configured_resources(config_file_path) enabled_resources = getattr(WorkflowTest, 'enabled_resources', None) enable_resources = [] if not hasattr(WorkflowTest, 'resource_pass'): WorkflowTest.resource_pass = {} for resource_id in resource_ids: sys.stdout.write("============ Resource : " + resource_id + " =================== \n") config = Configuration.load_from_file(resource_id, config_file_path) if not interactive \ and ((enabled_resources is None and config.get_mode() != LIGHT_MODE) or (enabled_resources is not None and resource_id not in enabled_resources)): sys.stdout.write('Resource %s is not tested in ' 'non-interactive mode\n' % resource_id) continue # skip login/password ask if interactive: if enabled_resources is None: sys.stdout.write("Do you want to test the resource " "%s (Y/n) ? " % resource_id) sys.stdout.flush() test_resource = sys.stdin.readline() if test_resource.strip() in ['no', 'n', 'N', 'No', 'NO']: # Skip the resource sys.stdout.write('Resource %s is not tested \n' % resource_id) sys.stdout.flush() continue enable_resources.append(resource_id) (login, password) = get_user_id(resource_id, config) WorkflowTest.resource_pass[resource_id] = (login, password) else: if resource_id not in enabled_resources: continue (login, password) = WorkflowTest.resource_pass[resource_id] else: (login, password) = get_user_id(resource_id, config, interactive=interactive) if config.get_mode() == LIGHT_MODE: # use a temporary sqlite database in soma-workflow to avoid # concurrent access problems tmpdb = tempfile.mkstemp('.db', prefix='swf_') os.close(tmpdb[0]) os.unlink(tmpdb[1]) # and so on for transfers / stdio files directory tmptrans = tempfile.mkdtemp(prefix='swf_') config._database_file = tmpdb[1] config._transfered_file_dir = tmptrans wf_controller = None try: with suppress_stdout(debug): wf_controller = WorkflowController(resource_id, login, password, config=config) cls.setup_wf_controller(wf_controller) allowed_config = cls.allowed_config[:] for configuration in cls.allowed_config: if config.get_mode() != configuration[0]: allowed_config.remove(configuration) if len(allowed_config) == 0: sys.stdout.write( "No tests available for the resource %s \n" % resource_id) for configuration in allowed_config: (mode, file_system) = configuration sys.stdout.write( "\n---------------------------------------\n") sys.stdout.write("Mode : " + mode + '\n') sys.stdout.write("File system : " + file_system + '\n') cls.setup_path_management(file_system) if file_system in (cls.SHARED_RESOURCE_PATH, cls.SHARED_TRANSFER) \ and not config.get_path_translation(): sys.stdout.write( "Paths translation unavailable - not testing " "this case\n") sys.stdout.flush() continue suite_list = [] list_tests = [] for test in dir(cls): prefix = "test_" if len(test) < len(prefix): continue if test[0:len(prefix)] == prefix: list_tests.append(test) suite_list.append( unittest.TestSuite(list(map(cls, list_tests)))) alltests = unittest.TestSuite(suite_list) with suppress_stdout(debug): res = unittest.TextTestRunner( verbosity=2).run(alltests) sys.stdout.flush() sys.stdout.write("after test\n") if len(res.errors) != 0 or len(res.failures) != 0: raise RuntimeError("tests failed.") finally: sys.stdout.write("del wf_controller") if wf_controller: wf_controller.stop_engine() del wf_controller cls.setup_wf_controller(None) # del WorkflowController sys.stdout.write("deleted.") if config.get_mode() == LIGHT_MODE: if not kwargs.get('keep_temporary', False): if os.path.exists(config._database_file): os.unlink(config._database_file) if os.path.exists(config._database_file + '-journal'): os.unlink(config._database_file + '-journal') shutil.rmtree(config._transfered_file_dir) else: print('temporary files kept:') print('databse file:', config._database_file) print('transfers:', config._transfered_file_dir) if interactive and enabled_resources is None: print('set enabled_resources') WorkflowTest.enabled_resources = enable_resources
jobs += group_score + group_significativity + group_merge jobs.append(job_final) scores = Group(elements=group_score, name="group where test scores are calculated") significativity = Group( elements=group_significativity, name="group where distributions are calculated for significance") merge = Group(elements=group_merge, name="group where we merge results") workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=[scores, significativity, merge, job_final]) Helper.serialize( os.path.join(inputs_path, 'optimized_cluster_part_2.somawf'), workflow) ### Submit the workflow to computing resource (configured in the client-server mode) controller = WorkflowController( "DSV_cluster_{}".format(login), login, password) #"DSV_cluster_ap259944", login, password workflow_id = controller.submit_workflow(workflow=workflow, name="Cluster optimized part 2") print("Finished !!!")
def remote_map(func, largs=None, lkwargs=None, mode='serial'): """ Execute a function in parallel on a list of arguments. Args: *func* (function): function to apply on each item. **this function must be importable on the remote side** *largs* (list of tuple): each item in the list is a tuple containing all positional argument values of the function *lkwargs* (list of dict): each item in the list is a dict containing all named arguments of the function mapped to their value. *mode* (str): indicates how execution is distributed. Choices are: - "serial": single-thread loop on the local machine - "local" : use joblib to run tasks in parallel. The number of simultaneous jobs is defined in the configuration section ['parallel-local']['nb_procs'] see ~/.pyhrf/config.cfg - "remote_cluster: use somaworkflow to run tasks in parallel. The connection setup has to be defined in the configuration section ['parallel-cluster'] of ~/.pyhrf/config.cfg. - "local_with_dumps": testing purpose only, run each task serially as a subprocess. Returns: a list of results Raises: RemoteException if any remote task has failed Example: >>> from pyhrf.parallel import remote_map >>> def foo(a, b=2): \ return a + b >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}]) [7, 10] """ if largs is None: if lkwargs is not None: largs = [tuple()] * len(lkwargs) else: largs = [tuple()] if lkwargs is None: lkwargs = [{}] * len(largs) lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs] assert len(lkwargs) == len(largs) all_args = zip(largs, lkwargs) # print 'all_args:', all_args fmodule = func.__module__ fname = '.'.join([fmodule, func.__name__]) if mode == 'serial': return [func(*args, **kwargs) for args, kwargs in all_args] elif mode == 'local': try: from joblib import Parallel, delayed except ImportError: raise ImportError('Can not import joblib. It is ' 'required to enable parallel ' 'processing on a local machine.') if logger.getEffectiveLevel() == logging.DEBUG: parallel_verb = 10 else: parallel_verb = 0 if pyhrf.cfg['parallel-local']['nb_procs']: n_jobs = pyhrf.cfg['parallel-local']['nb_procs'] else: n_jobs = available_cpu_count() p = Parallel(n_jobs=n_jobs, verbose=parallel_verb) return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args) elif mode == 'local_with_dumps': results = [] for i, params in enumerate(all_args): # print 'params:', params params_fn = 'params_%d.pck' % i fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() output_fn = 'output_%d.pck' % i # print 'call subprocess ...' subprocess.call([ 'python', '-c', cfunc % (fmodule, fname), params_fn, output_fn ]) # print 'Read outputs ...' fout = open(output_fn) results.append(cPickle.load(fout)) return results elif mode == 'remote_cluster': # FileTransfer creation for input files #data_dir = './rmap_data' data_dir = mkdtemp(prefix="sw_rmap") all_jobs = [] param_files = [] for i, params in enumerate(all_args): params_fn = op.join(data_dir, 'params_%d.pck' % i) fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() param_file = FileTransfer(is_input=True, client_path=params_fn, name='params_file_%d' % i) param_files.append(param_file) output_fn = op.join(data_dir, 'output_%d.pck' % i) output_file = FileTransfer(is_input=False, client_path=output_fn, name='output_file_%d' % i) job = Job(command=[ 'pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file ], name="rmap, item %d" % i, referenced_input_files=[param_file], referenced_output_files=[output_file]) all_jobs.append(job) workflow = Workflow(jobs=all_jobs, dependencies=[]) # submit the workflow cfg = pyhrf.cfg['parallel-cluster'] controller = WorkflowController(cfg['server_id'], cfg['user']) # controller.transfer_files(fids_to_transfer) wf_id = controller.submit_workflow(workflow=workflow, name="remote_map") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) results = [] for i in xrange(len(all_args)): fnout = op.join(data_dir, 'output_%d.pck' % i) fout = open(fnout) o = cPickle.load(fout) print 'file cPickle loaded:', o fout.close() os.remove(fnout) if isinstance(o, Exception): raise RemoteException('Task %d failed' % i, o) if o.errno != 17: raise RemoteException('Task %d failed' % i, o) results.append(o) return results
def remote_map_marshal(func, largs=None, lkwargs=None, mode='local'): if largs is None: if lkwargs is not None: largs = [[]] * len(lkwargs) else: largs = [] if lkwargs is None: lkwargs = [{}] * len(largs) lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs] assert len(lkwargs) == len(largs) all_args = zip(largs, lkwargs) if mode == 'local': return [func(*args, **kwargs) for args, kwargs in all_args] elif mode == 'local_with_dumps': func_fn = './func.marshal' dump_func(func, func_fn) results = [] for i, params in enumerate(all_args): print 'params:', params params_fn = 'params_%d.pck' % i fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() output_fn = 'output_%d.pck' % i print 'call subprocess ...' subprocess.call( ['python', '-c', cfunc_marshal, params_fn, func_fn, output_fn]) print 'Read outputs ...' fout = open(output_fn) results.append(cPickle.load(fout)) return results elif mode == 'remote_cluster': # FileTransfer creation for input files #data_dir = './rmap_data' data_dir = mkdtemp(prefix="sw_rmap") func_fn = op.join(data_dir, 'func.marshal') dump_func(func, func_fn) func_file = FileTransfer(is_input=True, client_path=func_fn, name="func_file") all_jobs = [] param_files = [] for i, params in enumerate(all_args): params_fn = op.join(data_dir, 'params_%d.pck' % i) fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() param_file = FileTransfer(is_input=True, client_path=params_fn, name='params_file_%d' % i) param_files.append(param_file) output_fn = op.join(data_dir, 'output_%d.pck' % i) output_file = FileTransfer(is_input=False, client_path=output_fn, name='output_file_%d' % i) job = Job(command=[ 'python', '-c', cfunc, param_file, func_file, output_file ], name="rmap, item %d" % i, referenced_input_files=[func_file, param_file], referenced_output_files=[output_file]) all_jobs.append(job) workflow = Workflow(jobs=all_jobs, dependencies=[]) # submit the workflow cfg = pyhrf.cfg['parallel-cluster'] controller = WorkflowController(cfg['server_id'], cfg['user']) # controller.transfer_files(fids_to_transfer) wf_id = controller.submit_workflow(workflow=workflow, name="remote_map") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) results = [] for i in xrange(len(all_args)): fout = open(op.join(data_dir, 'output_%d.pck' % i)) results.append(cPickle.load(fout)) fout.close() return results
def run_soma_workflow(treatments, exec_cmd, tmp_local_dirs, server_id, remote_host, remote_user, remote_pathes, local_result_pathes, label_for_cluster, wait_ending=False): """Dispatch treatments using soma-workflow. Parameters ---------- treatments it is a dict mapping a treatment name to a treatment object exec_cmd it is the command to run on each ROI data. tmp_local_dirs it is a dict mapping a treatment name to a local tmp dir (used to store a temporary configuration file) server_id it is the server ID as expected by WorkflowController remote_host it is the remote machine where treatments are treated in parallel remote_user it is used to log in remote_host remote_pathes it is a dict mapping a treatment name to an existing remote dir which will be used to store ROI data and result files local_result_pathes it is a dict mapping a treatment name to a local path where final results will be sorted (host will send it there by scp) label_for_cluster it is the base name used to label workflows and sub jobs """ import getpass from socket import gethostname local_user = getpass.getuser() local_host = gethostname() all_nodes = [] all_deps = [] all_groups = [] split_jobs = [] for t_id, treatment in treatments.iteritems(): tmp_local_dir = tmp_local_dirs[t_id] remote_path = remote_pathes[t_id] local_result_path = local_result_pathes[t_id] sj, n, d, g = prepare_treatment_jobs( treatment, tmp_local_dir, local_result_path, local_user, local_host, remote_host, remote_user, remote_path, label_for_cluster + '-' + str(t_id)) all_nodes.extend(n) all_deps.extend(d) all_groups.append(g) split_jobs.append(sj) # Jobs for data splitting should be done sequentially. # If they're done in parallel, they may flood the remote file system for isj in xrange(len(split_jobs)): if isj + 1 < len(split_jobs): all_deps.append((split_jobs[isj], split_jobs[isj + 1])) # # Be sure that all splitting jobs are done first: # # Is there a better way ? # for n in all_nodes: # for sjob in split_jobs: # all_deps.append((sjob,n)) # Does not seem to work well -> maybe to many deps ? workflow = Workflow(all_nodes + split_jobs, all_deps, root_group=all_groups) # f = open('/tmp/workflow.pck','w') # cPickle.dump(workflow, f) # f.close() logger.info('Open connection ...') connection = WorkflowController(server_id, remote_user) logger.info('Submit workflow ...') wf_id = connection.submit_workflow( workflow=workflow, # expiration_date="", # queue="run32", name=label_for_cluster + '-' + local_user) #wf = connection.workflow(wf_id) if wait_ending: # wait for result logger.info('Wait for workflow to end and make outputs ...') Helper.wait_workflow(wf_id, connection) for t_id, local_result_path in local_result_pathes.iteritems(): treatment = treatments[t_id] rfilename = treatment.result_dump_file if rfilename is None: rfilename = 'result.pck' local_result_file = op.join(local_result_path, op.basename(rfilename)) if not op.exists(local_result_file): raise Exception('Local result does not exist "%s"' % local_result_file) if treatment.analyser.outFile is not None: # return result only for last treatment ... print 'Load result from %s ...' % local_result_file if splitext(local_result_file)[1] == '.gz': import gzip fresult = gzip.open(local_result_file) else: fresult = open(local_result_file) results = cPickle.load(fresult) fresult.close() # print 'Make outputs ...' #treatment.output(results, dump=False) logger.info('Cleaning tmp dirs ...') for tmp_dir in tmp_local_dirs.itervalues(): shutil.rmtree(tmp_dir) return results else: logger.info('Cleaning tmp dirs ...') for tmp_dir in tmp_local_dirs.itervalues(): shutil.rmtree(tmp_dir) logger.info('Workflow sent, returning ...') return []
p = [("scaler", s1), ("logit", s2)] est = Pipeline(p) # get the iris dataset X, y = iris.get_data() # jsonify the method and a cross-validation scheme method_conf = JSONify_estimator(est, out="./est.json") cv_conf = JSONify_cv(StratifiedKFold, cv_kwargs={"n_folds": 5}, score_func=f1_score, stratified=True, out="./cv.json") # build the dataset file dataset = build_dataset(X, y, method_conf, cv_conf, ".", compress=1) # create the workflow in the internal representation wfi = create_wf(dataset['folds'], cv_conf, method_conf, ".", verbose=True) # save to soma-workflow format wf = save_wf(wfi, "./workflow.json", mode="soma-workflow") # create a controler and submit controler = WorkflowController() wf_id = controler.submit_workflow(workflow=wf, name="first example") # wait for completion while controler.workflow_status(wf_id) != 'workflow_done': time.sleep(2) # read final result file print(joblib.load('./final_res.pkl'))
from __future__ import print_function import time import os from soma_workflow.client import Job, Workflow, WorkflowController, Helper, FileTransfer from soma_workflow.configuration import Configuration # from soma_workflow.connection import RemoteConnection user = '******' try: import pwd user = pwd.getpwuid(os.getuid()).pw_name except Exception: pass controller = WorkflowController("Gabriel", user) # FileTransfer creation for input files file1 = FileTransfer(is_input=True, client_path="%s/create_file.py" % Configuration.get_home_dir(), name="script") file2 = FileTransfer(is_input=True, client_path="%s/output_file" % Configuration.get_home_dir(), name="file created on the server") # Job and Workflow run_script = Job(command=["python", file1, file2], name="copy",
echo %s """ % test_bash_script fileout.write(filecontent) fileout.close() os.chdir(cur_work_dir) job1 = Job(command=[u"touch", test_filepath], name="epac_job_test", working_directory=tmp_work_dir_path) job2 = Job(command=["%s/readfile" % cur_file_dir, test_bash_script], name="epac_job_test", working_directory=tmp_work_dir_path) soma_workflow = Workflow(jobs=[job1, job2]) resource_id = socket.gethostname() controller = WorkflowController(resource_id, "", "") ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow") Helper.wait_workflow(wf_id, controller) nb_failed_jobs = len(Helper.list_failed_jobs(wf_id, controller)) if nb_failed_jobs > 0: raise ValueError("Soma-workflow error, cannot use working directory") if not os.path.isfile(os.path.join(tmp_work_dir_path, test_filepath)): raise ValueError("Soma-workflow cannot define working directory") else: print("OK for creating new file in working directory")
"--output_r2", os.path.join(args.output, 'r2'), "--output_distribution", os.path.join(args.output, 'distribution'), "--x", args.x, "--y", args.y, "--shuffling", shuffling, "--n_permutations", args.nb_permutations, "--alpha_percentile", args.alpha_percentile ], name="job {} - alpha {}".format(run, alpha), working_directory=scripts_path) group_significativity.append(job) jobs.append(job) distribution_voxels = Group(elements=group_significativity, name="Voxel wise fitting of the models") workflow2 = Workflow(jobs=jobs, root_group=[distribution_voxels]) ### Submit the workflow to computing resource (configured in the client-server mode) controller2 = WorkflowController( "DSV_cluster_ap259944", args.login, args.password) #"DSV_cluster_ap259944", args.login, args.password workflow_id2 = controller2.submit_workflow(workflow=workflow2, name="Voxel-wise computations") # You may use the gui or manually transfer the files: manual = True if manual: Helper.transfer_input_files(workflow_id2, controller2) Helper.wait_workflow(workflow_id2, controller2) Helper.transfer_output_files(workflow_id2, controller2)