def remote_map(func, largs=None, lkwargs=None, mode='serial'): """ Execute a function in parallel on a list of arguments. Args: *func* (function): function to apply on each item. **this function must be importable on the remote side** *largs* (list of tuple): each item in the list is a tuple containing all positional argument values of the function *lkwargs* (list of dict): each item in the list is a dict containing all named arguments of the function mapped to their value. *mode* (str): indicates how execution is distributed. Choices are: - "serial": single-thread loop on the local machine - "local" : use joblib to run tasks in parallel. The number of simultaneous jobs is defined in the configuration section ['parallel-local']['nb_procs'] see ~/.pyhrf/config.cfg - "remote_cluster: use somaworkflow to run tasks in parallel. The connection setup has to be defined in the configuration section ['parallel-cluster'] of ~/.pyhrf/config.cfg. - "local_with_dumps": testing purpose only, run each task serially as a subprocess. Returns: a list of results Raises: RemoteException if any remote task has failed Example: >>> from pyhrf.parallel import remote_map >>> def foo(a, b=2): \ return a + b >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}]) [7, 10] """ if largs is None: if lkwargs is not None: largs = [tuple()] * len(lkwargs) else: largs = [tuple()] if lkwargs is None: lkwargs = [{}] * len(largs) lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs] assert len(lkwargs) == len(largs) all_args = zip(largs, lkwargs) # print 'all_args:', all_args fmodule = func.__module__ fname = '.'.join([fmodule, func.__name__]) if mode == 'serial': return [func(*args, **kwargs) for args, kwargs in all_args] elif mode == 'local': try: from joblib import Parallel, delayed except ImportError: raise ImportError('Can not import joblib. It is ' 'required to enable parallel ' 'processing on a local machine.') if logger.getEffectiveLevel() == logging.DEBUG: parallel_verb = 10 else: parallel_verb = 0 if pyhrf.cfg['parallel-local']['nb_procs']: n_jobs = pyhrf.cfg['parallel-local']['nb_procs'] else: n_jobs = available_cpu_count() p = Parallel(n_jobs=n_jobs, verbose=parallel_verb) return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args) elif mode == 'local_with_dumps': results = [] for i, params in enumerate(all_args): # print 'params:', params params_fn = 'params_%d.pck' % i fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() output_fn = 'output_%d.pck' % i # print 'call subprocess ...' subprocess.call(['python', '-c', cfunc % (fmodule, fname), params_fn, output_fn]) # print 'Read outputs ...' fout = open(output_fn) results.append(cPickle.load(fout)) return results elif mode == 'remote_cluster': # FileTransfer creation for input files #data_dir = './rmap_data' data_dir = mkdtemp(prefix="sw_rmap") all_jobs = [] param_files = [] for i, params in enumerate(all_args): params_fn = op.join(data_dir, 'params_%d.pck' % i) fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() param_file = FileTransfer(is_input=True, client_path=params_fn, name='params_file_%d' % i) param_files.append(param_file) output_fn = op.join(data_dir, 'output_%d.pck' % i) output_file = FileTransfer(is_input=False, client_path=output_fn, name='output_file_%d' % i) job = Job(command=['pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file], name="rmap, item %d" % i, referenced_input_files=[param_file], referenced_output_files=[output_file]) all_jobs.append(job) workflow = Workflow(jobs=all_jobs, dependencies=[]) # submit the workflow cfg = pyhrf.cfg['parallel-cluster'] controller = WorkflowController(cfg['server_id'], cfg['user']) # controller.transfer_files(fids_to_transfer) wf_id = controller.submit_workflow( workflow=workflow, name="remote_map") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) results = [] for i in xrange(len(all_args)): fnout = op.join(data_dir, 'output_%d.pck' % i) fout = open(fnout) o = cPickle.load(fout) print 'file cPickle loaded:', o fout.close() os.remove(fnout) if isinstance(o, Exception): raise RemoteException('Task %d failed' % i, o) if o.errno != 17: raise RemoteException('Task %d failed' % i, o) results.append(o) return results
def run(self, parallel=None, n_jobs=None): """ Run the analysis: load data, run estimation, output results """ if parallel is None: result = self.execute() elif parallel == 'local': cfg_parallel = pyhrf.cfg['parallel-local'] try: from joblib import Parallel, delayed except ImportError: raise Exception( 'Can not import joblib. It is required to ' 'enable parallel processing on a local machine.') effective_level = logger.getEffectiveLevel() if effective_level == logging.DEBUG: parallel_verb = 11 elif effective_level == logging.INFO: parallel_verb = 2 else: parallel_verb = 0 if n_jobs is None: if cfg_parallel["nb_procs"]: n_jobs = cfg_parallel["nb_procs"] else: n_jobs = available_cpu_count() p = Parallel(n_jobs=n_jobs, verbose=parallel_verb) result = p(delayed(exec_t)(t) for t in self.split(output_dir=None)) # join list of lists: result = list(itertools.chain.from_iterable(result)) elif parallel == 'LAN': from pyhrf import grid cfg_parallel = pyhrf.cfg['parallel-LAN'] remoteUser = cfg_parallel['user'] # 1. Some checks on input/output directory remoteDir = cfg_parallel['remote_path'] # At the end, results will be retrieved direclty from remoteDir, # which has to be readable if remoteDir is None or not op.exists(remoteDir): raise Exception('Remote directory is not readable (%s).' 'Consider mounting it with sshfs.' % remoteDir) # Try if remoteDir is writeable, so that we don't need to upload # data via ssh remote_writeable = False if os.access(remoteDir, os.W_OK): remote_writeable = True tmpDir = remoteDir else: logger.info('Remote dir is not writeable -> using tmp ' 'dir to store splitted data & then upload.') # 2. split roi data logger.info('Path to store sub treatments: %s', tmpDir) treatments_dump_files = [] self.split(dump_sub_results=True, output_dir=tmpDir, make_sub_outputs=False, output_file_list=treatments_dump_files) # 3. copy data to remote directory if not remote_writeable: host = cfg_parallel['remote_host'] logger.info('Uploading data to %s ...', remoteDir) remote_input_files = remote_copy(treatments_dump_files, host, remoteUser, remoteDir) # 4. create job list tasks_list = [] for f in treatments_dump_files: f = op.join(remoteDir, op.basename(f)) nice = cfg_parallel['niceness'] tasks_list.append( 'nice -n %d %s -v%d -t "%s"' % (nice, 'pyhrf_jde_estim', logger.getEffectiveLevel(), f)) mode = 'dispatch' tasks = grid.read_tasks(';'.join(tasks_list), mode) timeslot = grid.read_timeslot('allday') hosts = grid.read_hosts(cfg_parallel['hosts']) if self.output_dir is not None: brokenfile = op.join(self.output_dir, 'pyhrf-broken_cmd.batch') logfile = op.join(self.output_dir, 'pyhrf-parallel.log') logger.info('Log file for process dispatching: %s', logfile) else: brokenfile = None logfile = None # 3. launch them logger.info('Dispatching processes ...') try: grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile, logfile, user=remoteUser) grid.kill_threads() except KeyboardInterrupt: grid.quit(None, None) if brokenfile is not None and len( open(brokenfile).readlines()) > 0: logger.info('There are some broken commands, trying again ...') try: tasks = grid.read_tasks(brokenfile, mode) grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile, logfile, user=remoteUser) grid.kill_threads() except KeyboardInterrupt: grid.quit(None, None) # 3.1 grab everything back ?? # try: # "scp %s@%s:%s %s" %(remoteUser,host, # op.join(remoteDir,'result*'), # op.abspath(op.dirname(options.cfgFile)))) # TODO : test if everything went fine # 4. merge all results and create outputs result = [] # if op.exists(remoteDir): TODO :scp if remoteDir not readable nb_treatments = len(treatments_dump_files) remote_result_files = [ op.join(remoteDir, 'result_%04d.pck' % i) for i in range(nb_treatments) ] logger.info('remote_result_files: %s', str(remote_result_files)) nres = len(filter(op.exists, remote_result_files)) if nres == nb_treatments: logger.info('Grabbing results ...') for fnresult in remote_result_files: fresult = open(fnresult) result.append(cPickle.load(fresult)[0]) fresult.close() else: print 'Found only %d result files (expected %d)' \ % (nres, nb_treatments) print 'Something went wrong, check the log files' if not remote_writeable: logger.info('Cleaning tmp dir (%s)...', tmpDir) shutil.rmtree(tmpDir) logger.info('Cleaning up remote dir (%s) through ssh ...', remoteDir) cmd = 'ssh %s@%s rm -f "%s" "%s" ' \ % (remoteUser, host, ' '.join(remote_result_files), ' '.join(remote_input_files)) logger.info(cmd) os.system(cmd) else: if 0: logger.info('Cleaning up remote dir (%s)...', remoteDir) for f in os.listdir(remoteDir): os.remove(op.join(remoteDir, f)) elif parallel == 'cluster': from pyhrf.parallel import run_soma_workflow cfg = pyhrf.cfg['parallel-cluster'] # create tmp remote path: date_now = time.strftime('%c').replace(' ', '_').replace(':', '_') remote_path = op.join(cfg['remote_path'], date_now) logger.info('Create tmp remote dir: %s', remote_path) remote_mkdir(cfg['server'], cfg['user'], remote_path) t_name = 'default_treatment' tmp_dir = pyhrf.get_tmp_path() label_for_cluster = self.analyser.get_label() if self.output_dir is None: out_dir = pyhrf.get_tmp_path() else: out_dir = self.output_dir result = run_soma_workflow({t_name: self}, 'pyhrf_jde_estim', {t_name: tmp_dir}, cfg['server_id'], cfg['server'], cfg['user'], {t_name: remote_path}, {t_name: op.abspath(out_dir)}, label_for_cluster, wait_ending=True) else: raise Exception('Parallel mode "%s" not available' % parallel) logger.info('Retrieved %d results', len(result)) return self.output(result, (self.result_dump_file is not None), self.make_outputs)
def run(self, parallel=None, n_jobs=None): """ Run the analysis: load data, run estimation, output results """ if parallel is None: result = self.execute() elif parallel == 'local': cfg_parallel = pyhrf.cfg['parallel-local'] try: from joblib import Parallel, delayed except ImportError: raise Exception('Can not import joblib. It is required to ' 'enable parallel processing on a local machine.') effective_level = logger.getEffectiveLevel() if effective_level == logging.DEBUG: parallel_verb = 11 elif effective_level == logging.INFO: parallel_verb = 2 else: parallel_verb = 0 if n_jobs is None: if cfg_parallel["nb_procs"]: n_jobs = cfg_parallel["nb_procs"] else: n_jobs = available_cpu_count() p = Parallel(n_jobs=n_jobs, verbose=parallel_verb) result = p(delayed(exec_t)(t) for t in self.split(output_dir=None)) # join list of lists: result = list(itertools.chain.from_iterable(result)) elif parallel == 'LAN': from pyhrf import grid cfg_parallel = pyhrf.cfg['parallel-LAN'] remoteUser = cfg_parallel['user'] # 1. Some checks on input/output directory remoteDir = cfg_parallel['remote_path'] # At the end, results will be retrieved direclty from remoteDir, # which has to be readable if remoteDir is None or not op.exists(remoteDir): raise Exception('Remote directory is not readable (%s).' 'Consider mounting it with sshfs.' % remoteDir) # Try if remoteDir is writeable, so that we don't need to upload # data via ssh remote_writeable = False if os.access(remoteDir, os.W_OK): remote_writeable = True tmpDir = remoteDir else: logger.info('Remote dir is not writeable -> using tmp ' 'dir to store splitted data & then upload.') # 2. split roi data logger.info('Path to store sub treatments: %s', tmpDir) treatments_dump_files = [] self.split(dump_sub_results=True, output_dir=tmpDir, make_sub_outputs=False, output_file_list=treatments_dump_files) # 3. copy data to remote directory if not remote_writeable: host = cfg_parallel['remote_host'] logger.info('Uploading data to %s ...', remoteDir) remote_input_files = remote_copy(treatments_dump_files, host, remoteUser, remoteDir) # 4. create job list tasks_list = [] for f in treatments_dump_files: f = op.join(remoteDir, op.basename(f)) nice = cfg_parallel['niceness'] tasks_list.append('nice -n %d %s -v%d -t "%s"' % (nice, 'pyhrf_jde_estim', logger.getEffectiveLevel(), f)) mode = 'dispatch' tasks = grid.read_tasks(';'.join(tasks_list), mode) timeslot = grid.read_timeslot('allday') hosts = grid.read_hosts(cfg_parallel['hosts']) if self.output_dir is not None: brokenfile = op.join(self.output_dir, 'pyhrf-broken_cmd.batch') logfile = op.join(self.output_dir, 'pyhrf-parallel.log') logger.info('Log file for process dispatching: %s', logfile) else: brokenfile = None logfile = None # 3. launch them logger.info('Dispatching processes ...') try: grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile, logfile, user=remoteUser) grid.kill_threads() except KeyboardInterrupt: grid.quit(None, None) if brokenfile is not None and len(open(brokenfile).readlines()) > 0: logger.info('There are some broken commands, trying again ...') try: tasks = grid.read_tasks(brokenfile, mode) grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile, logfile, user=remoteUser) grid.kill_threads() except KeyboardInterrupt: grid.quit(None, None) # 3.1 grab everything back ?? # try: # "scp %s@%s:%s %s" %(remoteUser,host, # op.join(remoteDir,'result*'), # op.abspath(op.dirname(options.cfgFile)))) # TODO : test if everything went fine # 4. merge all results and create outputs result = [] # if op.exists(remoteDir): TODO :scp if remoteDir not readable nb_treatments = len(treatments_dump_files) remote_result_files = [op.join(remoteDir, 'result_%04d.pck' % i) for i in range(nb_treatments)] logger.info('remote_result_files: %s', str(remote_result_files)) nres = len(filter(op.exists, remote_result_files)) if nres == nb_treatments: logger.info('Grabbing results ...') for fnresult in remote_result_files: fresult = open(fnresult) result.append(cPickle.load(fresult)[0]) fresult.close() else: print 'Found only %d result files (expected %d)' \ % (nres, nb_treatments) print 'Something went wrong, check the log files' if not remote_writeable: logger.info('Cleaning tmp dir (%s)...', tmpDir) shutil.rmtree(tmpDir) logger.info('Cleaning up remote dir (%s) through ssh ...', remoteDir) cmd = 'ssh %s@%s rm -f "%s" "%s" ' \ % (remoteUser, host, ' '.join(remote_result_files), ' '.join(remote_input_files)) logger.info(cmd) os.system(cmd) else: if 0: logger.info('Cleaning up remote dir (%s)...', remoteDir) for f in os.listdir(remoteDir): os.remove(op.join(remoteDir, f)) elif parallel == 'cluster': from pyhrf.parallel import run_soma_workflow cfg = pyhrf.cfg['parallel-cluster'] # create tmp remote path: date_now = time.strftime('%c').replace(' ', '_').replace(':', '_') remote_path = op.join(cfg['remote_path'], date_now) logger.info('Create tmp remote dir: %s', remote_path) remote_mkdir(cfg['server'], cfg['user'], remote_path) t_name = 'default_treatment' tmp_dir = pyhrf.get_tmp_path() label_for_cluster = self.analyser.get_label() if self.output_dir is None: out_dir = pyhrf.get_tmp_path() else: out_dir = self.output_dir result = run_soma_workflow({t_name: self}, 'pyhrf_jde_estim', {t_name: tmp_dir}, cfg['server_id'], cfg['server'], cfg['user'], {t_name: remote_path}, {t_name: op.abspath(out_dir)}, label_for_cluster, wait_ending=True) else: raise Exception('Parallel mode "%s" not available' % parallel) logger.info('Retrieved %d results', len(result)) return self.output(result, (self.result_dump_file is not None), self.make_outputs)
def remote_map(func, largs=None, lkwargs=None, mode='serial'): """ Execute a function in parallel on a list of arguments. Args: *func* (function): function to apply on each item. **this function must be importable on the remote side** *largs* (list of tuple): each item in the list is a tuple containing all positional argument values of the function *lkwargs* (list of dict): each item in the list is a dict containing all named arguments of the function mapped to their value. *mode* (str): indicates how execution is distributed. Choices are: - "serial": single-thread loop on the local machine - "local" : use joblib to run tasks in parallel. The number of simultaneous jobs is defined in the configuration section ['parallel-local']['nb_procs'] see ~/.pyhrf/config.cfg - "remote_cluster: use somaworkflow to run tasks in parallel. The connection setup has to be defined in the configuration section ['parallel-cluster'] of ~/.pyhrf/config.cfg. - "local_with_dumps": testing purpose only, run each task serially as a subprocess. Returns: a list of results Raises: RemoteException if any remote task has failed Example: >>> from pyhrf.parallel import remote_map >>> def foo(a, b=2): \ return a + b >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}]) [7, 10] """ if largs is None: if lkwargs is not None: largs = [tuple()] * len(lkwargs) else: largs = [tuple()] if lkwargs is None: lkwargs = [{}] * len(largs) lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs] assert len(lkwargs) == len(largs) all_args = zip(largs, lkwargs) # print 'all_args:', all_args fmodule = func.__module__ fname = '.'.join([fmodule, func.__name__]) if mode == 'serial': return [func(*args, **kwargs) for args, kwargs in all_args] elif mode == 'local': try: from joblib import Parallel, delayed except ImportError: raise ImportError('Can not import joblib. It is ' 'required to enable parallel ' 'processing on a local machine.') if logger.getEffectiveLevel() == logging.DEBUG: parallel_verb = 10 else: parallel_verb = 0 if pyhrf.cfg['parallel-local']['nb_procs']: n_jobs = pyhrf.cfg['parallel-local']['nb_procs'] else: n_jobs = available_cpu_count() p = Parallel(n_jobs=n_jobs, verbose=parallel_verb) return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args) elif mode == 'local_with_dumps': results = [] for i, params in enumerate(all_args): # print 'params:', params params_fn = 'params_%d.pck' % i fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() output_fn = 'output_%d.pck' % i # print 'call subprocess ...' subprocess.call([ 'python', '-c', cfunc % (fmodule, fname), params_fn, output_fn ]) # print 'Read outputs ...' fout = open(output_fn) results.append(cPickle.load(fout)) return results elif mode == 'remote_cluster': # FileTransfer creation for input files #data_dir = './rmap_data' data_dir = mkdtemp(prefix="sw_rmap") all_jobs = [] param_files = [] for i, params in enumerate(all_args): params_fn = op.join(data_dir, 'params_%d.pck' % i) fparams = open(params_fn, 'wb') cPickle.dump(params, fparams) fparams.close() param_file = FileTransfer(is_input=True, client_path=params_fn, name='params_file_%d' % i) param_files.append(param_file) output_fn = op.join(data_dir, 'output_%d.pck' % i) output_file = FileTransfer(is_input=False, client_path=output_fn, name='output_file_%d' % i) job = Job(command=[ 'pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file ], name="rmap, item %d" % i, referenced_input_files=[param_file], referenced_output_files=[output_file]) all_jobs.append(job) workflow = Workflow(jobs=all_jobs, dependencies=[]) # submit the workflow cfg = pyhrf.cfg['parallel-cluster'] controller = WorkflowController(cfg['server_id'], cfg['user']) # controller.transfer_files(fids_to_transfer) wf_id = controller.submit_workflow(workflow=workflow, name="remote_map") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) results = [] for i in xrange(len(all_args)): fnout = op.join(data_dir, 'output_%d.pck' % i) fout = open(fnout) o = cPickle.load(fout) print 'file cPickle loaded:', o fout.close() os.remove(fnout) if isinstance(o, Exception): raise RemoteException('Task %d failed' % i, o) if o.errno != 17: raise RemoteException('Task %d failed' % i, o) results.append(o) return results