def _init(self, *args, **kwargs): # A small helper function which will do the following: # * get a client from the cache in a thread safe manner, or # create a new one from scratch # * store the input arguments as class members # * create a LoadBalancedView from the client # * create a lock to regulate access to the view # * return the view. from ipyparallel import Client # Turn the arguments into something that might be hashable. args_key = (args, tuple(sorted([(k, kwargs[k]) for k in kwargs]))) if _hashable(args_key): with _client_cache_lock: if args_key in _client_cache: rc = _client_cache[args_key] else: _client_cache[args_key] = Client(*args, **kwargs) rc = _client_cache[args_key] else: # If the arguments are not hashable, just create a brand new # client. rc = Client(*args, **kwargs) # Save the init arguments. self._args = args self._kwargs = kwargs # NOTE: we need to regulate access to the view because, # while run_evolve() is running in a separate thread, we # could be doing other things involving the view (e.g., # asking extra_info()). Thus, create the lock here. self._view_lock = _Lock() return rc.load_balanced_view()
def __init__(self, mod, engine='multiproc'): self.engine = engine if engine == 'multiproc': print('parallel engine: multiproc') elif engine == 'ipcluster': print('parallel engine: ipcluster') from ipyparallel import Client try: rc = Client() self.rc = rc except OSError as ex: raise OSError( str(ex) + '\nPARSCANNER: Requires a running IPython cluster. See "ipcluster --help".\n' ) dv = rc[:] # direct view lv = rc.load_balanced_view() self.dv = dv self.lv = lv dv.execute('from pysces.PyscesParScan import Analyze, setModValue') else: raise UserWarning(engine + " is not a valid parallel engine!") from ipyparallel.serialize import codeutil self.GenDict = {} self.GenOrder = [] self.ScanSpace = [] self.mod = mod self.SteadyStateResults = [] self.UserOutputList = [] self.UserOutputResults = [] self.scanT = TimerBox()
def run_jobs_on_ipythoncluster(worker, task_list, shutdown_ipengines_after_done=False): t0 = time.time() rc = Client(CLUSTER_CLIENT_JSON) lview = rc.load_balanced_view() cnt_nodes = len(lview.targets or rc.ids) print("\t# nodes in use: {}".format(cnt_nodes)) lview.block = False print("\t# of tasks: {}".format(len(task_list))) print("\tsubmitting...", end='') job = lview.map_async(worker,task_list) print("done.") try: job.wait_interactive() except KeyboardInterrupt: #handle "Ctrl-C" if ask("\nAbort all submitted jobs?") == 'Y': lview.abort() print("Aborted, all submitted jobs are cancelled.") else: print("Aborted, but your jobs are still running on the cluster.") return if len(job.result()) != len(task_list): print("WARNING:\t# of results returned ({}) != # of tasks ({}).".format(len(job.result()), len(task_list))) print("\ttotal time: {}".format(timesofar(t0))) if shutdown_ipengines_after_done: print("\tshuting down all ipengine nodes...", end='') lview.shutdown() print('Done.') return job.result()
def start_ipcluster(ipcluster_exe, nengines, profile, max_retries=50): """ Start a new IPython parallel cluster (daemon) with a number of `nengines` and using `profile`. """ from ipyparallel import Client ipcluster = None rc = None dview = None lview = None ipcluster = os.system('{} start -n={} --profile={} --daemon'.format( ipcluster_exe, nengines, profile)) # retry until ipcluster is ready time.sleep(3) rc = Client(profile=profile) retries = 0 while True: if retries > max_retries: stop_ipcluster(ipcluster_exe, profile) raise Exception("impossible to access to (all) engines " "of the IPython parallel cluster") if len(rc.ids) < nengines: retries += 1 time.sleep(1) continue else: break dview = rc[:] lview = rc.load_balanced_view() return ipcluster, rc, dview, lview
def create_optimizer(args): '''returns configured bluepyopt.optimisations.DEAPOptimisation''' if args.ipyparallel or os.getenv('L5PCBENCHMARK_USEIPYP'): from ipyparallel import Client rc = Client(profile=os.getenv('IPYTHON_PROFILE')) logger.debug('Using ipyparallel with %d engines', len(rc)) lview = rc.load_balanced_view() def mapper(func, it): start_time = datetime.now() ret = lview.map_sync(func, it) logger.debug('Generation took %s', datetime.now() - start_time) return ret map_function = mapper else: map_function = None evaluator = l5pc_evaluator.create() seed = os.getenv('BLUEPYOPT_SEED', args.seed) opt = bluepyopt.optimisations.DEAPOptimisation( evaluator=evaluator, map_function=map_function, seed=seed) return opt
def create_optimizer(args): '''returns configured bluepyopt.optimisations.DEAPOptimisation''' if args.ipyparallel or os.getenv('L5PCBENCHMARK_USEIPYP'): from ipyparallel import Client rc = Client(profile=os.getenv('IPYTHON_PROFILE')) logger.debug('Using ipyparallel with %d engines', len(rc)) lview = rc.load_balanced_view() def mapper(func, it): start_time = datetime.now() ret = lview.map_sync(func, it) logger.debug('Generation took %s', datetime.now() - start_time) return ret map_function = mapper else: map_function = None evaluator = l5pc_evaluator.create() seed = os.getenv('BLUEPYOPT_SEED', args.seed) opt = bluepyopt.optimisations.DEAPOptimisation(evaluator=evaluator, map_function=map_function, seed=seed) return opt
def Main(): filter_obj = { 'dendrite_type': 'spiny', 'structure_layer_name': '5', 'structure_area_abbrev': 'VISp' } ctc = CellTypesCache() cells = ctc.get_cells(species=['Mus musculus']) cells_df = pd.DataFrame(cells) for filt_key, filt_val in filter_obj.items(): cells_df = cells_df.loc[cells_df[filt_key] == filt_val, :] cell_ids = list(cells_df['id'].values) rc = Client(profile=os.getenv('IPYTHON_PROFILE')) logger.debug('Using ipyparallel with %d engines', len(rc)) lview = rc.load_balanced_view() func = partial(get_fi_data, ctc) filter_fi_data = lview.map_sync(func, cell_ids) filter_fi_data = [data for data in filter_fi_data if data is not None] file_name = 'fi_data.pkl' with open(file_name, 'wb') as fh: pickle.dump(filter_fi_data, fh) plot_fi_data(filter_fi_data) rc.shutdown(hub=True)
def _make_ipyparallel_view(client_args, client_kwargs, view_args, view_kwargs): # Small helper to create an ipyparallel view. from ipyparallel import Client rc = Client(*client_args, **client_kwargs) rc[:].use_cloudpickle() return rc.load_balanced_view(*view_args, **view_kwargs)
def simulate_general(runner, results_filename): """ Function with the general code to simulate the MIMO schemes. """ # xxxxxxxxxx Print the simulation parameters xxxxxxxxxxxxxxxxxxxxxxxxxx pprint(runner.params.parameters) print("MIMO Scheme: {0}".format(runner.mimo_object.__class__.__name__)) # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxx Replace any parameter mention in results_filename xxxxxxxxxxxxx runner.set_results_filename(results_filename) # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxxxxxxx Perform the simulation xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # The simulation will be run either in parallel or serially depending # if the IPython engines are running or not. run_in_parallel = True # noinspection PyBroadException,PyBroadException try: # If we can get an IPython view that means that the IPython engines # are running. In that case we will perform the simulation in # parallel from ipyparallel import Client cl = Client() # We create a direct view to run coe in all engines dview = cl.direct_view() # Reset the engines so that we don't have variables there from last # computations dview.execute('%reset') dview.execute('import sys') # We use block=True to ensure that all engines have modified their # path to include the folder with the simulator before we create # the load lanced view in the following. dview.execute('sys.path.append("{0}")'.format(parent_dir), block=True) # But for the actual simulation we are better using a load balanced # view lview = cl.load_balanced_view() except Exception: # pylint: disable=W0703 # If we can't get an IPython view then we will perform the # simulation serially run_in_parallel = False if run_in_parallel is True: print("-----> Simulation will be run in Parallel") runner.simulate_in_parallel(lview) else: print("-----> Simulation will be run serially") runner.simulate() # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx print("Runned iterations: {0}".format(runner.runned_reps)) print("Elapsed Time: {0}".format(runner.elapsed_time)) print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n") return runner.results, runner.results_filename
def do_parallel(filelist): rc = Client() print('# of engines : %d' % len(rc.ids)) print('# of job : %d' % len(filelist)) lv = rc.load_balanced_view() result = lv.map_async(singlejob, filelist) result.wait_interactive()
def ipython_partition(images, direct, n): c = Client() partitions = [images[i:i + n] for i in range(0, len(images), n)] if direct: dview = c[:] dview.block = False num_clients = len(c.ids) start = time.time() ret1 = [ c[i % num_clients].apply_sync(local_par_hog, partitions[i]) for i in xrange(len(partitions)) ] end = time.time() - start ret1 = list(itertools.chain.from_iterable(ret1)) print "HOG IPYTHON DIRECT APP: %d images -> %f" % (len(ret1), end) start = time.time() ret2 = dview.map_sync(local_par_hog, partitions) end = time.time() - start ret2 = list(itertools.chain.from_iterable(ret2)) print "HOG IPYTHON DIRECT MAP: %d images -> %f" % (len(ret2), end) start = time.time() rets = [ c[i % num_clients].apply_async(local_par_hog, partitions[i]) for i in xrange(len(partitions)) ] ret3 = [r.get() for r in rets] end = time.time() - start ret3 = list(itertools.chain.from_iterable(ret3)) print "HOG IPYTHON DIRECT ASY: %d images -> %f" % (len(ret3), end) return ret3 else: dview = c.load_balanced_view() dview.block = False start = time.time() ret1 = [dview.apply_sync(local_par_hog, i) for i in partitions] end = time.time() - start ret1 = list(itertools.chain.from_iterable(ret1)) print "HOG IPYTHON LBV APP: %d images -> %f" % (len(ret1), end) start = time.time() ret2 = dview.map_sync(local_par_hog, partitions) end = time.time() - start ret2 = list(itertools.chain.from_iterable(ret2)) print "HOG IPYTHON LBV MAP: %d images -> %f" % (len(ret2), end) start = time.time() rets = [dview.apply_async(local_par_hog, i) for i in partitions] ret3 = [r.get() for r in rets] end = time.time() - start ret3 = list(itertools.chain.from_iterable(ret3)) print "HOG IPYTHON LBV ASY: %d images -> %f" % (len(ret3), end) return ret3
def setup_parallel(dbname): c = Client() dview = c.direct_view() dview.push({'dbname': str(dbname)}) # dview.push({'remove_duplicates_from_image_name_data': # remove_duplicates_from_image_name_data, # 'get_temp_fname': get_temp_fname, # 'dbname': dbname}) lbview = c.load_balanced_view() return lbview
def setup_parallel(dbname): c = Client() dview = c.direct_view() dview.push({'dbname': str(dbname)}) # dview.push({'remove_duplicates_from_image_name_data': # remove_duplicates_from_image_name_data, # 'get_temp_fname': get_temp_fname, # 'dbname': dbname}) lbview = c.load_balanced_view() return lbview
def setup_parallel(parallel): if parallel: pickleutil.use_dill() #can_map.pop(FunctionType, None) #serialize.pickle = pickle print("Running in parallel") rc = Client() rc[:].use_dill() lview = rc.load_balanced_view() lview.block = True else: lview = None return lview
class DistributedSpider(object): # Time to wait between polling for task results. pollingDelay = 0.5 def __init__(self, site): self.client = Client() self.view = self.client.load_balanced_view() self.mux = self.client[:] self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site def visitLink(self, url): if url not in self.allLinks: self.allLinks.append(url) if url.startswith(self.site): print(' ', url) self.linksWorking[url] = self.view.apply(fetchAndParse, url) def onVisitDone(self, links, url): print(url + ':') self.linksDone[url] = None del self.linksWorking[url] for link in links: self.visitLink(link) def run(self): self.visitLink(self.site) while self.linksWorking: print(len(self.linksWorking), 'pending...') self.synchronize() time.sleep(self.pollingDelay) def synchronize(self): for url, ar in list(self.linksWorking.items()): # Calling get_task_result with block=False will return None if the # task is not done yet. This provides a simple way of polling. try: links = ar.get(0) except error.TimeoutError: continue except Exception as e: self.linksDone[url] = None del self.linksWorking[url] print('%s: %s' % (url, e)) else: self.onVisitDone(links, url)
class DistributedSpider(object): # Time to wait between polling for task results. pollingDelay = 0.5 def __init__(self, site): self.client = Client() self.view = self.client.load_balanced_view() self.mux = self.client[:] self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site def visitLink(self, url): if url not in self.allLinks: self.allLinks.append(url) if url.startswith(self.site): print(" ", url) self.linksWorking[url] = self.view.apply(fetchAndParse, url) def onVisitDone(self, links, url): print(url + ":") self.linksDone[url] = None del self.linksWorking[url] for link in links: self.visitLink(link) def run(self): self.visitLink(self.site) while self.linksWorking: print(len(self.linksWorking), "pending...") self.synchronize() time.sleep(self.pollingDelay) def synchronize(self): for url, ar in list(self.linksWorking.items()): # Calling get_task_result with block=False will return None if the # task is not done yet. This provides a simple way of polling. try: links = ar.get(0) except error.TimeoutError: continue except Exception as e: self.linksDone[url] = None del self.linksWorking[url] print("%s: %s" % (url, e)) else: self.onVisitDone(links, url)
def download_and_calibrate_parallel(list_of_ids, n=None): """Download and calibrate in parallel. Parameters ---------- list_of_ids : list, optional container with img_ids to process n : int Number of cores for the parallel processing. Default: n_cores_system//2 """ setup_cluster(n_cores=n) c = Client() lbview = c.load_balanced_view() lbview.map_async(download_and_calibrate, list_of_ids) subprocess.Popen(["ipcluster", "stop", "--quiet"])
def main(): from ipyparallel import Client rc = Client(profile='mpi') workers = rc.load_balanced_view() jobs = [] batches = chunkify(vid_name_list, n_job) batch_inds = chunkify(range(len(vid_name_list)), n_job) for i in range(n_job): print(i) jobs.append( workers.apply_async( extract_job, *(batch_inds[i], batches[i], input_basedir, output_basedir))) for i in range(n_job): print(jobs[i].get(), i, len(jobs))
def map(self, parallel_task, args): from ipyparallel import Client, TimeoutError chunksize = 1 if self.max_tasks > 0 and len(args) > self.max_tasks: chunksize = len(args) // self.max_tasks if chunksize * self.max_tasks < len(args): chunksize += 1 client = None try: client = Client() except TimeoutError: raise RuntimeError( 'Cannot connect to the ipyparallel client. Is it running?') ar = None try: client[:].use_cloudpickle() lbv = client.load_balanced_view() ar = lbv.map_async(IppFunctionWrapper(parallel_task, self.timeout), args, chunksize=chunksize) try: r = [] for k, z in enumerate( tqdm(ar, desc="(IPYPARALLEL)", total=len(args))): if z[0] == -1: logger.error(z[1]) engine = ar.engine_id[k] client.abort(ar) client.close() raise RuntimeError( 'remote failure (task %d of %d on engine %d)' % (k + 1, len(ar), engine)) elif z[0] == 0: r.append(z[1]) except KeyboardInterrupt: client.abort(ar) raise finally: # always close the client to release resources if ar: client.abort(ar) if client: client.close() return r
def process_trajectories(*processors, postprocessor, ipyparallel=None): trajectories = [] for proc in processors: for info in proc.get_infos(): trajectories += [Trajectory(info, proc, postprocessor)] if ipyparallel is not None: from ipyparallel import Client rc = Client(profile=ipyparallel) lbv = rc.load_balanced_view() with lbv.temp_flags(retries=10): lbv.map_async(_process_trajectory, trajectories, retries=10) else: with Pool(processes=os.cpu_count() - 1) as pool: pool.map(_process_trajectory, trajectories, chunksize=1) log.info("Done!")
def process_trajectories(*processors, postprocessor, ipyparallel=None): trajectories = [] for proc in processors: for info in proc.get_infos(): trajectories += [Trajectory(info, proc, postprocessor)] if ipyparallel is not None: from ipyparallel import Client rc = Client(profile=ipyparallel) lbv = rc.load_balanced_view() with lbv.temp_flags(retries=10): lbv.map_async(_process_trajectory, trajectories, retries=10) else: with Pool(processes=os.cpu_count() - 1) as pool: pool.map(_process_trajectory, trajectories, chunksize=1) log.info("Done!")
class IPClusterEnsemble(SurveyEnsemble): """ Parallelized suvey ensemble based on IPython parallal (ipcluster) Args: \*\*specs: user specified values Attributes: Notes: """ def __init__(self, **specs): SurveyEnsemble.__init__(self, **specs) # access the cluster self.rc = Client() self.dview = self.rc[:] self.dview.block = True with self.dview.sync_imports(): import EXOSIMS,EXOSIMS.util.get_module r1 = self.dview.execute("SurveySim = EXOSIMS.util.get_module.get_module('%s', 'SurveySimulation')"%specs['modules']['SurveySimulation']) self.dview.push(dict(specs=specs)) r2 = self.dview.execute("sim = SurveySim(**specs)") self.lview = self.rc.load_balanced_view() def run_ensemble(self,run_one,N=10): t1 = time.time() async_res = [] for j in range(N): ar = self.lview.apply_async(run_one) async_res.append(ar) print "Submitted tasks: ", len(async_res) self.rc.wait(async_res) t2 = time.time() print "Completed in %d sec" %(t2-t1) res = [ar.get() for ar in async_res] return res
def run_ipyparallel(func, tasks, *args, **kwargs): from ipyparallel import Client profile = kwargs.get("profile", get_conda_env()) url_file = kwargs.get("url_file", None) if not profile and url_file: raise ValueError( "you must create a ipython profile first, try using 'fxdayu create_profile' in commands" ) client = Client(url_file=url_file, profile=profile) _dview = client[:] _lview = client.load_balanced_view() delayed = [ _lview.apply_async(func, task, *args, **kwargs) for task in tasks ] _lview.wait(delayed) results = [delayed.get() for delayed in delayed] return results
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1e-3) parser.set_defaults(tmax=1) parser.set_defaults(profile='default') parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-p", '--profile', type='str', dest='profile', help="the cluster profile [default: 'default']") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = Client() view = rc.load_balanced_view() print(view) rc.block=True nengines = len(rc.ids) with rc[:].sync_imports(): from IPython.utils.timing import time # the jobs should take a random time within a range times = [random.random()*(opts.tmax-opts.tmin)+opts.tmin for i in range(opts.n)] stime = sum(times) print("executing %i tasks, totalling %.1f secs on %i engines"%(opts.n, stime, nengines)) time.sleep(1) start = time.time() amr = view.map(time.sleep, times) amr.get() stop = time.time() ptime = stop-start scale = stime/ptime print("executed %.1f secs in %.1f secs"%(stime, ptime)) print("%.3fx parallel performance on %i engines"%(scale, nengines)) print("%.1f%% of theoretical max"%(100*scale/nengines))
def run(dest, results_path, parallel, seed): np.random.seed(seed) # load the responses old_store_pth = os.path.abspath(os.path.join( results_path, 'model_fall_responses_raw.h5')) old_store = pd.HDFStore(old_store_pth, mode='r') # open up the store for saving store = pd.HDFStore(dest, mode='w') # create the ipython parallel client if parallel: rc = Client() lview = rc.load_balanced_view() # start the tasks results = [] for key in old_store.keys(): if key.split('/')[-1] == 'param_ref': store.append(key, old_store[key]) continue args = [key, old_store[key]] if parallel: result = lview.apply(model_fall_responses, *args) else: result = model_fall_responses(*args) results.append(result) # collect and save results while len(results) > 0: result = results.pop(0) if parallel: key, responses = result.get() result.display_outputs() else: key, responses = result store.append(key, responses) store.close() old_store.close()
def analyzer_map(parallel=True): '''returns configured bluepyopt.optimisations.DEAPOptimisation''' if parallel: from ipyparallel import Client rc = Client(profile=os.getenv('IPYTHON_PROFILE')) logger.debug('Using ipyparallel with %d engines', len(rc)) lview = rc.load_balanced_view() def mapper(func, it): ret = lview.map_sync(func, it) return ret map_function = mapper else: map_function = None return map_function
def __init__(self, mod, engine='multiproc'): """ Instantiate the parallel scanner class with a PySCeS model instance and an optional 'engine' argument specifying the parallel engine: 'multiproc' -- multiprocessing (default) 'ipcluster' -- IPython cluster """ self.engine = engine if engine == 'multiproc': print('parallel engine: multiproc') elif engine == 'ipcluster': print('parallel engine: ipcluster') try: from ipyparallel import Client except ImportError as ex: print('\n', ex) raise ImportError( 'PARSCANNER: Requires IPython and ipyparallel version >=4.0 (http://ipython.org) and 0MQ (http://zero.mq).' ) try: rc = Client() self.rc = rc except OSError as ex: raise OSError( str(ex) + '\nPARSCANNER: Requires a running IPython cluster. See "ipcluster --help".\n' ) dv = rc[:] # direct view lv = rc.load_balanced_view() self.dv = dv self.lv = lv dv.execute('from pysces.PyscesParScan import Analyze, setModValue') else: raise UserWarning(engine + " is not a valid parallel engine!") self.GenDict = {} self.GenOrder = [] self.ScanSpace = [] self.mod = mod self.SteadyStateResults = [] self.UserOutputList = [] self.UserOutputResults = [] self.scanT = TimerBox()
class ParaExec: def __init__(self, addpath=None): self.client = Client() self.load_balanced_view = self.client.load_balanced_view() if (len(self.client.ids) == 0): print('# of engines : single mode') else: print('# of engines : %d' % len(self.client.ids)) def do_parallel(self, func, arglist): print('# of job : %d' % len(arglist)) rs = self.load_balanced_view.map_async(func, arglist) rs.wait_interactive() print rs.result() def do_single(self, func, arglist): print('# of job : %d' % len(arglist)) for arg in arglist: func(arg)
def main(): parser = argparse.ArgumentParser() parser.add_argument('directory', help="Provide the directory of the HDF files " "that shall be converted to csv here.") args = parser.parse_args() root = os.path.abspath(args.directory) fnames = glob.glob(os.path.join(root, '*.hdf')) logging.info('Found %i files to convert.', len(fnames)) c = Client() lbview = c.load_balanced_view() results = lbview.map_async(process_fname, fnames) # progress display while not results.ready(): print("{:.1f} %".format(100 * results.progress / len(fnames))) sys.stdout.flush() time.sleep(10) logging.info('Conversion done.')
def _perform_evolution(self, algo, pop): try: from ipyparallel import Client # Create client rc = Client() # Create Load-balanced view lbview = rc.load_balanced_view() # Run the task lbview.block = True ar = lbview.apply(_maptask_target, args=(algo, pop)) # Get retval retval = ar.get() if isinstance(retval, BaseException): raise retval return retval except BaseException as e: print('Exception caught during evolution:') print(e) raise RuntimeError()
def _perform_evolution(self, algo, pop): try: from ipyparallel import Client # Create client rc = Client() # Create Load-balanced view lbview = rc.load_balanced_view() # Run the task lbview.block = True ar = lbview.apply(_maptask_target, args=(algo, pop)) # Get retval retval = ar.get() if isinstance(retval, BaseException): raise retval return retval except BaseException as e: print('Exception caught during evolution:') print(e) raise RuntimeError()
def main(): parser = argparse.ArgumentParser() parser.add_argument('directory', help="Provide the directory of the HDF files " "that shall be converted to csv here.") args = parser.parse_args() root = os.path.abspath(args.directory) fnames = glob.glob(os.path.join(root, '*.hdf')) logging.info('Found %i files to convert.', len(fnames)) c = Client() lbview = c.load_balanced_view() results = lbview.map_async(process_fname, fnames) # progress display while not results.ready(): print("{:.1f} %".format(100 * results.progress / len(fnames))) sys.stdout.flush() time.sleep(10) logging.info('Conversion done.')
def _init(self, *args, **kwargs): # A small helper function which will do the following: # * get a client from the cache in a thread safe manner, or # create a new one from scratch # * store the input arguments as class members # * create a LoadBalancedView from the client # * create a lock to regulate access to the view # * return the view. from ipyparallel import Client # Turn the arguments into something that might be hashable. # Make sure the kwargs are sorted so that two sets of identical # kwargs will be recognized as equal also if the keys are stored # in different order. args_key = (args, tuple(sorted([(k, kwargs[k]) for k in kwargs]))) if _hashable(args_key): with _client_cache_lock: # Try to see if a client constructed with the same # arguments already exists in the cache. rc = _client_cache.get(args_key) if rc is None: # No cached client exists. Create a new client # and store it in the cache. rc = Client(*args, **kwargs) _client_cache[args_key] = rc else: # If the arguments are not hashable, just create a brand new # client. rc = Client(*args, **kwargs) # Save the init arguments. self._args = args self._kwargs = kwargs # NOTE: we need to regulate access to the view because, # while run_evolve() is running in a separate thread, we # could be doing other things involving the view (e.g., # asking extra_info()). Thus, create the lock here. self._view_lock = _Lock() return rc.load_balanced_view()
def bench_ipp_seq(tasks, workers, task_duration, warmup=True): from ipyparallel import Client rc = Client() #dview = rc[:] dview = rc.load_balanced_view() if warmup: dview.map_sync(sleep_worker, [task_duration for i in range(tasks)]) dview.block = True def run(tasks): objs = [ dview.apply_async(sleep_worker, task_duration) for i in range(tasks) ] for task in objs: task.get() res, elapsed = timeit(run, tasks) return elapsed
def par_value(n): """ Parallel option valuation Parameters ========== n: int number of option valuations/strikes """ import numpy as np from ipyparallel import Client c = Client(profile="default") view = c.load_balanced_view() strikes = np.linspace(80, 20, n) option_values = [] for strike in strikes: values = view.apply_async(bsm_mcs_valuation, strike) option_values.append(values) c.wait(option_values) return strikes, option_values
def run_jobs_on_ipythoncluster(worker, task_list, shutdown_ipengines_after_done=False): t0 = time.time() rc = Client(config.CLUSTER_CLIENT_JSON) lview = rc.load_balanced_view() cnt_nodes = len(lview.targets or rc.ids) print("\t# nodes in use: {}".format(cnt_nodes)) lview.block = False # move to app path lview.map(os.chdir, [config.APP_PATH] * cnt_nodes) print("\t# of tasks: {}".format(len(task_list))) print("\tsubmitting...", end='') job = lview.map_async(worker, task_list) print("done.") try: job.wait_interactive() except KeyboardInterrupt: #handle "Ctrl-C" if ask("\nAbort all submitted jobs?") == 'Y': lview.abort() print("Aborted, all submitted jobs are cancelled.") else: print("Aborted, but your jobs are still running on the cluster.") return if len(job.result()) != len(task_list): print( "WARNING:\t# of results returned ({}) != # of tasks ({}).".format( len(job.result()), len(task_list))) print("\ttotal time: {}".format(timesofar(t0))) if shutdown_ipengines_after_done: print("\tshuting down all ipengine nodes...", end='') lview.shutdown() print('Done.') return job.result()
def start_ipcluster(ipcluster_exe, nengines, profile, max_retries=50): """ Start a new IPython parallel cluster (daemon) with a number of `nengines` and using `profile`. """ from ipyparallel import Client ipcluster = None rc = None dview = None lview = None ipcluster = os.system( '{} start -n={} --profile={} --daemon' .format(ipcluster_exe, nengines, profile) ) # retry until ipcluster is ready time.sleep(3) rc = Client(profile=profile) retries = 0 while True: if retries > max_retries: stop_ipcluster(ipcluster_exe, profile) raise Exception("impossible to access to (all) engines " "of the IPython parallel cluster") if len(rc.ids) < nengines: retries += 1 time.sleep(1) continue else: break dview = rc[:] lview = rc.load_balanced_view() return ipcluster, rc, dview, lview
class ParaExec: def __init__(self, addpath=None): self.client = Client() self.load_balanced_view = self.client.load_balanced_view() if(len(self.client.ids) == 0): print('# of engines : single mode') else: print('# of engines : %d' % len(self.client.ids)) def do_parallel(self, func, arglist): print('# of job : %d' % len(arglist)) rs = self.load_balanced_view.map_async(func, arglist) rs.wait_interactive() print rs.result() def do_single(self, func, arglist): print('# of job : %d' % len(arglist)) for arg in arglist: func(arg)
class ParaExec: """ Parallelized execution of a function with ipyparallel """ def __init__(self): self.client = Client() self.load_balanced_view = self.client.load_balanced_view() if len(self.client.ids) == 0: print('# of engines : single mode') else: print('# of engines : %d' % len(self.client.ids)) def do_parallel(self, func, arg_list): print('# of job : %d' % len(arg_list)) rs = self.load_balanced_view.map_async(func, arg_list) rs.wait_interactive() # print rs.result() def do_single(self, func, arg_list): print('# of job : %d' % len(arg_list)) for arg in arg_list: func(arg)
def Main(): args = parser.parse_args() cty_type = args.cty cty_list = args.cty_list data_path = os.path.join(os.path.dirname(man_opt.__file__), os.pardir, 'assets', 'aggregated_data') mouse_data_filename = os.path.join(data_path, 'Mouse_class_data.csv') mouse_datatype_filename = os.path.join(data_path, 'Mouse_class_datatype.csv') me_ttype_map_path = os.path.join(data_path, 'me_ttype.pkl') sdk_data_filename = os.path.join(data_path, 'sdk.csv') sdk_datatype_filename = os.path.join(data_path, 'sdk_datatype.csv') sdk_data = man_utils.read_csv_with_dtype(sdk_data_filename, sdk_datatype_filename) if cty_type == 'ttype': mouse_data = man_utils.read_csv_with_dtype(mouse_data_filename, mouse_datatype_filename) me_ttype_map = utility.load_pickle(me_ttype_map_path) metype_cluster = mouse_data.loc[ mouse_data.hof_index == 0, ['Cell_id', 'Dendrite_type', 'me_type']] sdk_me = pd.merge(sdk_data, metype_cluster, how='left', on='Cell_id').dropna(how='any', subset=['me_type']) sdk_me['ttype'] = sdk_me['me_type'].apply(lambda x: me_ttype_map[x]) cell_df = sdk_me.loc[sdk_me.ttype.isin(cty_list), ] elif cty_type == 'Cre_line': cell_df = sdk_data.loc[sdk_data.line_name.isin(cty_list), ] cell_ids = cell_df.Cell_id.unique().tolist() rc = Client(profile=os.getenv('IPYTHON_PROFILE')) logger.debug('Using ipyparallel with %d engines', len(rc)) lview = rc.load_balanced_view() lview.map_sync(get_efeatures, cell_ids)
def __init__(self, mod, engine='multiproc'): """ Instantiate the parallel scanner class with a PySCeS model instance and an optional 'engine' argument specifying the parallel engine: 'multiproc' -- multiprocessing (default) 'ipcluster' -- IPython cluster """ self.engine=engine if engine == 'multiproc': print('parallel engine: multiproc') elif engine == 'ipcluster': print('parallel engine: ipcluster') try: from ipyparallel import Client except ImportError as ex: print('\n',ex) raise ImportError('PARSCANNER: Requires IPython and ipyparallel version >=4.0 (http://ipython.org) and 0MQ (http://zero.mq).') try: rc = Client() self.rc=rc except OSError as ex: raise OSError(str(ex)+'\nPARSCANNER: Requires a running IPython cluster. See "ipcluster --help".\n') dv = rc[:] # direct view lv = rc.load_balanced_view() self.dv=dv self.lv=lv dv.execute('from pysces.PyscesParScan import Analyze, setModValue') else: raise UserWarning(engine+" is not a valid parallel engine!") self.GenDict = {} self.GenOrder = [] self.ScanSpace = [] self.mod = mod self.SteadyStateResults = [] self.UserOutputList = [] self.UserOutputResults = [] self.scanT = TimerBox()
def parallel_init(wdir, profile=None, variables=None): '''Initiallize ipyparallel cluster Args: wdir: The working directory of the project profile: The ipyparallel profile to use. If None, use the default. variables: A dictionary of variables that will be set in each engine. Returns: A balanced view and a direct view object. ''' c = Client(profile=profile) directview = c[:] if variables is not None: directview.push(variables) v = c.load_balanced_view() print('[*] {0} parallel engines available'.format(len(v))) directview.map_sync(os.chdir, [wdir] * len(v)) print('[*] Finished setting working directories') return v, directview
def execute_in_parallel(func, iterable): """Use IPyparallel's load_balanced_view to execute in parallel. Function will create a Client() object, a load_balanced_view and a notebook widget based progressbar automatically. The processing will be performed using the `map_async` method of the load_balanced_view object. Parameters ---------- func : function Function to call in parallel. iterable : iterable Iterable container that will be used as input for `func`. Returns ------- The results object from executing `lbview.map_async`. """ c = Client() lbview = c.load_balanced_view() results = lbview.map_async(func, iterable) display_multi_progress(results, iterable) return results
class ParallelOptimizer(Optimizer): def __init__(self, url_file=None, profile=None, settings=None): super(ParallelOptimizer, self).__init__(settings) self.settings = settings if settings else {} self._client = Client(url_file=url_file, profile=profile) self._dview = self._client[:] self._lview = self._client.load_balanced_view() self._code = "" def open(self, filename): with open(filename) as f: self._code = f.read() def __getitem__(self, item): return self.settings[item] def __setitem__(self, item, value): self.settings[item] = value @staticmethod def run_trader(settings, code, param, runtime_meta): trader = Trader() for k, v in settings.items(): trader[k].kwargs.update(v) symbols, frequency, start, end, ticker_type, save = runtime_meta trader.back_test(code, symbols, frequency, start, end, ticker_type, param, save, raw_code=True) op_dict = trader.output("strategy_summary", "risk_indicator") for p in op_dict.values(): param.update(p) return param def run(self, symbols, frequency, start=None, end=None, ticker_type=None, sort=u"夏普比率", ascending=False, save=False, **paras): runtime_meta = (symbols, frequency, start, end, ticker_type, save) task = list(self.exhaustion(**paras)) ars = [] for item in task: ars.append( self._lview.apply_async(self.run_trader, self.settings, self._code, item, runtime_meta)) self._lview.wait(ars) tmp = [ar.get() for ar in ars] result = pd.DataFrame(tmp).sort_values(by=sort, ascending=ascending) return result
return(testSeed(seed_values[i])) ################################################################################ # Set up engines ################################################################################ arrayid = int(os.environ['SLURM_ARRAY_TASK_ID']) mycluster = "cluster-" + str(arrayid) c = Client(profile=mycluster) c.ids dview = c[:] dview.block = True lview = c.load_balanced_view() lview.block = True dview.execute('import sys') dview.execute("sys.path.append('../../modules')") dview.execute('from sample import permute_indices, fykd') dview.execute('from scipy.misc import comb') dview.execute('import numpy as np') mydict = dict(seed_values = seed_values, testSeed = testSeed, prob_derangement = prob_derangement, check_derangement = check_derangement, sequential_derangement_test = sequential_derangement_test) dview.push(mydict) ################################################################################
# Please check my own question at stack overflow http://stackoverflow.com/questions/34210522/how-to-run-a-custom-script-in-parallel-with-django/34302741#34302741 # pip install ipyparallel -- necessary to run the cluster # create a file on ~/.ipython/profile_default/startup/00-load_django.py and paste this gist to it: https://gist.github.com/hassek/49956bce9f44d3853b42 # start cluster: ipcluster start -n X --profile=tomtom # where X is the number of parallel engines desired, a good number is your machines core + 1, so if you have 4 cores, 5 is a good number # you can now run any functions in parallel from ipython! # Is very important to notice that anything that is returned should be able to be pickled. # i.e. list(Receipt.objects.all()) will return but Receipt.obejcts.all() will rise an error from ipyparallel import Client; rc = Client(); lview = rc.load_balanced_view() @lview.parallel() def fix(last_user): from mongoengine.base.datastructures import BaseDict from receipt.models import Receipt fixed_receipts = [] for rep in Receipt.objects.filter(user_id__gte=last_user, user_id__lt=last_user+1000, tracking__type=3): if type(rep.tracking) == BaseDict: if '0' in rep.tracking: rep.tracking = [] rep.save() fixed_receipts.append((rep.id, "FIXED")) return fixed_receipts res = fix.map(range(0, 10000, 1000)) # This will execute 10 tasks res.wait_interactive() # this will show the progress from datetime import datetime; from shipment.models import Shipment
def _merge_ipython_cluster(self, step=100000): '''Do the merging on ipython cluster.''' from ipyparallel import Client, require from config import CLUSTER_CLIENT_JSON t0 = time.time() src_collection_list = [collection for collection in self._build_config['sources'] if collection not in ['entrez_gene', 'ensembl_gene']] self.target.drop() self.target.prepare() geneid_set = self.make_genedoc_root() idmapping_gridfs_d = self._save_idmapping_gridfs() logging.info(timesofar(t0)) rc = Client(CLUSTER_CLIENT_JSON) lview = rc.load_balanced_view() logging.info("\t# nodes in use: {}".format(len(lview.targets or rc.ids))) lview.block = False kwargs = {} target_collection = self.target.target_collection kwargs['server'], kwargs['port'] = target_collection.database.client.address kwargs['src_db'] = self.src.name kwargs['target_db'] = target_collection.database.name kwargs['target_collection_name'] = target_collection.name kwargs['limit'] = step @require('pymongo', 'time', 'types') def worker(kwargs): server = kwargs['server'] port = kwargs['port'] src_db = kwargs['src_db'] target_db = kwargs['target_db'] target_collection_name = kwargs['target_collection_name'] src_collection = kwargs['src_collection'] skip = kwargs['skip'] limit = kwargs['limit'] def load_from_gridfs(filename, db): import gzip import pickle import gridfs fs = gridfs.GridFS(db) fobj = fs.get(filename) gzfobj = gzip.GzipFile(fileobj=fobj) try: object = pickle.load(gzfobj) finally: gzfobj.close() fobj.close() return object def alwayslist(value): if value is None: return [] if isinstance(value, (list, tuple)): return value else: return [value] conn = pymongo.MongoClient(server, port) src = conn[src_db] target_collection = conn[target_db][target_collection_name] idmapping_gridfs_name = kwargs.get('idmapping_gridfs_name', None) if idmapping_gridfs_name: idmapping_d = load_from_gridfs(idmapping_gridfs_name, src) else: idmapping_d = None cur = src[src_collection].find(skip=skip, limit=limit, timeout=False) cur.batch_size(1000) try: for doc in cur: _id = doc['_id'] if idmapping_d: _id = idmapping_d.get(_id, None) or _id # there could be cases that idmapping returns multiple entrez_gene id. for __id in alwayslist(_id): __id = str(__id) doc.pop('_id', None) doc.pop('taxid', None) target_collection.update({'_id': __id}, doc, manipulate=False, upsert=False) #target_collection.update({'_id': __id}, {'$set': doc}, finally: cur.close() t0 = time.time() task_list = [] for src_collection in src_collection_list: _kwargs = copy.copy(kwargs) _kwargs['src_collection'] = src_collection id_type = self.src_master[src_collection].get('id_type', None) if id_type: idmapping_gridfs_name = idmapping_gridfs_d[id_type] _kwargs['idmapping_gridfs_name'] = idmapping_gridfs_name cnt = self.src[src_collection].count() for s in range(0, cnt, step): __kwargs = copy.copy(_kwargs) __kwargs['skip'] = s task_list.append(__kwargs) logging.info("\t# of tasks: {}".format(len(task_list))) logging.info("\tsubmitting...") job = lview.map_async(worker, task_list) logging.info("done.") job.wait_interactive() logging.info("\t# of results returned: {}".format(len(job.result()))) logging.info("\ttotal time: {}".format(timesofar(t0))) if self.shutdown_ipengines_after_done: logging.info("\tshuting down all ipengine nodes...") lview.shutdown() logging.info('Done.')
class ParallelMagic(Magic): client = None view = None view_load_balanced = None module_name = None class_name = None kernel_name = None ids = None retval = None retry = False @option( '-k', '--kernel_name', action='store', default="default", help='arbitrary name given to reference kernel' ) @option( '-i', '--ids', action='store', default=None, help='the machine ids to use from the cluster' ) def line_parallel(self, module_name, class_name, kernel_name="default", ids=None): """ %parallel MODULE CLASS [-k NAME] [-i [...]] - construct an interface to the cluster. Example: %parallel bash_kernel BashKernel %parallel bash_kernel BashKernel -k bash %parallel bash_kernel BashKernel --i [0,2:5,9,...] cluster_size and cluster_rank variables are set upon initialization of the remote node (if the kernel supports %set). Use %px or %%px to send code to the cluster. """ try: from ipyparallel import Client except ImportError: from IPython.parallel import Client count = 1 while count <= 5: try: self.client = Client() break except: print("Waiting on cluster to start...") time.sleep(2) count += 1 if count == 6: raise Exception("Cluster was not started.") if ids is None: count = 1 while count <= 5: try: self.view = self.client[:] break except: print("Waiting for engines...") time.sleep(2) count += 1 if count == 6: raise Exception("Engines were not started.") else: # ids[:] = slice(None, None, None) # ids[1:3] = slice(1, 3, None) # ids[1:3:1] = slice(1, 3, 1) # ids[1, 2, ...] = [1, 2, Ellipsis] # ids[1, 2:4, ...] = [1, slice(2, 4, None), Ellipsis] try: ids_slice = eval("slicer%s" % ids) # slicer[0,...,7] except: ids_slice = slicer[:] if isinstance(ids_slice, (slice, int)): count = 1 while count <= 5: try: self.view = self.client[ids_slice] break except: print("Waiting for engines...") time.sleep(2) count += 1 if count == 6: raise Exception("Engines were not started.") else: # tuple of indexes/slices # FIXME: if so, handle Ellipsis view = None for item in ids_slice: count = 1 while count <= 5: try: client = self.client[item] if view: ## FIXME: can't do this: view.append(client) else: view = client break except: print("Waiting on cluster to start...") time.sleep(2) count += 1 if count == 6: raise Exception("Cluster was not started.") self.view = view self.view_load_balanced = self.client.load_balanced_view() self.module_name = module_name self.class_name = class_name self.kernel_name = kernel_name self.view.execute(""" import os for key, value in %(env)s.items(): os.environ[key] = value try: kernels except: kernels = {} from %(module_name)s import %(class_name)s kernels['%(kernel_name)s'] = %(class_name)s() ## FIXME: kernels['%(kernel_name)s'].kernel = kernel """ % {"module_name": module_name, "class_name": class_name, "kernel_name": kernel_name, "env": str(self.kernel.env)}, block=True) self.view["kernels['%s'].set_variable(\"cluster_size\", %s)" % ( kernel_name, len(self.client))] self.client[:].scatter('cluster_rank', self.client.ids, flatten=True) self.view["kernels['%s'].set_variable(\"cluster_rank\", cluster_rank)" % ( kernel_name)] self.retval = None @option( '-k', '--kernel_name', action='store', default=None, help='kernel name given to use for execution' ) @option( '-e', '--evaluate', action='store_true', default=False, help=('evaluate code in the current kernel, too. The current ' + 'kernel should be of the same language as the cluster.') ) def line_px(self, expression, kernel_name=None, evaluate=False): """ %px EXPRESSION - send EXPRESSION to the cluster. Example: %px sys.version %px -k scheme (define x 42) %px x %px cluster_rank cluster_size and cluster_rank variables are set upon initialization of the remote node (if the kernel supports %set). Use %parallel to initialize the cluster. """ expression = str(expression) if kernel_name is None: kernel_name = self.kernel_name if self.retry: count = 1 while count <= 5: try: self.retval = self.view["kernels['%s'].do_execute_direct(\"%s\")" % ( kernel_name, self._clean_code(expression))] break except: print("Waiting on cluster clients to start...") time.sleep(2) count += 1 if count == 6: raise Exception("Cluster clients have not started.") self.retry = False else: try: self.retval = self.view["kernels['%s'].do_execute_direct(\"%s\")" % ( kernel_name, self._clean_code(expression))] except Exception as e: self.retval = str(e) if evaluate: self.code = expression def _clean_code(self, expr): return expr.strip().replace('"', '\\"').replace("\n", "\\n") ## px --kernel NAME @option( '-k', '--kernel_name', action='store', default=None, help='kernel name given to use for execution' ) @option( '-e', '--evaluate', action='store_true', default=False, help=('evaluate code in the current kernel, too. The current ' + 'kernel should be of the same language as the cluster.') ) def cell_px(self, kernel_name=None, evaluate=False): """ %%px - send cell to the cluster. Example: %%px (define x 42) Use %parallel to initialize the cluster. """ if kernel_name is None: kernel_name = self.kernel_name self.retval = self.view["kernels['%s'].do_execute_direct(\"%s\")" % ( kernel_name, self._clean_code(self.code))] self.evaluate = evaluate def line_pmap(self, function_name, args, kernel_name=None): """ %pmap FUNCTION [ARGS1,ARGS2,...] - ("parallel map") call a FUNCTION on args This line magic will apply a function name to all of the arguments given one at a time using a dynamic load balancing scheduler. Currently, the args are provided as a Python expression (with no spaces). You must first setup a cluster using the %parallel magic. Examples: %pmap function-name-in-language range(10) %pmap function-name-in-language [1,2,3,4] %pmap run_experiment range(1,100,5) %pmap run_experiment ["test1","test2","test3"] %pmap f [(1,4,7),(2,3,5),(7,2,2)] The function name must be a function that is available on all nodes in the cluster. For example, you could: %%px (define myfunc (lambda (n) (+ n 1))) to define myfunc on all machines (use %%px -e to also define it in the running notebook or console). Then you can apply it to a list of arguments: %%pmap myfunc range(100) The load balancer will run myfunc on the next available node in the cluster. Note: not all languages may support running a function via this magic. """ if kernel_name is None: kernel_name = self.kernel_name # To make sure we can find `kernels`: try: from ipyparallel.util import interactive except ImportError: from IPython.parallel.util import interactive f = interactive(lambda arg, kname=kernel_name, fname=function_name: \ kernels[kname].do_function_direct(fname, arg)) self.retval = self.view_load_balanced.map_async(f, eval(args)) def post_process(self, retval): try: ## any will crash on numpy arrays if isinstance(self.retval, list) and not any(self.retval): return None except: pass return self.retval
def parallel_map(task, values, task_args=None, task_kwargs=None, client=None, view=None, progress_bar=None, show_scheduling=False, **kwargs): """ Call the function ``task`` for each value in ``values`` using a cluster of IPython engines. The function ``task`` should have the signature ``task(value, *args, **kwargs)``. The ``client`` and ``view`` are the IPython.parallel client and load-balanced view that will be used in the parfor execution. If these are ``None``, new instances will be created. Parameters ---------- task: a Python function The function that is to be called for each value in ``task_vec``. values: array / list The list or array of values for which the ``task`` function is to be evaluated. task_args: list / dictionary The optional additional argument to the ``task`` function. task_kwargs: list / dictionary The optional additional keyword argument to the ``task`` function. client: IPython.parallel.Client The IPython.parallel Client instance that will be used in the parfor execution. view: a IPython.parallel.Client view The view that is to be used in scheduling the tasks on the IPython cluster. Preferably a load-balanced view, which is obtained from the IPython.parallel.Client instance client by calling, view = client.load_balanced_view(). show_scheduling: bool {False, True}, default False Display a graph showing how the tasks (the evaluation of ``task`` for for the value in ``task_vec1``) was scheduled on the IPython engine cluster. show_progressbar: bool {False, True}, default False Display a HTML-based progress bar during the execution of the parfor loop. Returns -------- result : list The result list contains the value of ``task(value, task_args, task_kwargs)`` for each value in ``values``. """ submitted = datetime.datetime.now() if task_args is None: task_args = tuple() if task_kwargs is None: task_kwargs = {} if client is None: client = Client() # make sure qutip is available at engines dview = client[:] dview.block = True dview.execute("from qutip import *") if view is None: view = client.load_balanced_view() ar_list = [view.apply_async(task, value, *task_args, **task_kwargs) for value in values] if progress_bar is None: view.wait(ar_list) else: if progress_bar is True: progress_bar = HTMLProgressBar() n = len(ar_list) progress_bar.start(n) while True: n_finished = sum([ar.progress for ar in ar_list]) progress_bar.update(n_finished) if view.wait(ar_list, timeout=0.5): progress_bar.update(n) break progress_bar.finished() if show_scheduling: metadata = [[ar.engine_id, (ar.started - submitted).total_seconds(), (ar.completed - submitted).total_seconds()] for ar in ar_list] _visualize_parfor_data(metadata) return [ar.get() for ar in ar_list]
def _simulate_do_what_i_mean_single_runner(runner, folder=None, block=True): # pragma: no cover """ This will either call the `simulate` method or the `simulate_in_parallel` method as appropriated. If the 'parameters variation index' was specified in the command line, then the `simulate` method will be called with that index. If not, then the `simulate` method will be called without any index or, if there is an ipython cluster running, the `simulate_in_parallel` method will be called. Parameters ---------- runner : SimulationRunner The SimulationRunner object for which either the 'simulate' or the 'simulate_in_parallel' method will be called. folder : str, optional Folder to be added to the python path. This should be the main pyphysim folder block : bool, optional Passed to the simulate_in_parallel method when the simulation is performed in parallel. If this is false, you need to call the method 'wait_parallel_simulation' of the runner object at some point. """ if runner.command_line_args.index is not None: # Perform the simulation (serially) for the desired index msg = "Simulation will be run for the parameters variation: {0}" print(msg.format(runner.command_line_args.index)) runner.simulate(runner.command_line_args.index) else: run_in_parallel = True try: # If we can get an IPython view that means that the IPython # engines are running. In that case we will perform the # simulation in parallel from ipyparallel import Client # cl = Client(profile="ssh") cl = Client(profile="default") if folder is not None: _add_folder_to_ipython_engines_path(cl, folder) # For the actual simulation we are better using a # load_balanced_view lview = cl.load_balanced_view() except (IOError, ImportError): # If we can't get an IPython view then we will perform the # simulation serially run_in_parallel = False lview = None if run_in_parallel is True: print("Simulation will be run in Parallel") # Remove the " - SNR: {SNR}" string in the progressbar message, # since when the simulation is performed in parallel we get a # single progressbar for all the simulation. runner.progressbar_message = 'Elapsed Time: {{elapsed_time}}' runner.simulate_in_parallel(lview, wait=block) else: print("Simulation will be run serially") runner.simulate()
class ClusterLab(epyc.Lab): """A :class:`Lab` running on an ``ipyparallel`` compute cluster. Experiments are submitted to engines in the cluster for execution in parallel, with the experiments being performed asynchronously to allow for disconnection and subsequent retrieval of results. Combined with a persistent :class:`LabNotebook`, this allows for fully decoupled access to an on-going computational experiment with piecewise retrieval of results. This class requires a cluster to already be set up and running, configured for persistent access, with access to the necessary code and libraries, and with appropriate security information available to the client. """ # Tuning parameters WaitingTime = 30 #: Waiting time for checking for job completion. Lower values increase network traffic. def __init__( self, notebook = None, url_file = None, profile = None, profile_dir = None, ipython_dir = None, context = None, debug = False, sshserver = None, sshkey = None, password = None, paramiko = None, timeout = 10, cluster_id = None, use_dill = False, **extra_args ): """Create an empty lab attached to the given cluster. Most of the arguments are as expected by the ``ipyparallel.Client`` class, and are used to create the underlying connection to the cluster. The connection is opened immediately, meaning the cluster must be up and accessible when creating a lab to use it. :param notebook: the notebook used to results (defaults to an empty :class:`LabNotebook`) :param url_file: file containing connection information for accessing cluster :param profile: name of the IPython profile to use :param profile_dir: directory containing the profile's connection information :param ipython_dir: directory containing profile directories :param context: ZMQ context :param debug: whether to issue debugging information (defaults to False) :param sshserver: username and machine for ssh connections :param sshkey: file containing ssh key :param password: ssh password :param paramiko: True to use paramiko for ssh (defaults to False) :param timeout: timeout in seconds for ssh connection (defaults to 10s) :param cluster_id: string added to runtime files to prevent collisions :param use_dill: whether to use Dill as pickler (defaults to False)""" super(epyc.ClusterLab, self).__init__(notebook) # record all the connection arguments for later self._arguments = dict(url_file = url_file, profile = profile, profile_dir = profile_dir, ipython_dir = ipython_dir, context = context, debug = debug, sshserver = sshserver, sshkey = sshkey, password = password, paramiko = paramiko, timeout = timeout, cluster_id = cluster_id, **extra_args) self._client = None # connect to the cluster self.open() # use Dill if requested if use_dill: self.use_dill() def open( self ): """Connect to the cluster.""" if self._client is None: self._client = Client(**self._arguments) def close( self ): """Close down the connection to the cluster.""" if self._client is not None: self._client.close() self._client = None def numberOfEngines( self ): """Return the number of engines available to this lab. :returns: the number of engines""" return len(self.engines()) def engines( self ): """Return a list of the available engines. :returns: a list of engines""" self.open() return self._client[:] def use_dill( self ): """Make the cluster use Dill as pickler for transferring results. This isn't generally needed, but is sometimes useful for particularly complex experiments such as those involving closures. (Or, to put it another way, if you find yourself tempted to use this method, consider re-structuring your experiment code.)""" self.open() with self.sync_imports(quiet = True): import dill self._client.direct_view().use_dill() def sync_imports( self, quiet = False ): """Return a context manager to control imports onto all the engines in the underlying cluster. This method is used within a ``with`` statement. Any imports should be done with no experiments running, otherwise the method will block until the cluster is quiet. Generally imports will be one of the first things done when connecting to a cluster. (But be careful not to accidentally try to re-import if re-connecting to a running cluster.) :param quiet: if True, suppresses messages (defaults to False) :returns: a context manager""" self.open() return self._client[:].sync_imports(quiet = quiet) def _mixup( self, ps ): """Private method to mix up a list of values in-place using a Fisher-Yates shuffle (see https://en.wikipedia.org/wiki/Fisher-Yates_shuffle). :param ps: the array :returns: the array, shuffled in-place""" for i in range(len(ps) - 1, 0, -1): j = int(numpy.random.random() * i) temp = ps[i] ps[i] = ps[j] ps[j] = temp return ps def runExperiment( self, e ): """Run the experiment across the parameter space in parallel using all the engines in the cluster. This method returns immediately. The experiments are run asynchronously, with the points in the parameter space being explored randomly so that intermediate retrievals of results are more representative of the overall result. Put another way, for a lot of experiments the results available will converge towards a final answer, so we can plot them and see the answer emerge. :param e: the experiment""" # create the parameter space space = self.parameterSpace() # only proceed if there's work to do if len(space) > 0: nb = self.notebook() # randomise the order of the parameter space so that we evaluate across # the space as we go along to try to make intermediate (incomplete) result # sets more representative of the overall result set ps = self._mixup(space) try: # connect to the cluster self.open() # submit an experiment at each point in the parameter space to the cluster view = self._client.load_balanced_view() jobs = [] for p in ps: jobs.extend((view.apply_async((lambda p: e.set(p).run()), p)).msg_ids) # there seems to be a race condition in submitting jobs, # whereby jobs get dropped if they're submitted too quickly time.sleep(0.01) # record the mesage ids of all the jobs as submitted but not yet completed psjs = zip(ps, jobs) for (p, j) in psjs: nb.addPendingResult(p, j) finally: # commit our pending results in the notebook nb.commit() self.close() def updateResults( self ): """Update our results within any pending results that have completed since we last retrieved results from the cluster. :returns: the number of pending results completed at this call""" # we do all the tests for pending results against the notebook directly, # as the corresponding methods on self call this method themselves nb = self.notebook() # look for pending results if we're waiting for any n = 0 if nb.numberOfPendingResults() > 0: # we have results to get self.open() for j in set(nb.pendingResults()): # query the status of a job status = self._client.result_status(j, status_only = False) # add all completed jobs to the notebook if j in status['completed']: r = status[j] # update the result in the notebook, cancelling # the pending result as well # values come back from Client.result_status() in # varying degrees of list-nesting, which LabNotebook.addResult() # handles itself nb.addResult(r, j) # commit changes to the notebook nb.commit() # purge the completed job from the cluster self._client.purge_hub_results(j) # record that we retrieved the results for the given job n = n + 1 return n def numberOfResults( self ): """Return the number of results we have available at the moment. :returns: the number of results""" self.updateResults() return self.notebook().numberOfResults() def numberOfPendingResults( self ): """Return the number of resultswe are waiting for. :returns: the number of pending results""" self.updateResults() return self.notebook().numberOfPendingResults() def _availableResultsFraction( self ): """Private method to return the fraction of results available, as a real number between 0 and 1. This does not update the results fetched from the cluster. :returns: the fraction of available results""" tr = self.notebook().numberOfResults() + self.notebook().numberOfPendingResults() if tr == 0: return 0 else: return (self.notebook().numberOfResults() + 0.0) / tr def readyFraction( self ): """Test what fraction of results are available. This will change over time as the results come in. :returns: the fraction from 0 to 1""" self.updateResults() return self._availableResultsFraction() def ready( self ): """Test whether all the results are available. This will change over time as the results come in. :returns: True if all the results are available""" return (self.readyFraction() == 1) def wait( self, timeout = -1 ): """Wait for all pending results to be finished. If timeout is set, return after this many seconds regardless. :param timeout: timeout period in seconds (defaults to forever) :returns: True if all the results completed""" # we can't use ipyparallel.Client.wait() for this, because that # method only works for cases where the Client object is the one that # submitted the jobs to the cluster hub -- and therefore has the # necessary data structures to perform synchronisation. This isn't the # case for us, as one of the main goals of epyc is to support disconnected # operation, which implies a different Client object retrieving results # than the one that submitted the jobs in the first place. This is # unfortunate, but understandable given the typical use cases for # Client objects. # # Instead. we have to code around a little busily. The ClusterLab.WaitingTime # global sets the latency for waiting, and we repeatedly wait for this amount # of time before updating the results. The latency value essentially controls # how busy this process is: given that most simulations are expected to # be long, a latency in the tens of seconds feels about right as a default if self.numberOfPendingResults() > 0: # we've got pending results, wait for them timeWaited = 0 while (timeout < 0) or (timeWaited < timeout): if self.numberOfPendingResults() == 0: # no pending jobs left, we're complete return True else: # not done yet, calculate the waiting period if timeout == -1: # wait for the default waiting period dt = self.WaitingTime else: # wait for the default waiting period or until the end of the timeout. # whichever comes first if (timeout - timeWaited) < self.WaitingTime: dt = timeout - timeWaited else: dt = self.WaitingTime # sleep for a while time.sleep(dt) timeWaited = timeWaited + dt # if we get here, the timeout expired, so do a final check # and then exit return (self.numberOfPendingResults() == 0) else: # no results, so we got them all return True def pendingResults( self ): """Return the list of job iods for any pending results. :returns: a list of job ids""" return self.notebook().pendingResults() def pendingResultsFor( self, params ): """Return a list of job ids for any results pending for experiments at the given point in the parameter space. :param params: the experimental parameters :returns: a list of job ids""" return self.notebook().pendingResultsFor(params) def _abortJobs( self, js ): """Private method to abort a set of jobs. :param js: the job ids to be aborted""" self.open() self._client.abort(jobs = js) self.close() def cancelPendingResultsFor( self, params ): """Cancel any results pending for experiments at the given point in the parameter space. :param params: the experimental parameters""" # grab the result job ids jobs = self.pendingResultsFor(params) if len(jobs) > 0: # abort in the cluster self._abortJobs(jobs) # cancel in the notebook self.notebook().cancelPendingResultsFor(params) def cancelAllPendingResults( self ): """Cancel all pending results.""" # grab all the pending job ids jobs = self.pendingResults() if len(jobs) > 0: # abort in the cluster self._abortJobs(jobs) # cancel in the notebook self.notebook().cancelAllPendingResults()
def __init__(self, temperature, eDensity, wavelength, filter=(chfilters.gaussianR, 1000.), label=None, elementList = None, ionList = None, minAbund=None, keepIons=0, doLines=1, doContinuum=1, allLines = 1, em=None, abundanceName=0, verbose=0, timeout=0.1): # wavelength = np.atleast_1d(wavelength) if wavelength.size < 2: print(' wavelength must have at least two values, current length %3i'%(wavelength.size)) return t1 = datetime.now() # rcAll = Client() # all_engines = rcAll[:] lbvAll = rcAll.load_balanced_view() # # # creates Intensity dict from first ion calculated # setupIntensity = 0 # self.Defaults = chdata.Defaults # self.Temperature = np.asarray(temperature,'float64') self.EDensity = np.asarray(eDensity,'float64') self.NEDens = self.EDensity.size ndens = self.EDensity.size ntemp = self.Temperature.size tst1 = ndens == ntemp tst1a = ndens != ntemp tst2 = ntemp > 1 tst3 = ndens > 1 tst4 = ndens > 1 and ntemp > 1 if tst1 and ntemp == 1: self.NTempDen = 1 elif tst1a and (tst2 or tst3) and not tst4: self.NTempDen = ntemp*ndens if ntemp == self.NTempDen and ndens != self.NTempDen: self.EDensity = np.ones_like(self.Temperature)*self.EDensity elif ndens == self.NTempDen and ntemp != self.NTempDen: self.Temperature = np.ones_like(self.EDensity)*self.Temperature elif tst1 and tst4: self.NTempDen = ntemp if verbose: print('NTempDen: %5i'%(self.NTempDen)) # # if em == None: em = np.ones(self.NTempDen, 'float64') ylabel = r'erg cm$^{-2}$ s$^{-1}$ sr$^{-1} \AA^{-1}$ ($\int\,$ N$_e\,$N$_H\,$d${\it l}$)$^{-1}$' elif type(em) == float: em = np.ones(self.NTempDen, 'float64')*em ylabel = r'erg cm$^{-2}$ s$^{-1}$ sr$^{-1} \AA^{-1}$ $' elif type(em) == list or type(em) == tuple or type(em) == np.ndarray: em = np.asarray(em, 'float64') ylabel = r'erg cm$^{-2}$ s$^{-1}$ sr$^{-1} \AA^{-1}$ $' self.Em = em if verbose: print('len of self.Em %5i'%(len(self.Em))) # # if self.Em.any() > 0.: ylabel = r'erg cm$^{-2}$ s$^{-1}$ sr$^{-1} \AA^{-1}$ $' else: ylabel = r'erg cm$^{-2}$ s$^{-1}$ sr$^{-1} \AA^{-1}$ ($\int\,$ N$_e\,$N$_H\,$d${\it l}$)$^{-1}$' # xlabel = 'Wavelength ('+self.Defaults['wavelength'] +')' # self.AllLines = allLines # if not abundanceName: self.AbundanceName = self.Defaults['abundfile'] else: if abundanceName in chdata.Abundance: self.AbundanceName = abundanceName else: abundChoices = list(chdata.Abundance.keys()) abundChoice = chgui.gui.selectorDialog(abundChoices,label='Select Abundance name') abundChoice_idx = abundChoice.selectedIndex self.AbundanceName = abundChoices[abundChoice_idx[0]] abundanceName = self.AbundanceName print(' Abundance chosen: %s '%(self.AbundanceName)) # # abundAll = chdata.Abundance[self.AbundanceName]['abundance'] self.AbundAll = abundAll self.MinAbund = minAbund # #ionInfo = chio.masterListInfo() wavelength = np.asarray(wavelength) nWvl = wavelength.size self.Wavelength = wavelength # # freeFree = np.zeros((self.NTempDen, nWvl), 'float64').squeeze() freeBound = np.zeros((self.NTempDen, nWvl), 'float64').squeeze() twoPhoton = np.zeros((self.NTempDen, nWvl), 'float64').squeeze() lineSpectrum = np.zeros((self.NTempDen, nWvl), 'float64').squeeze() # # allInpt = [] # if keepIons: self.IonInstances = {} self.FbInstances = {} self.FfInstances = {} # # ionGate creates the self.Todo list # self.ionGate(elementList = elementList, ionList = ionList, minAbund=minAbund, doLines=doLines, doContinuum=doContinuum, verbose = verbose) # for akey in sorted(self.Todo.keys()): zStuff = util.convertName(akey) Z = zStuff['Z'] abundance = chdata.Abundance[self.AbundanceName]['abundance'][Z - 1] if verbose: print(' %5i %5s abundance = %10.2e '%(Z, const.El[Z-1], abundance)) if verbose: print(' doing ion %s for the following processes %s'%(akey, self.Todo[akey])) if 'ff' in self.Todo[akey]: allInpt.append([akey, 'ff', temperature, wavelength, abundance, em]) if 'fb' in self.Todo[akey]: allInpt.append([akey, 'fb', temperature, wavelength, abundance, em]) if 'line' in self.Todo[akey]: allInpt.append([akey, 'line', temperature, eDensity, wavelength, filter, allLines, abundance, em, doContinuum]) # result = lbvAll.map_sync(doAll, allInpt) if verbose: print(' got all ff, fb, line results') ionsCalculated = [] # for ijk in range(len(result)): out = result[ijk] if type(out) != list: print(' a problem has occured - this can be caused by') print('running Python3 and not using ipcluster3') return ionS = out[0] if verbose: print(' collecting calculation for %s'%(ionS)) ionsCalculated.append(ionS) calcType = out[1] if verbose: print(' processing %s results'%(calcType)) # if calcType == 'ff': thisFf = out[2] if keepIons: self.FfInstances[ionS] = thisFf freeFree += thisFf elif calcType == 'fb': thisFb = out[2] if verbose: print(' fb ion = %s'%(ionS)) if hasattr(thisFb, 'FreeBound'): if 'errorMessage' not in sorted(thisFb.keys()): if keepIons: self.FbInstances[ionS] = thisFb freeBound += thisFb['rate'] else: print(thisFb['errorMessage']) elif calcType == 'line': thisIon = out[2] if not 'errorMessage' in sorted(thisIon.Intensity.keys()): if keepIons: self.IonInstances[ionS] = thisIon thisIntensity = thisIon.Intensity ## self.IonInstances.append(copy.deepcopy(thisIon)) if setupIntensity: for akey in sorted(self.Intensity.keys()): self.Intensity[akey] = np.hstack((self.Intensity[akey], thisIntensity[akey])) else: setupIntensity = 1 self.Intensity = thisIntensity # lineSpectrum += thisIon.Spectrum['intensity'] # check for two-photon emission if len(out) == 4: tp = out[3] if self.NTempDen == 1: twoPhoton += tp['intensity'] else: for iTempDen in range(self.NTempDen): twoPhoton[iTempDen] += tp['rate'][iTempDen] else: if 'errorMessage' in sorted(thisIon.Intensity.keys()): print(thisIon.Intensity['errorMessage']) # # self.IonsCalculated = ionsCalculated # # self.FreeFree = {'wavelength':wavelength, 'intensity':freeFree.squeeze()} self.FreeBound = {'wavelength':wavelength, 'intensity':freeBound.squeeze()} self.LineSpectrum = {'wavelength':wavelength, 'intensity':lineSpectrum.squeeze()} self.TwoPhoton = {'wavelength':wavelength, 'intensity':twoPhoton.squeeze()} # total = freeFree + freeBound + lineSpectrum + twoPhoton # t2 = datetime.now() dt=t2-t1 print(' elapsed seconds = %12.3e'%(dt.seconds)) rcAll.purge_results('all') # if self.NTempDen == 1: integrated = total else: integrated = total.sum(axis=0) # if type(label) == type(''): if hasattr(self, 'Spectrum'): print(' hasattr = true') self.Spectrum[label] = {'wavelength':wavelength, 'intensity':total.squeeze(), 'filter':filter[0].__name__, 'width':filter[1], 'integrated':integrated, 'em':em, 'Abundance':self.AbundanceName, 'xlabel':xlabel, 'ylabel':ylabel} else: self.Spectrum = {label:{'wavelength':wavelength, 'intensity':total.squeeze(), 'filter':filter[0].__name__, 'width':filter[1], 'integrated':integrated, 'em':em, 'Abundance':self.AbundanceName, 'xlabel':xlabel, 'ylabel':ylabel}} else: self.Spectrum = {'wavelength':wavelength, 'intensity':total.squeeze(), 'filter':filter[0].__name__, 'width':filter[1], 'integrated':integrated, 'em':em, 'Abundance':self.AbundanceName, 'xlabel':xlabel, 'ylabel':ylabel}
import os if not os.path.exists(fname[:-5]+'_avg_image.npy'): import ca_source_extraction as cse import numpy as np Yr,dims,T=cse.utilities.load_memmap(fname) img=np.mean(Yr,-1) img=np.reshape(img,dims,order='F') np.save(fname[:-5]+'_avg_image.npy',np.array(img)) return img return None #%% #%% create average images so that one could look at them b_dview=c.load_balanced_view(targets=list(range(1,len(c),3))) images=b_dview.map_sync(create_average_image,names_map) np.save('all_averages.npy',np.array(images)) #%% in order to maximally parallelize, we pass portions of work to differet workers pars=[] import re for bf in base_folders: fls=glob.glob(os.path.join(bf,'images/*.mmap')) try: fls.sort(key=lambda fn: np.int(re.findall('_[0-9]{1,5}_d1_',fn)[0][1:-4])) except: fls.sort() print(fls) base_name_='TOTAL_'
class IPClusterEnsemble(SurveyEnsemble): """Parallelized suvey ensemble based on IPython parallel (ipcluster) """ def __init__(self, **specs): SurveyEnsemble.__init__(self, **specs) self.verb = specs.get('verbose', True) # access the cluster self.rc = Client() self.dview = self.rc[:] self.dview.block = True with self.dview.sync_imports(): import EXOSIMS, EXOSIMS.util.get_module, \ os, os.path, time, random, pickle, traceback, numpy if 'logger' in specs: specs.pop('logger') if 'seed' in specs: specs.pop('seed') self.dview.push(dict(specs=specs)) self.vprint("Building SurveySimulation object on all workers.") res = self.dview.execute("SS = EXOSIMS.util.get_module.get_module(specs['modules'] \ ['SurveySimulation'], 'SurveySimulation')(**specs)") res2 = self.dview.execute("SS.reset_sim()") self.vprint("Created SurveySimulation objects on %d engines."%len(self.rc.ids)) #for row in res.stdout: # self.vprint(row) self.lview = self.rc.load_balanced_view() self.maxNumEngines = len(self.rc.ids) def run_ensemble(self, sim, nb_run_sim, run_one=None, genNewPlanets=True, rewindPlanets=True, kwargs={}): """ Args: sim: """ hangingRunsOccured = False # keeps track of whether hanging runs have occured t1 = time.time() async_res = [] for j in range(nb_run_sim): ar = self.lview.apply_async(run_one, genNewPlanets=genNewPlanets, rewindPlanets=rewindPlanets, **kwargs) async_res.append(ar) print("Submitted %d tasks."%len(async_res)) engine_pids = self.rc[:].apply(os.getpid).get_dict() #ar2 = self.lview.apply_async(os.getpid) #pids = ar2.get_dict() print('engine_pids') print(engine_pids) runStartTime = time.time()#create job starting time avg_time_per_run = 0. tmplenoutstandingset = nb_run_sim tLastRunFinished = time.time() ar= self.rc._asyncresult_from_jobs(async_res) while not ar.ready(): ar.wait(10.) clear_output(wait=True) if ar.progress > 0: timeleft = ar.elapsed/ar.progress * (nb_run_sim - ar.progress) if timeleft > 3600.: timeleftstr = "%2.2f hours"%(timeleft/3600.) elif timeleft > 60.: timeleftstr = "%2.2f minutes"%(timeleft/60.) else: timeleftstr = "%2.2f seconds"%timeleft else: timeleftstr = "who knows" #Terminate hanging runs outstandingset = self.rc.outstanding#a set of msg_ids that have been submitted but resunts have not been received if len(outstandingset) > 0 and len(outstandingset) < nb_run_sim:#there is at least 1 run still going and we have not just started avg_time_per_run = (time.time() - runStartTime)/float(nb_run_sim - len(outstandingset))#compute average amount of time per run if len(outstandingset) < tmplenoutstandingset:#The scheduler has finished a run tmplenoutstandingset = len(outstandingset)#update this. should decrease by ~1 or number of cores... tLastRunFinished = time.time()#update tLastRunFinished to the last time a simulation finished (right now) #self.vprint("tmplenoutstandingset %d, tLastRunFinished %0.6f"%(tmplenoutstandingset,tLastRunFinished)) if time.time() - tLastRunFinished > avg_time_per_run*(1. + self.maxNumEngines*2.)*4.: #nb_run_sim = len(self.rc.outstanding) #restartRuns = True self.vprint('Aborting ' + str(len(self.rc.outstanding)) + 'qty outstandingset jobs') #runningPIDS = os.listdir('/proc') # get all running pids self.vprint('queue_status') self.vprint(str(self.rc.queue_status())) self.rc.abort() ar.wait(20) runningPIDS = [int(tpid) for tpid in os.listdir('/proc') if tpid.isdigit()] #[self.rc.queue_status()[eind] for eind in np.arange(self.maxNumEngines) if self.rc.queue_status()[eind]['tasks']>0] for engineInd in [eind for eind in np.arange(self.maxNumEngines) if self.rc.queue_status()[eind]['tasks']>0]: os.kill(engine_pids[engineInd],15) time.sleep(20) # for pid in [engine_pids[eind] for eind in np.arange(len(engine_pids))]: # if pid in runningPIDS: # os.kill(pid,9) # send kill command to stop this worker stopIPClusterCommand = subprocess.Popen(['ipcluster','stop']) stopIPClusterCommand.wait() time.sleep(60) # doing this instead of waiting for ipcluster to terminate stopIPClusterCommand = subprocess.Popen(['ipcluster','stop']) stopIPClusterCommand.wait() time.sleep(60) # doing this instead of waiting for ipcluster to terminate hangingRunsOccured = True # keeps track of whether hanging runs have occured break #stopIPClusterCommand.wait() # waits for process to terminate #call(["ipcluster","stop"]) # send command to stop ipcluster #self.rc.abort(jobs=self.rc.outstanding.copy().pop()) #self.rc.abort()#by default should abort all outstanding jobs... #it is possible that this will not stop the jobs running #ar.wait(100) #self.rc.purge_everything() # purge all results if outstanding *because rc.abort() didn't seem to do the job right tLastRunFinished = time.time()#update tLastRunFinished to the last time a simulation was restarted (right now) print("%4i/%i tasks finished after %4i s. About %s to go." % (ar.progress, nb_run_sim, ar.elapsed, timeleftstr), end="") sys.stdout.flush() #numRunStarts += 1 # increment number of run restarts t2 = time.time() print("\nCompleted in %d sec" % (t2 - t1)) if hangingRunsOccured: #hanging runs have occured res = [1] else: res = [ar.get() for ar in async_res] return res
import os import parcel_model import numpy dv['exec_run'] = exec_run dv['smax_nact_calc'] = smax_nact_calc dv['mode_dict'] = mode_dict # Correct save_dir for network mount SAVE_DIR = os.path.join("/net/legion", SAVE_DIR) dv['SAVE_DIR'] = SAVE_DIR dv['MAX_ITER'] = MAX_ITER dv['PARALLEL'] = PARALLEL dv['RANDOM'] = RANDOM view = client.load_balanced_view() print(" parcel model calculations will be done in parallel") data = xray.open_dataset(input_fn, decode_times=False # we don't care about datetimes ) # Subset data fields data = data.sel(lev=LEVEL_SUB, lat=LAT_SUB) data = data.isel(time=TIME_ISUB) # Alias for extracting data underlying an xray data set # given its key def d(field, ravel=False): data_array = data[field].data if ravel: return data_array.ravel()