def run_process_by_pathos_nonblockingmap(pos_arr, dc_arr): this_function_name = inspect.currentframe().f_code.co_name print("Begin {}...".format (this_function_name)) pool = ProcessPool(nodes=8) # do a non-blocking map, then extract the results from the iterator results = pool.imap(run_once, pos_arr, dc_arr) list(results)
def fit(self, X, y): """Fit estimator. Args: X : array-like The data to fit. y : array-like The target variable. """ self.X = np.asarray(X) self.y = np.asarray(y).reshape(y.shape[0], ) self._base_score = cross_val_score(self.clf, self.X, y=self.y, scoring=self.metric, cv=self.cv).mean() print("Base score: {}\n".format(self._base_score)) self._best_score = self._base_score population = self._create_population() gen = 0 total_time = 0 for i in trange(self.max_iter, desc='Generation', leave=False): self.generation_plot.append(population.tolist()) p = ProcessPool(nodes=multiprocessing.cpu_count()) start = timer() self._individuals = p.map(self._score_ind, population) total_time = total_time + timer() - start self._Generations.append(self._individuals) best = sorted(self._individuals, key=lambda tup: tup.score, reverse=True)[0] self._best_individuals.append( self._BestIndividual(gen, best.params, best.score)) if (gen == 0): self._best_score = self._best_individuals[gen] if (best.score > self._best_score.score): self._best_score = self._best_individuals[gen] else: pass self._gen_score.append( self._Generation( gen, sum([tup[1] for tup in self._individuals]) / len(self._individuals), self._best_individuals[gen])) population = self._create_next_generation(self._individuals) self._individuals = [] gen += 1 else: print('gen: {}'.format(gen)) print('avg time per gen: {0:0.1f}'.format(total_time / gen))
def transform(self, X, columns=None): """ Given X, create features of fitted studies :param X: Dataset with features used to create fitted studies :return: """ # Remove trailing identifier in column list if present if columns is not None: columns = [re.sub(r'_[0-9]+$', '', s) for s in columns] X.columns = X.columns.str.lower() # columns must be lower case pool = ProcessPool(nodes=self.n_jobs) # Number of jobs self.result = [] # Iterate fitted studies and calculate TA with fitted parameter set for ind in self.fitted: # Create field if no columns or is in columns list if columns is None or ind.res_y.name in columns: self.result.append(pool.apipe(ind.transform, X)) # Blocking wait for asynchronous results self.result = [res.get() for res in self.result] # Combine results into dataframe to return res = pd.concat(self.result, axis=1) return res
def cluster_search(self): _log('CLUSTER SEARCH') target_kwargs = dict(self.cfg) def target(infile): return _process_clusters(infile, **target_kwargs) if self.cfg.ncpu > 1: pool = ProcessPool(nodes=min(self.cfg.ncpu, len(self.infiles))) all_out_arrays = pool.map(target, self.infiles) _log('CLUSTER REDUCTION') all_out_arrays = reduce(lambda a, b: a + b, all_out_arrays) _log('CLUSTER OUTPUT') with h5py.File(self.cfg.outfile, 'a', libver=libver) as f: for out_array in all_out_arrays: self._write_cluster(f, out_array) else: for infile in self.infiles: all_out_arrays = target(infile) _log('CLUSTER OUTPUT (PARTIAL)') with h5py.File(self.cfg.outfile, 'a', libver=libver) as f: for out_array in all_out_arrays: self._write_cluster(f, out_array) return
def map_list_in_chunks(l, f, extra_data): ''' A wrapper around ProcessPool.uimap that processes a list in chunks. Differs from `map_list_as_chunks` in that this method calls `f` once for each item in `l`. uimap already chunks but if you have extra data to pass in it will pickle it for every item. This function passes in the extra data to each chunk which significantly saves on pickling. https://stackoverflow.com/questions/53604048/iterating-the-results-of-a-multiprocessing-list-is-consuming-large-amounts-of-me Parameters ---------- l : list the list f : function the function to process each item takes two parameters: item, extra_data extra_data : object the extra data to pass to each f ''' cpus = cpu_count() chunk_length = max(1, int(len(l) / cpus)) chunks = [l[x:x + chunk_length] for x in range(0, len(l), chunk_length)] pool = Pool(nodes=cpus) f_dumps = cloudpickle.dumps(f) tuples = [(chunk, f_dumps, extra_data) for chunk in chunks] mapped_chunks = pool.map(_process_chunk, tuples) return (item for chunk in mapped_chunks for item in chunk)
def Scrap_landing(allowed_domains, start_urls): pool = ProcessPool(nodes=4) def f_runner(spider): ScrapperSpider.allowed_domains = [allowed_domains] ScrapperSpider.start_urls = [start_urls] from twisted.internet import reactor from scrapy.settings import Settings import Scrapper.settings as my_settings from scrapy.crawler import CrawlerProcess, CrawlerRunner crawler_settings = Settings() crawler_settings.setmodule(my_settings) runner = CrawlerRunner(settings=crawler_settings) deferred = runner.crawl(spider) deferred.addBoth(lambda _: reactor.stop()) reactor.run() #ScrapperSpider.allowed_domains = [allowed_domains] #ScrapperSpider.start_urls = [start_urls] #print("\nstart URLS:{}".format(ScrapperSpider.start_urls)) results = pool.amap(f_runner, [ScrapperSpider]) t = 0 while not results.ready(): time.sleep(5); print(".", end=' '); t = t + 5 if t == 30: print("\nProcess limited to 30 seconds...EXITING\n"); return None pool.clear()
def AllocateParticlesInBins(B, num_nodes, step_data): #seeds not being used at the moment magnitude = 1000.0 particle_data = [] phi_data = [] psi_data = [] target_ids = [] B_copies = [B] magnitude_copies=[] for j in range(B.length()): angles = BinMidpoints(B, j) phi_data.append(angles[0]) psi_data.append(angles[1]) target_ids.append(j) B_copies.append(B) magnitude_copies.append(magnitude) pool = ProcessPool(nodes = num_nodes) particle = AlanineDipeptideSimulation() #positionsset = pool.map(particle.move_particle_to_bin, phi_data, psi_data, step_data, seeds, B_copies, target_ids) positionsset = pool.map(particle.move_particle_to_bin_minenergy, phi_data, psi_data, magnitude_copies) #positions = particle.move_particle_to_bin(1,1, step_data[0], seeds[0]) return positionsset
def getdates(year, par=False): ''' Use gsutil to read files for a specific year''' __bucket__ = 'earthenginepartners-hansen' __location__ = 'gs://%s/GLADalert/%d' % (__bucket__, year) dates = os.popen('gsutil ls %s' % __location__).read().split() print('number of dates: ', len(dates)) ret = [] if par: from pathos.multiprocessing import ProcessPool pool = ProcessPool(nodes=25) def pl(i): return os.popen('gsutil ls %s' % i).read().split() dates = pool.imap(pl, dates) else: (os.popen('gsutil ls %s' % i).read().split() for i in dates) for i in dates: ret.extend(i) return ret
def map_list_as_chunks(l, f, extra_data, cpus=None, max_chunk_size=None): ''' A wrapper around `pathos.multiprocessing.ProcessPool.uimap` that processes a list in chunks. Differs from `map_list_in_chunks` in that this method calls `f` once for each chunk. uimap already chunks but if you have extra data to pass in it will pickle it for every item. This function passes in the extra data to each chunk which significantly saves on pickling. https://stackoverflow.com/questions/53604048/iterating-the-results-of-a-multiprocessing-list-is-consuming-large-amounts-of-me Parameters ---------- l : list the list f : function the function to process each item takes two parameters: chunk, extra_data extra_data : object the extra data to pass to each f cpus : int the number of cores to use to split the chunks across max_chunk_size : int the maximum size for each chunk ''' cpus = cpu_count() if cpus is None else cpus max_chunk_size = float('inf') if max_chunk_size is None else max_chunk_size chunk_length = min(max_chunk_size, max(1, ceil(len(l) / cpus))) chunks = [l[x:x + chunk_length] for x in range(0, len(l), chunk_length)] pool = Pool(nodes=cpus) f_dumps = cloudpickle.dumps(f) tuples = [(chunk, f_dumps, extra_data) for chunk in chunks] return pool.map(_process_whole_chunk, tuples)
def make_predictions_by_t_local_map_general(model_name, in_dir, timepoints, allowed_gpus=[0], chunk_size=(200, 150, 150)): """ Make predictions for all timepoints, using all local gpus Args: model_name: name of model to use, will be looked up in_dir: absolute path to data dir timepoints: list of timepoints to process chunk_size: size of chunks to proces volume in allowed_gpus: CUDA ids of GPUs to use job will be parallelized across GPUs """ n_gpus = len(allowed_gpus) split_timepoints = np.array_split(timepoints, n_gpus) devices = ['/gpu:{}'.format(idx) for idx in allowed_gpus] starred_args = list(zip(split_timepoints, [in_dir] * n_gpus, [model_name] * n_gpus, [chunk_size] * n_gpus, devices)) def _star_helper(args): return _local_predict_helper_general(*args) print("Creating pool") pool = Pool(n_gpus) print("Dispatching jobs") pool.map(_star_helper, starred_args)
def build_coarse_model_voronoi(B, num_samples_per_bin, num_nodes, num_steps): n_bins = B.length() pool = ProcessPool(nodes = num_nodes) num_steps = [num_steps for x in range(num_samples_per_bin)] seed_data = [x for x in range(num_samples_per_bin)] Transitions = [] particle = AlanineDipeptideSimulation() particle.Bins = B particle.temperature = 1000 T = np.zeros((n_bins,n_bins)) for j in range(n_bins): particle.positions = B.Ω[j] Transitions.append(pool.map(particle.sample_voronoi, num_steps, seed_data)) for j in range(len(Transitions)): T[Transitions[j][0], Transitions[j][1]] = T[Transitions[j][0], Transitions[j][1]] + 1 for j in range(n_bins): if (sum(T[j,:])==0): #print('No transitions in row', j, flush = True) T[j,j]=1 T[j,:] = T[j,:] / sum(T[j,:]) return T
def pareto(callback, budgets : List[float], heuristic, runtime, verbose=True, **kwargs): def safe_callback(rt): result = 'pass' t = time.time() try: callback(rt) except MemoryError: result = 'fail (OOM)' except RematExceededError: result = 'fail (thrashed)' except: import traceback traceback.print_exc() print(flush=True) raise total_time = time.time() - t rt.meta['total_time'] = total_time if verbose: print(' budget {} finished in {} seconds: {}'.format( rt.budget, total_time, result ), flush=True) rt._prepickle() return rt if verbose: print('running pareto trial for budgets: {}'.format(budgets), flush=True) p = Pool() runtimes = list(map(lambda b: runtime(b, heuristic, **kwargs), budgets)) runtimes = p.map(safe_callback, runtimes) return runtimes
def make_cache(): from grid2viz.src.manager import ( scenarios, agents, make_episode_without_decorate, n_cores, retrieve_episode_from_disk, save_in_ram_cache, cache_dir, ) from pathos.multiprocessing import ProcessPool if not os.path.exists(cache_dir): print( "Starting Multiprocessing for reading the best agent of each scenario" ) # TODO: tous les agents n'ont pas forcément tourner sur exactement tous les mêmes scenarios # Eviter une erreur si un agent n'a pas tourné sur un scenario agent_scenario_list = [(agent, scenario) for agent in agents for scenario in scenarios] agents_data = [] if n_cores == 1: # no multiprocess useful for debug if needed i = 0 for agent_scenario in agent_scenario_list: agents_data.append( make_episode_without_decorate(agent_scenario[0], agent_scenario[1])) i += 1 else: pool = ProcessPool(n_cores) agents_data = list( pool.imap( make_episode_without_decorate, [agent_scenario[0] for agent_scenario in agent_scenario_list], # agents [agent_scenario[1] for agent_scenario in agent_scenario_list], ) ) # scenarios #we go over all agents and all scenarios for each agent pool.close() print("Multiprocessing done") ##### # saving data on disk i = 0 for agent_scenario in agent_scenario_list: print(i) agent = agent_scenario[0] episode_name = agent_scenario[1] agent_episode = agents_data[i] if agent_episode is not None: episode_data = retrieve_episode_from_disk( agent_episode.episode_name, agent_episode.agent) agent_episode.decorate(episode_data) save_in_ram_cache(agent_episode.episode_name, agent_episode.agent, agent_episode) i += 1
def get_circle_fill_objs(self, start_camera_pos, n_cameras): max_n_threads = 40 n_threads = min(n_cameras, max_n_threads) cx, cy = start_camera_pos[0], start_camera_pos[1] cz = self.center[2] r = np.abs(start_camera_pos[2] - cz) def f(camera_i): theta = camera_i / n_cameras * (2 * np.pi) x = r * np.sin(theta) + cx z = r * np.cos(theta) + cz camera_pos = np.array([x, cy, z]) deg = theta / np.pi * 180 deg_str = '{}'.format(int(deg)) meshlab_R = self.get_meshlab_R(camera_pos, np.array([cx, cy, cz])) self.write_meshlab_camera( join(self.frames_dir, 'meshlab_camera_{}.txt'.format(deg_str)), camera_pos, meshlab_R) self.get_fill_obj(camera_pos, postfix=deg_str, frame=camera_i, check=False) pool = ProcessPool(nodes=n_threads) pool.map(f, range(n_cameras))
def start_cache(self): pool = Pool(self.WORKERS) for status in pool.map(self.parallel_cache, self.nodes): if "ERROR" in status: STREAM.error(status) else: STREAM.success(status)
def make_predictions_by_t_local_map(model_name, chunk_size=(300, 150, 150), allowed_gpus=list(range(8)), timepoints=None): """ Make predictions for all timepoints, using all local gpus """ from division_detection.vol_preprocessing import VOL_DIR_H5 # fetch the number of timepoints num_vols = len(os.listdir(VOL_DIR_H5)) if timepoints is None: timepoints = timepoints or np.arange(3, num_vols - 4) n_gpus = len(allowed_gpus) split_timepoints = np.array_split(timepoints, n_gpus) devices = ['/gpu:{}'.format(idx) for idx in allowed_gpus] starred_args = list(zip(split_timepoints, [model_name] * n_gpus, [chunk_size] * n_gpus, devices)) def _star_helper(args): return _predict_local_helper(*args) print("Creating pool") pool = Pool(n_gpus) print("Dispatching jobs") pool.map(_star_helper, starred_args)
def interloper_search(self): _log('INTERLOPER SEARCH') # must not put 'self' in the function, so copy the dict target_kwargs = dict(self.cfg) def target(infile): return _process_interlopers(infile, **target_kwargs) if self.cfg.ncpu > 1: pool = ProcessPool(ncpus=min(self.cfg.ncpu, len(self.infiles))) all_out_arrays = pool.map(target, self.infiles) else: all_out_arrays = list() for infile in self.infiles: all_out_arrays.append(target(infile)) _log('INTERLOPER REDUCTION') all_out_arrays = np.vstack(all_out_arrays) unique_keys = np.unique(all_out_arrays['is_near']) _log('INTERLOPER OUTPUT') with h5py.File(self.cfg.outfile, 'a', libver=libver) as f: for ik, cluster_id in enumerate(unique_keys): if ik % 1000 == 0: _log(' ', ik, '/', unique_keys.size) interlopers = all_out_arrays[all_out_arrays['is_near'] == cluster_id] self._write_interlopers(f, cluster_id, interlopers) return
def CreateMovie(saveFrame, nFrames, fps, test='Ronchi', fixedLight=True, fringe=stepFringe, nX=1001): '''Generate all the frames, create the video, then delete the individual frames. ''' # file name for the final animation if fixedLight: name = test + "_fixedlight" else: name = test + "_samelightgrating" print("Generate all frames") f = lambda iFrame: saveFrame(iFrame, './figures/_tmp%05d.jpg'%iFrame, test=test, fixedLight=fixedLight) pool = ProcessPool(nodes=3) pool.map(f, range(nFrames)) print("Resize images") # resize the images to have even pixel sizes on both dimensions, important for ffmpeg # this commands preserves the aspect ratio, rescales the image to fill HD as much as possible, # without cropping, then pads the rest with white for iFrame in range(nFrames): fname = './figures/_tmp%05d.jpg'%iFrame #os.system("convert "+fname+" -resize 1280x720 -gravity center -extent 1280x720 -background white "+fname) os.system("convert "+fname+" -resize 1000x1000 -gravity center -extent 1000x1000 -background white "+fname) # delete old animation os.system("rm ./figures/"+name+".mp4") print("Create new animation") #os.system("ffmpeg -r "+str(fps)+" -i ./figures/_tmp%05d.jpg -s 1280x720 -vcodec libx264 -pix_fmt yuv420p ./figures/ronchi.mp4") os.system("ffmpeg -r "+str(fps)+" -i ./figures/_tmp%05d.jpg -s 1000x1000 -vcodec libx264 -pix_fmt yuv420p ./figures/"+name+".mp4") # delete images os.system("rm ./figures/_tmp*.jpg")
def _calculate_powder(self): """ Calculates powder data (a_tensors, b_tensors according to aCLIMAX manual). """ # define container for powder data powder = AbinsModules.PowderData(num_atoms=self._num_atoms) k_indices = sorted(self._frequencies.keys( )) # make sure dictionary keys are in the same order on each machine b_tensors = {} a_tensors = {} if PATHOS_FOUND: threads = AbinsModules.AbinsParameters.threads p_local = ProcessPool(nodes=threads) tensors = p_local.map(self._calculate_powder_k, k_indices) else: tensors = [self._calculate_powder_k(k=k) for k in k_indices] for indx, k in enumerate(k_indices): a_tensors[k] = tensors[indx][0] b_tensors[k] = tensors[indx][1] # fill powder object with powder data powder.set(dict(b_tensors=b_tensors, a_tensors=a_tensors)) return powder
def _calculate_powder(self): """ Calculates powder data (a_tensors, b_tensors according to aCLIMAX manual). """ # define container for powder data powder = AbinsModules.PowderData(num_atoms=self._num_atoms) k_indices = sorted(self._frequencies.keys()) # make sure dictionary keys are in the same order on each machine b_tensors = {} a_tensors = {} if PATHOS_FOUND: threads = AbinsModules.AbinsParameters.threads p_local = ProcessPool(nodes=threads) tensors = p_local.map(self._calculate_powder_k, k_indices) else: tensors = [self._calculate_powder_k(k=k) for k in k_indices] for indx, k in enumerate(k_indices): a_tensors[k] = tensors[indx][0] b_tensors[k] = tensors[indx][1] # fill powder object with powder data powder.set(dict(b_tensors=b_tensors, a_tensors=a_tensors)) return powder
def get_grid_fill_objs(self, size): max_n_threads = 40 n_rows, n_cols = size n_cameras = n_rows * n_cols # print('n_cameras',n_cameras) n_threads = min(n_cameras, max_n_threads) cam_path = join(self.sample_dir, 'camera.txt') cams = np.loadtxt(cam_path) def f(camera_i): row_i = camera_i // n_cols col_i = camera_i % n_cols y = row_i / (n_rows - 1) x = col_i / (n_cols - 1) camera_pos = (1 - x) * (1 - y) * cams[3] + x * ( 1 - y) * cams[2] + (1 - x) * y * cams[1] + x * y * cams[0] self.write_meshlab_camera( join(self.keys_dir, 'meshlab_camera_{}.txt'.format(camera_i)), camera_pos, np.eye(3)) self.get_fill_obj(camera_pos, postfix=str(camera_i), frame=camera_i, check=False) pool = ProcessPool(nodes=n_threads) pool.map(f, range(n_cameras))
def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs: Any) -> Generator: """Returns a generator for a parallel map with a progress bar. Arguments: ordered(bool): True for an ordered map, false for an unordered map. function(Callable): The function to apply to each element of the given Iterables. iterables(Tuple[Iterable]): One or more Iterables containing the data to be mapped. Returns: A generator which will apply the function to each element of the given Iterables in parallel in order with a progress bar. """ # Extract num_cpus num_cpus = kwargs.pop('num_cpus', None) # Determine num_cpus if num_cpus is None: num_cpus = cpu_count() elif type(num_cpus) == float: num_cpus = int(round(num_cpus * cpu_count())) # Determine length of tqdm (equal to length of shortest iterable) length = min(len(iterable) for iterable in iterables if isinstance(iterable, Sized)) # Create parallel generator map_type = 'imap' if ordered else 'uimap' pool = Pool(num_cpus) map_func = getattr(pool, map_type) for item in tqdm(map_func(function, *iterables), total=length, **kwargs): yield item pool.clear()
def obs_temp_p(self, dtobj): ''' get observed temperature in amsterdam parallel ''' self.dtobjP = dtobj pool = Pool() obs = pool.map(self.obs_temp, self.filelist) self.obs = [ob for ob in obs if ob is not None]
def _parallel_final_score(self, smiles: List[str]) -> FinalSummary: molecules, valid_indices = self._smiles_to_mols(smiles) component_smiles_pairs = [[ component, molecules, valid_indices, smiles ] for component in self.scoring_components] pool = ProcessPool(nodes=len(self.scoring_components)) mapped_pool = pool.map(parallel_run, component_smiles_pairs) pool.clear() return self._score_summary(mapped_pool, smiles, valid_indices)
def para_data_allo_1(Theta, cpu_num, rng, d_struct, Data_struct): time.sleep(1) pub = Data_struct.pub_info[0, ] print(" id: {} , is dealing the auction with {} bidder ".format( threading.get_ident(), pub[2])) JJ = d_struct["JJ"] # number of auctions in the data || maximum length of an auction TT, T_end = Data_struct.data_act.shape TT = int(TT) T_end = int(T_end) ''' take the grid generation outsides ''' # num of bidders in the auction N = int(pub[2]) # setup the env info structure info_flag = pub[3] # setup the env info structure Env = ENV(N, Theta) if info_flag == 0: para = Env.Uninform() else: para = Env.Info_ID() [x_signal, w_x] = signal_DGP(para, rng, N, JJ) results = [] func = partial(para_fun, para, info_flag, rng, T_end, int(JJ * N), x_signal, w_x) pool = ProcessPool(nodes=cpu_num) # pool = ProcessPoolExecutor(max_workers=cpu_num) start = time.time() results = pool.map( func, zip(range(0, TT), Data_struct.data_act, Data_struct.data_state, Data_struct.pub_info)) MoM = np.nanmean(list(results)) end = time.time() print('time expenditure for the auction estimation under N = {}'.format(N)) print(end - start) return MoM
def runIteration(self, task, pop, fpop, xb, fxb, A, A_f, B, B_f, D, D_f, **dparams): r"""Core funciton of GreyWolfOptimizer algorithm. Args: task (Task): Optimization task. pop (numpy.ndarray): Current population. fpop (numpy.ndarray): Current populations function/fitness values. xb (numpy.ndarray): fxb (float): A (numpy.ndarray): A_f (float): B (numpy.ndarray): B_f (float): D (numpy.ndarray): D_f (float): **dparams (Dict[str, Any]): Additional arguments. Returns: Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, float, Dict[str, Any]]: 1. New population 2. New population fitness/function values 3. Additional arguments: * A (): TODO """ def eval_task(args): i, w = args A1, C1 = 2 * a * self.rand(task.D) - a, 2 * self.rand(task.D) X1 = A - A1 * fabs(C1 * A - w) A2, C2 = 2 * a * self.rand(task.D) - a, 2 * self.rand(task.D) X2 = B - A2 * fabs(C2 * B - w) A3, C3 = 2 * a * self.rand(task.D) - a, 2 * self.rand(task.D) X3 = D - A3 * fabs(C3 * D - w) pop = task.repair((X1 + X2 + X3) / 3, self.Rand) fpop = task.eval(pop[i]) return i, pop, fpop a = 2 - task.Evals * (2 / task.nFES) pool = ProcessPool(nodes=self.nodes) pool.clear() results = pool.map(eval_task, [[i, w] for i, w in enumerate(pop)]) for i, _pop, _fpop in results: pop[i] = _pop fpop[i] = _fpop for i, f in enumerate(fpop): if f < A_f: A, A_f = pop[i].copy(), f elif A_f < f < B_f: B, B_f = pop[i].copy(), f elif B_f < f < D_f: D, D_f = pop[i].copy(), f xb, fxb = self.getBest(A, A_f, xb, fxb) return pop, fpop, xb, fxb, { 'A': A, 'A_f': A_f, 'B': B, 'B_f': B_f, 'D': D, 'D_f': D_f }
def run_vt(self): cams=self.load_cam() def f(cam_i): cam_pos=cams[cam_i] self.get_fill_obj(cam_pos,'{}'.format(cam_i),cam_i,check=False) n_cams=len(cams) n_threads=n_cams pool=ProcessPool(nodes=n_threads) pool.map(f,range(n_cams))
def run_asymptotics(base, ns, heuristic, bound, runtime, releases=True, **kwargs): config = { 'ns': ns, 'heuristic': str(heuristic), 'heuristic_features': list(heuristic.FEATURES), 'memory': str(bound), 'releases': releases, 'runtime': runtime.ID, 'runtime_features': list(runtime.FEATURES), 'kwargs': kwargs } p = Pool() print('generating asymptotics data for config: {}...'.format( json.dumps(config, indent=2))) args = [] for n in ns: args.append([n, bound(n), heuristic, runtime, releases, kwargs]) t = time.time() rts = p.map(run, *zip(*args)) t = time.time() - t succ_ns, succ_rts = chop_failures(ns, rts) print(' - succeeded between n={} and n={}'.format(succ_ns[0], succ_ns[-1])) print(' done, took {} seconds.'.format(t)) results = { 'layers': succ_ns, 'computes': list(map(lambda rt: rt.telemetry.summary['remat_compute'], rts)), 'had_OOM': ns[0] != succ_ns[0], 'had_thrash': ns[-1] != succ_ns[-1] } date_str = datetime.now().strftime('%Y%m%d-%H%M%S-%f') base_mod = ASYMPTOTICS_MOD + '/' + base out_file = '{}-{}-{}.json'.format(date_str, heuristic.ID, bound.ID) util.ensure_output_path(base_mod) out_path = util.get_output_path(base_mod, out_file) with open(out_path, 'w') as out_f: out_f.write( json.dumps({ 'config': config, 'results': results }, indent=2)) print('-> done, saved to "{}"'.format(out_path))
def transform(self, X): X.columns = X.columns.str.lower() # columns must be lower case pool = ProcessPool(nodes=self.n_jobs) self.result = [] for ind in self.fitted: self.result.append(pool.apipe(ind.transform, X)) self.result = [res.get() for res in self.result] res = pd.concat(self.result, axis=1) return res
def sample(self, horizon, act=None, nodes=8): act = [None] * len(horizon) if act is None else act seeds = [i for i in range(len(horizon))] pool = ProcessPool(nodes=nodes) res = pool.map(self._sample, horizon, act, seeds) pool.clear() state, obs = list(map(list, zip(*res))) return state, obs
def fit(self, X, y, trials=5, indicators=indicators, ranges=ranges, tune_series=tune_series, tune_params=tune_params, tune_column=tune_column): self.fitted = [] X.columns = X.columns.str.lower() # columns must be lower case pool = ProcessPool(nodes=self.n_jobs) for low, high in ranges: if low <= 1: raise ValueError("Range low must be > 1") if high >= len(X): raise ValueError( f"Range high:{high} must be > length of X:{len(X)}") for ind in indicators: idx = 0 if ":" in ind: idx = int(ind.split(":")[1]) ind = ind.split(":")[0] fn = f"{ind}(" if ind[0:3] == "tta": usage = eval(f"{ind}.__doc__").split(")")[0].split("(")[1] params = re.sub('[^0-9a-zA-Z_\s]', '', usage).split() else: sig = inspect.signature(eval(ind)) params = sig.parameters.values() for param in params: param = re.split(':|=', str(param))[0].strip() if param == "open_": param = "open" if param == "real": fn += f"X.close, " elif param == "ohlc": fn += f"X, " elif param == "ohlcv": fn += f"X, " elif param in tune_series: fn += f"X.{param}, " elif param in tune_params: fn += f"{param}=trial.suggest_int('{param}', {low}, {high}), " fn += ")" self.fitted.append( pool.apipe(Optimize(function=fn, n_trials=trials).fit, X, y, idx=idx, verbose=self.verbose)) self.fitted = [fit.get() for fit in self.fitted] # Get results of jobs
def run( self ): # from pathos.multiprocessing import Pool from pathos.multiprocessing import ProcessPool as Pool args = self._interpna_setup( ) pool = Pool( processes=self.ncpus ) out = pool.map( self._interpna, args[:400] ) pool.close() lons = self._lonpc # stack em and roll-its axis so time is dim0 dat = np.rollaxis( np.dstack( out ), -1 ) if self._rotated == True: # rotate it back dat, lons = self.rotate( dat, lons, to_pacific=False ) # place back into a new xarray.Dataset object for further processing # function to make a new xarray.Dataset object with the mdata we need? # ds = self.ds # var = ds[ self.variable ] # setattr( var, 'data', dat ) # self.ds = ds print( 'ds interpolated updated into self.ds' ) return dat
class BatchRunnerMP(BatchRunner): """ Child class of BatchRunner, extended with multiprocessing support. """ def __init__(self, model_cls, nr_processes=2, **kwargs): """ Create a new BatchRunnerMP for a given model with the given parameters. Args: model_cls: The class of model to batch-run. nr_processes: the number of separate processes the BatchRunner should start, all running in parallel. kwargs: the kwargs required for the parent BatchRunner class """ if not pathos_support: raise MPSupport super().__init__(model_cls, **kwargs) self.pool = ProcessPool(nodes=nr_processes) def run_all(self): """ Run the model at all parameter combinations and store results, overrides run_all from BatchRunner. """ run_count = count() total_iterations, all_kwargs, all_param_values = self._make_model_args() # register the process pool and init a queue job_queue = [] with tqdm(total_iterations, disable=not self.display_progress) as pbar: for i, kwargs in enumerate(all_kwargs): param_values = all_param_values[i] for _ in range(self.iterations): # make a new process and add it to the queue job_queue.append(self.pool.uimap(self.run_iteration, (kwargs,), (param_values,), (next(run_count),))) # empty the queue results = [] for task in job_queue: for model_vars, agent_vars in list(task): results.append((model_vars, agent_vars)) pbar.update() # store the results for model_vars, agent_vars in results: if self.model_reporters: for model_key, model_val in model_vars.items(): self.model_vars[model_key] = model_val if self.agent_reporters: for agent_key, reports in agent_vars.items(): self.agent_vars[agent_key] = reports
def undirect(self,ncores=4): ''' Remove directional links between species by finding the net weight of the jacobian ''' dct={} specs = self.spec.columns iterate = [] for i in specs: for j in specs: if i==j: break iterate.append(list(set([i,j]))) self = self.jacsp.compute() def net(d): ret = [] for n in d: total =[] try: total.append(self['%s->%s'%(n[0],n[1])]) except:None try: total.append(-self['%s->%s'%(n[1],n[0])]) except:None if len(total) > 0 : ret.append(['->'.join(n), sum(total)]) return ret dct = ProcessPool(nodes=ncores).amap(net,np.array_split(iterate,ncores)) while not dct.ready(): time.sleep(5); print(".") dct = dct.get() return dict([i for j in dct for i in j])
def __init__(self, model_cls, nr_processes=2, **kwargs): """ Create a new BatchRunnerMP for a given model with the given parameters. Args: model_cls: The class of model to batch-run. nr_processes: the number of separate processes the BatchRunner should start, all running in parallel. kwargs: the kwargs required for the parent BatchRunner class """ if not pathos_support: raise MPSupport super().__init__(model_cls, **kwargs) self.pool = ProcessPool(nodes=nr_processes)
# from pathos.multiprocessing import ProcessingPool as Pool # import pathos.multiprocessing as mp from pathos.multiprocessing import ProcessPool as Pool from ActiveShapeModelsBetter import ASMB, Point, Shape import dill if __name__ == "__main__": asm = ASMB([0, 1], 10) asm.addShape(Shape([Point(100, 200), Point(200, 440), Point(400, 300)])) p = Pool() p.map(Point.rotate, asm.allShapes, [[-1, 1], [1, -1]])
def spaceConvNumbaThreadedOuter2(self): """ `Block` threading example """ def divider(arr_dims, coreNum=1): """ Get a bunch of iterable ranges; Example input: [[[0, 24], [15, 25]]]""" if (coreNum == 1): return arr_dims elif (coreNum < 1): raise ValueError(\ 'partitioner expected a positive number of cores, got %d'\ % coreNum ) elif (coreNum % 2): raise ValueError(\ 'partitioner expected an even number of cores, got %d'\ % coreNum ) total = [] # Split each coordinate in arr_dims in _half_ for arr_dim in arr_dims: dY = arr_dim[0][1] - arr_dim[0][0] dX = arr_dim[1][1] - arr_dim[1][0] if ((coreNum,)*2 > (dY, dX)): coreNum = max(dY, dX) coreNum -= 1 if (coreNum % 2 and coreNum > 1) else 0 new_c1, new_c2, = [], [] if (dY >= dX): # Subimage height is greater than its width half = dY // 2 new_c1.append([arr_dim[0][0], arr_dim[0][0] + half]) new_c1.append(arr_dim[1]) new_c2.append([arr_dim[0][0] + half, arr_dim[0][1]]) new_c2.append(arr_dim[1]) else: # Subimage width is greater than its height half = dX // 2 new_c1.append(arr_dim[0]) new_c1.append([arr_dim[1][0], half]) new_c2.append(arr_dim[0]) new_c2.append([arr_dim[1][0] + half, arr_dim[1][1]]) total.append(new_c1), total.append(new_c2) # If the number of cores is 1, we get back the total; Else, # we split each in total, etc.; it's turtles all the way down return divider(total, coreNum // 2) def numer(start, finish): count = start iteration = 0 while count < finish: yield iteration, count iteration += 1 count += 1 @checkarrays @jit def dotJit(subarray, kernel): total = 0.0 for i in xrange(subarray.shape[0]): for j in xrange(subarray.shape[1]): total += subarray[i][j] * kernel[i][j] return total def outer(subset): a, b, = subset ai, bi, = map(sub, *reversed(zip(*subset))) temp = np.zeros((ai, bi)) for ind, i in numer(*a): for jnd, j in numer(*b): temp[ind, jnd] = dotJit(\ self.array[i:i+self.__rangeKX_, j:j+self.__rangeKY_] , self.kernel ) return temp, a, b # ProcessPool auto-detects processors, but my function above # only accepts an even number; I'm still working on it. # Otherwise I wouldn't mess with cpu_count() cores = cpu_count() cores -= 1 if (cores % 2 == 1 and cores > 1) else 0 # Get partitioning indices and the usable number of cores shape = [[[0, self.__rangeX_ - 1], [0, self.__rangeY_ - 1]]] partitions = divider(shape, cores) # Map partitions to threads and process pool = ProcessPool(nodes=cores) results = pool.map(outer, partitions) #pool.close() #pool.join() for ind, res in enumerate(results): X, Y, = results[ind][1:] self.__arr_[slice(*X), slice(*Y)] += results[ind][0] return self.__arr_