def align_dataset(config_file: str): """Aligns an image dataset """ config = AlignConfig(config_file) output_dir = os.path.expanduser(config.output_dir) os.makedirs(output_dir, exist_ok=True) dataset = get_dataset(config.input_dir) num_images = sum(len(i) for i in dataset) TIMER.max_value = num_images TIMER.start() num_processes = cast(int, min(config.num_processes, os.cpu_count())) if num_processes == -1: num_processes = os.cpu_count() if num_processes > 1: process_pool = ProcessPool(num_processes) process_pool.imap(align_person, zip(dataset, [config] * len(dataset))) process_pool.close() process_pool.join() else: for person in dataset: align_person((person, config)) transform_to_lfw_format(output_dir, num_processes) TIMER.finish() print('Total number of images: %d' % int(NUM_IMAGES_TOTAL.value)) print('Number of faces found and aligned: %d' % int(NUM_SUCESSFUL.value)) print('Number of unsuccessful: %d' % int(NUM_UNSECESSFUL.value))
def pare_multi_process(urls_and_people): print("[INFO] Paring and downlaoding all image urls with {} processes". format(NUM_PROCESSES)) urls, person = zip(*urls_and_people) pare_pool = ProcessPool(NUM_PROCESSES) pare_pool.imap(safe_pare_matches_and_download, urls, person) pare_pool.close() pare_pool.join() print("[INFO] Done paring and downlaoding all image urls")
def make_cache(): from grid2viz.src.manager import ( scenarios, agents, make_episode_without_decorate, n_cores, retrieve_episode_from_disk, save_in_ram_cache, cache_dir, ) from pathos.multiprocessing import ProcessPool if not os.path.exists(cache_dir): print( "Starting Multiprocessing for reading the best agent of each scenario" ) # TODO: tous les agents n'ont pas forcément tourner sur exactement tous les mêmes scenarios # Eviter une erreur si un agent n'a pas tourné sur un scenario agent_scenario_list = [(agent, scenario) for agent in agents for scenario in scenarios] agents_data = [] if n_cores == 1: # no multiprocess useful for debug if needed i = 0 for agent_scenario in agent_scenario_list: agents_data.append( make_episode_without_decorate(agent_scenario[0], agent_scenario[1])) i += 1 else: pool = ProcessPool(n_cores) agents_data = list( pool.imap( make_episode_without_decorate, [agent_scenario[0] for agent_scenario in agent_scenario_list], # agents [agent_scenario[1] for agent_scenario in agent_scenario_list], ) ) # scenarios #we go over all agents and all scenarios for each agent pool.close() print("Multiprocessing done") ##### # saving data on disk i = 0 for agent_scenario in agent_scenario_list: print(i) agent = agent_scenario[0] episode_name = agent_scenario[1] agent_episode = agents_data[i] if agent_episode is not None: episode_data = retrieve_episode_from_disk( agent_episode.episode_name, agent_episode.agent) agent_episode.decorate(episode_data) save_in_ram_cache(agent_episode.episode_name, agent_episode.agent, agent_episode) i += 1
def normalize(self) -> None: """ Normalize ensemble """ if not self.regenerate: try: self.load() return except FileNotFoundError: pass assert xor((self.normalizer_nld is not None and self.normalizer_gsf is not None), self.normalizer_simultan is not None), \ "Either 'normalizer_nld' and 'normalizer_gsf' must be set, or " \ "normalizer_simultan" gsfs = self.extractor.gsf nlds = self.extractor.nld self.LOG.info(f"Start normalization with {self.nprocesses} cpus") pool = ProcessPool(nodes=self.nprocesses) N = len(nlds) iterator = pool.imap(self.step, range(N), nlds, gsfs) self.res = list(tqdm(iterator, total=N)) pool.close() pool.join() pool.clear() self.save()
def getdates(year, par=False): ''' Use gsutil to read files for a specific year''' __bucket__ = 'earthenginepartners-hansen' __location__ = 'gs://%s/GLADalert/%d' % (__bucket__, year) dates = os.popen('gsutil ls %s' % __location__).read().split() print('number of dates: ', len(dates)) ret = [] if par: from pathos.multiprocessing import ProcessPool pool = ProcessPool(nodes=25) def pl(i): return os.popen('gsutil ls %s' % i).read().split() dates = pool.imap(pl, dates) else: (os.popen('gsutil ls %s' % i).read().split() for i in dates) for i in dates: ret.extend(i) return ret
def run_process_by_pathos_nonblockingmap(pos_arr, dc_arr): this_function_name = inspect.currentframe().f_code.co_name print("Begin {}...".format (this_function_name)) pool = ProcessPool(nodes=8) # do a non-blocking map, then extract the results from the iterator results = pool.imap(run_once, pos_arr, dc_arr) list(results)
def start(self): dcm_list = dicom_list(self.input_directory, load=False) pool = ProcessPool(self.concurrency) # Eager-load all filenames mainly so we have the total count dcm_files = [dcm for dcm in dcm_list if dcm] iterator = pool.imap(self.sort, dcm_files) # If we aren't going to be showing output for each file, then show the progress bar if not self.verbose and not self.dry_run: iterator = tqdm.tqdm(iterator, total=len(dcm_files)) for _ in iterator: pass
def multiprocessing_imap( func: Callable, iterable: Iterable[Any], *, n_workers: int = -1, progressbar: bool = True, args=(), **kwargs ) -> List[Any]: """Execute func on each element in iterable on n_workers, ensuring order. Args: func: Function to apply to each element in iterable. iterable: Input iterable on which to execute func. n_workers: Amount of workers (processes) to spawn. progressbar: Whether to wrap the chunks in a tqdm.auto.tqdm. args: Additional positional arguments to pass to func. kwargs: Additional keyword arguments to pass to func. Returns: Results in same order as input iterable. """ iterable = list(iterable) # exhaust if iterable is a generator n_chunks = len(iterable) func = partial(func, *args, **kwargs) if n_chunks == 1 or n_workers == 1: # no sense spawning pool pool = None stage = map(func, iterable) else: n_workers = _choose_n_workers(n_chunks, n_workers) logger.debug("Starting ProcessPool with %d workers", n_workers) pool = ProcessPool(n_workers) stage = pool.imap(func, iterable) if progressbar: stage = tqdm(stage, total=n_chunks) try: return list(stage) finally: if pool: logger.debug("Closing ProcessPool") pool.clear()
def main(args): if len(args.input) < 2: print("Please name at least one STAR file and an output directory") return 1 if args.apix is None: print("Using pixel size computed from STAR files") def do_job(star): try: mrc = os.path.join(args.output, os.path.basename(star).replace(".star", ".mrc")) print("Starting reconstruction of %s" % star) do_reconstruct(star, mrc, args.apix, args.sym, args.ctf) print("Wrote %s reconstruction to %s" % (star, mrc)) if args.mask is not None: masked_mrc = mrc.replace(".mrc", "_masked.mrc") do_mask(mrc, masked_mrc, args.mask) print("Wrote masked map %s" % masked_mrc) if args.mask is not None and args.delete_unmasked: delete_unmasked(mrc, masked_mrc) print("Overwrote %s with %s" % (mrc, masked_mrc)) except Exception as e: print("Failed on %s" % star) return 0 pool = Pool(nodes=args.nproc) #pool.apipe(do_job, args.input) results = pool.imap(do_job, args.input) codes = list(results) if pool is not None: pool.close() pool.join() pool.terminate() return 0
def compute_seq_distances(sequences, affinity=sequence_distance, nb_jobs=NB_THREADS): """ compute matrix of all distances :param [] sequences: list of all sequences :param func affinity: function specify the sample affinity :param int nb_jobs: number jobs running in parallel :return ndarray: >>> ss = [['a', 'b', 'a', 'c'], ['a', 'a', 'b', 'a'], ['b', None, 'b', 'a']] >>> compute_seq_distances(ss, affinity=sequence_distance) array([[0. , 0.25, 0.5 ], [0.25, 0. , 0.25], [0.5 , 0.25, 0. ]]) >>> ss = [['hi', 'there', 'how', 'are', 'you'], ... ['hi', 'how', 'are', 'you'], ... ['hi', 'are', 'you', 'there']] >>> compute_seq_distances(ss) array([[0. , 0.2, 0.6], [0.2, 0. , 0.5], [0.6, 0.5, 0. ]]) """ idxs = [(i, j) for i in range(len(sequences)) for j in range(i, len(sequences))] idx_lt = (((i, j), (sequences[i], sequences[j])) for i, j in idxs) dists = np.zeros((len(sequences), len(sequences))) _wrap_dist = partial(wrap_distance, similar_distance=affinity) pool = ProcessPool(nb_jobs) for idx, d in pool.imap(_wrap_dist, idx_lt): dists[idx[0], idx[1]] = d dists[idx[1], idx[0]] = d pool.close() pool.join() pool.clear() return dists
def multiprocess(func: Callable, all_urls: List[List[str]], people: List[str], total_count: int, info: str) -> Tuple[List[List[str]], List[str]]: print("[INFO] {} with {} processes".format(info, NUM_PROCESSES)) global TIMER widgets_match = ['{}: '.format(info), pb.Percentage(), ' ', pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA()] TIMER = pb.ProgressBar(widgets=widgets_match, maxval=total_count).start() if NUM_PROCESSES > 1: process_pool = ProcessPool(NUM_PROCESSES) urls_and_people = process_pool.imap(func, all_urls, people) process_pool.close() process_pool.join() filtered_urls, people = zip(*urls_and_people) else: filtered_urls = [] for urls, person in zip(all_urls, people): filtered, person = func(urls, person) filtered_urls.append(filtered) print("[INFO] Done {}".format(info)) return filtered_urls, people
''' Variation pathos, local method Windows OS: Hangs/ multiprocess error with newer version of pathos Mac OS: Linux: Debian (unclear because windows app but operated in same manner) Cloud-based: Repl.it: Works Ideone.com: Fails-multiprocess error ''' from multiprocess import freeze_support from pathos.multiprocessing import ProcessPool def f(vars): return vars[0]**vars[1] if __name__ == "__main__": freeze_support() pool = ProcessPool(4) print(list(pool.imap(f, [(1, 5), (2, 8), (3, 9)])))
def _cmpt_lim_phsrand_obj_vals(self, phs_red_rate, idxs_sclr): beg_tm = default_timer() _ = phs_red_rate _ = idxs_sclr self._sett_lim_phsrand_dir.mkdir(exist_ok=True) ptrb_ratios = np.linspace(self._sett_lim_phsrand_ptrb_lbd, self._sett_lim_phsrand_ptrb_ubd, self._sett_lim_phsrand_n_ptrb_vals, endpoint=True) ptrb_obj_vals = np.empty((self._sett_lim_phsrand_n_ptrb_vals, self._sett_lim_phsrand_iters_per_atpt)) n_cpus = min(self._sett_lim_phsrand_n_ptrb_vals, self._sett_misc_n_cpus) ubd_sclr = 1.2 search_attempts = 0 ress = [] sel_stat_ftn = getattr(np, self._alg_lim_phsrand_sel_stat) if self._vb: print('Attempt,', 'Perturb ratio,', ' Minimum,', ' Mean,', ' Maximum') if n_cpus > 1: self._lock = Manager().Lock() mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) for i in range(0, self._sett_lim_phsrand_n_ptrb_vals, n_cpus): end_idx = min(self._sett_lim_phsrand_n_ptrb_vals, n_cpus + i) assert i < end_idx, 'This was not supposed to happen!' search_attempts += end_idx - i # Don't use ret_mp_idxs, it will be inefficient. args_gen = ((j, ptrb_ratios[j]) for j in range(i, end_idx)) ptrb_obj_vals_iter = (list( mp_pool.imap(self._cmpt_lim_phsrand_obj_vals_single, args_gen))) ress.extend(ptrb_obj_vals_iter) if np.any([ sel_stat_ftn(ptrb_obj_vals_iter[k][1]) >= (self._sett_lim_phsrand_obj_ubd * ubd_sclr) for k in range(len(ptrb_obj_vals_iter)) ]): break mp_pool.close() mp_pool.join() self._lock = None mp_pool = None else: self._lock = Lock() for j in range(self._sett_lim_phsrand_n_ptrb_vals): search_attempts += 1 ress.append( self._cmpt_lim_phsrand_obj_vals_single( (j, ptrb_ratios[j]))) if (sel_stat_ftn(ress[-1][1]) >= (self._sett_lim_phsrand_obj_ubd * ubd_sclr)): break self._lock = None take_idxs = [] for res in ress: take_idxs.append(res[0]) ptrb_obj_vals[take_idxs[-1], :] = res[1] take_idxs.sort() take_idxs = np.array(take_idxs) ptrb_ratios = ptrb_ratios[take_idxs] ptrb_obj_vals = ptrb_obj_vals[take_idxs] res = ress = None assert np.all( np.isfinite(ptrb_ratios)), ('Invalid values in ptrb_ratios!') assert np.all( ptrb_ratios >= 0), ('Values less than zero in ptrb_ratios!') assert np.all( np.isfinite(ptrb_obj_vals)), ('Invalid values in ptrb_obj_vals!') assert np.all( ptrb_obj_vals >= 0), ('Values less than zero in ptrb_obj_vals!') self._alg_lim_phsrand_ptrb_ratios = ptrb_ratios self._alg_lim_phsrand_ptrb_obj_vals = ptrb_obj_vals self._set_lim_phsrand_ptrb_ratio() self._plot_lim_phsrand_obj_vals() end_tm = default_timer() if self._vb: print(f'Found perturbation ratio of ' f'{self._alg_lim_phsrand_ptrb_ratio:5.3E} in ' f'{end_tm - beg_tm:0.1f} ' f'seconds using {search_attempts} attempts.') return
def run(self, params, runfunc=None, parallel=True, n_thread=8, hide_progress=False): """ Main logical core of Simulator, accepting parameter list/array and returning simulation results. run() is designed to be a flexible method for running batches of simulations. run() takes the elements of its only required argument, params, and dispatches them to a function (either default or user-provided) that takes that element and returns a neural simulation result (e.g., instantaneous firing rate response). This dispatching can either be done in a traditional for loop or using a multiprocessing Pool. The latter means that batch simulations can be easily parallelized. The elements of params are generally assumed to be dicts that encode the information required to run simulations, as Simulator subclasses are expected to implement a simulate() method that accepts such a dict and returns a simulation result. This is because the default behavior of run() is to apply this simulate() method to each element of params. However, by passing a custom "runfunc" one can override this default behavior. In theory, one could pass *any* function as a runfunc and thereby use any subclass of Simulator to implement any simulation. However, this is not the intended use of this functionality. Instead, the user is expected to wrap the simulate() method with functions that extend or alter its functionality. This pattern creates a natural correspondence between any Simulator object and its simulate() method. That is, even when that method is extended with a custom runfunc, a user generally knows what simulation is being executed at the lowest level of the runfunc simply by knowing the Simulator in question. Many examples of this pattern are visible in the acceptance and replication tests in the test suite. run() returns an object of the same type as its input params (either a list or an array). This output is also of the same shape and size as the input params, and each element corresponds to the matching element of the input params. One disadvantage of the way that run() is currently implemented is that parallelization is only supported between elements of params. In other words, only a single thread can work on a single element of params. Args: params (list, ndarray): a list or ndarray whose elements are passed to runfunc runfunc (func): function that accepts kwargs and returns simulation results. If None is passed, the simulate() method bound to this object is used instead. parallel (bool): flag to control if we run the simulation in parallel. If true, elements of params are dispatched to separate threads using the pathos.multiprocessing library. Note that this functionality has a few side effects that must be carefully considered. First, some functionality (e.g., warnings) does not work in parallel, so sometimes turning parallel mode off can be helpful for troubleshooting. Second, randomization should be carefully considered if it plays a crucial role in a simulation. Some variants of random number generation/seeding are *not* thread-safe, meaning that you may get the same exact (nominally random) result multiple times across threads if random number generation is not implemented correctly. n_thread (int): number of threads to use in multiprocessing, ignored if parallel is false hide_progress (bool): flag to control if we want to display a tqdm progress bar Returns: results (list, ndarray): list or ndarray of results """ # If runfunc is None, just use simulate() directly if runfunc is None: runfunc = self.simulate # If we pass Parameters object, extract underlying data and discard object shell if type(params) is Parameters: params = params.params # If parallel, set up the pool and run sequence on pool if parallel: p = ProcessPool(n_thread) if type(params) is list: results = list( p.imap( runfunc, tqdm(params, disable=hide_progress, total=len(params)))) elif type(params) is np.ndarray: # For array params, we need flatten the array and then un-flatten it after output old_size = params.shape params = np.reshape(params, (params.size, )) results = list_to_array( list( p.imap( runfunc, tqdm(params, disable=hide_progress, total=len(params))))) results = np.reshape(results, old_size) else: raise TypeError('params should be a list or an array') # If not parallel, simply iterate over and run each element of the sequence else: if type(params) is list: results = [ runfunc(element) for element in tqdm( params, disable=hide_progress, total=len(params)) ] elif type(params) is np.ndarray: # For array params, we need flatten the array and then un-flatten it after output old_size = params.shape params = np.reshape(params, (params.size, )) results = list_to_array( list( map( runfunc, tqdm(params, disable=hide_progress, total=len(params))))) results = np.reshape(results, old_size) else: raise TypeError('params should be a list or an array') return results
def _find_path_internal(self) -> None: #print(datetime.datetime.now(), ' Line 68') kernels: List[Tuple[int, AlgorithmRunner]] = list(map(lambda kernel: (kernel[0], self._services.algorithm.get_new_runner( copy.deepcopy(self._get_grid()), OnlineLSTM, ([], {"max_it": self._max_it, "load_name": kernel[ 1]}), BasicTesting, with_animations=True)), enumerate(self.kernel_names))) #print('Kernels ', kernels) #TODO: HANGS here # print('Line 86') if self._threaded: # threaded_jobs: List[Process] = list( # map(lambda kernel: multip.Process(target=kernel[1].find_path, daemon=True), kernels)) # print(datetime.datetime.now(), ' Line 85') # print('Threaded Jobs: ', threaded_jobs) #multip.set_start_method('spawn') # print('Kernals \n \n', kernels) i = 0 # lambda kernel : kernel[1].find_path #function #Data = kernels print("Parallel") Parallel(n_jobs=9)(self.kernels_cal(self,_) for _ in kernels) p = ProcessPool(10) sc = p.map(self.kernels_cal, kernels) pool = ProcessPool(nodes = 10) returned_results = pool.imap(self.test_multip,num) # returned_results = pool.imap(self.kernels_cal,kernels) # for j in threaded_jobs: # i+=1 # print('\n Started # ',i, j) # j.start() # # j.join() # i = 0 # for j in threaded_jobs: # i+=1 # print('\n Joined # ',i, j) # j.join() else: #It goes here #TODO: Figure out why it hangs #print('Kernels: ',kernels) #iterates through 10 kernels (max it = 10) for k in kernels: #print('kernel is ', k) #Kernel is tuple with a number (0-10) and the algorithm ()? self.__active_kernel = k[1] #Problem is next three lines # self.t1 = datetime.datetime.now() k[1].find_path() #HANGS HERE!!! This takes 0.2 seconds # self.t2 = datetime.datetime.now() # print('Time: ', (self.t2-self.t1)) self.__total_path = self.__total_path.union(set(map(lambda el: el.position, k[1].map.trace))) # print(datetime.datetime.now(), ' Line 96') # print('Gets to line 114') self.__active_kernel = None # check if any found path and if they did take smallest dist best_kernels: List[Tuple[int, AlgorithmRunner]] = [] for kernel in kernels: if kernel[1].map.is_goal_reached(kernel[1].map.agent.position): best_kernels.append(kernel) #print(datetime.datetime.now(), ' Line 106') # take smallest dist kernel if any dist: float = float("inf") best_kernel: Tuple[int, AlgorithmRunner] = None for kernel in best_kernels: if dist > len(kernel[1].map.trace): dist = len(kernel[1].map.trace) best_kernel = kernel # print(datetime.datetime.now(), ' Line 115') if best_kernel: best_kernel[1].map.replay_trace(self.__replay) else: # pick the one with furthest progress dist = -1 best_kernel = None #print('Kernels', kernels) for kernel in kernels: #print('Kernel', kernel) if dist < len(kernel[1].map.trace): dist = len(kernel[1].map.trace) best_kernel = kernel # print(datetime.datetime.now(), ' Line 126') best_kernel[1].map.replay_trace(self.__replay) self.kernel_call_idx = best_kernel[0]
critic.compile(optimizer=tf.keras.optimizers.SGD(0.0001), loss='mse') cg = CellGroup() for gen in range(GEN_ENDED_AT + 1, GEN_ENDS_AT + 1): print(f'Generation {gen}') print('Running Games...') weights = pickle.dumps(critic.get_weights()) cs = list() with tqdm.tqdm(total=GAME_PER_GEN) as pbar: for i, dat in enumerate( pool.imap(run_game, itertools.repeat(weights, GAME_PER_GEN))): cs.extend(dat) pbar.update() print('Running Games Complete.') print('Processing Data...') if len(cg.cs) == NUM_REPLAY_BUF: cg.pop() cg.add(cs) total = cg.cl + 1 dat = MyDataset(cg.s, cg.ss, cg.r, cg.a, total, [(*NUM_GRID, NUM_CHANNEL), (1, ), (1, ), (1, )]).new()
if __name__ == "__main__": def process_updates(x): from API_TTERMS import getconf2, query, testingKWT # tid = x[0] tid = 428 testingKWT(tid, '144.167.35.89') conf = getconf2() q_trackers = f"select tid from trackers where userid = '*****@*****.**' or YEAR(date_created) in (2019,2020)" # q_trackers = f"select t.tid from trackers t left join tracker_keyword tk on t.tid = tk.tid where t.tid is null or tk.tid is null or tk.status_percentage < 100 or tk.status != 1 or tk.status_percentage is null or tk.status is null" tracker_result = query(conf, q_trackers) if parallel: process_pool = ProcessPool(num_processes) for record in tqdm(process_pool.imap(process_updates, tracker_result), desc="Terms", ascii=True, file=sys.stdout, total=len(tracker_result)): pass process_pool.close() print("Joining pool") process_pool.join() print("Clearing pool") process_pool.clear() print("Finished!") else: for x in tqdm(tracker_result, desc="Terms", ascii=True,
# cursor.execute("select tid from trackers where tid not in (select tid from tracker_keyword)") records = cursor.fetchall() connection.close() if parallel_main: # pool = Pool(int(6)) # pool.map(process_updates, records) def process_updates(x): from Utils.functions import clean_text, getconf2, updateStatus, getTopKWS, single_process, testingKWT num_processes = 24 update__status = True parallel = False tid = x['tid'] print(tid) # tid = 424 testingKWT(tid, '144.167.35.89', parallel, update__status, num_processes) process_pool = ProcessPool(num_processes_main) pbar = tqdm(process_pool.imap(process_updates, records), desc="Terms_", ascii=True, file=sys.stdout, total=len(records)) for x in pbar: pbar.update(1) else: for x in tqdm(records, desc="Terms", ascii=True, file=sys.stdout): print(x) process_updates(x)
def generate(self, number: int, method: str = 'poisson', regenerate: bool = False) -> None: """Generates an ensemble of matrices and estimates standard deviation Perturbs the initial raw matrix using either a Gaussian or Poisson process, unfolds them and applies the first generation method to them. Uses the variation to estimate standard deviation of each step. Args: number: The number of perturbed matrices to generate. method: The stochastic method to use to generate the perturbations Can be 'gaussian' or 'poisson'. regenerate: Whether to use already generated files (False) or generate them all anew (True). """ assert self.raw is not None, "Set the raw matrix" assert self.unfolder is not None, "Set unfolder" assert self.first_generation_method is not None, \ "Set first generation method" self.size = number self.regenerate = regenerate LOG.info(f"Start normalization with {self.nprocesses} cpus") pool = ProcessPool(nodes=self.nprocesses) ss = np.random.SeedSequence(self.seed) iterator = pool.imap(self.step, range(number), ss.spawn(number), repeat(method)) ensembles = np.array(list(tqdm(iterator, total=number))) pool.close() pool.join() pool.clear() raw_ensemble = ensembles[:, 0, :, :] unfolded_ensemble = ensembles[:, 1, :, :] firstgen_ensemble = ensembles[:, 2, :, :] # TODO Move this to a save step self.raw.save(self.path / 'raw.npy') # saving for firstgen is in step due to pickling self.firstgen = Matrix(path=self.path / 'firstgen.npy') # Calculate standard deviation raw_ensemble_std = np.std(raw_ensemble, axis=0) raw_std = Matrix(raw_ensemble_std, self.raw.Eg, self.raw.Ex, state='std') raw_std.save(self.path / "raw_std.npy") unfolded_ensemble_std = np.std(unfolded_ensemble, axis=0) unfolded_std = Matrix(unfolded_ensemble_std, self.raw.Eg, self.raw.Ex, state='std') unfolded_std.save(self.path / "unfolded_std.npy") firstgen_ensemble_std = np.std(firstgen_ensemble, axis=0) firstgen_std = Matrix(firstgen_ensemble_std, self.firstgen.Eg, self.firstgen.Ex, state='std') firstgen_std.save(self.path / "firstgen_std.npy") self.std_raw = raw_std self.std_unfolded = unfolded_std self.std_firstgen = firstgen_std self.raw_ensemble = raw_ensemble self.unfolded_ensemble = unfolded_ensemble self.firstgen_ensemble = firstgen_ensemble
def load_scenario_cards(url): """ Create and display html cards with scenario's kpi for the 15 first scenarios using cache file. """ cards_list = [] cards_count = 0 episode_graph_layout = { "autosize": True, "showlegend": False, "xaxis": { "showticklabels": False }, "yaxis": { "showticklabels": False }, "margin": { "l": 0, "r": 0, "t": 0, "b": 0 }, } url_split = url.split("/") url_split = url_split[len(url_split) - 1] is_episode_page = url_split == "/" or url_split == "" or url_split == "episodes" start_time = time.time() if cards_count < 15 and is_episode_page: sorted_scenarios = list(sorted(scenarios)) if not os.path.exists(cache_dir): print( "Starting Multiprocessing for reading the best agent of each scenario" ) pool = ProcessPool(n_cores) best_agents_data = list( pool.imap( make_episode_without_decorate, [ best_agents[scenario]["agent"] for scenario in sorted_scenarios ], sorted_scenarios, )) pool.close() print("Multiprocessing done") for i, scenario in enumerate(sorted_scenarios): best_agent_episode = best_agents_data[i] episode_data = retrieve_episode_from_disk( best_agent_episode.episode_name, best_agent_episode.agent) best_agent_episode.decorate(episode_data) save_in_ram_cache( best_agent_episode.episode_name, best_agent_episode.agent, best_agent_episode, ) for i, scenario in enumerate(sorted_scenarios): best_agent_episode = make_episode( best_agents[scenario]["agent"], scenario) prod_share = EpisodeTrace.get_prod_share_trace( best_agent_episode) consumption = best_agent_episode.profile_traces cards_list.append( dbc.Col( id=f"card_{scenario}", lg=4, width=12, children=[ dbc.Card( className="mb-3", children=[ dbc.CardBody([ html.H5( className="card-title", children="Scenario {0}".format( scenario), ), dbc.Row(children=[ dbc.Col( className="mb-4", children=[ html.P( className= "border-bottom h3 mb-0 text-right", children=best_agents[ scenario] ["out_of"], ), html.P( className="text-muted", children= "Agents on Scenario", ), ], ), dbc.Col( className="mb-4", children=[ html.P( className= "border-bottom h3 mb-0 text-right", children="{}/{}". format( best_agents[ scenario] ["value"], meta_json[scenario] ["chronics_max_timestep"], ), ), html.P( className="text-muted", children= "Agent's Survival", ), ], ), dbc.Col( className="mb-4", children=[ html.P( className= "border-bottom h3 mb-0 text-right", children= f'{round(best_agents[scenario]["cum_reward"]):,}', ), html.P( className="text-muted", children= "Cumulative Reward", ), ], ), dbc.Col( className="mb-4", children=[ html.P( className= "border-bottom h3 mb-0 text-right", children="{} min". format( round( best_agent_episode . total_maintenance_duration )), ), html.P( className="text-muted", children= "Total Maintenance Duration", ), ], ), ]), dbc.Row( className="align-items-center", children=[ dbc.Col( lg=4, width=12, children=[ html.H5( "Production Share", className= "text-center", ), dcc.Graph( style={ "height": "150px" }, figure=go.Figure( layout= episode_graph_layout, data=prod_share, ), ), ], ), dbc.Col( lg=8, width=12, children=[ html.H5( "Consumption Profile", className= "text-center", ), dcc.Graph( style={ "height": "150px" }, figure=go.Figure( layout= episode_graph_layout, data= consumption, ), ), ], ), ], ), ]), dbc.CardFooter( dbc.Button( "Open", id=scenario, key=scenario, className="btn-block", style={ "background-color": "#2196F3" }, )), ], ) ], )) cards_count += 1 print( "Initial loading time for the best agent of all scenarios = {:.1f} seconds" .format(time.time() - start_time)) return cards_list
import time from pathos.multiprocessing import ProcessPool # instantiate and configure the worker pool pool = ProcessPool(nodes=3) print "- Do a blocking (=synchronous) map on the chosen function" print(pool.map(pow, [1, 2, 3, 4], [5, 6, 7, 8])) print "- Do a non-blocking (=asynchronous) map, then get the results" results = pool.amap(pow, [1, 2, 3, 4], [5, 6, 7, 8]) while not results.ready(): time.sleep(1) print(".") print(results.get()) print "- Do a non-blocking (=asynchronous) map, then extract the results from the iterator" results = pool.imap(pow, [1, 2, 3, 4], [5, 6, 7, 8]) print("...") print(list(results)) print "- Do one item at a time, using a pipe" print(pool.pipe(pow, 1, 5)) print(pool.pipe(pow, 2, 6)) print "- Do one item at a time, using a non-blocking (=asynchronous) pipe" result1 = pool.apipe(pow, 1, 5) result2 = pool.apipe(pow, 2, 6) print(result1.get()) print(result2.get())
''' Variation pathos, local method, iterated imap Windows OS: Hangs/ multiprocess error with newer version of pathos Mac OS: Linux: Debian Hangs Cloud-based: Repl.it: Works Ideone.com: Fails-multiprocess error ''' from multiprocess import freeze_support from pathos.multiprocessing import ProcessPool def f(vars): return vars[0]**vars[1] if __name__ == "__main__": freeze_support() pool = ProcessPool(4) print("start") for run in pool.imap(f, [(1, 5), (2, 8), (3, 9)]): print(run)
def testingKWT(self, tid, ip, parallel, update__status, num_processes): conf = getconf2() s = SqlFuncs(conf) # Get blogsites in tracker connection = s.get_connection(conf) with connection.cursor() as cursor: cursor.execute(f"""select * from trackers where tid = {tid}""") records = cursor.fetchall() if records: query = records[0]['query'] if 'blogsite_id in (' in query: blog_ids = query[query.find("(") + 1:query.find(")")] if blog_ids and 'NaN' not in blog_ids: blog_ids = blog_ids[:-1] if ',' == blog_ids[ -1] else blog_ids cursor.execute( f"""select post from blogposts where blogsite_id in ({blog_ids})""" ) records = cursor.fetchall() # Get top terms from blog_ids try: terms_result = getTopKWS(blog_ids) except Exception as e: print(e) try: print('Retrying...') terms_result = getTopKWS(blog_ids) print('success') except Exception as e: terms_result = [] print(e) # Count terms and group by year if terms_result: data_ = [] for term in terms_result: PARAMS = term, blog_ids, tid data_.append(PARAMS) if parallel: print("starting multi-process") process_pool = ProcessPool(num_processes) pbar = tqdm(process_pool.imap( single_process, data_), desc="Terms", ascii=True, file=sys.stdout, total=len(data_)) for x in pbar: pbar.update(1) # Update status on DB if update__status: status = round( (pbar.last_print_n / len(data_)) * 100) if status <= 99 and status >= 90: status = 100 updateStatus(status, tid) process_pool.close() print("Joining pool") process_pool.join() print("Clearing pool") process_pool.clear() print("Finished!") else: for x in data_: single_process(x)