def plot_cats_qsims(dbs_dir, n_cpus=1): '''Plot discharge simulations for every catchment for every kfold using its prm_vecs.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) plot_gen = (cat_db for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_qsims, plot_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for plot_args in plot_gen: plot_cat_qsims(plot_args) return
def plot_cats_vars_errors(dbs_dir, err_var_labs, n_cpus): cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) n_cpus = min(n_cats, n_cpus) cats_paths_gen = ((cat_db, err_var_labs) for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_vars_errors, cats_paths_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for cat_paths in cats_paths_gen: plot_cat_vars_errors(cat_paths) return
def plot_cats_prm_vecs(dbs_dir, n_cpus): '''Plot final parameter set from kfold for every catchments along with objective function value distribution. ''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) n_cpus = min(n_cats, n_cpus) opt_res_gen = (cat_db for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_prm_vecs, opt_res_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for opt_res in opt_res_gen: plot_cat_prm_vecs(opt_res) return
def plot_cats_hbv_sim(dbs_dir, water_bal_step_size, full_flag=False, wat_bal_flag=False, show_warm_up_steps_flag=False, n_cpus=1): '''Plot hbv simulations for every catchment for every kfold.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) const_args = (water_bal_step_size, full_flag, wat_bal_flag, show_warm_up_steps_flag) plot_gen = ((cat_db, const_args) for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_hbv_sim, plot_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for plot_args in plot_gen: plot_cat_hbv_sim(plot_args) return
def plot_cats_kfold_effs(dbs_dir, hgs_db_path, compare_ann_cyc_flag, n_cpus): '''Plot the k-fold efficiency results.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) n_cpus = min(n_cats, n_cpus) const_args = (compare_ann_cyc_flag, hgs_db_path) cats_paths_gen = ((cat_db, const_args) for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_kfold_effs, cats_paths_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for cat_paths in cats_paths_gen: plot_cat_kfold_effs(cat_paths) return
def plot_cats_best_prms_1d(dbs_dir, n_cpus): '''Plot every best kfold parameter set for all catchments.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) cats_paths_gen = (cat_db for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_best_prms_1d, cats_paths_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for cat_paths in cats_paths_gen: plot_cat_best_prms_1d(cat_paths) return
def main(): if os.path.exists('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE): os.remove('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE) pool = ProcessPool(nodes=cpu_count() - 1 or 1) # pool.amap(send_aftermarket_part, [AftermarketPartConfig.TOPIC], [AftermarketPartConfig.KAFKA_KEY]) # pool.amap(send_description, [DescriptionConfig.TOPIC], [DescriptionConfig.KAFKA_KEY]) # pool.amap(send_engineering_part, [EngineeringPartConfig.TOPIC], [EngineeringPartConfig.KAFKA_KEY]) # pool.amap(send_engineering_part_function, [EngineeringPartFunctionConfig.TOPIC], [EngineeringPartFunctionConfig.KAFKA_KEY]) # pool.amap(send_engineering_part_usage, [EngineeringPartUsageConfig.TOPIC], [EngineeringPartUsageConfig.KAFKA_KEY]) # pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY]) # pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC], [FeatureFamilyConfig.KAFKA_KEY]) pool.amap(send_hierarchy, [HierarchyConfig.TOPIC], [HierarchyConfig.KAFKA_KEY]) # pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC], [HierarchyIllustrationConfig.KAFKA_KEY]) # pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC], [HierarchyUsageConfig.KAFKA_KEY]) # pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC], [SectionCalloutConfig.KAFKA_KEY]) # pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC], [SectionPartUsageConfig.KAFKA_KEY]) # pool.amap(send_supersession, [SupersessionConfig.TOPIC], [SupersessionConfig.KAFKA_KEY]) # pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY]) # pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY]) pool.close() pool.join()
def _prep_anomaly_bjs_mp(anoms_arr, bjs_arr, n_cpus, fig_out_dir): assert anoms_arr.shape == bjs_arr.shape _idxs = ret_mp_idxs(anoms_arr.shape[1], n_cpus) _idxs_list = [_idxs[i:i + 2] for i in range(n_cpus)] _anoms_gen = ((anoms_arr[:, _idxs_list[i][0]:_idxs_list[i][1]]) for i in range(n_cpus)) _bjs_gen = ((bjs_arr[:, _idxs_list[i][0]:_idxs_list[i][1]]) for i in range(n_cpus)) mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) try: print( list( mp_pool.uimap(Anomaly._plot_anomaly_bjs_cdf, _idxs_list, _anoms_gen, _bjs_gen, [fig_out_dir] * n_cpus))) mp_pool.clear() except Exception as msg: mp_pool.close() mp_pool.join() print('Error in _plot_anomaly_bjs_cdf:', msg) return
def main_jagcat(): if os.path.exists('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE): os.remove('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE) pool = ProcessPool(nodes=cpu_count() - 1 or 1) pool.amap(send_part_meta, [PartMetaConfig.TOPIC], [PartMetaConfig.KAFKA_KEY]) pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY]) pool.amap(send_description, [DescriptionConfig.TOPIC], [DescriptionConfig.KAFKA_KEY]) pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY]) pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC], [FeatureFamilyConfig.KAFKA_KEY]) pool.amap(send_hierarchy, [HierarchyConfig.TOPIC], [HierarchyConfig.KAFKA_KEY]) pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC], [HierarchyIllustrationConfig.KAFKA_KEY]) pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC], [HierarchyUsageConfig.KAFKA_KEY]) pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC], [SectionCalloutConfig.KAFKA_KEY]) pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC], [SectionPartUsageConfig.KAFKA_KEY]) pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY]) pool.close() pool.join()
def align_dataset(config_file: str): """Aligns an image dataset """ config = AlignConfig(config_file) output_dir = os.path.expanduser(config.output_dir) os.makedirs(output_dir, exist_ok=True) dataset = get_dataset(config.input_dir) num_images = sum(len(i) for i in dataset) TIMER.max_value = num_images TIMER.start() num_processes = cast(int, min(config.num_processes, os.cpu_count())) if num_processes == -1: num_processes = os.cpu_count() if num_processes > 1: process_pool = ProcessPool(num_processes) process_pool.imap(align_person, zip(dataset, [config] * len(dataset))) process_pool.close() process_pool.join() else: for person in dataset: align_person((person, config)) transform_to_lfw_format(output_dir, num_processes) TIMER.finish() print('Total number of images: %d' % int(NUM_IMAGES_TOTAL.value)) print('Number of faces found and aligned: %d' % int(NUM_SUCESSFUL.value)) print('Number of unsuccessful: %d' % int(NUM_UNSECESSFUL.value))
def normalize(self) -> None: """ Normalize ensemble """ if not self.regenerate: try: self.load() return except FileNotFoundError: pass assert xor((self.normalizer_nld is not None and self.normalizer_gsf is not None), self.normalizer_simultan is not None), \ "Either 'normalizer_nld' and 'normalizer_gsf' must be set, or " \ "normalizer_simultan" gsfs = self.extractor.gsf nlds = self.extractor.nld self.LOG.info(f"Start normalization with {self.nprocesses} cpus") pool = ProcessPool(nodes=self.nprocesses) N = len(nlds) iterator = pool.imap(self.step, range(N), nlds, gsfs) self.res = list(tqdm(iterator, total=N)) pool.close() pool.join() pool.clear() self.save()
def make_cache(): from grid2viz.src.manager import ( scenarios, agents, make_episode_without_decorate, n_cores, retrieve_episode_from_disk, save_in_ram_cache, cache_dir, ) from pathos.multiprocessing import ProcessPool if not os.path.exists(cache_dir): print( "Starting Multiprocessing for reading the best agent of each scenario" ) # TODO: tous les agents n'ont pas forcément tourner sur exactement tous les mêmes scenarios # Eviter une erreur si un agent n'a pas tourné sur un scenario agent_scenario_list = [(agent, scenario) for agent in agents for scenario in scenarios] agents_data = [] if n_cores == 1: # no multiprocess useful for debug if needed i = 0 for agent_scenario in agent_scenario_list: agents_data.append( make_episode_without_decorate(agent_scenario[0], agent_scenario[1])) i += 1 else: pool = ProcessPool(n_cores) agents_data = list( pool.imap( make_episode_without_decorate, [agent_scenario[0] for agent_scenario in agent_scenario_list], # agents [agent_scenario[1] for agent_scenario in agent_scenario_list], ) ) # scenarios #we go over all agents and all scenarios for each agent pool.close() print("Multiprocessing done") ##### # saving data on disk i = 0 for agent_scenario in agent_scenario_list: print(i) agent = agent_scenario[0] episode_name = agent_scenario[1] agent_episode = agents_data[i] if agent_episode is not None: episode_data = retrieve_episode_from_disk( agent_episode.episode_name, agent_episode.agent) agent_episode.decorate(episode_data) save_in_ram_cache(agent_episode.episode_name, agent_episode.agent, agent_episode) i += 1
def correction_factor(p, number_of_runs, method, X, y, n_jobs=None): # Setup parallel job if n_jobs == -1: n_jobs = cpu_count() elif n_jobs == None: n_jobs = 1 pool = Pool(n_jobs, maxtasksperchild=1000) def run(_): # Artificially falsify y_f = falsify(y, p, random_state=_) # Correct labels y_corrected = method.fit_transform(X, y_f) N = X.shape[0] return ((y == y_corrected).sum() - (1 - p) * N) / (p * N) factor = np.array(pool.map(run, range(number_of_runs))) # Close the pool again pool.close() pool.join() pool.clear() return np.mean(factor), np.std(factor)
def pare_multi_process(urls_and_people): print("[INFO] Paring and downlaoding all image urls with {} processes". format(NUM_PROCESSES)) urls, person = zip(*urls_and_people) pare_pool = ProcessPool(NUM_PROCESSES) pare_pool.imap(safe_pare_matches_and_download, urls, person) pare_pool.close() pare_pool.join() print("[INFO] Done paring and downlaoding all image urls")
def _prepare_eigen(self): """ calculate eigen values and vectors for all kpts and save. Note that the convention 2 is used here, where the phase factor is e^(ik.R), not e^(ik.(R+rj-ri)) """ nkpts = len(self.kpts) self.evals = np.zeros((nkpts, self.nbasis), dtype=float) self.nkpts = nkpts self.H0 = np.zeros((self.nbasis, self.nbasis), dtype=complex) self.evecs = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex) H = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex) if not self.is_orthogonal: self.S = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex) else: self.S = None if self.nproc == 1: results = map(self.tbmodel.HSE_k, self.kpts) else: executor = ProcessPool(nodes=self.nproc) results = executor.map(self.tbmodel.HSE_k, self.kpts, [2] * len(self.kpts)) executor.close() executor.join() executor.clear() for ik, result in enumerate(results): if self.is_orthogonal: H[ik], _, self.evals[ik], self.evecs[ik] = result else: H[ik], self.S[ik], self.evals[ik], self.evecs[ik] = result self.H0 += H[ik] / self.nkpts self.evals, self.evecs = self._reduce_eigens(self.evals, self.evecs, emin=self.efermi - 10.0, emax=self.efermi + 10.1) if self._use_cache: evecs = self.evecs self.evecs_shape = self.evecs.shape self.evecs = np.memmap(os.path.join(self.cache_path, 'evecs.dat'), mode='w+', shape=self.evecs.shape, dtype=complex) self.evecs[:, :, :] = evecs[:, :, :] if self.is_orthogonal: self.S = None else: S = self.S self.S = np.memmap(os.path.join(self.cache_path, 'S.dat'), mode='w+', shape=(nkpts, self.nbasis, self.nbasis), dtype=complex) self.S[:] = S[:] del self.evecs if not self.is_orthogonal: del self.S
def parallelize_simulations(simulation_execs: List[Callable], var_dict_list: List[VarDictType], states_lists: List[StatesListsType], configs_structs: List[ConfigsType], env_processes_list: List[EnvProcessesType], Ts: List[range], SimIDs, Ns: List[int], ExpIDs: List[int], SubsetIDs, SubsetWindows, configured_n): print(f'Execution Mode: parallelized') params = list( zip(simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, SimIDs, Ns, SubsetIDs, SubsetWindows)) len_configs_structs = len(configs_structs) unique_runs = Counter(SimIDs) sim_count = max(unique_runs.values()) highest_divisor = int(len_configs_structs / sim_count) new_configs_structs, new_params = [], [] for count in range(sim_count): if count == 0: new_params.append(params[count:highest_divisor]) new_configs_structs.append(configs_structs[count:highest_divisor]) elif count > 0: new_params.append(params[count * highest_divisor:(count + 1) * highest_divisor]) new_configs_structs.append( configs_structs[count * highest_divisor:(count + 1) * highest_divisor]) def threaded_executor(params): tp = TPool() if len_configs_structs > 1: results = tp.map( lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n), params) else: t = params[0] results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n) tp.close() return results pp = PPool() results = flatten( list(pp.map(lambda params: threaded_executor(params), new_params))) pp.close() pp.join() pp.clear() # pp.restart() return results
def accuracy(p, number_of_runs, method, X, y, clf = None, n_jobs = None): ''' Returns: (array{mean_acc_corrected, mean_acc_false}, array{stddev_acc_corrected, stddev_acc_false}) ''' # Setup parallel job if n_jobs == -1: n_jobs = cpu_count() elif n_jobs == None: n_jobs = 1 pool = Pool(n_jobs, maxtasksperchild = 1000) if clf is None: clf = NNC(n_neighbors= 1) def run(_): # Artificially falsify y_f = falsify(y, p, random_state = _) # Correct labels y_corrected = method.fit_transform(X, y_f) # Set up 10-fold-Cross validation train_corr, test_corr = kfold(10, X, y, y_corrected) train_f, test_f = kfold(10, X, y, y_f) score = np.zeros((2, 10)) # Calc scores for fold in range(10): train_X, train_y = train_corr[fold] test_X, test_y = test_corr[fold] clf.fit(train_X, train_y) score[0, fold] = clf.score(test_X, test_y) train_X, train_y = train_f[fold] test_X, test_y = test_f[fold] clf.fit(train_X, train_y) score[1, fold] = clf.score(test_X, test_y) # Average return np.mean(score, axis = 1).tolist() acc = np.array(pool.map(run, range(number_of_runs))) # Close the pool again pool.close() pool.join() pool.clear() return np.mean(acc, axis = 0), np.std(acc, axis = 0)
def MyProcessPool(nodes=None): if nodes is None or nodes > 1: p = ProcessPool(nodes) try: yield p finally: p.close() p.join() p.clear() else: #print("Using PseudoPool!") yield PseudoPool()
def calculate_all(self): """ The top level. """ print("Green's function Calculation started.") widgets = [ ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ', ] bar = progressbar.ProgressBar(maxval=self.contour.npoints, widgets=widgets) bar.start() rhoRs = [] GRs = [] AijRs = {} if self.np > 1: executor = ProcessPool(nodes=self.np) results = executor.map(self.get_AijR_rhoR, self.contour.path) else: results = map(self.get_AijR_rhoR, self.contour.path) for i, result in enumerate(results): bar.update(i) for iR, R in enumerate(self.R_ijatom_dict): for (iatom, jatom) in self.R_ijatom_dict[R]: if (R, iatom, jatom) in AijRs: AijRs[(R, iatom, jatom)].append(result[0][R, iatom, jatom]) else: AijRs[(R, iatom, jatom)] = [] AijRs[(R, iatom, jatom)].append(result[0][R, iatom, jatom]) rhoRs.append(result[1]) if self.np > 1: executor.close() executor.join() executor.clear() #self.save_AijRs(AijRs) self.integrate(rhoRs, AijRs) self.get_rho_atom() self.A_to_Jtensor() bar.finish()
def process_executor(params): if len_configs_structs > 1: pp = PPool(processes=len_configs_structs) results = pp.map( lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n), params) pp.close() pp.join() pp.clear() else: t = params[0] results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n) return results
def threaded_contents_to_text( content_series, processes=None, none_content='raise', ): """Threaded version of content_to_text method It takes as input a series which index is the uid of the products, and the values are the content (in the form of bytes) of the documents. processes argument is the number of processes to launch. If omitted, it defaults to the number of cpu cores on the machine. none_content arg can be 'raise' (default) or to_empty """ processer = partial( PDFDecoder.content_to_text, none_content=none_content, ) processes = processes if processes else cpu_count() print(f'Launching {processes} processes.') in_ds = content_series.apply(BytesIO) # Pool with context manager do not seem to work due to issue 38501 of # standard python library. It hangs when running tests through pytest # see: https://bugs.python.org/issue38501 # Below content should be tested again whenever this issue is closed # # with Pool(nodes=processes) as pool: # tuples = (list(in_ds.index), # pool.map(processer, in_ds)) # # End of block # This temporary solution should be removed when tests mentioned above # are successful. # This just closes each pool after execution or exception. try: pool = Pool(nodes=processes) pool.restart(force=True) tuples = (list(in_ds.index), pool.map(processer, in_ds)) except Exception: pool.close() raise pool.close() # End of block ds = pd.Series(tuples[1], index=tuples[0]) return (ds)
def threaded_texts_to_blocks(text_series, processes=None, split_func=lambda x: x.split('\n\n'), return_type='along_index'): """Threaded version of text_to_blocks_series method It takes as input a series which index is the uid of the products, and the values are the content (in the form of bytes) of the documents.. processes argument is the number of processes to launch. If omitted, it defaults to the number of cpu cores on the machine. As for text_to_blocks_series function, return_type can be 'along_axis' or 'list_like'. """ processer = partial(PDFDecoder.text_to_blocks_series, split_func=split_func, return_type=return_type) processes = processes if processes else cpu_count() print(f'Launching {processes} processes.') # Pool with context manager do not seem to work due to issue 38501 of # standard python library. It hangs when running tests through pytest # see: https://bugs.python.org/issue38501 # Below content should be tested again whenever this issue is closed # # with Pool(nodes=processes) as pool: # ds_list = pool.map(processer, text_series, text_series.index) # # End of block # This temporary solution should be removed when tests mentioned above # are successful. # This just closes each pool after execution or exception. try: pool = Pool(nodes=processes) pool.restart(force=True) ds_list = pool.map(processer, text_series, text_series.index) except Exception: pool.close() raise pool.close() # End of block ds = pd.concat(ds_list, axis=0) return (ds)
def avaliacao(self, populacao): n = len(populacao) def steps(k): individuo = populacao[k, :] obj = self.funcao_objetivo(individuo) return obj ncpu = cpu_count() pool = ProcessPool(nodes=ncpu) pesos = array(pool.map(steps, range(n))) pool.close() pool.join() pool.clear() shutdown() return pesos
def apply_data(self, data: 'Data', method: Callable) -> 'Data': """Applies 'method' to 'data' across several cores. Args: data ('Data'): instance with a stored pandas DataFrame. method (Callable): callable method or function to apply to 'data'. Returns: 'Data': with 'method' applied. """ dfs = np.array_split(data.data, mp.cpu_count(), axis=0) pool = Pool() data.data = np.vstack(pool.map(method, dfs)) pool.close() pool.join() pool.clear() return data
def main(args): log.info('----------start processing---------') clips_dir = os.path.join(args.source_dir, 'clips') if args.follow: datasets_file = glob(args.source_dir + '/*.tsv') else: validated_tsv = args.source_dir + '/validated.tsv' new_path = dataset_split(validated_tsv, args.source_dir) datasets_file = glob(new_path + '/*.tsv') if args.num_process == 1: for tsv in tqdm(datasets_file): each_tsv(tsv, clips_dir, args.target_dir) else: targets = [args.target_dir] * len(datasets_file) clips_dirs = [clips_dir] * len(datasets_file) pool = Pool(args.num_process) pool.map(each_tsv, datasets_file, clips_dirs, targets) pool.close() pool.join()
def run( self ): # from pathos.multiprocessing import Pool from pathos.multiprocessing import ProcessPool as Pool args = self._interpna_setup( ) pool = Pool( processes=self.ncpus ) out = pool.map( self._interpna, args[:400] ) pool.close() lons = self._lonpc # stack em and roll-its axis so time is dim0 dat = np.rollaxis( np.dstack( out ), -1 ) if self._rotated == True: # rotate it back dat, lons = self.rotate( dat, lons, to_pacific=False ) # place back into a new xarray.Dataset object for further processing # function to make a new xarray.Dataset object with the mdata we need? # ds = self.ds # var = ds[ self.variable ] # setattr( var, 'data', dat ) # self.ds = ds print( 'ds interpolated updated into self.ds' ) return dat
def calculate_all(self): """ The top level. """ print("Green's function Calculation started.") widgets = [ ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ', ] bar = progressbar.ProgressBar(maxval=len(self.contour.path), widgets=widgets) bar.start() if self.np == 1: results = map(self.get_AijR_rhoR, self.contour.path) else: pool = ProcessPool(nodes=self.np) results = pool.map(self.get_AijR_rhoR, self.contour.path) for i, result in enumerate(results): bar.update(i) rup, rdn, Jorb_list, JJ_list = result self.rho_up_list.append(rup) self.rho_dn_list.append(rdn) for iR, R in enumerate(self.R_ijatom_dict): for (iatom, jatom) in self.R_ijatom_dict[R]: key = (R, iatom, jatom) self.Jorb_list[key].append(Jorb_list[key]) self.JJ_list[key].append(JJ_list[key]) if self.np > 1: pool.close() pool.join() pool.clear() self.integrate() self.get_rho_atom() self.A_to_Jtensor() bar.finish()
def main(args): if len(args.input) < 2: print("Please name at least one STAR file and an output directory") return 1 if args.apix is None: print("Using pixel size computed from STAR files") def do_job(star): try: mrc = os.path.join(args.output, os.path.basename(star).replace(".star", ".mrc")) print("Starting reconstruction of %s" % star) do_reconstruct(star, mrc, args.apix, args.sym, args.ctf) print("Wrote %s reconstruction to %s" % (star, mrc)) if args.mask is not None: masked_mrc = mrc.replace(".mrc", "_masked.mrc") do_mask(mrc, masked_mrc, args.mask) print("Wrote masked map %s" % masked_mrc) if args.mask is not None and args.delete_unmasked: delete_unmasked(mrc, masked_mrc) print("Overwrote %s with %s" % (mrc, masked_mrc)) except Exception as e: print("Failed on %s" % star) return 0 pool = Pool(nodes=args.nproc) #pool.apipe(do_job, args.input) results = pool.imap(do_job, args.input) codes = list(results) if pool is not None: pool.close() pool.join() pool.terminate() return 0
def compute_seq_distances(sequences, affinity=sequence_distance, nb_jobs=NB_THREADS): """ compute matrix of all distances :param [] sequences: list of all sequences :param func affinity: function specify the sample affinity :param int nb_jobs: number jobs running in parallel :return ndarray: >>> ss = [['a', 'b', 'a', 'c'], ['a', 'a', 'b', 'a'], ['b', None, 'b', 'a']] >>> compute_seq_distances(ss, affinity=sequence_distance) array([[0. , 0.25, 0.5 ], [0.25, 0. , 0.25], [0.5 , 0.25, 0. ]]) >>> ss = [['hi', 'there', 'how', 'are', 'you'], ... ['hi', 'how', 'are', 'you'], ... ['hi', 'are', 'you', 'there']] >>> compute_seq_distances(ss) array([[0. , 0.2, 0.6], [0.2, 0. , 0.5], [0.6, 0.5, 0. ]]) """ idxs = [(i, j) for i in range(len(sequences)) for j in range(i, len(sequences))] idx_lt = (((i, j), (sequences[i], sequences[j])) for i, j in idxs) dists = np.zeros((len(sequences), len(sequences))) _wrap_dist = partial(wrap_distance, similar_distance=affinity) pool = ProcessPool(nb_jobs) for idx, d in pool.imap(_wrap_dist, idx_lt): dists[idx[0], idx[1]] = d dists[idx[1], idx[0]] = d pool.close() pool.join() pool.clear() return dists
def multiprocess(func: Callable, all_urls: List[List[str]], people: List[str], total_count: int, info: str) -> Tuple[List[List[str]], List[str]]: print("[INFO] {} with {} processes".format(info, NUM_PROCESSES)) global TIMER widgets_match = ['{}: '.format(info), pb.Percentage(), ' ', pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA()] TIMER = pb.ProgressBar(widgets=widgets_match, maxval=total_count).start() if NUM_PROCESSES > 1: process_pool = ProcessPool(NUM_PROCESSES) urls_and_people = process_pool.imap(func, all_urls, people) process_pool.close() process_pool.join() filtered_urls, people = zip(*urls_and_people) else: filtered_urls = [] for urls, person in zip(all_urls, people): filtered, person = func(urls, person) filtered_urls.append(filtered) print("[INFO] Done {}".format(info)) return filtered_urls, people