def plot_cats_qsims(dbs_dir, n_cpus=1): '''Plot discharge simulations for every catchment for every kfold using its prm_vecs.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) plot_gen = (cat_db for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_qsims, plot_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for plot_args in plot_gen: plot_cat_qsims(plot_args) return
def main_jagcat(): if os.path.exists('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE): os.remove('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE) pool = ProcessPool(nodes=cpu_count() - 1 or 1) pool.amap(send_part_meta, [PartMetaConfig.TOPIC], [PartMetaConfig.KAFKA_KEY]) pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY]) pool.amap(send_description, [DescriptionConfig.TOPIC], [DescriptionConfig.KAFKA_KEY]) pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY]) pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC], [FeatureFamilyConfig.KAFKA_KEY]) pool.amap(send_hierarchy, [HierarchyConfig.TOPIC], [HierarchyConfig.KAFKA_KEY]) pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC], [HierarchyIllustrationConfig.KAFKA_KEY]) pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC], [HierarchyUsageConfig.KAFKA_KEY]) pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC], [SectionCalloutConfig.KAFKA_KEY]) pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC], [SectionPartUsageConfig.KAFKA_KEY]) pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY]) pool.close() pool.join()
def plot_cats_vars_errors(dbs_dir, err_var_labs, n_cpus): cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) n_cpus = min(n_cats, n_cpus) cats_paths_gen = ((cat_db, err_var_labs) for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_vars_errors, cats_paths_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for cat_paths in cats_paths_gen: plot_cat_vars_errors(cat_paths) return
def align_dataset(config_file: str): """Aligns an image dataset """ config = AlignConfig(config_file) output_dir = os.path.expanduser(config.output_dir) os.makedirs(output_dir, exist_ok=True) dataset = get_dataset(config.input_dir) num_images = sum(len(i) for i in dataset) TIMER.max_value = num_images TIMER.start() num_processes = cast(int, min(config.num_processes, os.cpu_count())) if num_processes == -1: num_processes = os.cpu_count() if num_processes > 1: process_pool = ProcessPool(num_processes) process_pool.imap(align_person, zip(dataset, [config] * len(dataset))) process_pool.close() process_pool.join() else: for person in dataset: align_person((person, config)) transform_to_lfw_format(output_dir, num_processes) TIMER.finish() print('Total number of images: %d' % int(NUM_IMAGES_TOTAL.value)) print('Number of faces found and aligned: %d' % int(NUM_SUCESSFUL.value)) print('Number of unsuccessful: %d' % int(NUM_UNSECESSFUL.value))
def plot_cats_hbv_sim(dbs_dir, water_bal_step_size, full_flag=False, wat_bal_flag=False, show_warm_up_steps_flag=False, n_cpus=1): '''Plot hbv simulations for every catchment for every kfold.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) const_args = (water_bal_step_size, full_flag, wat_bal_flag, show_warm_up_steps_flag) plot_gen = ((cat_db, const_args) for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_hbv_sim, plot_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for plot_args in plot_gen: plot_cat_hbv_sim(plot_args) return
def plot_cats_best_prms_1d(dbs_dir, n_cpus): '''Plot every best kfold parameter set for all catchments.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) cats_paths_gen = (cat_db for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_best_prms_1d, cats_paths_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for cat_paths in cats_paths_gen: plot_cat_best_prms_1d(cat_paths) return
def plot_cats_prm_vecs(dbs_dir, n_cpus): '''Plot final parameter set from kfold for every catchments along with objective function value distribution. ''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) n_cpus = min(n_cats, n_cpus) opt_res_gen = (cat_db for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_prm_vecs, opt_res_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for opt_res in opt_res_gen: plot_cat_prm_vecs(opt_res) return
def _prep_anomaly_bjs_mp(anoms_arr, bjs_arr, n_cpus, fig_out_dir): assert anoms_arr.shape == bjs_arr.shape _idxs = ret_mp_idxs(anoms_arr.shape[1], n_cpus) _idxs_list = [_idxs[i:i + 2] for i in range(n_cpus)] _anoms_gen = ((anoms_arr[:, _idxs_list[i][0]:_idxs_list[i][1]]) for i in range(n_cpus)) _bjs_gen = ((bjs_arr[:, _idxs_list[i][0]:_idxs_list[i][1]]) for i in range(n_cpus)) mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) try: print( list( mp_pool.uimap(Anomaly._plot_anomaly_bjs_cdf, _idxs_list, _anoms_gen, _bjs_gen, [fig_out_dir] * n_cpus))) mp_pool.clear() except Exception as msg: mp_pool.close() mp_pool.join() print('Error in _plot_anomaly_bjs_cdf:', msg) return
def plot_cats_kfold_effs(dbs_dir, hgs_db_path, compare_ann_cyc_flag, n_cpus): '''Plot the k-fold efficiency results.''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) n_cpus = min(n_cats, n_cpus) const_args = (compare_ann_cyc_flag, hgs_db_path) cats_paths_gen = ((cat_db, const_args) for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_kfold_effs, cats_paths_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for cat_paths in cats_paths_gen: plot_cat_kfold_effs(cat_paths) return
def normalize(self) -> None: """ Normalize ensemble """ if not self.regenerate: try: self.load() return except FileNotFoundError: pass assert xor((self.normalizer_nld is not None and self.normalizer_gsf is not None), self.normalizer_simultan is not None), \ "Either 'normalizer_nld' and 'normalizer_gsf' must be set, or " \ "normalizer_simultan" gsfs = self.extractor.gsf nlds = self.extractor.nld self.LOG.info(f"Start normalization with {self.nprocesses} cpus") pool = ProcessPool(nodes=self.nprocesses) N = len(nlds) iterator = pool.imap(self.step, range(N), nlds, gsfs) self.res = list(tqdm(iterator, total=N)) pool.close() pool.join() pool.clear() self.save()
def correction_factor(p, number_of_runs, method, X, y, n_jobs=None): # Setup parallel job if n_jobs == -1: n_jobs = cpu_count() elif n_jobs == None: n_jobs = 1 pool = Pool(n_jobs, maxtasksperchild=1000) def run(_): # Artificially falsify y_f = falsify(y, p, random_state=_) # Correct labels y_corrected = method.fit_transform(X, y_f) N = X.shape[0] return ((y == y_corrected).sum() - (1 - p) * N) / (p * N) factor = np.array(pool.map(run, range(number_of_runs))) # Close the pool again pool.close() pool.join() pool.clear() return np.mean(factor), np.std(factor)
def main(): if os.path.exists('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE): os.remove('log\\' + GeneralConfig.ENV + '_' + GeneralConfig.TOPIC_PREFIX + '_' + GeneralConfig.ROWCOUNT_LOG_FILE) pool = ProcessPool(nodes=cpu_count() - 1 or 1) # pool.amap(send_aftermarket_part, [AftermarketPartConfig.TOPIC], [AftermarketPartConfig.KAFKA_KEY]) # pool.amap(send_description, [DescriptionConfig.TOPIC], [DescriptionConfig.KAFKA_KEY]) # pool.amap(send_engineering_part, [EngineeringPartConfig.TOPIC], [EngineeringPartConfig.KAFKA_KEY]) # pool.amap(send_engineering_part_function, [EngineeringPartFunctionConfig.TOPIC], [EngineeringPartFunctionConfig.KAFKA_KEY]) # pool.amap(send_engineering_part_usage, [EngineeringPartUsageConfig.TOPIC], [EngineeringPartUsageConfig.KAFKA_KEY]) # pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY]) # pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC], [FeatureFamilyConfig.KAFKA_KEY]) pool.amap(send_hierarchy, [HierarchyConfig.TOPIC], [HierarchyConfig.KAFKA_KEY]) # pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC], [HierarchyIllustrationConfig.KAFKA_KEY]) # pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC], [HierarchyUsageConfig.KAFKA_KEY]) # pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC], [SectionCalloutConfig.KAFKA_KEY]) # pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC], [SectionPartUsageConfig.KAFKA_KEY]) # pool.amap(send_supersession, [SupersessionConfig.TOPIC], [SupersessionConfig.KAFKA_KEY]) # pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY]) # pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY]) pool.close() pool.join()
def _prepare_eigen(self): """ calculate eigen values and vectors for all kpts and save. Note that the convention 2 is used here, where the phase factor is e^(ik.R), not e^(ik.(R+rj-ri)) """ nkpts = len(self.kpts) self.evals = np.zeros((nkpts, self.nbasis), dtype=float) self.nkpts = nkpts self.H0 = np.zeros((self.nbasis, self.nbasis), dtype=complex) self.evecs = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex) H = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex) if not self.is_orthogonal: self.S = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex) else: self.S = None if self.nproc == 1: results = map(self.tbmodel.HSE_k, self.kpts) else: executor = ProcessPool(nodes=self.nproc) results = executor.map(self.tbmodel.HSE_k, self.kpts, [2] * len(self.kpts)) executor.close() executor.join() executor.clear() for ik, result in enumerate(results): if self.is_orthogonal: H[ik], _, self.evals[ik], self.evecs[ik] = result else: H[ik], self.S[ik], self.evals[ik], self.evecs[ik] = result self.H0 += H[ik] / self.nkpts self.evals, self.evecs = self._reduce_eigens(self.evals, self.evecs, emin=self.efermi - 10.0, emax=self.efermi + 10.1) if self._use_cache: evecs = self.evecs self.evecs_shape = self.evecs.shape self.evecs = np.memmap(os.path.join(self.cache_path, 'evecs.dat'), mode='w+', shape=self.evecs.shape, dtype=complex) self.evecs[:, :, :] = evecs[:, :, :] if self.is_orthogonal: self.S = None else: S = self.S self.S = np.memmap(os.path.join(self.cache_path, 'S.dat'), mode='w+', shape=(nkpts, self.nbasis, self.nbasis), dtype=complex) self.S[:] = S[:] del self.evecs if not self.is_orthogonal: del self.S
def pare_multi_process(urls_and_people): print("[INFO] Paring and downlaoding all image urls with {} processes". format(NUM_PROCESSES)) urls, person = zip(*urls_and_people) pare_pool = ProcessPool(NUM_PROCESSES) pare_pool.imap(safe_pare_matches_and_download, urls, person) pare_pool.close() pare_pool.join() print("[INFO] Done paring and downlaoding all image urls")
def accuracy(p, number_of_runs, method, X, y, clf = None, n_jobs = None): ''' Returns: (array{mean_acc_corrected, mean_acc_false}, array{stddev_acc_corrected, stddev_acc_false}) ''' # Setup parallel job if n_jobs == -1: n_jobs = cpu_count() elif n_jobs == None: n_jobs = 1 pool = Pool(n_jobs, maxtasksperchild = 1000) if clf is None: clf = NNC(n_neighbors= 1) def run(_): # Artificially falsify y_f = falsify(y, p, random_state = _) # Correct labels y_corrected = method.fit_transform(X, y_f) # Set up 10-fold-Cross validation train_corr, test_corr = kfold(10, X, y, y_corrected) train_f, test_f = kfold(10, X, y, y_f) score = np.zeros((2, 10)) # Calc scores for fold in range(10): train_X, train_y = train_corr[fold] test_X, test_y = test_corr[fold] clf.fit(train_X, train_y) score[0, fold] = clf.score(test_X, test_y) train_X, train_y = train_f[fold] test_X, test_y = test_f[fold] clf.fit(train_X, train_y) score[1, fold] = clf.score(test_X, test_y) # Average return np.mean(score, axis = 1).tolist() acc = np.array(pool.map(run, range(number_of_runs))) # Close the pool again pool.close() pool.join() pool.clear() return np.mean(acc, axis = 0), np.std(acc, axis = 0)
def parallelize_simulations(simulation_execs: List[Callable], var_dict_list: List[VarDictType], states_lists: List[StatesListsType], configs_structs: List[ConfigsType], env_processes_list: List[EnvProcessesType], Ts: List[range], SimIDs, Ns: List[int], ExpIDs: List[int], SubsetIDs, SubsetWindows, configured_n): print(f'Execution Mode: parallelized') params = list( zip(simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, SimIDs, Ns, SubsetIDs, SubsetWindows)) len_configs_structs = len(configs_structs) unique_runs = Counter(SimIDs) sim_count = max(unique_runs.values()) highest_divisor = int(len_configs_structs / sim_count) new_configs_structs, new_params = [], [] for count in range(sim_count): if count == 0: new_params.append(params[count:highest_divisor]) new_configs_structs.append(configs_structs[count:highest_divisor]) elif count > 0: new_params.append(params[count * highest_divisor:(count + 1) * highest_divisor]) new_configs_structs.append( configs_structs[count * highest_divisor:(count + 1) * highest_divisor]) def threaded_executor(params): tp = TPool() if len_configs_structs > 1: results = tp.map( lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n), params) else: t = params[0] results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n) tp.close() return results pp = PPool() results = flatten( list(pp.map(lambda params: threaded_executor(params), new_params))) pp.close() pp.join() pp.clear() # pp.restart() return results
def MyProcessPool(nodes=None): if nodes is None or nodes > 1: p = ProcessPool(nodes) try: yield p finally: p.close() p.join() p.clear() else: #print("Using PseudoPool!") yield PseudoPool()
def calculate_all(self): """ The top level. """ print("Green's function Calculation started.") widgets = [ ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ', ] bar = progressbar.ProgressBar(maxval=self.contour.npoints, widgets=widgets) bar.start() rhoRs = [] GRs = [] AijRs = {} if self.np > 1: executor = ProcessPool(nodes=self.np) results = executor.map(self.get_AijR_rhoR, self.contour.path) else: results = map(self.get_AijR_rhoR, self.contour.path) for i, result in enumerate(results): bar.update(i) for iR, R in enumerate(self.R_ijatom_dict): for (iatom, jatom) in self.R_ijatom_dict[R]: if (R, iatom, jatom) in AijRs: AijRs[(R, iatom, jatom)].append(result[0][R, iatom, jatom]) else: AijRs[(R, iatom, jatom)] = [] AijRs[(R, iatom, jatom)].append(result[0][R, iatom, jatom]) rhoRs.append(result[1]) if self.np > 1: executor.close() executor.join() executor.clear() #self.save_AijRs(AijRs) self.integrate(rhoRs, AijRs) self.get_rho_atom() self.A_to_Jtensor() bar.finish()
def process_executor(params): if len_configs_structs > 1: pp = PPool(processes=len_configs_structs) results = pp.map( lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n), params) pp.close() pp.join() pp.clear() else: t = params[0] results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n) return results
def update_hash_dict(self): if self.num_proc is None: self.num_proc = cpu_count() - 1 # check current hash_dict current_files = set(self.image_filenames) cache_files = self.hash_dict.keys() lost_set = cache_files - current_files target_files = list(current_files - cache_files) if len(lost_set) + len(target_files) > 0: try: if len(self.hash_dict) == 0: spinner = Spinner( prefix= "Calculating image hashes (hash-bits={} num-proc={})..." .format(self.hash_bits, self.num_proc)) else: spinner = Spinner( prefix= "Updating image hashes (hash-bits={} num-proc={})...". format(self.hash_bits, self.num_proc)) spinner.start() # del lost_set from hash_dict for f in lost_set: del self.hash_dict[f] if six.PY2: from pathos.multiprocessing import ProcessPool as Pool elif six.PY3: from multiprocessing import Pool pool = Pool(self.num_proc) hashes = pool.map(self.gen_hash, target_files) for filename, hash_value in zip(target_files, hashes): self.hash_dict[filename] = hash_value spinner.stop() except KeyboardInterrupt: pool.terminate() pool.join() spinner.stop() sys.exit(1) return True else: return False
def apply_data(self, data: 'Data', method: Callable) -> 'Data': """Applies 'method' to 'data' across several cores. Args: data ('Data'): instance with a stored pandas DataFrame. method (Callable): callable method or function to apply to 'data'. Returns: 'Data': with 'method' applied. """ dfs = np.array_split(data.data, mp.cpu_count(), axis=0) pool = Pool() data.data = np.vstack(pool.map(method, dfs)) pool.close() pool.join() pool.clear() return data
def avaliacao(self, populacao): n = len(populacao) def steps(k): individuo = populacao[k, :] obj = self.funcao_objetivo(individuo) return obj ncpu = cpu_count() pool = ProcessPool(nodes=ncpu) pesos = array(pool.map(steps, range(n))) pool.close() pool.join() pool.clear() shutdown() return pesos
def main(args): log.info('----------start processing---------') clips_dir = os.path.join(args.source_dir, 'clips') if args.follow: datasets_file = glob(args.source_dir + '/*.tsv') else: validated_tsv = args.source_dir + '/validated.tsv' new_path = dataset_split(validated_tsv, args.source_dir) datasets_file = glob(new_path + '/*.tsv') if args.num_process == 1: for tsv in tqdm(datasets_file): each_tsv(tsv, clips_dir, args.target_dir) else: targets = [args.target_dir] * len(datasets_file) clips_dirs = [clips_dir] * len(datasets_file) pool = Pool(args.num_process) pool.map(each_tsv, datasets_file, clips_dirs, targets) pool.close() pool.join()
def calculate_all(self): """ The top level. """ print("Green's function Calculation started.") widgets = [ ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ', ] bar = progressbar.ProgressBar(maxval=len(self.contour.path), widgets=widgets) bar.start() if self.np == 1: results = map(self.get_AijR_rhoR, self.contour.path) else: pool = ProcessPool(nodes=self.np) results = pool.map(self.get_AijR_rhoR, self.contour.path) for i, result in enumerate(results): bar.update(i) rup, rdn, Jorb_list, JJ_list = result self.rho_up_list.append(rup) self.rho_dn_list.append(rdn) for iR, R in enumerate(self.R_ijatom_dict): for (iatom, jatom) in self.R_ijatom_dict[R]: key = (R, iatom, jatom) self.Jorb_list[key].append(Jorb_list[key]) self.JJ_list[key].append(JJ_list[key]) if self.np > 1: pool.close() pool.join() pool.clear() self.integrate() self.get_rho_atom() self.A_to_Jtensor() bar.finish()
def make_hash_list(self): if self.num_proc is None: self.num_proc = cpu_count() - 1 try: spinner = Spinner( prefix="Calculating image hashes (hash-bits={} num-proc={})..." .format(self.hash_bits, self.num_proc)) spinner.start() if six.PY2: from pathos.multiprocessing import ProcessPool as Pool elif six.PY3: from multiprocessing import Pool pool = Pool(self.num_proc) self.cache = pool.map(self.gen_hash, self.image_filenames) spinner.stop() except KeyboardInterrupt: pool.terminate() pool.join() spinner.stop() sys.exit(1)
def main(args): if len(args.input) < 2: print("Please name at least one STAR file and an output directory") return 1 if args.apix is None: print("Using pixel size computed from STAR files") def do_job(star): try: mrc = os.path.join(args.output, os.path.basename(star).replace(".star", ".mrc")) print("Starting reconstruction of %s" % star) do_reconstruct(star, mrc, args.apix, args.sym, args.ctf) print("Wrote %s reconstruction to %s" % (star, mrc)) if args.mask is not None: masked_mrc = mrc.replace(".mrc", "_masked.mrc") do_mask(mrc, masked_mrc, args.mask) print("Wrote masked map %s" % masked_mrc) if args.mask is not None and args.delete_unmasked: delete_unmasked(mrc, masked_mrc) print("Overwrote %s with %s" % (mrc, masked_mrc)) except Exception as e: print("Failed on %s" % star) return 0 pool = Pool(nodes=args.nproc) #pool.apipe(do_job, args.input) results = pool.imap(do_job, args.input) codes = list(results) if pool is not None: pool.close() pool.join() pool.terminate() return 0
def compute_seq_distances(sequences, affinity=sequence_distance, nb_jobs=NB_THREADS): """ compute matrix of all distances :param [] sequences: list of all sequences :param func affinity: function specify the sample affinity :param int nb_jobs: number jobs running in parallel :return ndarray: >>> ss = [['a', 'b', 'a', 'c'], ['a', 'a', 'b', 'a'], ['b', None, 'b', 'a']] >>> compute_seq_distances(ss, affinity=sequence_distance) array([[0. , 0.25, 0.5 ], [0.25, 0. , 0.25], [0.5 , 0.25, 0. ]]) >>> ss = [['hi', 'there', 'how', 'are', 'you'], ... ['hi', 'how', 'are', 'you'], ... ['hi', 'are', 'you', 'there']] >>> compute_seq_distances(ss) array([[0. , 0.2, 0.6], [0.2, 0. , 0.5], [0.6, 0.5, 0. ]]) """ idxs = [(i, j) for i in range(len(sequences)) for j in range(i, len(sequences))] idx_lt = (((i, j), (sequences[i], sequences[j])) for i, j in idxs) dists = np.zeros((len(sequences), len(sequences))) _wrap_dist = partial(wrap_distance, similar_distance=affinity) pool = ProcessPool(nb_jobs) for idx, d in pool.imap(_wrap_dist, idx_lt): dists[idx[0], idx[1]] = d dists[idx[1], idx[0]] = d pool.close() pool.join() pool.clear() return dists
def plot_cats_prm_vecs_evo(dbs_dir, save_obj_flag, save_png_flag, save_gif_flag, anim_secs, n_cpus=1): '''Plot the evolution of parameter vectors and convex hull for every catchment for every kfold. ''' cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5')) assert cats_dbs n_cats = len(cats_dbs) n_cpus = min(n_cats, n_cpus) n_cpus = min(n_cats, n_cpus) opt_res_gen = ((cat_db, save_obj_flag, save_png_flag, save_gif_flag, anim_secs) for cat_db in cats_dbs) if (n_cpus > 1) and (n_cats > 1): mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) print(list(mp_pool.uimap(plot_cat_prm_vecs_evo, opt_res_gen))) mp_pool.clear() mp_pool.close() mp_pool.join() else: for opt_res in opt_res_gen: plot_cat_prm_vecs_evo(opt_res) return
def multiprocess(func: Callable, all_urls: List[List[str]], people: List[str], total_count: int, info: str) -> Tuple[List[List[str]], List[str]]: print("[INFO] {} with {} processes".format(info, NUM_PROCESSES)) global TIMER widgets_match = ['{}: '.format(info), pb.Percentage(), ' ', pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA()] TIMER = pb.ProgressBar(widgets=widgets_match, maxval=total_count).start() if NUM_PROCESSES > 1: process_pool = ProcessPool(NUM_PROCESSES) urls_and_people = process_pool.imap(func, all_urls, people) process_pool.close() process_pool.join() filtered_urls, people = zip(*urls_and_people) else: filtered_urls = [] for urls, person in zip(all_urls, people): filtered, person = func(urls, person) filtered_urls.append(filtered) print("[INFO] Done {}".format(info)) return filtered_urls, people
def data_generator(annotation_lines, input_shape, anchors, nb_classes, batch_size=1, augment=True, max_boxes=20, jitter=0.3, img_scaling=1.2, resize_img=True, allow_rnd_shift=True, color_hue=0.1, color_sat=1.5, color_val=1.5, flip_horizontal=True, flip_vertical=False, bbox_overlap=0.95, nb_threads=1): """data generator for fit_generator :param list(str) annotation_lines: :param int batch_size: :param ndarray anchors: :param int nb_classes: :param tuple(int,int) input_shape: CNN input size :param bool augment: perform augmentation :param int max_boxes: maximal number of training bounding boxes :param float jitter: :param float color_hue: range of change of HSV color HUE :param float color_sat: range of change of HSV color SAT :param float color_val: range of change of HSV color value :param float img_scaling: upper image scaling :param bool flip_horizontal: allow random flop image/boxes vertical :param bool flip_vertical: allow random flop image/boxes horizontal :param bool resize_img: resize image to fit fully to CNN :param bool allow_rnd_shift: allow shifting image not only centered crop :param float bbox_overlap: threshold in case cut image, drop all boxes with lower overlap :param float|int nb_threads: nb threads running in parallel :return: >>> np.random.seed(0) >>> path_img = os.path.join(update_path('model_data'), 'bike-car-dog.jpg') >>> line = path_img + ' 100,150,200,250,0 300,50,400,200,1' >>> anchors = get_anchors(os.path.join(update_path('model_data'), 'yolo_anchors.csv')) >>> gen = data_generator([line], (416, 416), anchors, 3, nb_threads=2) >>> batch = next(gen) >>> len(batch) 2 >>> [b.shape for b in batch[0]] [(1, 416, 416, 3), (1, 13, 13, 3, 8), (1, 26, 26, 3, 8), (1, 52, 52, 3, 8)] >>> gen = data_generator([line], (416, 416), anchors, 3, augment=False) >>> batch = next(gen) >>> len(batch) 2 >>> [b.shape for b in batch[0]] [(1, 416, 416, 3), (1, 13, 13, 3, 8), (1, 26, 26, 3, 8), (1, 52, 52, 3, 8)] """ nb_lines = len(annotation_lines) circ_i = 0 if nb_lines == 0 or batch_size <= 0: return None color_hue = abs(color_hue) color_sat = color_sat if color_sat > 1 else 1. / color_sat color_val = color_val if color_val > 1 else 1. / color_val nb_threads = nb_workers(nb_threads) pool = ProcessPool(nb_threads) if nb_threads > 1 else None _wrap_rand_data = partial( get_augmented_data, input_shape=input_shape, augment=augment, max_boxes=max_boxes, jitter=jitter, resize_img=resize_img, img_scaling=img_scaling, allow_rnd_shift=allow_rnd_shift, hue=color_hue, sat=color_sat, val=color_val, flip_horizontal=flip_horizontal, flip_vertical=flip_vertical, bbox_overlap=bbox_overlap, ) while True: if circ_i < batch_size: # shuffle while you are starting new cycle np.random.shuffle(annotation_lines) batch_image_data = [] batch_box_data = [] # create the list of lines to be loaded in batch annot_lines = annotation_lines[circ_i:circ_i + batch_size] batch_offset = (circ_i + batch_size) - nb_lines # chekck if the loaded batch size have sufficient size if batch_offset > 0: annot_lines += annotation_lines[:batch_offset] # multiprocessing loading of batch data map_process = pool.imap if pool else map for image, box in map_process(_wrap_rand_data, annot_lines): batch_image_data.append(image) batch_box_data.append(box) circ_i = (circ_i + batch_size) % nb_lines batch_image_data = np.array(batch_image_data) batch_box_data = np.array(batch_box_data) y_true = preprocess_true_boxes(batch_box_data, input_shape, anchors, nb_classes) batch = [batch_image_data, *y_true], np.zeros(batch_size) yield batch gc.collect() if pool: pool.close() pool.join() pool.clear()