def run_multiprocess_dataless(self, func, *args): pool = mp.Pool(processes=self.num_of_processes) funclist = [] if len(args) > 0: f = pool.apply_async(func, args) funclist.append(f) else: f = pool.apply_async(func) funclist.append(f) for idx, f in enumerate(funclist): print(f) print(f.get()) pool.close() pool.join()
def ip_convolution(wav, flux, chip_limits, R, fwhm_lim=5.0, numProcs=None): """Spectral convolution which allows non-equidistant step values. Parameters ---------- wav: Wavelength flux: Flux of spectrum chip_limits: List[float, float] Wavelength limits of region to return after convolution. R: Resolution to convolve to. fwhm_lim: Number of FWHM of convolution kernel to use as edge buffer. numProcs: int NUmber of processes to use. Default=None selects cpu_count - 1. """ # Turn into numpy arrays wav = np.asarray(wav, dtype="float64") flux = np.asarray(flux, dtype="float64") wav_chip, flux_chip = wav_selector(wav, flux, chip_limits[0], chip_limits[1]) # We need to calculate the fwhm at this value in order to set the starting # point for the convolution fwhm_min = wav_chip[0] / R # fwhm at the extremes of vector fwhm_max = wav_chip[-1] / R # Wide wavelength bin for the resolution_convolution wav_min = wav_chip[0] - fwhm_lim * fwhm_min wav_max = wav_chip[-1] + fwhm_lim * fwhm_max wav_ext, flux_ext = wav_selector(wav, flux, wav_min, wav_max) # Multiprocessing part if numProcs is None: numProcs = mprocess.cpu_count() - 1 mprocPool = mprocess.Pool(processes=numProcs) args_generator = [[wav, R, wav_ext, flux_ext, fwhm_lim] for wav in wav_chip] flux_conv_res = np.array( mprocPool.map(wrapper_fast_convolve, args_generator)) mprocPool.close() return wav_chip, flux_conv_res
def explain(self): """Use SHAP values to features' contributions to predict the responsiveness of a gene. """ with mp.Pool(processes=self.k_folds) as pool: mp_results = {} for k, y_te in enumerate(self.cv_results['preds']): n_tfs_te = len(y_te['tf'].unique()) n_tfs_tr = len(self.tfs) - n_tfs_te y_te['tf:gene'] = y_te['tf'] + ':' + y_te['gene'] te_tg_pairs = y_te['tf:gene'].values te_idx = [ self.tg_pairs.index(tg_pair) for tg_pair in te_tg_pairs ] tr_idx = sorted(set(range(len(self.tg_pairs))) - set(te_idx)) logger.info('Explaining {} genes in fold {}'.format( len(te_idx), k)) tf_X_tr, tf_X_te = self.tf_X[tr_idx], self.tf_X[te_idx] tf_X_tr, tf_X_te = standardize_feat_mtx( tf_X_tr, tf_X_te, 'zscore') nontf_X, _ = standardize_feat_mtx(self.nontf_X, None, 'zscore') X_tr = np.hstack( [tf_X_tr, np.vstack([nontf_X for i in range(n_tfs_tr)])]) X_te = np.hstack( [tf_X_te, np.vstack([nontf_X for i in range(n_tfs_te)])]) bg_idx = np.random.choice(range(X_tr.shape[0]), BG_GENE_NUM, replace=False) mp_results[k] = pool.apply_async( calculate_tree_shap, args=( self.cv_results['models'][k], X_te, te_tg_pairs, X_tr[bg_idx], )) self.shap_vals = [ mp_results[k].get() for k in sorted(mp_results.keys()) ]
def matchscan(items, fun): p = mp.Pool() count=len(items) input = itertools.combinations_with_replacement(items, 2) inp = (x for x in input if x[0]!=x[1] is not None) results = p.map(fun, inp) p.close() p.join() tri = np.zeros((count, count)) iu1 = np.triu_indices(count,1,count) il1 = np.tril_indices(count,-1,count) tri[iu1]=results tri[il1]=np.flipud(np.rot90(tri, 1))[il1] np.fill_diagonal(tri, 100) return tri
def run_hmm(self): logging.info('Running HMMER against Cas profiles') # Make dir os.mkdir(self.out+'hmmer') # Start multiprocess pool = mp.Pool(self.threads) # Each HMM if self.lvl == 'DEBUG' or self.simplelog: list(pool.imap(self.hmmsearch, os.listdir(self.pdir))) else: list(tqdm.tqdm(pool.imap(self.hmmsearch, os.listdir(self.pdir)), total=len(os.listdir(self.pdir)))) # Close multiprocess pool.close()
def get_data(batch_size): file_batch = random.sample(files_list,batch_size) x = 110 y = 110 z = 110 pool = mp.Pool(processes=batch_size) for i in range(batch_size): t1=time.time() res = pool.apply_async(data_worker,(x,y,z,file_batch[i])) tmp = np.ctypeslib.as_array(res.get()) print(tmp.shape) print(tmp) print('Time:'+str(time.time()-t1)+'s') pool.close() pool.join()
def main(datasets): t0 = time.time() cores = 18 # mp.cpu_count() #datasets = ['/g/data1/k88/MODIS_C/MCD43A4.006/2000091/']i pool = mp.Pool(processes=cores) res = pool.map(create_doc_dataset, datasets) t1 = time.time() print('Processed in {0:.1f} seconds'.format(t1 - t0)) return
def mmap(f, items, callback, interval=0.025, num_cores=0): """ num_cores: None or 0 mean maximum ("the number returned by `os.cpu_count()`"). @see https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool """ results = [] start = time.time() with multiprocess.Pool(num_cores if num_cores != 0 else None) as pool: for i, result in enumerate(pool.imap_unordered(f, items), 1): results.append(result) now = time.time() if now-start >= interval: start = now callback(i/len(items)*100) callback(100) # pool.clear() # Using close/join crashes the app on the next iteration, ProcessPool must effect the global space. return results
def update_catalog_collection(elements, max_miller, n_processes=1, mp_query=None): ''' This function will add enumerate and add adsorption sites to our `catalog` Mongo collection. Args: elements A list of strings indicating the elements you are looking for, e.g., ['Cu', 'Al'] max_miller An integer indicating the maximum Miller index to be enumerated n_processes An integer indicating how many threads you want to use when running the tasks. If you do not expect many updates, stick to the default of 1, or go up to 4. If you are re-creating your collection from scratch, you may want to want to increase this argument as high as you can. mp_query We get our bulks from The Materials Project. This dictionary argument is used as a Mongo query to The Materials Project Database. If you do not supply this argument, then it will automatically filter out bulks whose energies above the hull are greater than 0.1 eV and whose formation energy per atom are above 0 eV. ''' # Python doesn't like mutable arguments if mp_query is None: mp_query = {} # Figure out the MPIDs we need to enumerate get_mpid_task = _GetMpids(elements=elements, mp_query=mp_query) schedule_tasks([get_mpid_task]) mpids = get_task_output(get_mpid_task) # For each MPID, enumerate all the sites and then add them to our `catalog` # Mongo collection. Do this in parallel because it can be. if n_processes > 1: with multiprocess.Pool(n_processes) as pool: list( pool.imap(func=lambda mpid: __run_insert_to_catalog_task( mpid, max_miller), iterable=mpids, chunksize=20)) else: for mpid in mpids: __run_insert_to_catalog_task(mpid, max_miller)
def _run(self): data = self.input window = self.params['window'] group_by_feature = self.params['group_by'] date_field = self.params['date_field'] # Fixing same data timestamps for same card_id # check_data = data.reset_index().set_index([group_by_feature, date_field]) # duplicate_transactions = check_data[check_data.index.duplicated()]['TransactionID'].values # # while len(duplicate_transactions) > 0: # print(f"Found {len(duplicate_transactions)} duplicate transactions") # for itid, tid in enumerate(duplicate_transactions): # print(itid) # q = data.loc[tid] # date = q[date_field] # card_id = q[group_by_feature] # alldup = data[data[date_field] == date] # alldup = alldup[alldup[group_by_feature] == card_id] # # print(alldup.index) # for it, idx in enumerate(alldup.index): # # print(idx) # data.loc[idx, date_field] += pd.Timedelta(seconds=it) # check_data = data.reset_index().set_index([group_by_feature, date_field]) # duplicate_transactions = check_data[check_data.index.duplicated()]['TransactionID'].values with mp.Pool() as Pool: self.output = pd.DataFrame(index=data.index) for nf in self.params['features']: if nf not in data.columns: continue print(nf) df = pd.DataFrame(index=data.index) data_slice = data[[date_field, group_by_feature, nf]].reset_index() args = [(data_slice, group_by_feature, nf, ws) for ws in window] m = Pool.imap(aggregate_with_time_local, args) for i, df_agg in enumerate(m): print('.') assert df.shape[0] == df_agg.shape[0] df = pd.concat([df, df_agg], axis=1) self.output = self.output.join(df)
def Run_Dynamic_Nested_Fitting(loglikelihood, prior_transform, ndim, nlive_init=100, sample='auto', nlive_batch=50, maxbatch=2, pfrac=0.8, n_cpu=None, print_progress=True): """ Run Fitting as a Function. Parameters ---------- loglikelihood: function log likelihood function prior_transform: function priot transorm function ndim: int number of dimension """ print("Run Nested Fitting for the image... #a of params: %d"%ndim) start = time.time() if n_cpu is None: n_cpu = mp.cpu_count()-1 with mp.Pool(processes=n_cpu) as pool: print("Opening pool: # of CPU used: %d"%(n_cpu)) pool.size = n_cpu dlogz = 1e-3 * (nlive_init - 1) + 0.01 pdsampler = dynesty.DynamicNestedSampler(loglikelihood, prior_transform, ndim, sample=sample, pool=pool, use_pool={'update_bound': False}) pdsampler.run_nested(nlive_init=nlive_init, nlive_batch=nlive_batch, maxbatch=maxbatch, print_progress=print_progress, dlogz_init=dlogz, wt_kwargs={'pfrac': pfrac}) end = time.time() print("Finish Fitting! Total time elapsed: %.3gs"%(end-start)) return pdsampler
def __init__(self, Pos=None, MD=None ,pth=None, acq = None,frames=None, NucChannel='DeepBlue', threads=10, register=True, **kwargs): if any([Pos is None]): raise ValueError('Please provide position') if pth is None: if MD is not None: self.pth = MD.base_pth; else: self.pth = pth if MD is None: MD = Metadata(pth) if MD().empty: raise AssertionError('No metadata found in supplied path') if Pos not in MD.posnames: raise AssertionError('Position does not exist in dataset') self.posname = Pos self.channels = MD.unique('Channel',Position=Pos) self.acq = MD.unique('acq',Position=Pos) if frames is None: self.frames = MD.unique('frame',Position=Pos) elif type(frames) is not list: self.frames = [frames] else: self.frames = frames if len(self.frames)==1: register=False self._registerflag=register self._tracked=False self._splitflag=False #self.PixelSize = MD.unique('PixelSize')[0] # Create all framelabels for the different TPs. This will segment and measure stuff. with mp.Pool(threads) as ppool: frames = list(tqdm(ppool.imap(partial(FrameLbl, MD = MD, pth = pth, Pos=Pos, NucChannel=NucChannel,register=self._registerflag, **kwargs), self.frames), total=len(self.frames))) #ppool.close() #ppool.join() self.framelabels = np.array(frames) self._calculate_pointmat() print('\nFinished loading and segmenting position ' + str(Pos))
def kline_data(self, pair_list, interval, **kwargs): start_date = kwargs.get('start_date', '') end_date = kwargs.get('end_date', '') storage = kwargs.get('storage', '') output = kwargs.get('output', '') progress_statements = kwargs.get('progress_statements', '') if start_date: start_date = datetime.datetime.strptime(start_date, '%m/%d/%Y') if end_date: end_date = datetime.datetime.strptime(end_date, '%m/%d/%Y') valid_kline_intervals = [ '1m', '3m', '5m', '15m', '30m', '1h', '2h', '4h', '6h', '8h', '12h' ] if interval not in set(valid_kline_intervals): raise ValueError( 'Invalid Interval: Kline interval should be one of the following - {}' .format(','.join(valid_kline_intervals))) output = self.process_kline_output(output) if not storage: storage = ['csv', None] try: storage_method, intended_dir = storage except ValueError: storage_method = storage[0] intended_dir = None if progress_statements: self.progress_statements = progress_statements if storage_method.lower() == 'csv': kline_interval_directory = self.create_csv_directories( pair_list, interval, intended_dir) csv_file_info = mp.Manager().list() pair = [currency_pair for i, currency_pair in enumerate(pair_list)] lock = mp.Lock() pool = mp.Pool(processes=3, initargs=(lock, )) # data = pool.starmap(self.kline_to_csv,zip(pair,re(start_date),re(end_date),re(kline_interval_directory),re(interval),re(titles),re(fields),re(csv_file_info))) data = pool.starmap( self.kline_to_csv, zip(pair, re(start_date), re(end_date), re(kline_interval_directory), re(interval), re(csv_file_info))) pool.close() pool.join() self.concatenate_csvs(set(list(csv_file_info))) else: raise ValueError( 'Invalid Storage Type: Currently only csv storage supported')
def fit_transformer(self, iterations=None, samps_per_iteration=None): if samps_per_iteration is None: samps_per_iteration = 10 * self.n_components if iterations is None: iterations = np.ceil(self.total_samples / samps_per_iteration).astype(int) self.pool = mp.Pool() for i in tqdm(range(iterations), desc='incremental char pca'): samples = self.make_sample(samps_per_iteration, seed=i + self.seed, pool=self.pool) self.transformer.partial_fit(samples) del self.transformed_expr self.pool.close() del self.pool self.is_fitted = True return self.transformer
def _forward_Difference_And_F0(self, x0): assert len(x0) == self.numDim mask = np.eye(len(x0)) mask = np.row_stack((np.zeros(self.numDim), mask)) x_abArr = mask * self.gradStepSampSize + x0 # x upper and lower values for derivative calc if self.parallel == True: with mp.Pool() as pool: vals_ab = np.asarray(pool.map(self.funcObj, x_abArr)) else: vals_ab = np.asarray([self.funcObj(x) for x in x_abArr]) assert len(vals_ab.shape) == 1 and len(vals_ab) == self.numDim + 1 F0 = vals_ab[0] deltaVals = vals_ab[1:] - F0 grad = deltaVals / self.gradStepSampSize if self.disp == True and self.descentMethod == 'adam': print(x0, F0) return F0, grad
def get_ci_seq(x, ci_fn, times, parallel=False): """ Get sequence of confidence intervals Parameters ---------- x, array-like The vector of observations between 0 and 1. ci_fn, univariate function A function which takes an array-like of bounded numbers `x` and outputs a tuple `(l, u)` of lower and upper confidence intervals. Note that `l` and `u` are scalars (not vectors). times, array-like of positive integers Times at which to compute the confidence interval. parallel, boolean Should this function be parallelized? Returns ------- l, array-like of [0, 1]-valued reals Lower confidence intervals u, array-like of [0, 1]-valued reals Upper confidence intervals """ x = np.array(x) l = np.repeat(0.0, len(times)) u = np.repeat(1.0, len(times)) if parallel: n_cores = multiprocess.cpu_count() print("Using " + str(n_cores) + " cores") with multiprocess.Pool(n_cores) as p: result = np.array(p.map(lambda time: ci_fn(x[0:time]), times)) l, u = result[:, 0], result[:, 1] else: for i in np.arange(0, len(times)): time = times[i] x_t = x[0:time] l[i], u[i] = ci_fn(x_t) return l, u
def solve_heat_transfer(self): """ Solve the heat transfer problem for each tube Adds the thermal results to each receiver.Tube object """ #pylint: disable=no-member if self.progress: print("Running thermal analysis:") with multiprocess.Pool(self.nthreads) as p: temps = list( self.progress_decorator( p.imap( lambda x: self.thermal_solver.solve( x, self.thermal_material, self.fluid_material), self.tubes), self.ntubes)) for tube, temps in zip(self.tubes, temps): tube.add_results("temperature", temps)
def optimize1(self): self.initialize_Optimization() numSamples = 1000 samples = np.asarray(skopt.sampler.Sobol().generate( self.bounds, numSamples)) with mp.Pool(maxtasksperchild=1) as pool: vals = np.asarray( pool.map(self.cost_Function, samples, chunksize=1)) xOptimal = samples[np.argmin(vals)] return gradient_Descent(self.continuous_Cost, xOptimal, 200e-6, 50, gradStepSize=50e-6, gradMethod='central', descentMethod='adam', disp=True)
def __init__(self, data1, data2, reference_data, index_db, chromosom_length, cpus=10): self.data1 = data1 self.data2 = data2 self.reference_data = reference_data self.index_db = index_db self.chromosom_length = chromosom_length self.population = [] self.scores = [] self.pool = multiprocess.Pool(cpus) self.fittness = [] self.fittness_function = self.fittness_min_mean
def determine_life(self, receiver, material, nthreads=1, decorator=lambda x, n: x): """ Determine the life of the receiver by calculating individual material point damage and finding the minimum of all points. Parameters: receiver fully-solved receiver object material material model to use Additional Parameters: nthreads number of threads decorator progress bar """ with multiprocess.Pool(nthreads) as p: results = list( decorator( p.imap( lambda x: self.tube_log_reliability( x, material, receiver), receiver.tubes, ), receiver.ntubes, )) p_tube = np.array([res[0] for res in results]) tube_fields = [res[1] for res in results] # Tube reliability is the minimum of all the time steps tube = np.min(p_tube, axis=1) # Overall reliability is the minimum of the sum overall = np.min(np.sum(p_tube, axis=0)) # Add the field to the tubes for tubei, field in zip(receiver.tubes, tube_fields): tubei.add_quadrature_results("log_reliability", field) # Convert back from log-prob as we go return { "tube_reliability": np.exp(tube), "overall_reliability": np.exp(overall), }
def pdist(data, dist_func, n_jobs=1, max_time=None): """ Parallel pairwise distances between elements of x. Similar to scipy pdist with parallel processing and bounded runtime. """ x = np.asanyarray(data) n_jobs = mp.cpu_count() if n_jobs == -1 else min(n_jobs, mp.cpu_count()) if max_time is None or max_time == np.inf: with mp.Pool(processes=n_jobs) as pool: dists = pool.map(lambda a: dist_func(*a), itr.combinations(x, 2)) return np.array(dists) def dist_col(a, b_list): return [dist_func(a, b) for b in b_list] dist_lists = [] end = np.inf if max_time is None else time.time() + max_time projected_end = np.NINF n = len(x) i = 0 end_i = 0 # calculate matrix columns in chunks of n_jobs at a time with jl.Parallel(n_jobs=n_jobs) as parallel: while end_i < n and projected_end <= end: chunk_start = time.time() start_i = i + 1 end_i = min(i + n_jobs + 1, n) a_list = [x[j] for j in range(start_i, end_i)] b_lists = [x[:k] for k in range(start_i, end_i)] cols = parallel( jl.delayed(dist_col)(a, b) for a, b in zip(a_list, b_lists)) for col in cols: for sublist, d in zip(dist_lists, col): sublist.append(d) dist_lists.append([col[-1]]) i += n_jobs n_items = sum([len(b) for b in b_lists]) next_items = n_items + n_jobs * (n_jobs - 1) / 2 col_time = time.time() - chunk_start projected_end = time.time() + col_time * next_items / n_items return flat_list(dist_lists)
def __init__(self, num_workers, eval_function, timeout=None): self.num_workers = num_workers self.eval_function = eval_function self.timeout = timeout self.manager = mp.Manager() self.connection_strings = self.manager.Queue() for i in range(num_workers): # Connect to SITL directly wthout mavproxy, can only do one instance # port = 5760 + i*10 # self.connection_strings.put('tcp:127.0.0.1:' + str(port)) # Connect to mavproxy, uses more rescources, max about 20 instances port = 14550 + i * 10 self.connection_strings.put('127.0.0.1:' + str(port)) self.pool = mp.Pool(num_workers, initializer=self.initializer, initargs=(self.connection_strings, ))
def mt_evaluate_logps(self, parallel, multitry, proposed_pts, pfunc, ref=False): """Evaluate the log probability for multiple points in serial or parallel when using multi-try. Parameters ---------- parallel : bool Whether to evaluate multi-try points in parallel multitry : int Number of multi-try points proposed_pts : numpy 2D array nmulti-try x nparameterdims Proposed points pfunc : function Function that takes a point in parameter space and returns the log of the prior value and the log of the likelihood at that point ref : bool Whether this is a multi-try reference draw. Default = False""" #If using multi-try and running in parallel farm out proposed points to process pool. if parallel: p = mp.Pool(multitry) args = list(zip([self]*multitry, np.squeeze(proposed_pts))) logps = p.map(call_logp, args) p.close() p.join() log_priors = [val[0] for val in logps] log_likes = [val[1] for val in logps] else: log_priors = [] log_likes = [] if multitry == 2: log_priors, log_likes = np.array([pfunc(np.squeeze(proposed_pts))]) else: for pt in np.squeeze(proposed_pts): log_priors.append(pfunc(pt)[0]) log_likes.append(pfunc(pt)[1]) log_priors = np.array(log_priors) log_likes = np.array(log_likes) if ref: log_likes = np.append(log_likes, self.last_like) log_priors = np.append(log_priors, self.last_prior) return log_priors, log_likes
def _run(self): data = self.input window = self.params['window'] group_by_feature = self.params['group_by'] date_field = self.params['date_field'] # Fixing same data timestamps for same card_id # check_data = data.reset_index().set_index([group_by_feature, 'Date']) # duplicate_transactions = check_data[check_data.index.duplicated()]['TransactionID'].values # while len(duplicate_transactions) > 0: # print(f"Found {len(duplicate_transactions)} duplicate transactions") # for itid, tid in enumerate(duplicate_transactions): # print(itid) # q = data.loc[tid] # date = q['Date'] # card_id = q[group_by_feature] # alldup = data[data['Date'] == date] # alldup = alldup[alldup[group_by_feature] == card_id] # # print(alldup.index) # for it, idx in enumerate(alldup.index): # # print(idx) # data.loc[idx, 'Date'] += pd.Timedelta(seconds=it) # check_data = data.reset_index().set_index([group_by_feature, 'Date']) # duplicate_transactions = check_data[check_data.index.duplicated()]['TransactionID'].values # print(data.loc[alldup.index]) # break with mp.Pool() as Pool: self.output = pd.DataFrame(index=data.index) df = pd.DataFrame(index=data.index) data_slice = data[[date_field, group_by_feature]].reset_index() data_slice.loc[:, 'hours'] = ( data_slice.date_field - data_slice.date_field.iloc[0]).dt.total_seconds() / 3600 args = [(data_slice, group_by_feature, ws) for ws in window] m = Pool.imap(aggregate_transaction_frequencies, args) for i, df_agg in enumerate(m): print('.') assert df.shape[0] == df_agg.shape[0] df = pd.concat([df, df_agg], axis=1) self.output = self.output.join(df)
def _set_alphas(self): manager = mp.Manager() shared_alphas = manager.dict() shared_test_counts = manager.dict() self.alphas = dict() self.test_counts = dict() test_anno = self.anno.groupby('id')['test_data'].agg(is_test=any) exclude_test_samples = self.exclude_test_samples def load_alphas(entry): data = pd.read_csv(entry[1], sep="\t", skip_blank_lines=False, keep_default_na=False) samp_names = [re.sub('\.[0-9]+', '', x) for x in data.columns] if not samp_names: return entry[0], False is_test_sample = [ test_anno.loc[x, 'is_test'] if x in test_anno.index else False for x in samp_names ] is_test_sample = np.array(is_test_sample) if exclude_test_samples is True and any(is_test_sample): shared_alphas[entry[0]] = data.loc[:, ~is_test_sample] + 1 shared_test_counts[entry[0]] = data.loc[:, is_test_sample] else: try: shared_alphas[entry[0]] = data + 1 except Exception as e: raise Exception(f'Unable to deal with {entry[1]}') return entry[0], True tasks = list(self.expression_tsv.items()) with mp.Pool() as pool: for key, success in tqdm(pool.imap(load_alphas, tasks), total=len(tasks), desc='loading counts'): if success is False: warnings.warn('No samples found for {}.'.format(key), RuntimeWarning) continue self.alphas[key] = shared_alphas[key] if key in shared_test_counts.keys(): self.test_counts[key] = shared_test_counts[key] return self.alphas, self.test_counts
def run_multiple_backtest(self, initial_portf, start_time, end_time, policies, loglevel=logging.WARNING, parallel=True): """Backtest multiple policies. """ def _run_backtest(policy): return self.run_backtest(initial_portf, start_time, end_time, policy, loglevel=loglevel) num_workers = min(multiprocess.cpu_count(), len(policies)) if parallel: workers = multiprocess.Pool(num_workers) results = workers.map(_run_backtest, policies) workers.close() return results else: return list(map(_run_backtest, policies))
def run_experiment(algorithms_for_experiment, backup, cores=8): iterations = list(range(0, 10)) node_sizes = [250, 500] mus = np.arange(0.1, 0.8, 0.1) configuration_set = itertools.product( *[iterations, algorithms_for_experiment.items(), node_sizes, mus]) cpu_cnt = cores pool = mp.Pool(processes=cpu_cnt) print(f"Running experiments in parallel with {cpu_cnt} cpus") parallel_execution_data = pool.imap_unordered(compute_experiment, configuration_set) for result in parallel_execution_data: save_data(result, backup) save_data(result, result["method"])
def process_images(self, image_paths_list): ''' multiprocess load and process frames ''' model = self.model p = mp.Pool(mp.cpu_count()) images = p.map(Image.open, image_paths_list) images = list(images) resized_img = p.map(resize_method, images) resized_img = np.concatenate(list(resized_img), axis=0) # features = model.predict(resized_img, batch_size=5) p.close() p.join() return resized_img
def findTransformation(self, transform, matches, processes, **kwargs): ''' This is a Method that finds the optimal transformation between two images given matching features using a random sample consensus. Input: transform: skimage.transform object matches (list): matches found through match_features method. processors: Number of processors to use. **kwargs are passed to skimage.transform.ransac Output: Transformations. ''' keypts = self.features[0] def optimization(Pts): robustTrans, inliers = ransac((Pts[0], Pts[1]), transform, **kwargs) output = [robustTrans, inliers] return output # start pool of workers print('launching %i kernels...' % (processes)) pool = mp.Pool(processes) tasks = [(key1[match[:, 0]], key2[match[:, 1]]) for match, key1, key2 in zip(matches, keypts[:], keypts[1:])] chunk = int(len(keypts) / processes) jobs = pool.imap(optimization, tasks, chunksize=chunk) # get Transforms and inlier matches transforms, trueMatches = [], [] print('Extracting Inlier Matches with RANSAC...') try: for j in jobs: transforms.append(j[0]) trueMatches.append(j[1]) except np.linalg.LinAlgError: pass # close the pool pool.close() print('Closing down the kernels...\n') return transforms, trueMatches
def findPeaksArray(vectors, waveletWidths, processes=4, **kwargs): ''' Parallel version of find_peaks_cwt Input: vectors: 2d array, each column is a spectrum waveletWidths: 1d array of width peaks processes: # number of processes to use. kwargs: passed to scipy.signal.find_peaks_cwt Output: location of peaks ''' # This is the function that will be mapped (in parallel) over the vectors 2d array def peaks(vector): peakIndices = find_peaks_cwt(vector, waveletWidths, **kwargs) return peakIndices # start pool of workers print('launching %i kernels...' % (processes)) pool = mp.Pool(processes) tasks = [(vector) for vector in vectors] chunk = int(vectors.shape[0] / processes) jobs = pool.imap(peaks, tasks, chunksize=chunk) # get peaks from different processes results = [] print('Extracting Peaks...') try: for j in jobs: results.append(j) except ValueError: warnings.warn('Error: Something went wrong!!!') # pack all peaks into 2d array peaks = [itm for itm in results] # close the pool print('Closing down the kernels... \n') pool.close() return peaks