def gridSearch(options, use_datasets, numExamples, compute_mistakes=False, verbose=False, parallelize=False): if MODEL_KEYWORD not in options: print 'ERROR: must specify models for grid search under "%s" key.' % (MODEL_KEYWORD) return paramCombos = myProduct(options) partialTestCombo = partial(testCombo, use_datasets=use_datasets, numExamples=numExamples, compute_mistakes=compute_mistakes, verbose=verbose) if parallelize: from pathos.multiprocessing import Pool p = Pool(5) try: result = p.map_async(partialTestCombo, paramCombos) result = result.get(999999999) bestScore, bestParamsStr, bestCombo = max(result, key=lambda x:x[0]) sys.stdout = open("best.out", "w") print 'Best score of %s was achieved by parameters:\n%s' % (bestScore, bestParamsStr) except KeyboardInterrupt: p.terminate() print "You cancelled the program!" sys.exit(1) else: bestScore, bestCombo, bestComboStr = float('-inf'), None, '' for paramCombo in paramCombos: score, paramsStr, _ = testCombo(paramCombo, use_datasets=use_datasets, numExamples=numExamples, compute_mistakes=compute_mistakes, verbose=verbose, parallelize=False) if score > bestScore: bestScore, bestCombo, bestComboStr = score, paramCombo, paramsStr print 'Best score of %s was achieved by parameters:\n%s' % (bestScore, bestComboStr)
class DataTransformation(object): """ Args: source ([dict]): list of records config (Configuration): configuration to apply to each record """ def __init__(self, source, config, single_process=False, processes=cpu_count()): if single_process: self.results = imap(transform, izip(source, repeat(config))) else: self.pool = Pool(processes) self.results = self.pool.imap(transform, izip(source, repeat(config))) def __iter__(self): return self def next(self): return self.results.next() def __del__(self): try: self.pool.close() except: pass
def json_to_metadata_chunks(args,file_chunks = []): # Open the json file, and split the reading of the file among worker threads results = [] if args.json_metadata == None: return None num_proc = len(file_chunks) print (file_chunks, file=sys.stderr) pool = Pool(processes = num_proc) with open(args.json_metadata) as json_metadata_file: results = [pool.map(process_json_line,json_metadata_file, chunk) for index, chunk in enumerate(file_chunks)] #objs = [p.get() for p in results] return merge_exact_duplicates(results)
class DataFusion(object): """ Args: source ([tuple]): list of tuples, each containing the records to merge config (Configuration): configuration to apply to each tuple """ def __init__(self, source, config, processes=cpu_count()): self._pool = Pool(processes) self._results = self._pool.imap(_merge_records, izip(source, repeat(config))) def __iter__(self): return self def next(self): return self._results.next() def __del__(self): self._pool.close()
def __call__(self, with_mp=False): cm = self.scatm.cmodel scat = self.scatm.smodel cgeo = np.pi * np.power( self.dist.a * c.micron2cm(), 2 ) # Test for graphite case if cm.cmtype == 'Graphite': if np.size(self.dist.a) > 1: for i in range( np.size(self.dist.a) ): self.qsca_pe[:,i] = scat.Qsca( self.E, a=self.dist.a[i], cm=cmi.CmGraphite(size=cm.size, orient='perp') ) self.qsca_pa[:,i] = scat.Qsca( self.E, a=self.dist.a[i], cm=cmi.CmGraphite(size=cm.size, orient='para') ) else: self.qsca_pe = scat.Qsca( self.E, a=self.dist.a, cm=cmi.CmGraphite(size=cm.size, orient='perp') ) self.qsca_pa = scat.Qsca( self.E, a=self.dist.a, cm=cmi.CmGraphite(size=cm.size, orient='para') ) self.qsca = ( self.qsca_pa + 2.0 * self.qsca_pe ) / 3.0 else: if np.size(self.dist.a) > 1: if with_mp: pool = Pool(processes=2) self.qsca = np.array(pool.map(self._one_scatter,self.dist.a)).T else: for i in range( np.size(self.dist.a) ): self.qsca[:,i] = self._one_scatter(self.dist.a[i]) else: self.qsca = scat.Qsca( self.E, a=self.dist.a, cm=cm ) if np.size(self.dist.a) == 1: kappa = self.dist.nd * self.qsca * cgeo / self.dist.md else: kappa = np.array([]) for j in range( np.size(self.E) ): kappa = np.append( kappa, \ c.intz( self.dist.a, self.dist.nd * self.qsca[j,:] * cgeo ) / self.dist.md ) self.kappa = kappa
def avaliacao(populacao): x = valores(populacao) n = len(populacao) def steps(k): sequence = x[k, :] t = lm.move(startpoint, sequence=sequence) return t peso = None ncpu = cpu_count() with Pool(ncpu) as pool: peso = array(pool.map(steps, range(n))) return peso
def parallel_run(input_list, list_fn, split_n): def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] from pathos.multiprocessing import ProcessingPool as Pool p = Pool(split_n, daemon=True) args = chunks(input_list, split_n) result_list_list = p.map(list_fn, args) result = [] for result_list in result_list_list: result.extend(result_list) return result
def calculate_centroids(self, p=None): """ Perform integration to find centroid at all turns up to N. Multiprocessing pool used to calculate independent turn values. Will automatically use `integrate_first_order` or `integrate_second_order` if appropriate. Args: p: Specify number of processes for pool. If not given then `cpu_count` is used. Returns: array of floats """ if p: pool_size = p else: pool_size = cpu_count() pool = Pool(pool_size) # attempt to speed things up by spreading out difficult integration values at the end of range # appeared to not work # x = [] # for i in range(cpu_count()): # x += range(N)[i::4] if len(self.mu) == 1: integration_function = self.integrate_first_order elif len(self.mu) == 2: integration_function = self.integrate_second_order else: integration_function = self.integrate_any_order x = range(self.N) results = pool.map(integration_function, x) pool.close() return results
def simulatehoneycomb(self, verbose=1, usediag=False, multiprocess=False): """ Loop over the 2D matrix of parameter values defined by makeparamvalues2D, calculate the ground state for each point, search for transitions and save in self.honeycomb""" t0 = time.time() paramnames = list(self.vals2D.keys()) initparamvalues = self.getall('det') npointsx = np.shape(self.vals2D[paramnames[0]])[0] npointsy = np.shape(self.vals2D[paramnames[0]])[1] self.hcgs = np.empty((npointsx, npointsy, self.ndots)) self.initSparse() if multiprocess and _have_mp: pool = Pool(processes=4) aa = [(i, self, npointsy, usediag) for i in range(npointsx)] result = pool.starmap_async(_simulate_row, aa) out = result.get() self.hcgs = np.array(out) else: for i in range(npointsx): if verbose: tprint('simulatehoneycomb: %d/%d' % (i, npointsx)) for j in range(npointsy): for name in paramnames: setattr(self, name, self.vals2D[name][i][j]) self.makeHsparse() self.solveH(usediag=usediag) self.hcgs[i, j] = self.OCC self.honeycomb, self.deloc = self.findtransitions(self.hcgs) self.setall('det', initparamvalues) if verbose: print('simulatehoneycomb: %.2f [s] (multiprocess %s)' % (time.time() - t0, multiprocess)) sys.stdout.flush()
def initial_population_generator_hyper_ksp_multiproc(amount, **kwargs): heuristics_candidates = heuristics.get_heuristics() def worker(_): state = simple_state_generator_hyper_ksp([], heuristics_candidates, **kwargs) result = { "heuristics": state, "fitness": fitness_hyper_ksp(state, **kwargs) } return result pool = Pool() population = pool.map(worker, range(amount)) return population
def create_features(WRITE_DB, FP, all_tables, schemas, CPUS, selected_schema, selected_table): #define key dataframe key_df = get_key(all_tables[selected_table], schemas,selected_schema) key_df = key_df.sort_values(by = ['key','date']) if selected_schema == 'scoring_schema': key_df = key_df[['key']] else: key_df = key_df[['key','date','target']] pool = Pool(CPUS) #Features output framework all_functions = inspect.getmembers(FP, inspect.isfunction) all_functions = [x[1] for x in all_functions] # print(all_functions) args = (WRITE_DB,key_df, schemas, all_tables, fc_protocol,selected_schema) all_functions = [(x,args) for x in all_functions] if WRITE_DB: temp = pool.map(trig_func, all_functions) df = pd.concat(temp, axis = 1) df = pd.concat([key_df,df], axis = 1) engine = conn_eng() if selected_schema == 'scoring_schema': df.to_sql(all_tables['scoring_table'],schema= schemas['output_schema'], con=engine, index=False,if_exists ='replace') else: df.to_sql(all_tables['features_table'],schema= schemas['output_schema'], con=engine, index=False,if_exists ='replace') engine.dispose() del engine #print_summary(MISSING_VALUE_TREATMENT,df) else: temp = pool.map(trig_func, all_functions) df = pd.concat(temp, axis = 1) df = pd.concat([key_df,df], axis = 1) # print_summary(MISSING_VALUE_TREATMENT,df) return df return 'Files written to DB'
def decrypt(self, cipher): if (self.mode == 'cbc'): #Extracing IV from cipher iv = cipher[0:self.block_size] cipher_blocks = [] for i in range(self.block_size, len(cipher), self.block_size): cipher_blocks.append(cipher[i:i + self.block_size]) message_blocks = [] xor_inp = iv aes = AES.new(self.key, AES.MODE_ECB) #Chaining blocks and deciphering for i in range(len(cipher_blocks)): decrypt_out = aes.decrypt(cipher_blocks[i]) message_blocks.append(byte_xor(xor_inp, decrypt_out)) xor_inp = cipher_blocks[i] #Calculating pad padding = int(str(message_blocks[len(message_blocks) - 1])[-2], 16) message_text = b'' for i in message_blocks: message_text += i #removing padding message_text = message_text[:-padding] return message_text.decode("utf-8") elif (self.mode == 'ctr'): #separate IV from cipher iv = cipher[0:self.block_size] #Split cipher into blocks of given block_size cipher_blocks = [] for i in range(self.block_size, len(cipher), self.block_size): cipher_blocks.append( cipher[i:min(i + self.block_size, len(cipher))]) #Generate inputs for Multiprocessing function key_all = [self.key for i in range(len(cipher_blocks))] #xor_inps is the sequence iv, iv+1,iv+2... used in ctr mode xor_inps = [ bytes.fromhex( str(hex(int(iv.hex(), self.block_size) + i))[2:]) for i in range(len(cipher_blocks)) ] with Pool(len(cipher_blocks) + 2) as p: # uses all cores available in parallel out = p.map(xorEncrypt, key_all, xor_inps, cipher_blocks) message_text = (b''.join(out)).decode("utf-8") return message_text
def threaded_contents_to_text( content_series, processes=None, none_content='raise', ): """Threaded version of content_to_text method It takes as input a series which index is the uid of the products, and the values are the content (in the form of bytes) of the documents. processes argument is the number of processes to launch. If omitted, it defaults to the number of cpu cores on the machine. none_content arg can be 'raise' (default) or to_empty """ processer = partial( PDFDecoder.content_to_text, none_content=none_content, ) processes = processes if processes else cpu_count() print(f'Launching {processes} processes.') in_ds = content_series.apply(BytesIO) # Pool with context manager do not seem to work due to issue 38501 of # standard python library. It hangs when running tests through pytest # see: https://bugs.python.org/issue38501 # Below content should be tested again whenever this issue is closed # # with Pool(nodes=processes) as pool: # tuples = (list(in_ds.index), # pool.map(processer, in_ds)) # # End of block # This temporary solution should be removed when tests mentioned above # are successful. # This just closes each pool after execution or exception. try: pool = Pool(nodes=processes) pool.restart(force=True) tuples = (list(in_ds.index), pool.map(processer, in_ds)) except Exception: pool.close() raise pool.close() # End of block ds = pd.Series(tuples[1], index=tuples[0]) return (ds)
def makeRadial(): rad, angle = d["radial"]["rad"], d["radial"]["angle"] args = np.linspace(angle, angle + np.pi, frameCount) pool = Pool(4) while True: subIm = JuliaTools.subImage(c=rad * np.exp(1j * angle), r=r, n=10, p=p, iters=iters, split=split, save=False, aura=False) isBlackList = pool.map(subIm, coords) if not all(isBlackList): break else: rad *= 0.975 # Circular arc c follows in complex plane cPath = rad * np.exp(1j * args) for frame in xrange(frameCount): subIm = JuliaTools.subImage(c=cPath[frame], r=r, n=n, p=p, iters=iters, split=split) isBlackList = pool.map(subIm, coords) allBlack = all(isBlackList) if not allBlack: JuliaTools.makeFrame(frame, n, split, coords) pool.close() JuliaTools.prepareForFFmpeg(frameCount=frameCount, loop=True) with open("tweet.txt", "w") as out: out.write("Images generated using constants" " on a circular arc of radius {:03.2f}.".format(rad)) stop = timeit.default_timer() print stop - start
def create(file, variables, metric, repeat, compare, compare_values, fixed): """ Creates a model with extrap :param file: csv file :param variables: list of variable columns :param metric: metric column :param repeat: repeat column or None :param compare: compare column :param compare_values: unique values of compare column :param fixed: dictionary of column:value to fix :return: Model """ def get_model(cmp_dict=None): try: f = fixed.copy() if cmp_dict is not None: f.update(cmp_dict) tmp_file_in = convert(file, variables, metric, repeat, f) if len(variables) == 1: return extrap_one_param(tmp_file_in) elif len(variables) == 2: return extrap_two_param(tmp_file_in) else: raise ValueError( "Parameters with more than 2 parameters are currently not supported" ) except (subprocess.CalledProcessError, subprocess.TimeoutExpired): return None if compare is None: # create single model m, r2 = get_model() if m is None: return [] return [Model(m, variables, adj_r2=r2)] else: # create multiple models def get_model_comp(compare_val): cmp = {compare: compare_val} model_str, adj_r2 = get_model(cmp) if model_str is None: return None return Model(model_str, variables, name=compare_val, adj_r2=adj_r2) with Pool(multiprocessing.cpu_count()) as p: models = p.map(get_model_comp, compare_values) return list(filter(lambda x: x is not None, models))
def RobustSTL(input, season_len, reg1=10.0, reg2=0.5, K=2, H=5, dn1=1., dn2=1., ds1=50., ds2=1., learning_rate=0.01, max_iter=100, max_trials=10, verbose=True): if np.ndim(input) < 2: return _RobustSTL(input, season_len, reg1, reg2, K, H, dn1, dn2, ds1, ds2, learning_rate, max_iter, max_trials, verbose) elif np.ndim(input) == 2 and np.shape(input)[1] == 1: return _RobustSTL(input[:, 0], season_len, reg1, reg2, K, H, dn1, dn2, ds1, ds2, learning_rate, max_iter, max_trials, verbose) elif np.ndim(input) == 2 or np.ndim(input) == 3: if np.ndim(input) == 3 and np.shape(input)[2] > 1: print( "[!] Valid input series shape: [# of Series, # of Time Steps] or [# of series, # of Time Steps, 1]" ) raise elif np.ndim(input) == 3: input = input[:, :, 0] num_series = np.shape(input)[0] input_list = [input[i, :] for i in range(num_series)] from pathos.multiprocessing import ProcessingPool as Pool p = Pool(num_series) def run_RobustSTL(_input): return _RobustSTL(_input, season_len, reg1, reg2, K, H, dn1, dn2, ds1, ds2) result = p.map(run_RobustSTL, input_list) return result else: print("[!] input series error") raise
def fetch_file_links(data): file_links = data['file_links'] locations = data['locations'] save_dir = data['save_dir'] thread_pool_size = multiprocessing.cpu_count() with Pool(thread_pool_size) as pool: fetch_iter = pool.uimap(lambda link: _fetch_file_links(link, locations, save_dir), file_links) results = list(tqdm(fetch_iter, total=len(file_links))) file_paths, error_links = zip(*results) file_paths = [l for l in file_paths if l is not None] error_links = [l for l in error_links if l is not None] print('Links with valid location:', len(file_paths)) return returning(file_paths, 'file_paths', data)
def crossover(population, crossover_reproduction_func, mutation_func, repair_func, fitness_func, **kwargs): childs = crossover_reproduction_func(population, **kwargs) if childs is None or len(childs) == 0: return population def worker(child): mutation_func(child, **kwargs) repair_func(child, **kwargs) return {"heuristics": child, "fitness": fitness_func(child, **kwargs)} pool = Pool() new_individuals = pool.map(worker, childs) population += new_individuals return population
def calc_path_matrix(graph: nx.Graph, heuristic: Callable = None, weight: str = "weight", nodes: int = 1) -> Dict[Hashable, List[Hashable]]: """ Calculates a shortest path matrix between all combinations of nodes in the graph. """ astar_path = _astar_path_factory(graph, heuristic=heuristic, weight=weight) node_pairs = [(source, target) for target in graph.nodes() for source in graph.nodes() if target > source] with Pool(nodes=nodes) as pool: paths = pool.map(astar_path, node_pairs) return { frozenset(node_pair): path for node_pair, path in zip(node_pairs, paths) }
def min_distances_parallel(features_1, features_2, cat_weight_=None, bool_features=None): if bool_features is not None and cat_weight_ is None: cat_weight_ = np.mean(bool_features) elif bool_features is None and cat_weight_ is None: cat_weight_ = 0 else: pass def get_min_distance(features_list): return min_distance(features_list, features_2, bool_features) distances = np.array(Pool().map(get_min_distance, features_1.tolist())) return distances.flatten()
def apply_by_multiprocessing(df, func, **kwargs): """ Parallel execution function for the DataFrame :param df: Input DataFrame :param func: :param kwargs: additional arguments for the df.apply() such as axis and et al. :return: Output DataFrame """ workers = kwargs.pop('workers') pool = Pool(processes=workers) result = pool.map(_apply_df, [(d, func, i, kwargs) for i, d in enumerate(np.array_split(df, workers))]) pool.close() result = sorted(result, key=lambda x: x[0]) return pd.concat([i[1] for i in result])
def gen_kernel(self, process = 1): def calc_kernel(i): return prism.gz(self.xp[0:1],self.yp[0:1],self.zp[0:1],[self.mesh[i]]) if process > 1: #Winodws MP running has possiblely errors. print('Number of process:',process) with Pool(processes=process) as pool: kernel0 = pool.map(calc_kernel,range(len(self.mesh))) else: kernel0 = [calc_kernel(i) for i in range(len(self.mesh))] self.kernel0 = np.array(kernel0).reshape(self.nz,self.ny,self.nx) self.kernel_op = AbicLSQOperator(self.kernel0, depth_constraint=self.constraints['depth'], smooth_components=self._smooth_components, refer_constraint=self.constraints['refer'], weights=self._weights)
def process(self, rel_pattern="", func=None, use_parallel=False, verbose=False, **kwargs): if use_parallel: self._pool = Pool(self._num_of_cpu - 1) result = self._process(rel_pattern, func, use_parallel=use_parallel, verbose=verbose, **kwargs) if use_parallel: self._pool.close() return result
def makePower(): global c pMin, pMax = d["power"]["pMin"], d["power"]["pMax"] pPath = np.linspace(pMin, pMax, frameCount) pool = Pool(4) # Get interesting c while True: subIm = JuliaTools.subImage(c=c, n=10, iters=iters / 2, r=r, p=pMin, split=split, save=False, aura=False) isBlackList = pool.map(subIm, coords) if not all(isBlackList): break else: c *= 0.975 for frame in xrange(frameCount): subIm = JuliaTools.subImage(c=c, r=r, n=n, p=pPath[frame], iters=iters / 2, split=split) isBlackList = pool.map(subIm, coords) allBlack = all(isBlackList) if not allBlack: JuliaTools.makeFrame(frame, n, split, coords) pool.close() JuliaTools.prepareForFFmpeg(frameCount=frameCount, loop=True) with open("tweet.txt", "w") as out: out.write("woooooooooooooooooooo") stop = timeit.default_timer() print stop - start
def climByAveragingPeriods(urls, # list of (daily) granule URLs for a long time period (e.g. a year) nEpochs, # compute a climatology for every N epochs (days) by 'averaging' nWindow, # number of epochs in window needed for averaging variable, # name of primary variable in file mask, # name of mask variable coordinates, # names of coordinate arrays to read and pass on (e.g. 'lat' and 'lon') maskFn=qcMask, # mask function to compute mask from mask variable averager='pixelAverage', # averaging function to use, one of ['pixelAverage', 'gaussInterp'] mode='sequential', # Map across time periods of N-days for concurrent work, executed by: # 'sequential' map, 'multicore' using pool.map(), 'cluster' using pathos pool.map(), # or 'spark' using PySpark numNodes=1, # number of cluster nodes to use nWorkers=4, # number of parallel workers per node averagingFunctions=AveragingFunctions, # dict of possible averaging functions legalModes=ExecutionModes # list of possiblel execution modes ): '''Compute a climatology every N days by applying a mask and averaging function. Writes the averaged variable grid, attributes of the primary variable, and the coordinate arrays in a dictionary. ***Assumption: This routine assumes that the N grids will fit in memory.*** ''' try: averageFn = averagingFunctions[averager] except : averageFn = average print >>sys.stderr, 'climatology: Error, Averaging function must be one of: %s' % str(averagingFunctions) urlSplits = [s for s in fixedSplit(urls, nEpochs)] if VERBOSE: print >>sys.stderr, urlSplits def climsContoured(urls): n = len(urls) var = climByAveraging(urls, variable, mask, coordinates, maskFn, averageFn) return contourMap(var, variable, coordinates, n, urls[0]) if mode == 'sequential': plots = map(climsContoured, urlSplits) elif mode == 'multicore': pool = Pool(nWorkers) plots = pool.map(climsContoured, urlSplits) elif mode == 'cluster': pass elif mode == 'spark': pass plots = map(climsContoured, urlSplits) print plots return plots
def _process_set_reads_library(self, input_object_info, genome_index_base, result_directory, cli_option_params): """ _process_set_reads_library: process set reads library """ reads_refs = self.fetch_reads_refs_from_sampleset( input_object_info['ref'], input_object_info['info']) set_object_name = input_object_info['info'][1] alignment_set_name = set_object_name + cli_option_params[ 'alignment_set_suffix'] arg_1 = [] arg_2 = [genome_index_base] * len(reads_refs) arg_3 = [result_directory] * len(reads_refs) arg_4 = [] conditions = [] for reads_ref in reads_refs: reads_input_object_info = self._get_input_object_info( reads_ref['ref']) option_params = cli_option_params.copy() option_params['reads_condition'] = reads_ref['condition'] conditions.append(reads_ref['condition']) arg_1.append(reads_input_object_info) arg_4.append(option_params) cpus = min(cli_option_params.get('num_threads'), multiprocessing.cpu_count()) pool = Pool(ncpus=cpus) log('running _process_alignment_object with {} cpus'.format(cpus)) reads_alignment_object_refs = pool.map( self._process_single_reads_library, arg_1, arg_2, arg_3, arg_4) for reads_alignment_object_ref in reads_alignment_object_refs: if reads_alignment_object_ref.startswith('ERROR'): error_msg = 'Caught exception in worker\n' error_msg += '{}'.format(reads_alignment_object_ref) raise ValueError(error_msg) workspace_name = cli_option_params['workspace_name'] reads_alignment_set_object_ref = self._save_alignment_set( reads_alignment_object_refs, workspace_name, alignment_set_name, conditions) return reads_alignment_set_object_ref
def sparsify_predictions(model_name, timepoints=None): """ Computes and saves sparse representations for model predictions Args: model_name: name of model whose predictions you wish to sparsify timepoints: list of timepoints to sparsify - [int] """ from ipp_tools.slurm import slurm_map from division_detection.constants import NUM_TIMEPOINTS pred_dir = '/nrs/turaga/bergera/division_detection/prediction_outbox/{}'.format(model_name) sparse_pred_dir = '{}/sparse'.format(pred_dir) if not os.path.exists(sparse_pred_dir): os.mkdir(sparse_pred_dir) existing_tps = set([int(fname[:-3]) for fname in os.listdir(sparse_pred_dir)]) if timepoints is None: timepoints = np.arange(3, NUM_TIMEPOINTS - 4) timepoints = [t for t in timepoints if t not in existing_tps] def _sparsify_predictions_helper(t_idx): """ Helper function that sparsifies a single timepoint. """ from division_detection.sparse_utils import save_dense_as_coo pred_dir = '/nrs/turaga/bergera/division_detection/prediction_outbox/{}'.format(model_name) dense_pred_dir = '{}/dense'.format(pred_dir) sparse_pred_dir = '{}/sparse'.format(pred_dir) if not os.path.exists('{}/{}.h5'.format(dense_pred_dir, t_idx)): warn('You asked me to sparsify predictions for {} but none exist'.format(t_idx)) try: with h5py.File('{}/{}.h5'.format(dense_pred_dir, t_idx), 'r') as prediction_file: predictions = prediction_file['predictions'] print("Loading ", t_idx) tp_preds = predictions[:] print("Saving ", t_idx) save_dense_as_coo(tp_preds, '{}/{}'.format(sparse_pred_dir, t_idx)) except OSError as os_err: warn("Caught OS error while trying to read {}; continuing".format(t_idx)) pool = Pool(20) pool.map(_sparsify_predictions_helper, timepoints)
def run(non_iter_args, do_multiprocessing): [ weightcalcdata, weightcalculator, box, startindex, size, newconnectionmatrix, method, boxindex, filename, headerline, writeoutput, ] = non_iter_args partial_gaincalc_oneset = partial( calc_weights_oneset, weightcalcdata, weightcalculator, box, startindex, size, newconnectionmatrix, method, boxindex, filename, headerline, writeoutput, ) if do_multiprocessing: pool = Pool(processes=pathos.multiprocessing.cpu_count()) pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes) # Current solution to no close and join methods on ProcessingPool # https://github.com/uqfoundation/pathos/issues/46 s = pathos.multiprocessing.__STATE["pool"] s.close() s.join() pathos.multiprocessing.__STATE["pool"] = None else: for causevarindex in weightcalcdata.causevarindexes: partial_gaincalc_oneset(causevarindex) return None
def calculate_bleu(sess, trainable_model, data_loader): # bleu score implementation # used for performance evaluation for pre-training & adv. training # separate true dataset to the valid set # conditionally generate samples from the start token of the valid set # measure similarity with nltk corpus BLEU smoother = SmoothingFunction() data_loader.reset_pointer() bleu_avg = 0 references = [] hypotheses = [] for it in xrange(data_loader.num_batch): batch = data_loader.next_batch() # predict from the batch # TODO: which start tokens? # start_tokens = batch[:, 0] start_tokens = np.array([START_TOKEN] * BATCH_SIZE, dtype=np.int64) prediction = trainable_model.predict(sess, batch, start_tokens) # argmax to convert to vocab #prediction = np.argmax(prediction, axis=2) # cast batch and prediction to 2d list of strings batch_list = batch.astype(np.str).tolist() pred_list = prediction.astype(np.str).tolist() references.extend(batch_list) hypotheses.extend(pred_list) bleu = 0. # calculate bleu for each predicted seq # compare each predicted seq with the entire references # this is slow, use multiprocess def calc_sentence_bleu(hypothesis): return sentence_bleu(references, hypothesis, smoothing_function=smoother.method4) if __name__ == '__main__': p = Pool() result = (p.map(calc_sentence_bleu, hypotheses)) bleu = np.mean(result) return bleu
def pad_pdf(path, ratio, output_path=None): """Pad PDF with a <ratio>% white margin increase on the right. Takes a path to the original PDF file, converts them to PIL images, and pads them with the appropriate whitespace. Returns a path to the padded PDF. If a valid output_path is given, it will move the PDF to the given path and return the path. """ images = pdf2image.convert_from_path(path) p = Pool(4) def overlay_and_store(img): """Pad the individual images by overlaying it on a white background. Passed to a multiprocessing pool as each individual PDF page is independent of each other. Saves the image in a temp path as a JPEG, and returns the absolute file path. """ w, h = img.size padded_img = Image.new("RGB", (int(w * (1.0 + ratio)), h), "white") padded_img.paste(img, (0, 0)) tmp_path = _generate_tmp_path(ext='.jpeg') padded_img.save(tmp_path, "JPEG") return tmp_path padded_images = p.map(overlay_and_store, images) # Output as PDF. output = _generate_tmp_path(ext='.pdf') with open(output, 'wb') as f: f.write(img2pdf.convert(padded_images)) # Clean up temp image files used. for tmp_img in padded_images: os.remove(tmp_img) if output_path: os.rename(output, output_path) return output_path return output
def __init__(self, numSS_Points, numSS_it, N, Qslack, Q, R, dR, n, d, shift, dt, track_map, Laps, TimeLMPC, Solver): """Initialization Arguments: numSS_Points: number of points selected from the previous trajectories to build SS numSS_it: number of previois trajectories selected to build SS N: horizon length Q,R: weight to define cost function h(x,u) = ||x||_Q + ||u||_R dR: weight to define the input rate cost h(x,u) = ||x_{k+1}-x_k||_dR n,d: state and input dimensiton shift: given the closest point x_t^j to x(t) the controller start selecting the point for SS from x_{t+shift}^j track_map: track_map Laps: maximum number of laps the controller can run (used to avoid dynamic allocation) TimeLMPC: maximum time [s] that an lap can last (used to avoid dynamic allocation) Solver: solver used in the reformulation of the LMPC as QP """ self.numSS_Points = numSS_Points self.numSS_it = numSS_it self.N = N self.Qslack = Qslack self.Q = Q self.R = R self.dR = dR self.n = n self.d = d self.shift = shift self.dt = dt self.track_map = track_map self.Solver = Solver self.clustering = None self.OldInput = np.zeros((1,d)) # Initialize the following quantities to avoid dynamic allocation # TODO: is there a more graceful way to do this in python? NumPoints = int(TimeLMPC / dt) + 1 self.TimeSS = 10000 * np.ones(Laps).astype(int) # Time at which each j-th iteration is completed self.SS = 10000 * np.ones((NumPoints, n, Laps)) # Sampled Safe SS self.uSS = 10000 * np.ones((NumPoints, d, Laps)) # Input associated with the points in SS self.Qfun = 0 * np.ones((NumPoints, Laps)) # Qfun: cost-to-go from each point in SS # TODO replace with after-the-fact mapping? self.SS_glob = 10000 * np.ones((NumPoints, n, Laps)) # SS in global (X-Y) used for plotting # Initialize the controller iteration self.it = 0 # Initialize pool for parallel computing used in the internal function _LMPC_EstimateABC # TODO this parameter should be tunable self.p = Pool(4)
def _process_index_to_index(filename, names_map, orig_map, dist_map, must_have_index=-1, must_have_percent=0.0, size=()): img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE) if len(size) == 2: img = cv2.resize(img, size, interpolation=cv2.INTER_NEAREST) # The slowest operation is to find indeces. So it is multiprocessed colors_indeces = {} def get_indx(name): i = (img == orig_map[name]) return name, i result = Pool(ncpus=16).map(get_indx, orig_map.keys()) for res in result: name = res[0] idx = res[1] colors_indeces[name] = idx ###### for orig_name, name in names_map.items(): index = dist_map[name] i = colors_indeces[orig_name] img[i] = index if must_have_index >= 0: if must_have_percent > 0.0: percent = float(np.count_nonzero( img == must_have_index)) / (img.shape[0] * img.shape[1]) if percent < must_have_percent: return False, None else: if not must_have_index in img: return False, None return True, img
def make_query(self, size=1): ## quit if nr_unlabeled_samples = 1 if self.dataset.len_unlabeled() == 1: return self.dataset.get_unlabeled_entries()[0].astype(int) ## Set the possible labels self.possible_labels = list(set(self.dataset.get_labeled_entries()[1])) ## Train the model self.model.train(self.dataset) ## Get probabilities X_ids, X = self.dataset.get_unlabeled_entries() pred = self.model.predict_proba( X) # pred.shape = (n_unlabeled, nr_of_labels) ## Setup pool for cpu parallelisation p = Pool(cpu_count(), maxtasksperchild=1000) ## nr of unlabeled samples -> len(X) ## Get uncertainty after adding every sample with every label total = np.asarray( p.map(self._eer, X_ids, len(X) * [self.dataset], len(X) * [self.depth])) # total.shape = (n_unlabeled, nr_of_labels) ## Close the Pool again p.close() p.join() p.clear() ## Get the total uncertainty of one sample after adding a label weighted by the labels probability total = np.inner( pred, total, ).diagonal() # total.shape = (n_unlabeled,) ## Zip it total = zipit(X_ids, total) ## Sort it results = sort_by_2nd(total, 'min') return results[:size, 0].astype(int)
def run(self): files = os.listdir(self.folder) outfile_bed = self.outfile.replace('.txt', '.bed') output_file = open(self.outfile, 'w') output_file.write('circle_id\ttranscript_id\tskipped_exon\tintron\tread_names\tsplice_reads\texon_reads\n') output_file.close() output_file = open(outfile_bed, 'w') output_file.write('# bed12 format\n') output_file.close() from pathos.multiprocessing import ProcessingPool as Pool p = Pool(self.cpus) p.map(self.run_parallel, files)
def collect_significances(self): with open(self.filename, 'w') as f: f.write( "Higgsino mass,Bino mass,Discovery Significance,Exclusion Limit\n" ) def get_disc_sig(signal, classifier, bdt_cut): try: table = BDTCutFlowTable(signal, classifier, bdt_cut) calc = table.initialize_significance_calculator() sig = calc.calculate_discovery_significance('bdt') return sig except: pass def get_excl_lim(signal, classifier, bdt_cut): try: table = BDTCutFlowTable(signal, classifier, bdt_cut) calc = table.initialize_significance_calculator() lim = calc.calculate_exclusion_limit('bdt') return lim except: pass mySignals = self.signals pbar = tqdm(total=len(mySignals) / 8) def write_sigs(signal): try: classifier = Classifier(signal.mass_combination_tuple) discs = map(lambda x: get_disc_sig(signal, classifier, x), np.arange(-10, 10, 0.1)) excls = map(lambda x: get_excl_lim(signal, classifier, x), np.arange(-10, 10, 0.1)) with open(self.filename, 'a') as f: f.write("{},{},{},{}\n".format(signal.higgsino_mass, signal.bino_mass, max(discs), max(excls))) pbar.update(1) except: pass p = Pool(8) p.map(write_sigs, mySignals)
def parallel_bball_data_helper(dataset, savedir, cpus=30, traj_per_file=10): # todo: deal with invalid data after data transformation result_list = [] pool = Pool(cpus) tasks_per_cpu = max(math.ceil(len(dataset) / cpus), traj_per_file) # make tasks a multiple of traj_per_file tasks_per_cpu = math.ceil(tasks_per_cpu / traj_per_file) * traj_per_file # save meta information joblib.dump({ 'len': len(dataset), 'traj/file': traj_per_file }, os.path.join(savedir, 'meta.pkl')) index = 0 i = 0 while index < len(dataset): indices = range(index, min(index + tasks_per_cpu, len(dataset))) index = index + tasks_per_cpu result_list.append( pool.apply_async(func=save_bball_data_helper, args=(dataset, indices, savedir, i, traj_per_file))) i += int(tasks_per_cpu / traj_per_file) while True: try: def call_if_ready(result): if result.ready(): result.get() return True else: return False done_list = list(map(call_if_ready, result_list)) print('{}/{} done'.format(sum(done_list), len(result_list))) if np.all(done_list): break time.sleep(3) except: pool.terminate() raise print('finished preprocessing')
from pathos.multiprocessing import Pool import os tsne = True try: ncores = int(os.popen('qstat -f $PBS_JOBID | grep resources.used.ncpus').read().split(' ')[-1]) except: ncores=1 print 'multiprocessing on ' , ncores pool = Pool(ncores) from collections import Counter import glob type = ['night','day','*'] for t in type: f = glob.glob('lhsgroup/*_gps.'+t) lf = len(f) print 'nfiles',lf def readme(x): return ['-'.join(set(i.strip().split('-'))) for i in tuple(open(x))] batch = pool.map(readme,f)
def _hilbert_ssa(args): i, j, data = args if not np.any(np.isnan(data)): ssa = ssa_class(data, M = 12) _, _, _, rc = ssa.run_ssa() real_part = rc[:, 0] + rc[:, 1] imag_part = np.imag(hilbert(real_part)) phase_hilb_rc = np.arctan2(imag_part[12:-12], real_part[12:-12]) else: phase_hilb_rc = np.nan return i, j, phase_hilb_rc pool = Pool(NUM_WORKERS) net.wavelet(1, 'y', pool = pool, cut = 1) # Hilbert on RC SSA fluctuations # args = [ (i, j, net.data[:, i, j]) for i in range(net.lats.shape[0]) for j in range(net.lons.shape[0]) ] # results = pool.map(_hilbert_ssa, args) # for i, j, res in results: # net.phase[:, i, j] = res net.get_continuous_phase(pool = pool) net.get_phase_fluctuations(rewrite = True, pool = pool) pool.close() pool.join() # index_correlations = {} # index_datas = {}
WORKERS = 5 NUM_SURRS = 100 to_do_periods = np.arange(2,15.5,0.5) net = ScaleSpecificNetwork('/Users/nikola/work-ui/data/NCEP/air.mon.mean.levels.nc', 'air', date(1950,1,1), date(2014,1,1), None, None, level = 0, dataset="NCEP", sampling='monthly', anom=False) synchronization = {} for period in to_do_periods: print("running for %.1f period..." % (period)) _, nao_ph, sg_nao, a_nao = load_NAOindex_wavelet_phase(date(1950,1,1), date(2014,1,1), period, anom=False) _, nino_ph, sg_nino, a_nino = load_nino34_wavelet_phase(date(1950,1,1), date(2014,1,1), period, anom=False) _, sunspots_ph, sg_sunspots, a_sunspots = load_sunspot_number_phase(date(1950,1,1), date(2014,1,1), period, anom=False) _, pdo_ph, sg_pdo, a_pdo = load_pdo_phase(date(1950,1,1), date(2014,1,1), period, anom=False) pool = Pool(WORKERS) net.wavelet(period, period_unit='y', cut=2, pool=pool) args = [(net.phase[:, i, j], i, j, nao_ph, nino_ph, sunspots_ph, pdo_ph) for i in range(net.lats.shape[0]) for j in range(net.lons.shape[0])] result = pool.map(_compute_MI_synch, args) synchs = np.zeros((4, net.lats.shape[0], net.lons.shape[0])) synchs_surrs = np.zeros((NUM_SURRS, 4, net.lats.shape[0], net.lons.shape[0])) for i, j, naos, ninos, suns, pdos in result: synchs[0, i, j] = naos synchs[1, i, j] = ninos synchs[2, i, j] = suns synchs[3, i, j] = pdos for surr in range(NUM_SURRS): sg_nao.construct_fourier_surrogates(algorithm='FT') sg_nao.add_seasonality(a_nao[0], a_nao[1], a_nao[2]) sg_nao.wavelet(period, period_unit="y", cut=2) sg_nino.construct_fourier_surrogates(algorithm='FT')
#!/usr/bin/env python from pathos.multiprocessing import Pool import dill import pickle #FIXME: multiprocessing needs cPickle + copy_reg pool = Pool() # pickle fails for nested functions def adder(augend): zero = [0] def inner(addend): return addend+augend+zero[0] return inner # test the pickle-ability of inner function add_me = adder(5) pinner = pickle.dumps(add_me) p_add_me = pickle.loads(pinner) assert add_me(10) == p_add_me(10) # pickle fails for lambda functions squ = lambda x:x**2 # test the pickle-ability of inner function psqu = pickle.dumps(squ) p_squ = pickle.loads(psqu) assert squ(10) == p_squ(10) # if pickle works, then multiprocessing should too print "Evaluate 10 items on 2 proc:" pool.ncpus = 2
def multiPESVS(ojf,lb,ub,ki,s,b,cfn,lsl,lsu,fnames): def f(fn): return PESVS(ojf,lb,ub,ki,s,b,cfn,lsl,lsu,fn) p = Pool(nproc) return p.map(f,fnames)
def generate_output(args): """ Main application Driver 1. Partition filenames into smaller chunks/arrays of image filenames 2. Generate worker processes 3. Pass the chunks to the workers 4. Each worker deduplicates it's set of image files 5. Merge the results from each worker to one python dictionary 6. OPTIONAL -- Output the deduplicated image files to a directory """ # Partition the list of filenames num_chunks = args.num_jobs # Create a pool of worker threads # Each worker will deduplicate a set of images filenames = [] metadata = None end_str = "" if args.json_metadata != None: metadata,filenames = process_json_file(args.json_metadata) end_str = "from metadata file: %s" % args.json_metadata else: # Find all image files in dump directory filenames = find_all_images(args.dump_dir) end_str = "from directory: %s" % args.dump_dir file_chunks = partition_filenames(filenames, num_chunks) print("Found {} images in directory: {}".format(len(filenames), end_str)) """ metadata_results = [] file_chunk_list = list(file_chunks) num_proc = len(file_chunk_list) print >> sys.stderr, "Printing file chunks" print >> sys.stderr, file_chunks pool2 = Pool(processes = num_proc) with open(args.json_metadata) as json_metadata_file: metadata_results = [pool2.map(process_json_line,json_metadata_file, chunk) for index, chunk in enumerate(file_chunk_list)] #objs = [p.get() for p in results] metadata = merge_exact_duplicates(metadata_results) """ pool = Pool(processes=num_chunks) # Pass the partitions to each thread results = [] final_dictionary = {} if not args.near_duplicates: if args.num_jobs == 1: # If we're only using one worker, don't make overhead of starting a process result = exact_deduplicate_images(filenames) dictionaries = [result] else: # Get the results from each worker results = [pool.apply_async(exact_deduplicate_images, args=(index,chunk,)) for index, chunk in enumerate(file_chunks)] dictionaries = [p.get() for p in results] # Merge the results into one dictionary final_dictionary = merge_exact_duplicates(dictionaries) else: if args.num_jobs == 1: # If we're only using one worker, don't make overhead of starting a process result = near_deduplicate_images(filenames, args.bit_distance, metadata = metadata) near_duplicate_objects = [result] else: # Get the results from each near duplicate worker if metadata != None: results = [pool.apply_async(near_deduplicate_images, (chunk,args.bit_distance, ), dict(metadata=metadata)) for chunk in file_chunks] else: results = [pool.apply_async(near_deduplicate_images, (chunk,args.bit_distance,)) for chunk in file_chunks] # create an array of near duplicate objects near_duplicate_objects = [p.get() for p in results] # Merge the dictionaries together using the info from its corresponding indexes final_dictionary = merge_near_duplicates(near_duplicate_objects) print("Number of images prior to deduplication: {}".format(len(filenames)), file=sys.stderr) print("Number of images after deduplication: {}".format(len(final_dictionary)), file=sys.stderr) # Write the image locations to an output file if args.output_json != None: # TODO # For now, just do this with exact duplicates # Dumping the simhash class to JSON doesn't work because the object isn't # JSON serializable outfile_name = args.output_json print("Writing to image dictionary to file: {}".format(outfile_name)) with open(outfile_name, 'w') as outfile: json.dump(final_dictionary, outfile, indent=4, skipkeys=True, default=str) # Copy the images to an output directory create_output_image_directory(args, final_dictionary) return len(final_dictionary), len(filenames) - len(final_dictionary)
def multiPESIS(ojf,lb,ub,ki,b,fnames): def f(fn): return PESIS(ojf,lb,ub,ki,b,fn) p = Pool(nproc) return p.map(f,fnames)
import mxnet as mx import os import numpy as np import random import cv2 import symbol import cPickle as pickle from PIL import Image from PATH import * from pathos.multiprocessing import Pool pool = Pool(12) alpha = 1e-2 batch_size = 480 ctx = mx.gpu(1) n = len(os.listdir(DATAPATH)) n = 1500000 imgout = mx.nd.zeros([1,3,384,384], ctx) anno = mx.nd.zeros([1,37,384,384], ctx) anno_np = np.zeros([1,37,384,384]) reg_anno = mx.nd.zeros([1,74,384,384], ctx) reg_anno_np = np.zeros([1,74,384,384]) cls_grad = mx.nd.zeros([1,37,384,384], ctx) reg_grad = mx.nd.zeros([1,74,384,384], ctx) reg_grad_np = np.zeros([1,74,384,384]) def get_image(): result = []
def __init__(self, source, config, single_process=False, processes=cpu_count()): if single_process: self.results = imap(transform, izip(source, repeat(config))) else: self.pool = Pool(processes) self.results = self.pool.imap(transform, izip(source, repeat(config)))
def __init__(self, source, config, processes=cpu_count()): self._pool = Pool(processes) self._results = self._pool.imap(_merge_records, izip(source, repeat(config)))
# net.save_net('networks/NCEP-SATsurface-7-8yrs-Hilb-phase-adjmat%s.bin' % ('MPC'), only_matrix = True) for method in METHODS: for scale in SCALES: print("Computing networks using %s method..." % (method)) # phase if method in ['MIEQQ', 'MIGAU', 'MPC']: # net = ScaleSpecificNetwork(fname, 'air', date(1948,1,1), date(2016,1,1), None, None, level = 0, dataset = "NCEP", # sampling = 'monthly', anom = False) net = ScaleSpecificNetwork(fname, 't2m', date(1958,1,1), date(2014,1,1), None, None, level=None, pickled=True, sampling='monthly', anom=False) pool = Pool(NUM_WORKERS) # net.get_hilbert_phase_amp(period = 90, width = 12, pool = pool, cut = 1) net.wavelet(scale, period_unit='m', cut=2, pool=pool) pool.close() pool.join() net.get_adjacency_matrix(net.phase, method = method, pool = None, use_queue = True, num_workers = NUM_WORKERS) net.save_net('networks/ERA-SATsurface-scale%dmonths-phase-adjmat%s.bin' % (scale, method), only_matrix = True) # amplitude if method in ['MIEQQ', 'MIGAU', 'CORR']: # net = ScaleSpecificNetwork(fname, 'air', date(1948,1,1), date(2016,1,1), None, None, level = 0, dataset = "NCEP", # sampling = 'monthly', anom = False) net = ScaleSpecificNetwork(fname, 't2m', date(1958,1,1), date(2014,1,1), None, None, level=None, pickled=True, sampling='monthly', anom=False) pool = Pool(NUM_WORKERS) # net.get_hilbert_phase_amp(period = 90, width = 12, pool = pool, cut = 1)