def process_all(args): multiprocessing.Pool(32).map(functools.partial(process_file, args=args), args.files)
Info_Train['cvm']=cvm Info = (Exp_temp >> select(Exp_temp[['CHROM','GeneStart','GeneEnd', 'GeneName','TargetID']])).merge(Info_Train, left_on='TargetID', right_on='TargetID', how='outer') Info.to_csv(args.out_prefix+'/CHR'+str(args.chr_num)+'_elastic_net_training_info.txt', header=None,index=None,sep='\t',mode='a') ############################################################################################################### ### Start thread if (args.thread < int(len(EXP)/100) | args.thread > len(EXP)): args.thread = (int(len(EXP)/100)+1)*100 pool = multiprocessing.Pool(args.thread) pool.map(thread_process,[num for num in range(len(EXP))]) pool.close() pool.join() #################################################################################################################### ### time calculation time=round((time.clock()-start_time)/60,2) print(str(time)+' minutes')
line = linet.replace("|"," ") line = re.sub(token, " ellipsis ", line) f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n') submission_id.append(array[0]) # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n') # uncomment this to debug with the 3.5M data. # submission_id.append(array[0]) # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n') # submission_id.append(array[0]) # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n') # submission_id.append(array[0]) # f.write(line + '\t' + str(namec) + "," + str(named) +"," + str(lc) +'\n') # submission_id.append(array[0]) f.close() return test_p,submission_id p = multiprocessing.Pool(2) preds = p.imap(pre, [0,1]) for i, pred in enumerate(preds): if i == 0: #print(pred) train_p = pred elif i==1: #print(pred) (test_p,submission_id) = pred p.close() p.join() del(p) def fill(x): if len(x)==0: return "missing"
} cols_types = {'all': all_feat_cols, 'reduced': reduced_feats} #%% random_state = 777 n_folds = 6 #dum variables keep for compatibility metric2exclude = 'loss' n_feats2remove = 'log2' fold_param = (cuda_id, train_args, metric2exclude, n_feats2remove) all_data_in = fold_generator(df_filt, cols_types, n_folds, test_size, fold_param) #%% p = mp.Pool(pool_size) results = p.map(get_softmax_clf, all_data_in) #%% with open(save_name, "wb") as fid: pickle.dump((strain_dicts, results), fid) #%% res_db = {} for (set_type, i_fold), dat in results: if set_type not in res_db: res_db[set_type] = [] res_db[set_type].append(dat) for set_type, dat in res_db.items(): res_db[set_type] = list(zip(*dat))
except: print(BOLD+Red+"[-] Something Went Wrong!",RESET) def splitfile(): splitLen = totallines/splt outputBase = 'part' input = open(args.wordlist, 'r', encoding="ISO-8859-1").read().split('\n') at = 1 global names names=[] for lines in range(0, len(input), int(splitLen)): outputData = input[lines:lines+int(splitLen)] output = open(outputBase + str(at) + '.txt', 'w', encoding="ISO-8859-1") namer=(outputBase + str(at) + '.txt') names.append(namer) output.write('\n'.join(outputData)) output.close() at += 1 if __name__ == "__main__": splitfile() pool = mp.Pool(mp.cpu_count()) results = pool.map(dbrute, [wordlst for wordlst in names]) pool.close()
args = parser.parse_args() NPAR = int(args.nproc) simMode = args.simulate hetMode = args.het homMode = args.hom maxdata = int(args.max) if args.max is not None else None regions = readIntervalFile(args.varfile, simMode, hetMode, homMode) fname = args.bam subsampling_fraction = float(args.subsample) if maxdata is not None: #Limit on how many results to return as opposed to using the whole VCF file in --het and --hom when you only need a few thousand training examples i = 0 if NPAR > 1: import multiprocessing as mp pool = mp.Pool(processes=NPAR) for result in pool.imap_unordered(processRegion, regions): if result is not None: print("\t".join([str(x) for x in result])) i += 1 if i > maxdata: sys.exit() else: for r in regions: result = processRegion(r) if result is not None: print("\t".join([str(x) for x in result])) i += 1 if i > maxdata: sys.exit() else: if NPAR > 1: import multiprocessing as mp
ot = UTCDateTime(ot) data_dict = get_data_dict(ot, args.data_dir) event_name = dtime2str(ot) event_dir = os.path.join(temp_root, event_name) if not os.path.exists(event_dir): os.makedirs(event_dir) # cut event print('cutting {}'.format(event_name)) for net_sta, [tp, ts] in pick_dict.items(): chn_codes = chn_dict[net_sta.split('.')[0]] b = tp - UTCDateTime(ot.date) - t_blank data_paths = data_dict[net_sta] out_paths = [os.path.join(event_dir,'%s.%s'%(net_sta,chn)) for chn in chn_codes] # cut event sac.cut(data_paths[0], b, b+win_len, out_paths[0]) sac.cut(data_paths[1], b, b+win_len, out_paths[1]) sac.cut(data_paths[2], b, b+win_len, out_paths[2]) # write header t0 = t_blank t1 = ts -tp + t_blank sac.ch_event(out_paths[0], lon, lat, dep, mag, [t0,t1]) sac.ch_event(out_paths[1], lon, lat, dep, mag, [t0,t1]) sac.ch_event(out_paths[2], lon, lat, dep, mag, [t0,t1]) # cut all events data pool = mp.Pool(num_workers) pool.map_async(cut_event, range(len(pha_list))) pool.close() pool.join()
#if j in [76]: # image2data = Marker(image2data,[y2,x2],20) j += 1 #image1data = Marker(image1data,[y1,x1],8) image2data = Marker(image2data,[bestMatch[1][1],bestMatch[1][0]],20) p2 = (pixelsToRadians(centerposition[0]-bestMatch[1][0]),pixelsToRadians(1447-bestMatch[1][1])) distance = AnglesToDistance((theta1,phi1),p2) p1 = 1447-radiansToPixels(phi1) #p2 = centerposition[0] - radiansToPixels(theta1) p2 = 2896 - radiansToPixels(theta1) q.put((p1,p2,distance)) (centerposition,image1data,image2data) = shiftImages(centerposition,image1data,image2data,-leftShift) statusBar = 0 columns = list(range(0,5793,stepSize)) pool = mp.Pool(processes=4) m = mp.Manager() q = m.Queue() for column in columns: print(column) arguments = (image1data,image2data,centerposition,q,stepSize,sampleSize,statusBar,1) a = pool.apply_async(ComputeColumn, args=(arguments,)) print(a) print("hello") input() #ComputeColumn(image1data,image2data,centerposition,q,stepSize,sampleSize,statusBar,column) #processes.append(process) #process.start() #for process in processes: # process.start() pool.close()
def read_all_metricdb_files(self): """Read all the metric-db files and create a dataframe with num_nodes X num_metricdb_files rows and num_metrics columns. Three additional columns store the node id, MPI process rank, and thread id (if applicable). """ metricdb_files = glob.glob(self.dir_name + "/*.metric-db") metricdb_files.sort() # All the metric data per node and per process is read into the metrics # array below. The three additional columns are for storing the implicit # node id (nid), MPI process rank, and thread id (if applicable). shape = [self.num_nodes * self.num_metricdb_files, self.num_metrics + 3] size = int(np.prod(shape)) # shared memory buffer for multiprocessing shared_buffer = mp.sharedctypes.RawArray("d", size) pool = mp.Pool(initializer=init_shared_array, initargs=(shared_buffer,)) self.metrics = np.frombuffer(shared_buffer).reshape(shape) args = [ ( filename, self.num_nodes, self.num_threads_per_rank, self.num_metrics, shape, ) for filename in metricdb_files ] try: pool.map(read_metricdb_file, args) finally: pool.close() # once all files have been read, create a dataframe of metrics metric_names = [ self.metric_names[key] for key in sorted(self.metric_names.keys()) ] for idx, name in enumerate(metric_names): if name == "CPUTIME (usec) (E)" or name == "CPUTIME (sec) (E)": metric_names[idx] = "time" if name == "CPUTIME (usec) (I)" or name == "CPUTIME (sec) (I)": metric_names[idx] = "time (inc)" self.metric_columns = metric_names df_columns = self.metric_columns + ["nid", "rank", "thread"] self.df_metrics = pd.DataFrame(self.metrics, columns=df_columns) self.df_metrics["nid"] = self.df_metrics["nid"].astype(int, copy=False) self.df_metrics["rank"] = self.df_metrics["rank"].astype(int, copy=False) self.df_metrics["thread"] = self.df_metrics["thread"].astype(int, copy=False) # if number of threads per rank is 1, we do not need to keep the thread ID column if self.num_threads_per_rank == 1: del self.df_metrics["thread"] # used to speedup parse_xml_node self.np_metrics = self.df_metrics[self.metric_columns].values # getting the number of execution threads for our stride in # subtract_exclusive_metric_vals/ num nodes is already calculated self.total_execution_threads = self.num_threads_per_rank * self.num_ranks
def main(args): freqs = range(0, 380, args.delta) argsfreqs = [(args, freq) for freq in freqs] calname = args.calskymod[:5] tarname = args.tarskymod[:5] if not args.shortercut: pool = mp.Pool(int( args.numthreads)) # number of concurrent frequencies try: if int(args.numthreads) == 1: print "Using single-threaded version" map(threadmain, argsfreqs) else: print "Using multithreaded version" pool.map(threadmain, argsfreqs) except Exception as e: print e oscsystem('cat %s_prical_stats_*.txt > %s_prical_stats.txt' % (calname, calname)) oscsystem('cat %s_prical_stats_*.txt > %s_prical_stats.txt' % (tarname, tarname)) oscsystem('cat %s_seccal_stats_*.txt > %s_seccal_stats.txt' % (tarname, tarname)) caldata = loadtxt(calname + '_prical_stats.txt') pritardata = loadtxt(tarname + '_prical_stats.txt') sectardata = loadtxt(tarname + '_seccal_stats.txt') xax = arange(110., 190.1, 1.) fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(24, 6)) makeplot(ax0, 'Primary calibrator (%s), direct calibration' % calname, 40., 210.) ax0.plot(xax, sourcespec(calname, xax), 'k--') ax0.errorbar(caldata[:, 0] / 1.e6, caldata[:, 1], yerr=caldata[:, 2], marker='o', linestyle='none') secymin = float(args.yaxislim.split(',')[0]) secymax = float(args.yaxislim.split(',')[1]) makeplot(ax1, 'Secondary calibrator (%s), direct calibration' % tarname, secymin, secymax) ax1.plot(xax, sourcespec(tarname, xax), 'k--') ax1.errorbar(pritardata[:, 0] / 1.e6, pritardata[:, 1], yerr=pritardata[:, 2], marker='o', linestyle='none') makeplot(ax2, 'Secondary calibrator (%s), transferred calibration' % tarname, secymin, secymax) ax2.plot(xax, sourcespec(tarname, xax), 'k--') ax2.errorbar(sectardata[:, 0] / 1.e6, sectardata[:, 1], yerr=sectardata[:, 2], marker='o', linestyle='none', label='old model') #ax2.legend() savefig(args.plot, bbox_inches='tight') if args.onscreen: show()
def run( args ): # Do some additional argument checking if not args.weights_file and not args.permutation_directory: sys.stderr.write('You must set the weights file or permutation directory, '\ 'otherwise nothing will be output.') sys.exit(1) # Load mutation data if args.verbose > 0: print '* Loading mutation data...' mutation_data = load_mutation_data( args.mutation_file ) genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.iteritems() ) patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.iteritems() ) geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) ) indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) ) patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) ) indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) ) edges = set() for gene, cases in geneToCases.iteritems(): for patient in cases: edges.add( (geneToIndex[gene], patientToIndex[patient]) ) edge_list = np.array(sorted(edges), dtype=np.int) # Run the bipartite edge swaps if args.verbose > 0: print '* Permuting matrices...' m = len(all_genes) n = len(patients) num_edges = len(edges) max_swaps = int(args.swap_multiplier*num_edges) max_tries = 10**9 seeds = [ i+args.start_index for i in range(args.num_permutations) ] # Run the bipartite edge swaps in parallel if more than one core indicated num_cores = args.num_cores if args.num_cores != -1 else mp.cpu_count() if num_cores != 1: pool = mp.Pool(num_cores) map_fn = pool.map else: map_fn = map wrapper_args = [ (edge_list, max_swaps, max_tries, seeds[i::num_cores], 0, m, n, num_edges, indexToGene, indexToPatient) for i in range(num_cores) ] results = map_fn(permute_matrices_wrapper, wrapper_args) if num_cores != 1: pool.close() pool.join() # Create the weights file if args.weights_file: if args.verbose > 0: print '* Saving weights file...' # Merge the observeds observeds = [ observed for observed, _ in results ] P = np.add.reduce(observeds) / float(len(observeds)) # Verify the weights for g, obs in geneToObserved.iteritems(): assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < 0.1) for p, obs in patientToObserved.iteritems(): assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < 0.1) # Add pseudocounts to entries with no mutations observed P[P == 0] = 1./(2. * args.num_permutations) # Output to file. # The rows/columns preserve the order given by the mutation file. np.save(args.weights_file, P) # Save the permuted mutation data if args.permutation_directory: output_prefix = args.permutation_directory + '/permuted-mutations-{}.json' if args.verbose > 0: print '* Saving permuted mutation data...' for _, permutation_list in results: for permutation in permutation_list: # Output in adjacency list format with open(output_prefix.format(permutation['permutation_number']), 'w') as OUT: permutation['params'] = params json.dump( permutation, OUT )
print("# #") print("# This is made for crawling #") print("# Graduate School of Science, Kyoto University web page. #") print("# If you have any trouble with running this code, #") print("# please send message to my e-mail. #") print("# [email protected] (final update. 03/07/2020) #") print("# #") print("#######################################################################\n") start = time() Ext = ['.pdf', '.mp3', '.mp4', '.doc', '.docx', '.xls', '.ppt'] sciURL = ["http://www.sci.kyoto-u.ac.jp/"] sciURLja = [] sciURLen = [] with mp.Pool(processes=2) as p: for i in range(3): for url in sciURL: print("Searching url from link " + url) sciURL = sciURL + Searching().URL(url, "ja/") sciURL = sciURL + Searching().URL(url, "en/") sciURL = list(set(sciURL)) sciURL.sort() for url in sciURL: if "/en" in url: sciURLen.append(url) if "/ja" in url: sciURLja.append(url) sciURLja = list(set(sciURLja))
quit_link_elem.click() def logout(self): logout_link_elem = WebDriverWait(self.driver, 5).until( EC.presence_of_element_located((By.ID, "logout")) ) logout_link_elem.click() def user_gen(url, ids): return [(url, 'User%d'%x, 'User%d'%x) for x in ids] def wrap_run_load_test(args): url = "http://yaksh.fossee.aero.iitb.ac.in/exam/" selenium_test = SeleniumTest(url=url, quiz_name=quiz_name) return selenium_test.run_load_test(*args) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('url', type=str, help="url of the website being tested") parser.add_argument('start', type=int, help="Starting user id") parser.add_argument("-n", "--number", type=int, default=10, help="number of users") opts = parser.parse_args() quiz_name = "Demo quiz" selenium_test = SeleniumTest(url=opts.url, quiz_name=quiz_name) pool = multiprocessing.Pool(opts.number) pool.map(wrap_run_load_test, user_gen(opts.url, range(opts.start, opts.start + opts.number))) pool.close() pool.join()
make_convex = FLAGS.convex fit_cylinder = FLAGS.cylinder print("Data in from: " + dir_in) print("Writing results in: " + dir_out) if not exists(dir_out): mkdir(dir_out) all_files = [f for f in listdir(dir_in) if isfile(join(dir_in, f))] obj_files = [f for f in all_files if f.split('.')[-1] == 'obj'] # for obj_file in obj_files: # process_obj(obj_file, dir_in, dir_out, grid_size, max_faces, make_convex, fit_cylinder, quality) pool_size = multiprocessing.cpu_count() pool = multiprocessing.Pool(processes=pool_size, maxtasksperchild=2) pool.map(partial(process_obj, dir_in=dir_in, dir_out=dir_out, grid_size=grid_size, max_faces=max_faces, make_convex=make_convex, fit_cylinder=fit_cylinder, quality=quality), obj_files) pool.close() pool.join() # mesh = pymesh.load_mesh("model_1ef68777bfdb7d6ba7a07ee616e34cd7.obj") # print "mesh" # print mesh.vertices.shape # print mesh.faces.shape # print mesh.attribute_names # surf_mesh = pymesh.compute_outer_hull(mesh) # print "surf" # print surf_mesh.vertices.shape
iris = parallelize_dataframe(iris, multiply_columns) #Distributed processing using pandas #Source: http://gouthamanbalaraman.com/blog/distributed-processing-pandas.html import pandas as pd import multiprocessing as mp LARGE_FILE = "D:\\my_large_file.txt" CHUNKSIZE = 100000 # processing 100,000 rows at a time def process_frame(df): # process data frame return len(df) if __name__ == '__main__': reader = pd.read_table(LARGE_FILE, chunksize=CHUNKSIZE) pool = mp.Pool(4) # use 4 processes funclist = [] for df in reader: # process each data frame f = pool.apply_async(process_frame,[df]) funclist.append(f) result = 0 for f in funclist: result += f.get(timeout=10) # timeout in 10 seconds print "There are %d rows of data"%(result) #Transpose a pyspark dataframe df.T
np.random.seed(0) # with open("data/pseudo_data.txt", "r") as fp: with open("data/data.txt", "r") as fp: fp_lines = fp.readlines() def train_lda(argv): global fp_lines lda = GibbsLDA(*argv, iterations=100, verbose=False) lda.fit(fp_lines[1:]) lda.save_state("output/z_{}_{}_{}.npz".format(*argv)) return lda range_n_components = [2, 3, 5, 7, 10, 20] range_doc_topic_prior = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0] range_topic_word_prior = [0.01, 0.02, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0] hyperparameters = list() for (n_components, doc_topic_prior, topic_word_prior) in itertools.product(range_n_components, range_doc_topic_prior, range_topic_word_prior): doc_topic_prior *= 1 / n_components topic_word_prior *= 1 / n_components hyperparameters.append((n_components, doc_topic_prior, topic_word_prior)) pool = multiprocessing.Pool(processes=200) LDAs = pool.map(train_lda, hyperparameters)
Script taking a data directory as commandline argument, and concurrently centers all models within. """ def center(model_file, data_path): """ Centers a voxel 3D model in the y- and x axis. """ if model_file.endswith(".binvox"): with open(f'{data_path}/{model_file}', 'rb') as f: try: print(f'File: {model_file}') model = binvox_rw.read_as_3d_array(f) model.data = binvox_rw.dense_to_sparse(model.data) if len(model.data[0]) != 0 and len(model.data[0]) != 0 and len(model.data[0]) != 0: translate_x = ( model.dims[0] - max(model.data[0]) - min(model.data[0]))//2 translate_y = ( m.dims[2] - max(model.data[2]) - min(model.data[2]))//2 for n in range(len(m.data[0])): model.data[0][n] += translate_x model.data[2][n] += translate_y m.write(f'{data_path}/{model_file}') except: print(f'Could not center file: {model_file}') pool = mp.Pool(processes=mp.cpu_count()) data_path = sys.argv[1] temp = [pool.apply_async(center, args=(model_file, data_path)) for model_file in os.listdir(data_path)] [p.get() for p in temp]
assert len(messages) == len(categories) tM = np.array(list(it.starmap(message_distance, it.combinations(messages, 2)))) sM = np.array(list(it.starmap(meaning_distance, it.combinations(categories, 2)))) return mantel_test(tM, sM, method=method, perms=perms) def process_file(input_file): messages, categories = read_csv(input_file) m_l = mantel(messages, categories) m_ln = mantel(messages, categories, message_distance=levenshtein_normalised) m_j = mantel(messages, categories, message_distance=jaccard, map_msg_to_str=False) return input_file, m_l, m_ln, m_j if __name__=="__main__": import argparse parser = argparse.ArgumentParser("compute distances and mantels for artifical languages") parser.add_argument("--input_dir", type=str, required=True) parser.add_argument("--output_file", type=str, required=True) args = parser.parse_args() files = list(pathlib.Path(args.input_dir).glob("**/*.tsv")) output_file = args.output_file with open(output_file, "w") as ostr, mp.Pool(mp.cpu_count()) as pool: calls = pool.imap_unordered(process_file, files) for input_file, m_l, m_ln, m_j in tqdm.tqdm(calls, total=len(files)): print(input_file.name, 'levenshtein', *m_l, 'levenshtein normalized', *m_ln, 'jaccard', *m_j, file=ostr)
def find_all_optoinable_stocks_multiprocess(udlyings): with multiprocessing.Pool() as pool: pool.map(find_optionable_stocks, udlyings)
Aggregate2 = predict(model2, data) Output_1 = Output(Aggregate1) Output_2 = Output(Aggregate2) GE[i] = get_error(model1, XTest, YTests[MC], 2**(n - 1)) / 2 GE_BN[i] = get_error(model2, XTest, YTests[MC], 2**(n - 1)) / 2 LVC[i] = get_LVComplexity(Output_1) LVC_BN[i] = get_LVComplexity(Output_2) del model1 del model2 return (LVC, LVC_BN, GE, GE_BN) # LVC_outputs.append(LVC), LVC_output_BNs.append(LVC_BN), GE_outputs.append(GE), GE_output_BNs.append(GE_BN) pool = multiprocessing.Pool(9) tasks = range(total_MC) result = [] with tqdm.tqdm(total=total_MC, mininterval=5, bar_format='{elapsed}{l_bar}{bar}{r_bar}') as t: for i, x in enumerate(pool.imap(process, tasks)): t.update() result.append(x) pool.close() pool.join() for output in result: LVC, LVC_BN, GE, GE_BN = output LVC_outputs.append(LVC) LVC_output_BNs.append(LVC_BN)
frame_pc = raw_pc_data[str(frame_key)] ground_frame_pc, clean_frame_pc = get_ground(frame_pc) clean_pcs[str(frame_key)] = clean_frame_pc ground_pcs[str(frame_key)] = ground_frame_pc if (frame_index + 1) % 10 == 0: print('Ground Removal SEQ {} / {}, Frame {} / {}'.format( file_index + 1, len(file_names), frame_index + 1, len(keys))) np.savez_compressed(os.path.join(clean_pc_folder, file_name), **clean_pcs) np.savez_compressed(os.path.join(ground_pc_folder, file_name), **ground_pcs) if __name__ == '__main__': if args.process > 1: pool = multiprocessing.Pool(args.process) for token in range(args.process): result = pool.apply_async(main, args=((token, args.process), args.raw_pc_folder, args.clean_pc_folder, args.ground_pc_folder)) pool.close() pool.join() else: main((0, 1), args.raw_pc_folder, args.clean_pc_folder, args.ground_pc_folder)
def findRotMaxRect(data_in,flag_opt=False,flag_parallel = False, nbre_angle=10,flag_out=None,flag_enlarge_img=False,limit_image_size=300): ''' flag_opt : True only nbre_angle are tested between 90 and 180 and a opt descent algo is run on the best fit False 100 angle are tested from 90 to 180. flag_parallel: only valid when flag_opt=False. the 100 angle are run on multithreading flag_out : angle and rectangle of the rotated image are output together with the rectangle of the original image flag_enlarge_img : the image used in the function is double of the size of the original to ensure all feature stay in when rotated limit_image_size : control the size numbre of pixel of the image use in the function. this speeds up the code but can give approximated results if the shape is not simple ''' #time_s = datetime.datetime.now() #make the image square #---------------- nx_in, ny_in = data_in.shape if nx_in != ny_in: n = max([nx_in,ny_in]) data_square = np.ones([n,n]) xshift = (n-nx_in)/2 yshift = (n-ny_in)/2 if yshift == 0: data_square[xshift:(xshift+nx_in),: ] = data_in[:,:] else: data_square[: ,yshift:(yshift+ny_in)] = data_in[:,:] else: xshift = 0 yshift = 0 data_square = data_in #apply scale factor if image bigger than limit_image_size #---------------- if data_square.shape[0] > limit_image_size: data_small = cv2.resize(data_square,(limit_image_size, limit_image_size),interpolation=0) scale_factor = 1.*data_square.shape[0]/data_small.shape[0] else: data_small = data_square scale_factor = 1 # set the input data with an odd number of point in each dimension to make rotation easier #---------------- nx,ny = data_small.shape nx_extra = -nx; ny_extra = -ny if nx%2==0: nx+=1 nx_extra = 1 if ny%2==0: ny+=1 ny_extra = 1 data_odd = np.ones([data_small.shape[0]+max([0,nx_extra]),data_small.shape[1]+max([0,ny_extra])]) data_odd[:-nx_extra, :-ny_extra] = data_small nx,ny = data_odd.shape nx_odd,ny_odd = data_odd.shape if flag_enlarge_img: data = np.zeros([2*data_odd.shape[0]+1,2*data_odd.shape[1]+1]) + 1 nx,ny = data.shape data[nx/2-nx_odd/2:nx/2+nx_odd/2,ny/2-ny_odd/2:ny/2+ny_odd/2] = data_odd else: data = np.copy(data_odd) nx,ny = data.shape #print (datetime.datetime.now()-time_s).total_seconds() if flag_opt: myranges_brute = ([(-10.,10.),]) coeff0 = np.array([0.,]) coeff1 = optimize.brute(residual, myranges_brute, args=(data,), Ns=nbre_angle, finish=None) popt = optimize.fmin(residual, coeff1, args=(data,), xtol=5, ftol=1.e-5, disp=False) angle_selected = popt[0] #rotation_angle = np.linspace(0,360,100+1)[:-1] #mm = [residual(aa,data) for aa in rotation_angle] #plt.plot(rotation_angle,mm) #plt.show() #pdb.set_trace() else: rotation_angle = np.linspace(-45,45,100+1)[:-1] args_here=[] for angle in rotation_angle: args_here.append([angle,data]) if flag_parallel: # set up a pool to run the parallel processing cpus = multiprocessing.cpu_count() pool = multiprocessing.Pool(processes=cpus) # then the map method of pool actually does the parallelisation results = pool.map(residual_star, args_here) pool.close() pool.join() else: results = [] for arg in args_here: results.append(residual_star(arg)) argmin = np.array(results).argmin() angle_selected = args_here[argmin][0] rectangle, M_rect_max, RotData = get_rectangle_coord(angle_selected,data,flag_out=True) #rectangle, M_rect_max = get_rectangle_coord(angle_selected,data) #print (datetime.datetime.now()-time_s).total_seconds() #invert rectangle M_invert = cv2.invertAffineTransform(M_rect_max) rect_coord = [rectangle[:2], [rectangle[0],rectangle[3]] , rectangle[2:], [rectangle[2],rectangle[1]] ] #ax = plt.subplot(111) #ax.imshow(RotData.T,origin='lower',interpolation='nearest') #patch = patches.Polygon(rect_coord, edgecolor='k', facecolor='None', linewidth=2) #ax.add_patch(patch) #plt.show() rect_coord_ori = [] for coord in rect_coord: rect_coord_ori.append(np.dot(M_invert,[coord[0],(ny-1)-coord[1],1])) #transform to numpy coord of input image coord_out = [] for coord in rect_coord_ori: coord_out.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0)-xshift,\ scale_factor*round((ny-1)-coord[1]-(ny/2-ny_odd/2),0)-yshift]) coord_out_rot = [] # coord_out_rot_h = [] # for coord in rect_coord: # coord_out_rot.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0)-xshift, \ # scale_factor*round( coord[1]-(ny/2-ny_odd/2),0)-yshift ]) # coord_out_rot_h.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0), \ # scale_factor*round( coord[1]-(ny/2-ny_odd/2),0) ]) #M = cv2.getRotationMatrix2D( ( (data_square.shape[0]-1)/2, (data_square.shape[1]-1)/2 ), angle_selected,1) #RotData = cv2.warpAffine(data_square,M,data_square.shape,flags=cv2.INTER_NEAREST,borderValue=1) #ax = plt.subplot(121) #ax.imshow(data_square.T,origin='lower',interpolation='nearest') #ax = plt.subplot(122) #ax.imshow(RotData.T,origin='lower',interpolation='nearest') #patch = patches.Polygon(coord_out_rot_h, edgecolor='k', facecolor='None', linewidth=2) #ax.add_patch(patch) #plt.show() #coord for data_in #---------------- #print scale_factor, xshift, yshift #coord_out2 = [] #for coord in coord_out: # coord_out2.append([int(np.round(scale_factor*coord[0]-xshift,0)),int(np.round(scale_factor*coord[1]-yshift,0))]) #print (datetime.datetime.now()-time_s).total_seconds() if flag_out is None: return coord_out elif flag_out == 'rotation': return coord_out, angle_selected, coord_out_rot else: print 'bad def in findRotMaxRect input. stop' pdb.set_trace()
import multiprocessing as mp def compute(data): return data**2 if __name__ == '__main__': with mp.Pool(10) as pool: print( pool.map(compute, [1, 7, 8, -2, 1, 7, 8, -2, 1, 7, 8, -2, 1, 7, 8, -2]))
def __init__(self, threads): LocalScheduler.__init__(self) self.threads = threads self.tasks = {} self.pool = multiprocessing.Pool(self.threads or 2)
TC_Phi_5_imp_avg = np.zeros((np.size(rc_arr), np.size(nt_arr))) TC_Phi_5_unalt_avg = np.zeros((np.size(rc_arr), np.size(nt_arr))) TC_Phi_5_imp_rms = np.zeros((np.size(rc_arr), np.size(nt_arr))) TC_Phi_5_unalt_rms = np.zeros((np.size(rc_arr), np.size(nt_arr))) TC_A_imp_avg = np.zeros((np.size(rc_arr), np.size(nt_arr))) TC_A_unalt_avg = np.zeros((np.size(rc_arr), np.size(nt_arr))) TC_A_imp_rms = np.zeros((np.size(rc_arr), np.size(nt_arr))) TC_A_unalt_rms = np.zeros((np.size(rc_arr), np.size(nt_arr))) # Start parallel processing nthread = mp.cpu_count() nthread = 2 print('starting pool with %i threads ...' % nthread) pool = mp.Pool(processes=nthread) for int_arr, nt in enumerate(nt_arr): for irc_arr, rc in enumerate(rc_arr): l_fracs = l_frac_data[irc_arr,:] print('lc_fracs_Driver = ', lc_fracs) # Get list of tuples length of nsamp arg_tuple = (generate_data, nx, ny, nz, finest, l_fracs, lc_fracs, nt) arg_list = [] [arg_list.append(arg_tuple) for i in range(nsample)] # Farm out nsample to each processor res_tuple = pool.map(parallel_run, arg_list) print('res_tuple = ', res_tuple) print('comp ratio = ', rc_arr[irc_arr])
def run_test(tickers,stocks,windows): with mp.Pool(mp.cpu_count()) as p: y = p.map(partial(Mean_Variance_Rolling,tickers=tickers,stocks=stocks), windows) return y
def main(): if not os.path.exists(a.output_dir): os.makedirs(a.output_dir) src_paths = [] dst_paths = [] skipped = 0 for src_path in im.find(a.input_dir): name, _ = os.path.splitext(os.path.basename(src_path)) dst_path = os.path.join(a.output_dir, name + ".png") if os.path.exists(dst_path): skipped += 1 else: src_paths.append(src_path) dst_paths.append(dst_path) print("skipping %d files that already exist" % skipped) global total total = len(src_paths) print("processing %d files" % total) global start start = time.time() if a.operation == "edges": # use a multiprocessing pool for this operation so it can use multiple CPUs # create the pool before we launch processing threads global edge_pool edge_pool = multiprocessing.Pool(a.workers) if a.workers == 1: with tf.Session() as sess: for src_path, dst_path in zip(src_paths, dst_paths): print (100*'::::') process(src_path, dst_path) complete() else: queue = tf.train.input_producer(zip(src_paths, dst_paths), shuffle=False, num_epochs=1) dequeue_op = queue.dequeue() def worker(coord): with sess.as_default(): while not coord.should_stop(): try: print (100*'""""""""""""""') src_path, dst_path = sess.run(dequeue_op) except tf.errors.OutOfRangeError: coord.request_stop() break process(src_path, dst_path) complete() # init epoch counter for the queue local_init_op = tf.local_variables_initializer() with tf.Session() as sess: sess.run(local_init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(a.workers): t = threading.Thread(target=worker, args=(coord,)) t.start() threads.append(t) try: coord.join(threads) except KeyboardInterrupt: coord.request_stop() coord.join(threads)
# Handle command line options parser = argparse.ArgumentParser( description='Compute features in parallel') parser.add_argument('--numprocs', required=True, type=int, default=multiprocessing.cpu_count(), help='Number of processors to use. ' + \ "Default for this machine is %d" % (multiprocessing.cpu_count(),) ) args = parser.parse_args() if args.numprocs < 1: sys.exit('Number of processors to use must be greater than 0') # Start my pool pool = multiprocessing.Pool(args.numprocs) print("Using %d processors..." % (args.numprocs)) # construct models base_model = VGG16(weights='imagenet') models = [] for layer in base_model.layers: models.append( Model(input=base_model.input, output=base_model.get_layer(layer.name).output)) # load data f = open(
return sum(X[input1%10]) if __name__ == '__main__': X = np.random.rand(10, 3) print X pool_size = multiprocessing.cpu_count() # active_procs = multiprocessing.active_children() # print active_procs pool = multiprocessing.Pool(processes=pool_size) print 'READY?' inputs = list(range(1000)) #print 'Input :', inputs # start = time.time() # builtin_outputs = map(do_calculation, inputs) # print 'Built-in:', builtin_outputs # end = time.time() # print end - start # start = time.time() #pool_outputs = pool.map(do_calculation, inputs)
all_user_rt_tt_diff.append(st.median(single_user_rt_tt_diff)) all_members_rting_arr.append(single_member_rting_arr) creation_data = db[collectionName].find({'rt_user_id': a_user}, {'rt_user_created_at':1}, no_cursor_timeout=True).limit(1) for cd in creation_data: rt_user_created_time_arr.append(datetime.datetime.strptime(dateutil.parser.parse(cd['rt_user_created_at']).strftime('%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')) ipt_d = density_features(rt_tweet_created_time_arr) rter_creation_d_std, rter_creation_mean, rter_creation_d_cov = creation_time_dispersion(rt_user_created_time_arr) s_td, me_an, co_v = retweeting_time_dispersion(all_members_rting_arr) cov_of_response_times = st.pstdev(all_user_rt_tt_diff)/float(st.mean(all_user_rt_tt_diff)) return sn, ipt_d, s_td, me_an, co_v, cov_of_response_times, rter_creation_d_std, rter_creation_mean, rter_creation_d_cov,label if __name__ == '__main__': pool = multiprocessing.Pool(CONFIG_POOL_SIZE) mongo_query = {} groups = db[group_collectionName].find(mongo_query, no_cursor_timeout=True) total_count= groups.count() all_data = pool.map(features, ((group, total_count) for idx,group in enumerate(groups))) pool.close() pool.join() df = pd.DataFrame(all_data, columns=['groupID', 'inter_posting_time_compactness', 'retweeting_time_distribution_sd', 'retweeting_time_distribution_mean', 'retweeting_time_distribution_cov', 'cov_response_time', 'user_creation_time_distribution_sd', 'user_creation_time_distribution_mean', 'user_creation_time_distribution_cov', 'label']) df.to_pickle('extracted_features/temporal_feature.pkl') client.close()