def performStage2TilesSegmentation(self, tilesImgDIR, tilesMaskedDIR, tilesSegsDIR, tilesSegBordersDIR, tmpDIR, tilesBase, s1BordersImage, segStatsInfo, minPxlsVal, distThresVal, bandsVal, ncpus): rsgisUtils = rsgislib.RSGISPyUtils() imgTiles = glob.glob(os.path.join(tilesImgDIR, tilesBase+"*.kea")) for imgTile in imgTiles: baseName = os.path.splitext(os.path.basename(imgTile))[0] maskedFile = os.path.join(tilesMaskedDIR, baseName + '_masked.kea') dataType = rsgisUtils.getRSGISLibDataTypeFromImg(imgTile) imageutils.maskImage(imgTile, s1BordersImage, maskedFile, 'KEA', dataType, 0, 0) imgTiles = glob.glob(os.path.join(tilesMaskedDIR, tilesBase+"*_masked.kea")) def stage2threadedTiledImgSeg(imgTile): baseName = os.path.splitext(os.path.basename(imgTile))[0] clumpsFile = os.path.join(tilesSegsDIR, baseName + '_segs.kea') kMeansCentres, imgStretchStats = self.findSegStatsFiles(imgTile, segStatsInfo) segutils.runShepherdSegmentationPreCalcdStats(imgTile, clumpsFile, kMeansCentres, imgStretchStats, outputMeanImg=None, tmpath=os.path.join(tmpDIR, baseName+'_segstemp'), gdalformat='KEA', noStats=False, noStretch=False, noDelete=False, minPxls=minPxlsVal, distThres=distThresVal, bands=bandsVal, processInMem=False) p = Pool(ncpus) p.map(stage2threadedTiledImgSeg, imgTiles) segTiles = glob.glob(os.path.join(tilesSegsDIR, tilesBase+"*_segs.kea")) for segTile in segTiles: baseName = os.path.splitext(os.path.basename(segTile))[0] borderMaskFile = os.path.join(tilesSegBordersDIR, baseName + '_segsborder.kea') rastergis.defineBorderClumps(segTile, 'BoundaryClumps') rastergis.exportCol2GDALImage(segTile, borderMaskFile, 'KEA', rsgislib.TYPE_8UINT, 'BoundaryClumps')
def run(non_iter_args, do_multiprocessing): [ weightcalcdata, weightcalculator, box, startindex, size, newconnectionmatrix, method, boxindex, filename, headerline, writeoutput ] = non_iter_args partial_gaincalc_oneset = partial(calc_weights_oneset, weightcalcdata, weightcalculator, box, startindex, size, newconnectionmatrix, method, boxindex, filename, headerline, writeoutput) if do_multiprocessing: pool = Pool(processes=pathos.multiprocessing.cpu_count()) pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes) # Current solution to no close and join methods on ProcessingPool # https://github.com/uqfoundation/pathos/issues/46 s = pathos.multiprocessing.__STATE['pool'] s.close() s.join() pathos.multiprocessing.__STATE['pool'] = None else: for causevarindex in weightcalcdata.causevarindexes: partial_gaincalc_oneset(causevarindex) return None
def extract_hits(bins_to_contig_lists, outdir, contig_file, threads): p = Pool(threads) pullseq_tmp = os.path.join(outdir, 'pullseq_ids_tmp') if not os.path.exists(pullseq_tmp): os.system('mkdir ' + pullseq_tmp) def pullseq_by_bin(bin_name, contig_list, contig_file): #Generates a file with the names of all the contigs to pull out #then provides that to pullseq; #parses the resulting fasta output from pullseq and then #passes it back. with open(os.path.join(pullseq_tmp, bin_name + '.txt'), 'w') as outfile: for element in contig_list: outfile.writelines(element + '\n') os.system('pullseq -i ' + contig_file + ' -n ' + os.path.join(pullseq_tmp, bin_name + '.txt') + ' > ' + os.path.join(outdir, bin_name + '.fasta')) return p.map(lambda x: pullseq_by_bin(x, bins_to_contig_lists[x], contig_file), bins_to_contig_lists) #for bin in bins_to_contig_lists: # pullseq_by_bin(bin, bins_to_contig_lists[bin], contig_file) os.system('rm -rf ' + pullseq_tmp) p.terminate() return
def HashBulkRead(request): hashes = request.POST['names'] hashes_list = hashes.split() try: if not hashes_list: messages.info(request, 'Field is empty!!') return render(request, 'bulkfilehash.html') else: ref = create_ref_code() hash_length = len(hashes_list) ref_list = list() for i in range(hash_length): ref_list.append(ref) p = Pool(20) p.map(checkhash, hashes_list, ref_list) result_to_display = Hashes.objects.filter( reference=ref_list[0]) context = { 'data_ip': result_to_display, 'reference': ref_list[0], 'button': 1 } return render(request, 'bulkfilehash.html', context) except: messages.info(request, 'check your input, error occured!!') return render(request, 'bulkfilehash.html')
def GroupByParallelProcess(tweetsDF, cores, groupMethod): """ Group by and aggregate on time via a parallel process """ tweetsDF.label_date = tweetsDF.label_date.astype(int) tweetsDF = tweetsDF.set_index("label_date") # Parallelizing using Pool.apply() df_split = GetListOfSplitDFs(tweetsDF, cores) # create the multiprocessing pool pool = Pool(cores) # process the DataFrame by mapping function to each df across the pool logging.info("Starting the grouping and aggregating process.") if groupMethod == "weighted-average": df_out = pool.map(PerformGroupbyAndAggregate, df_split) elif groupMethod == "sum": df_out = pool.map(PerformSum, df_split) elif groupMethod == "mean": df_out = pool.map(PerformMean, df_split) else: logging.error("Choose correct group by method.") return None # close down the pool and join pool.close() pool.join() pool.clear() logging.info("Ended the grouping and aggregating process.") return df_out
def BulkHash(request): if "GET" == request.method: return render(request, 'bulkfilehash.html', {'button': 0}) else: excel_file = request.FILES["excel_file"] if not excel_file: messages.info(request, 'Please select a file!!') return render(request, 'bulkfilehash.html') wb = openpyxl.load_workbook(excel_file) worksheet = wb["Sheet1"] excel_data = list() ref = create_ref_code() row_data = list() ref_data = list() for row in worksheet.iter_rows(): for cell in row: row_data.append(str(cell.value)) ref_data.append(ref) p = Pool(20) p.map(checkhash, row_data, ref_data) result_to_display = Hashes.objects.filter(reference=ref_data[0]) context = { 'data_ip': result_to_display, 'reference': ref_data[0], 'button': 1 } return render(request, 'bulkfilehash.html', context)
def para_pfulp_res_mutli(in_var, f1, f2, tlimit, cpu_n): mp.prec = 100 start_time = time.time() l_var = [] for i in in_var: tmp_l = depart(i[0], i[1]) l_var.append(tmp_l) l_confs = [] for element in itertools.product(*l_var): l_confs.append(element) next_tmp_l = [] print len(l_confs) print l_confs p = Pool(cpu_n) all_input_l = [] lf1 = [] lf2 = [] for j in l_confs: var_l = [] for k in j: var_l.append(sorted(np.random.uniform(k[0], k[1], 20))) input_l = [] for element in itertools.product(*var_l): input_l.append(element) all_input_l.append(input_l) lf1.extend([f1] * len(all_input_l)) lf2.extend([f2] * len(all_input_l)) res = p.map(test_pulp, lf1, lf2, all_input_l, l_confs) next_tmp_l = res tmp_time = time.time() - start_time print tmp_time k = len(next_tmp_l) if k > 10: k = min(int(len(next_tmp_l) / 2), 10) next_tmp_l = sorted(next_tmp_l, reverse=True)[0:k] print next_tmp_l[0] next_tmp_l_2 = [] one_time = (tlimit - tmp_time) / (k) print one_time print tlimit print len(next_tmp_l) print next_tmp_l time_list = [] gen_l = [] for i in next_tmp_l: tmp_gen_l = produce_interval(i[1], i[2]) gen_l.append(tmp_gen_l) lf1.extend([f1] * len(gen_l)) lf2.extend([f2] * len(gen_l)) time_list.extend([one_time] * len(gen_l)) print gen_l res = p.map(fine_search, gen_l, lf1, lf2, time_list) print res next_tmp_l_2 = res next_tmp_l_2 = sorted(next_tmp_l_2, reverse=True) print next_tmp_l_2 end_time = time.time() - start_time if len(next_tmp_l_2) == 0: return [[0.0, 0.0, [0.0, 0.0]], [0.0, 0.0, [0.0, 0.0]]], end_time return next_tmp_l_2[0], end_time
def loop_files_in_dir(input_dir, output_dir, loop_fn): def replace_postfix(x): if x.endswith('.txt'): x = x[:-4] elif x.endswith('.gz'): x = x[:-3] else: pass x = x + '.tfrecord' return x in_filenames = os.listdir(input_dir) out_filenames = map(replace_postfix, in_filenames) input_paths = map(lambda x: os.path.join(input_dir, x), in_filenames) output_paths = map(lambda x: os.path.join(output_dir, x), out_filenames) paths = list(zip(input_paths, output_paths)) N = min(10, len(paths)) global is_debug if is_debug is True: for in_path, out_path in paths: loop_fn((in_path, out_path)) else: pool = Pool(N) pool.map(loop_fn, paths)
def run(self): tempfile.tempdir = self.tmp_folder # set global tmp dir circle_info, circle_reads = self.read_circles(self.circles) print('DONE reading circles, found %s circles' % (len(circle_info))) reads = self.load_alignment(self.bamfile, circle_reads, self.mapq_cutoff) print('DONE extracting circular reads') folders = os.listdir(self.outfolder) if not self.sample in folders: os.mkdir('%s/%s' % (self.outfolder, self.sample)) self.write_circle_bam(reads, circle_info, self.cutoff, self.bamfile, '%s/%s' % (self.outfolder, self.sample)) print('DONE writing circle bam files\n') # files = os.listdir('%s/%s' % (self.outfolder, self.sample)) import glob files = glob.glob('%s/%s/*.bam' % (self.outfolder, self.sample)) # possible sorted files from previous run sorted_bams = glob.glob('%s/%s/*.sorted.bam' % (self.outfolder, self.sample)) # fix the file / circle count actual_bams = len(files) - len(sorted_bams) print( '%s circles passed your thresholds of at least %s reads with at least a mapq of %s\n\n' % (actual_bams, self.cutoff, self.mapq_cutoff)) from pathos.multiprocessing import ProcessingPool as Pool pool = Pool(self.cpus) pool.map(run_parallel, files)
def performStage3SubsetsSegmentation(self, subsetImgsMaskedDIR, subsetSegsDIR, tmpDIR, subImgBaseName, segStatsInfo, minPxlsVal, distThresVal, bandsVal, ncpus): imgTiles = glob.glob( os.path.join(subsetImgsMaskedDIR, subImgBaseName + "*_masked.kea")) def stage3threadedTiledImgSeg(imgTile): baseName = os.path.splitext(os.path.basename(imgTile))[0] clumpsFile = os.path.join(subsetSegsDIR, baseName + '_segs.kea') kMeansCentres, imgStretchStats = self.findSegStatsFiles( imgTile, segStatsInfo) segutils.runShepherdSegmentationPreCalcdStats( imgTile, clumpsFile, kMeansCentres, imgStretchStats, outputMeanImg=None, tmpath=os.path.join(tmpDIR, baseName + '_segstemp'), gdalformat='KEA', noStats=False, noStretch=False, noDelete=False, minPxls=minPxlsVal, distThres=distThresVal, bands=bandsVal, processInMem=False) p = Pool(ncpus) p.map(stage3threadedTiledImgSeg, imgTiles)
def run(self): # initializing the result table file self.exon_count_file = '%s/%s.exon_counts.txt' % (self.inputfolder, self.sample) exon_counts_out = open(self.exon_count_file, 'w') exon_counts_out.write('sample\tcircle_id\ttranscript_id\tother_ids\texon_id\tchr\tstart' '\tend\tstrand\texon_length\tunique_reads\tfragments\tnumber+\tnumber-\n') exon_counts_out.close() output_file = open('%s/%s.exon_counts.bed' % (self.inputfolder, self.sample), 'w') output_file.write('# BED12\n') output_file.close() # all circle files in a given folder files = os.listdir('%s/%s' % (self.inputfolder, self.sample)) # create folder for coverage profiles folders = os.listdir(self.inputfolder) if not '%s.coverage_profiles' % (self.sample) in folders: os.mkdir('%s/%s.coverage_profiles' % (self.inputfolder, self.sample)) from pathos.multiprocessing import ProcessingPool as Pool pool = Pool(self.cpus) pool.map(self.run_parallel, files)
def run(self): cmd_list = [] for f in self.param['files']: if self.param['nucleotide'] == True: cmd_list.append([ 'cdhit-est-2d', '-i', str(self.param['ref_cleared']), '-i2', str(f), '-c', str(self.param['c']), '-g', str(self.param['g']), '-s2', str(self.param['s2']), '-o', self.param['odir'] + '/' + os.path.splitext(os.path.basename(f))[0] ]) else: cmd_list.append([ 'cdhit-2d', '-i', str(self.param['ref_cleared']), '-i2', str(f), '-c', str(self.param['c']), '-g', str(self.param['g']), '-s2', str(self.param['s2']), '-o', self.param['odir'] + '/' + os.path.splitext(os.path.basename(f))[0] ]) #print cmd_list p = Pool(int(self.param['threads'])) # p.map(cdhit_analisys.exec_cdhit, cmd_list) with Pool(int(self.param['threads'])) as p: p.map(cdhit_analisys.exec_cdhit, cmd_list)
def main(): dfref = utrdf.merge(right = mRNAdf, on='#transcript') df = find_cds_seq(dfref) dfl = [df]*len(codonList) ### replicate the dateframe as a list for the length of the input codon list # find_codon_positions(df, codonList) ### split the codon list into two sets to ease the computational burder p = Pool(nodes=int(args.threadNumb)) p.map(find_codon_positions_multi, codonList, dfl)
def findbonds(self): """Calculates interactions between and/or within monomers""" if self.help: print( "Calculates interactions between and/or within monomers\n" f'\n\033[1mUsage: minnie findbonds \n' f' -cn, --complexName <string> \n ' f' Project ID of your complex\n\n' f' -p, --pdbs [<.pdb>/<path>] (singleframe.pdb) \n' f' Give single *.pdb or give folder path \n\n' f' -i [<hbonds>/<ionic>/<hydrophobic>/<ring_stacking>/<all>] (hbonds) \n' f' Calculates which types of interactions \n\n' f' -d <float> (2.5) \n' f' Cut-off to define a hydrogen bond\n\n' f' -intra, --includeIntra [<"True">/<"False">] ("False") \n' f' What do you want to analyze, all or only inter-monomer contacts? \033[0m \n\n\n\n' f'\n\033[1mUsage example:\033[0m\n\n' " Single frame - minnie findbonds -cn sox4 -p sox4/02_frames/md_0.pdb -i hbonds -s False \n" " Multiple frames - minnie findbonds -cn sox4 -p sox4/02_frames/* -i hbonds \n" " Multiple frames - minnie findbonds -cn sox4 -p sox4/02_frames/* -i all \n" ) elif not self.pdbs: print(f'where is pdb??') elif not self.complexName: print(f'Please specify complex name(s)') elif (self.systematic) == "True": pdb_list = self.pdbs if (self.intType == "all"): for intType in ["hbonds", "ionic", "hydrophobic", "ring_stacking"]: pool = Pool(pathos.multiprocessing.cpu_count() - 2) pool.map(analysis.comb_int, pdb_list, len(pdb_list) * [str(self.complexName)], len(pdb_list) * [str(intType)], len(pdb_list) * [str(self.includeIntra)], len(pdb_list) * [str(self.hbond_distance)]) #pool.close() else: pool = pathos.multiprocessing.ProcessingPool( pathos.multiprocessing.cpu_count() - 2) pool.map(analysis.comb_int, pdb_list, len(pdb_list) * [str(self.complexName)], len(pdb_list) * [str(self.intType)], len(pdb_list) * [str(self.includeIntra)], len(pdb_list) * [str(self.hbond_distance)]) pool.close() analysis.combine_interfacea_results(self.complexName) elif (self.systematic) == "False": if (self.intType == "all"): for intType in ["hbonds", "ionic", "hydrophobic", "ring_stacking"]: analysis.comb_int(self.pdbs[0], self.complexName, intType, self.includeIntra, self.hbond_distance) else: analysis.comb_int(self.pdbs[0], self.complexName, self.intType, self.includeIntra, self.hbond_distance) analysis.combine_interfacea_results(self.complexName)
def main(): print "starting fastq-dump" p = Pool(nodes=40) p.map(get_FASTQ_RP, srrList_RP) p.map(get_FASTQ_RNA, srrList_RNA) rename_RP_fastq() rename_RNA_fastq()
def main(): print("\n\n正在下载%s的全文...\n" % KEYWORDS) #新建文件夹 if not os.path.exists(KEYWORDS): #如果文件夹不存在就创建 os.mkdir(KEYWORDS) #清空notFound文件 file_path_notFound = '{0}/{1}'.format(KEYWORDS, KEYWORDS + '_notFound') open(file_path_notFound, 'w', encoding='utf-8').close() #清空文件内容 #打开info文件,从中提取uid, doi, pmcid, title信息,分别保存成list用于多线程参数 file_path_info = '{0}/{1}'.format(KEYWORDS, KEYWORDS + '_info') with open(file_path_info, 'r', encoding='utf-8') as fh: line_number = 0 uid = [] doi = [] pmcid = [] title = [] for line in fh: line_number += 1 list_line = line.strip().split('\t') if line_number % 4 == 1: title0 = '' uid.append(list_line[0]) doi.append(list_line[3]) pmcid.append(list_line[4]) title1 = str(list_line[1]) + '-' if line_number % 4 == 2: title2 = re.sub(PATTERN_title, ' ', str(list_line[0])[7:]) title0 = title1 + title2 title.append(title0) title0 = '' #统计下载前文件夹中文件数量 file_count_before = len([ name for name in os.listdir(KEYWORDS) if os.path.isfile(os.path.join(KEYWORDS, name)) ]) #多线程运行down_paper_from_PMC_and_SciHub任务 pool = Pool(4) pool.map(down_paper_from_PMC_and_SciHub, uid, doi, pmcid, title) #统计下载前文件夹中文件数量 file_count_after = len([ name for name in os.listdir(KEYWORDS) if os.path.isfile(os.path.join(KEYWORDS, name)) ]) #输出运行结束信息 print("文献全文下载结束!本次运行下载了%d篇文献!" % (file_count_after - file_count_before)) print("下载的文献全文保存在%s文件夹中。" % KEYWORDS) print("下载失败的文献信息保存在%s_notFound.txt中。\n" % KEYWORDS)
def run_all_control_analysis(self): dirs = dir_walker(self.encode_root) control_dir = None for d in dirs: if 'control' in d.lower(): control_dir = d assert control_dir is not None replicates = dir_walker(control_dir, level=1) pool = ProcessingPool(nodes=14) pool.map(self.control_analysis, tuple(replicates)) return replicates
def parallel_process(func, args, nprocs, bam=None, fasta=None): p = Pool(nprocs) if bam is not None and fasta is not None: results = p.map(func, args, [bam] * len(args), [fasta] * len(args)) elif bam is not None and fasta is None: results = p.map(func, args, [bam] * len(args)) elif bam is None and fasta is not None: results = p.map(func, args, [fasta] * len(args)) else: results = p.map(func, args) return results
def execute_all_parallel(self, n_threads=multiprocessing.cpu_count(), checkpoint_size=None, prepend_existing=True): pool = Pool(n_threads) # wrapper because of map function's lack of multi argument support def func_star(fnid_args_kwargs): print "Executing function..." t0 = time.time() fn_output = self.fn[0](*(fnid_args_kwargs[1]), **(fnid_args_kwargs[2])) t1 = time.time() print "...completed (Time elapsed: ", str(t1 - t0), ")." return fn_output self.speak("Starting parallel scheduled execution (", str(n_threads), " threads)...") if checkpoint_size is not None: t0 = time.time() checkpoints = range(0, len(self.fn_calls), checkpoint_size) for chkid, chk in enumerate(checkpoints): fn_call_ids = range(min(checkpoint_size, len(self.fn_calls))) self.speak("Starting scheduled batch " + str(chkid + 1) + " of " + str(len(checkpoints)) + "...") t2 = time.time() fn_outputs = pool.map( func_star, self.fn_calls[min(fn_call_ids):max(fn_call_ids) + 1]) t3 = time.time() self.speak("Scheduled batch complete. Time elapsed: ", str(t3 - t2)) self.speak("-----------------------") self.save_results(fn_call_ids, fn_outputs, prepend_existing) del self.fn_calls[min(fn_call_ids):max(fn_call_ids) + 1] if self.savefile_path is not None: self.save_schedule() if self.savefile_path is not None: os.remove( self.savefile_path) # delete (now empty) snapshot file t1 = time.time() else: t0 = time.time() fn_outputs = pool.map(func_star, self.fn_calls) t1 = time.time() self.save_all_results(fn_outputs, prepend_existing) self.speak("Multi-schedule complete. Total time elapsed: ", str(t1 - t0)) return fn_outputs
def main(): context = os.path.join(os.getcwd(), outdir) if not os.path.exists(context): os.system('mkdir ' + context) hits_recs = list(SeqIO.parse(os.path.join(os.getcwd(), infile), 'fasta')) hits_ids = [rec.id for rec in hits_recs] p = Pool(threads) p.map(lambda hit_id: get_context(hit_id, context, hits_ids), hits_ids) print('boogie')
def parallelise_initsync(argv, ssp_params, process_control_id, logger): # Pivot the collection of source_system_profile records into # three separate lists to enable us to call pool.map on each record (source_schemas, tables, target_schemas, query_conditions) = map(list, zip(*ssp_params)) source_conn_detail = dbuser.get_dbuser_properties(argv.sourceuser) target_conn_detail = dbuser.get_dbuser_properties(argv.targetuser) logger.info("Processing tables with {} dedicated worker processes".format( argv.numprocesses)) pool = Pool(nodes=argv.numprocesses) argvs = [argv] * len(tables) source_conn_details = [source_conn_detail] * len(tables) target_conn_details = [target_conn_detail] * len(tables) pcids = [process_control_id] * len(tables) queues = [manager.Queue()] * len(tables) logger.debug("Starting a new process for each table in: {tables}".format( tables=tables)) # Execute initsync for each schema/table combination in parallel pool.map(initsync_table, argvs, source_conn_details, target_conn_details, source_schemas, tables, target_schemas, pcids, query_conditions, queues, chunksize=1) # Ensure tables are processed in sequence # and workers are fully utilised pool.close() logger.debug("parallelise_initsync: Pool joining") pool.join() logger.debug("parallelise_initsync: Pool joined") all_table_results = {} for q in queues: size = q.qsize() message = q.get() logger.debug("Message queue size = {s}, message = {m}".format( s=size, m=message)) all_table_results.update(message) logger.debug("all_table_results = {r}".format(r=all_table_results)) return all_table_results
def _marginal_acq_parallel(self, X): """ """ marginal_acqX = np.zeros( (X.shape[0], len(self.utility_parameter_samples))) n_w = self.W_samples.shape[0] pool = Pool(4) for h in range(self.number_of_gp_hyps_samples): self.model.set_hyperparameters(h) pool.map(self._parallel_acq_helper, X) marginal_acqX += np.atleast_2d( pool.map(self._parallel_acq_helper, X)) marginal_acqX /= (self.number_of_gp_hyps_samples * n_w) return marginal_acqX
def create_features(WRITE_DB, FP, all_tables, schemas, CPUS, selected_schema, selected_table): #define key dataframe key_df = get_key(all_tables[selected_table], schemas,selected_schema) key_df = key_df.sort_values(by = ['key','date']) if selected_schema == 'scoring_schema': key_df = key_df[['key']] else: key_df = key_df[['key','date','target']] pool = Pool(CPUS) #Features output framework all_functions = inspect.getmembers(FP, inspect.isfunction) all_functions = [x[1] for x in all_functions] # print(all_functions) args = (WRITE_DB,key_df, schemas, all_tables, fc_protocol,selected_schema) all_functions = [(x,args) for x in all_functions] if WRITE_DB: temp = pool.map(trig_func, all_functions) df = pd.concat(temp, axis = 1) df = pd.concat([key_df,df], axis = 1) engine = conn_eng() if selected_schema == 'scoring_schema': df.to_sql(all_tables['scoring_table'],schema= schemas['output_schema'], con=engine, index=False,if_exists ='replace') else: df.to_sql(all_tables['features_table'],schema= schemas['output_schema'], con=engine, index=False,if_exists ='replace') engine.dispose() del engine #print_summary(MISSING_VALUE_TREATMENT,df) else: temp = pool.map(trig_func, all_functions) df = pd.concat(temp, axis = 1) df = pd.concat([key_df,df], axis = 1) # print_summary(MISSING_VALUE_TREATMENT,df) return df return 'Files written to DB'
def makeRadial(): rad, angle = d["radial"]["rad"], d["radial"]["angle"] args = np.linspace(angle, angle + np.pi, frameCount) pool = Pool(4) while True: subIm = JuliaTools.subImage(c=rad * np.exp(1j * angle), r=r, n=10, p=p, iters=iters, split=split, save=False, aura=False) isBlackList = pool.map(subIm, coords) if not all(isBlackList): break else: rad *= 0.975 # Circular arc c follows in complex plane cPath = rad * np.exp(1j * args) for frame in xrange(frameCount): subIm = JuliaTools.subImage(c=cPath[frame], r=r, n=n, p=p, iters=iters, split=split) isBlackList = pool.map(subIm, coords) allBlack = all(isBlackList) if not allBlack: JuliaTools.makeFrame(frame, n, split, coords) pool.close() JuliaTools.prepareForFFmpeg(frameCount=frameCount, loop=True) with open("tweet.txt", "w") as out: out.write("Images generated using constants" " on a circular arc of radius {:03.2f}.".format(rad)) stop = timeit.default_timer() print stop - start
def decorator(data, par): if (type(data) != type(np.zeros(3))) and (type(data) != type( pd.DataFrame([0]))) and (type(data) != type([])): raise TypeError( 'data type must be a list, a numpy array or a pandas DataFrame!' ) if self.n_proc == 1: print(' processing data in single-core mode') result = func(data, par) else: print(' processing data in %d-core mode' % self.n_proc) pool = Pool() data = self._multiproc_data_split(data) result = pool.map(func, data, repeat(par)) if type(result[0]) == type(np.array([0])): result = np.concatenate(result, axis=0) elif type(result[0]) == type(pd.DataFrame([0])): result = pd.concat(result, axis=0) elif type(result) == type([]): res = [] [res.extend(tmp) for tmp in result] result = res else: print( 'mprows: output data structure of the given ' + 'function is not recognized. Return a list containing results of each process !' ) return result
def calculate(population, maps_generator_from_sample): logging.info("Condi_evo: calculating population:") logging.info(population) pool = Pool(EC['POOL_SIZE']) for it in range(EC['NO_OF_ITERATIONS']): print(it) logging.info("Condi_evo: calculating iteration " + str(it)) iter_start = time.time() population = pool.map(lambda s: sample_acceptance_score( s, maps_generator_from_sample(s)), population) # EVALUATING POPULATION # for sample in population: # sample[3]+=1 eval_fin = time.time() logging.info("TIME: Evaluating population finished in " + str(eval_fin - iter_start)) population = evolve_condi_population(population) # EVOLVING POPULATION evol_fin = time.time() logging.info("TIME: Evolving population finished in " + str(evol_fin - eval_fin)) logging.info("TIME: Iteration finished in " + str(evol_fin - iter_start)) with open(STORAGE_PREFIX + ".population", 'w') as f: f.write(json.dumps(population)) if it % 10 == 9: display_n(population, 3) return population
def image_division(self): image_rows, image_cols = self.__image.shape[:2] print self.__image.shape[:2] grid_indices = [ np.array([x, y]) for x in xrange(0, image_cols - self.__GRID_SIZE, self.__GRID_SIZE) for y in xrange(0, image_rows - self.__GRID_SIZE, self.__GRID_SIZE) ] pool = Pool() output = pool.map(self.grid_division, grid_indices) threshod_sucess_sample = 6 ransacGrouper = RansacLine(1, threshod_sucess_sample, 25, 2) for index, edgels in enumerate(output): if len(edgels) > threshod_sucess_sample: ransacGrouper.edgels = edgels ransac_groups = ransacGrouper.applay_parallel_ransac() self.line_segment(ransac_groups) # print len(self.__lines) # for line in self.__lines: # print (line.slope, line.intercept) # coefficients = np.array([line.slope, line.intercept]) # # print "cof: ", coefficients # x = np.array([20, 50], dtype=np.int32) # polynomial = np.poly1d(coefficients) # # print "Poly: ", polynomial # y = polynomial(x) # y = [int(e) for e in y] # print "x: ", x, "y: ", y # cv2.line(self.__image, (x[0], y[0]), (x[1], y[1]), (0, 255, 0), 1) cv2.imshow("image", self.__image) cv2.waitKey(0) cv2.destroyAllWindows()
def main(args): """ Parmeters --------- args : dict See ``fragments`` subcommand """ # list of genome files genomeList = Utils.parseGenomeList(args['<genomeList>'], filePath=args['--fp']) # analyzing each genome (in parallel) pfunc = functools.partial(by_genome, args=args) # difussion calc in parallel pool = ProcessingPool(nodes=int(args['--np'])) if args['--debug']: fragList = map(pfunc, genomeList) else: fragList = pool.map(pfunc, genomeList) # writing out table if args['--tbl']: write_fragList(fragList) else: dill.dump(fragList, sys.stdout)
def map(self, f, seq): """ Parallel implementation of map. Parameters ---------- f : callable A function to map to all the values in 'seq' seq : iterable An iterable of values to process with 'f' Returns ------- results : list, shape=[len(seq)] The evaluated values """ if self.n_jobs < 1: n_jobs = multiprocessing.cpu_count() elif self.n_jobs == 1: return list(map(f, seq)) else: n_jobs = self.n_jobs pool = Pool(n_jobs) results = list(pool.map(f, seq)) # Closing/joining is not really allowed because pathos sees pools as # lasting for the duration of the program. return results
def optimize(self, f=None, df=None, f_df=None, duplicate_manager=None, x_baseline=None): """ Optimizes the input function. :param f: function to optimize. :param df: gradient of the function to optimize. :param f_df: returns both the function to optimize and its gradient. """ self.f = f self.df = df self.f_df = f_df ## --- Update the optimizer, in case context has beee passed. self.optimizer = choose_optimizer(self.optimizer_name, self.context_manager.noncontext_bounds) ## --- Selecting the anchor points and removing duplicates if self.type_anchor_points_logic == max_objective_anchor_points_logic: anchor_points_generator = ObjectiveAnchorPointsGenerator(self.space, random_design_type, f, self.n_starting) elif self.type_anchor_points_logic == thompson_sampling_anchor_points_logic: anchor_points_generator = ThompsonSamplingAnchorPointsGenerator(self.space, sobol_design_type, self.model) ## -- Select the anchor points (with context) anchor_points, anchor_points_values = anchor_points_generator.get(num_anchor=self.n_anchor, duplicate_manager=duplicate_manager, context_manager=self.context_manager, get_scores=True) if x_baseline is not None: f_baseline = f(x_baseline)[:, 0] anchor_points = np.vstack((anchor_points, x_baseline)) anchor_points_values = np.concatenate((anchor_points_values, f_baseline)) #print(anchor_points.shape) #print(anchor_points_values.shape) print('anchor points') print(anchor_points) print(anchor_points_values) parallel = True if parallel: pool = Pool(4) optimized_points = pool.map(self._parallel_optimization_wrapper, anchor_points) else: #pass optimized_points = [apply_optimizer(self.optimizer, a, f=f, df=None, f_df=f_df, duplicate_manager=duplicate_manager, context_manager=self.context_manager, space = self.space) for a in anchor_points] print('optimized points') print(optimized_points) x_min, fx_min = min(optimized_points, key=lambda t:t[1]) if x_baseline is not None: for i in range(x_baseline.shape[0]): val = f_baseline[i] if val < fx_min: print('baseline was best found') print(val) x_min = np.atleast_2d(x_baseline[i, :]) fx_min = val #if np.asscalar(anchor_points_values[0]) < np.asscalar(fx_min): #print('anchor_point was best found') #fx_min = np.atleast_2d(anchor_points_values[0]) #x_min = np.atleast_2d(anchor_points[0]) return x_min, fx_min
def parallelMap(func, args, batchFunc=None, zippedIn=True, zippedOut=False, cores=-1, quiet=False): from pathos.multiprocessing import ProcessingPool """Parallel map using multiprocessing library Pathos Args: stderr (function): func args (arguments): [arg1s, arg2s ,..., argns](zippedIn==True) or [[arg1,arg2,...,argn], ...](zippedIn=False) batchFunc (func, optional): TODO. Defaults to None. zippedIn (bool, optional): See [args]. Defaults to True. zippedOut (bool, optional): See [Returns]. Defaults to False. cores (int, optional): How many processes. Defaults to -1. quiet (bool, optional): if do not print anything. Defaults to False. Returns: tuples: [out1s, out2s,..., outns](zippedOut==False) or [[out1,out2,...,outn], ...](zippedOut==True) """ if batchFunc is None: batchFunc = lambda x:x if zippedIn==True: args = list(map(list, zip(*args))) # transpose if cores==-1: cores = os.cpu_count() pool = ProcessingPool(nodes=cores) batchIdx = list(range(len(args[0]))) batches = array2batches(batchIdx, cores) out = [] iterations = enumerate(batches) if quiet==True else progbar(enumerate(batches)) for i,batch in iterations: batch_args = [[arg[i] for i in batch] for arg in args] out.extend( pool.map(func, *batch_args) ) if zippedOut == False: if type(out[0]) is not tuple: out=[(item,) for item in out] out = list(map(list, zip(*out))) return out
def _featurize_complexes(self, df, featurizer, parallel=True, worker_pool=None): """Generates circular fingerprints for dataset.""" protein_pdbs = list(df["protein_pdb"]) ligand_pdbs = list(df["ligand_pdb"]) complexes = zip(ligand_pdbs, protein_pdbs) def featurize_wrapper(ligand_protein_pdb_tuple): ligand_pdb, protein_pdb = ligand_protein_pdb_tuple print("Featurizing %s" % ligand_pdb[0:2]) molecule_features = featurizer.featurize_complexes([ligand_pdb], [protein_pdb]) return molecule_features if worker_pool is None: features = [] for ligand_protein_pdb_tuple in zip(ligand_pdbs, protein_pdbs): features.append(featurize_wrapper(ligand_protein_pdb_tuple)) else: if worker_pool is None: worker_pool = ProcessingPool(mp.cpu_count()) features = worker_pool.map(featurize_wrapper, zip(ligand_pdbs, protein_pdbs)) else: features = worker_pool.map_sync(featurize_wrapper, zip(ligand_pdbs, protein_pdbs)) #features = featurize_wrapper(zip(ligand_pdbs, protein_pdbs)) df[featurizer.__class__.__name__] = list(features)
def run(non_iter_args, do_multiprocessing): [ weightcalcdata, weightcalculator, box, startindex, size, newconnectionmatrix, method, boxindex, filename, headerline, writeoutput, ] = non_iter_args partial_gaincalc_oneset = partial( calc_weights_oneset, weightcalcdata, weightcalculator, box, startindex, size, newconnectionmatrix, method, boxindex, filename, headerline, writeoutput, ) if do_multiprocessing: pool = Pool(processes=pathos.multiprocessing.cpu_count()) pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes) # Current solution to no close and join methods on ProcessingPool # https://github.com/uqfoundation/pathos/issues/46 s = pathos.multiprocessing.__STATE["pool"] s.close() s.join() pathos.multiprocessing.__STATE["pool"] = None else: for causevarindex in weightcalcdata.causevarindexes: partial_gaincalc_oneset(causevarindex) return None
def compute_importance(self, alpha): """ """ pool = ProcessingPool(self._numJobs) errors = pool.map(self._computeImportanceOfTree, [alpha] * self._numTree, range(self._numTree)) return np.array(errors).mean(axis=0)
def apply(values): pool = Pool() # result = [] result = pool.map(func, values) # result.append(ret) # pool.close() # pool.join() return result
def alignAllShapes( self ): import pathos.multiprocessing as mp start = time.time() pool = Pool() self.allShapes = pool.map( self.alignOneShape, self.allShapes ) # for sh in self.allShapes: # self.alignOneShape( sh ) print 'alignAllShapes: %f' % (time.time() - start ) return
def register_stack_to_template(frames, template, regfn, njobs=4, **fnargs): """ Given stack of frames (or a FSeq obj) and a template image, align every frame to template and return a list of functions, which take an image and return warped image, aligned to template. """ if njobs > 1: pool = ProcessingPool(nodes=njobs) out = pool.map(partial(regfn, template=template, **fnargs), frames) else: out = np.array([regfn(img, template, **fnargs) for img in frames]) return out
def parallelmap(func, data, nodes = None): """ Return the averaged signal and background (based on blank frames) over the given runs """ if not nodes: nodes = multiprocessing.cpu_count() - 2 pool = ProcessingPool(nodes=nodes) try: return pool.map(func, data) except KeyboardInterrupt: pool.terminate() pool.join()
def main(): from hyperopt import fmin,tpe,hp,Trials from hyperopt.mongoexp import MongoTrials import os fit_params=eval(open('fit_parameters.txt').read()) fit_params['root']=os.getcwd() directory=init_directory(fit_params) if fit_params['optimization']=='hyperopt': space=search_space(fit_params) trials=Trials() best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials) plot_results(trials.trials) #https://github.com/hyperopt/hyperopt/wiki/Parallelizing-Evaluations-During-Search-via-MongoDB ''' commands for MongoDB mongod --dbpath . --port 1234 export PYTHONPATH=$PYTHONPATH:/home/pduggins/influence_susceptibility_conformity hyperopt-mongo-worker --mongo=localhost:1234/foo_db --poll-interval=0.1 ''' if fit_params['optimization']=='mongodb': space=search_space(fit_params) space['directory']=directory trials=MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp4') best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials) plot_results(trials.trials) if fit_params['optimization']=='evolve': from pathos.multiprocessing import ProcessingPool as Pool from pathos.helpers import freeze_support #for Windows import numpy as np import pandas as pd # freeze_support() evo_pop=init_evo_pop(fit_params) pool = Pool(nodes=fit_params['threads']) for g in range(fit_params['generations']): exp_params=[value['P'] for value in evo_pop.itervalues()] fitness_list=pool.map(run, exp_params) # new_gen_list=tournament_selection(fitness_list,fit_params) new_gen_list=rank_proportional_selection(fitness_list) remade_pop=remake(evo_pop,new_gen_list) mutated_pop=mutate(remade_pop,evo_pop,fit_params) evo_pop=mutated_pop # crossed_pop=crossover(mutated_pop) # evo_pop=crossed_pop mean_F=np.average([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()]) std_F=np.std([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()]) print '\nGeneration %s: mean_F=%s, std F=%s' %(g+1,mean_F,std_F) out_pop=pd.DataFrame([evo_pop]) out_pop.reset_index().to_json('evo_pop.json',orient='records')
def apply_warps(warps, frames, njobs=4): """ returns result of applying warps for given frames (one warp per frame) """ if njobs > 1 : pool = ProcessingPool(nodes=njobs) out = np.array(pool.map(parametric_warp, frames, warps)) else: out = np.array([parametric_warp(f,w) for f,w in itt.izip(frames, warps)]) if isinstance(frames, fseq.FrameSequence): out = fseq.open_seq(out) out.meta = frames.meta return out
def multi_ray_sim(self, sources, procs=8): self.minener = 1e-10 # minimum energy threshold self.itmax = 1000 # stop iteration after this many ray bundles were generated (i.e. # after the original rays intersected some surface this many times). # The multiprocessing raytracing method to call from the original engine. if len(sources) != procs: raise Exception('Number of sources and processors do not agree') # Creates a pool of processes and makes them raytrace one different source each. The resm list returned is a list of copies of the original engine post raytrace. pool = Pool(processes=procs) resm = pool.map(self.trace, sources) # New tree container and length envaluation to redimension it. tree_len = N.zeros(len(resm), dtype=N.int) trees = [] for eng in xrange(len(resm)): # Get and regroup results in one tree and assembly only: S = resm[eng]._asm.get_surfaces() tree_len[eng] = len(resm[eng].tree._bunds) trees.append(resm[eng].tree) # Next loop is to get the optics callable objects and copy regroup their values without asumptions about what they are. for s in xrange(len(S)): part_res = S[s]._opt.__dict__ keys = S[s]._opt.__dict__.keys() for k in xrange(len(keys)): if (keys[k] == '_opt') or (keys[k] == '_abs'): continue if len(self._asm.get_surfaces()[s]._opt.__dict__[keys[k]]) < 1: self._asm.get_surfaces()[s]._opt.__dict__[keys[k]] = part_res[keys[k]] elif len(part_res[keys[k]]) < 1: continue else: self._asm.get_surfaces()[s]._opt.__dict__[keys[k]][0] = N.append(self._asm.get_surfaces()[s]._opt.__dict__[keys[k]][0], part_res[keys[k]][0], axis=1) # Regroup trees: self.tree = RayTree() # Create a new tree for all for t in xrange(N.amax(tree_len)): # Browse through general tree levels up to the maximum length that has been raytraced for eng in xrange(len(resm)): # Browse through bundles of each parallel engine. if t<(tree_len[eng]): # to not go over the length of the present parallel tree. if t==len(self.tree._bunds): # if the index is greater than the actual length of the general tree, add a new bundle to the general tree with the present parallel bundle to initialise it. bundt = trees[eng]._bunds[t] else: if t>0: # adapt parents indexing prior to concatenation trees[eng]._bunds[t].set_parents(trees[eng]._bunds[t].get_parents()+len(self.tree._bunds[t].get_parents())) bundt = concatenate_rays([bundt, trees[eng]._bunds[t]]) self.tree.append(bundt) trees = 0
def parallelmap(func, lst, nodes = None): """ Return the averaged signal and background (based on blank frames) over the given runs using multiprocessing (as opposed to MPI). """ from pathos.multiprocessing import ProcessingPool from pathos import multiprocessing if not nodes: nodes = multiprocessing.cpu_count() - 2 pool = ProcessingPool(nodes=nodes) try: return pool.map(func, lst) except KeyboardInterrupt: pool.terminate() pool.join()
def launch_simulation_parallel(simulation_config, max_iterations, parallel_blocks=gtconfig.parallel_blocks, show_progress=True): """ Parallel version of the simulation launch, to maximize CPU utilization. :param catalog_size: Number of defects present on the system. :param priority_generator: Generator for the priority of the defects. :param team_capacity: :param reporters_config: :param resolution_time_gen: :param max_iterations: :param max_time: :param dev_team_bandwidth: :param gatekeeper_config: :param inflation_factor: :param quota_system: :param parallel_blocks: :return: """ pool = Pool(processes=parallel_blocks) samples_per_worker = max_iterations / parallel_blocks logger.info("Launching " + str(max_iterations) + " replications IN PARALLEL. Using " + str(parallel_blocks) + " workers with " + str(samples_per_worker) + " samples each.") worker_inputs = [] for block_id in range(parallel_blocks): worker_input = {'simulation_config': simulation_config, 'max_iterations': samples_per_worker, 'block_id': block_id, 'show_progress': False} worker_inputs.append(worker_input) # Showing progress bar of first batch worker_inputs[0]['show_progress'] = show_progress worker_outputs = pool.map(launch_simulation_wrapper, worker_inputs) logger.info(str(max_iterations) + " replications finished. Starting output consolidation.") simulation_metrics = SimulationMetrics() for output in worker_outputs: simulation_metrics.append_results(output) return simulation_metrics
def get_signal_bg_many_parallel(runList, detid, **kwargs): """ Return the averaged signal and background (based on blank frames) over the given runs """ def mapfunc(run_number): return get_signal_bg_one_run(run_number, detid, **kwargs) MAXNODES = 14 pool = ProcessingPool(nodes=min(MAXNODES, len(runList))) bg = np.zeros(DIMENSIONS_DICT[detid]) signal = np.zeros(DIMENSIONS_DICT[detid]) run_data = pool.map(mapfunc, runList) for signal_increment, bg_increment in run_data: signal += (signal_increment / len(runList)) bg += (bg_increment / len(runList)) return signal, bg
def _calculate_s_powder_over_atoms_core(self, q_indx=None): """ Helper function for _calculate_s_powder_1d. :returns: Python dictionary with S data """ atoms_items = {} atoms = range(self._num_atoms) self._prepare_data(k_point=q_indx) if PATHOS_FOUND: p_local = ProcessingPool(nodes=AbinsModules.AbinsParameters.threads) result = p_local.map(self._calculate_s_powder_one_atom, atoms) else: result = [self._calculate_s_powder_one_atom(atom=atom) for atom in atoms] for atom in range(self._num_atoms): atoms_items["atom_%s" % atom] = {"s": result[atoms.index(atom)]} self._report_progress(msg="S for atom %s" % atom + " has been calculated.") return atoms_items
def transpose_index(self): # WORKS ONLY FOR TEST DATA """Transpose the data according to the index.""" data = self.data indexes = list(set(data.index)) names, datasets = [], [] for name in indexes: names.append(name) datasets.append(data[[name in i for i in data.index]]) plotSets = zip(names, datasets) pool = ProcessingPool() plots = [] for name, dataset in plotSets: plots.append(pool.map(self.create_transposed_plot, [name], [dataset])) logging.debug('Index transposed') return plots
def evaluate(self): """Evaluates the system using 10-fold cross validation, returning a dictionary of results keyed by classifier type.""" trainer = Trainer(self.profiles, self.profile_type, self.converter, self.network) training_set = trainer.generate_training_set() profiles = numpy.array(list(self.profiles)) data = numpy.array(training_set.data) labels = numpy.array(training_set.labels) fold_iterator = cross_validation.StratifiedKFold(labels, n_folds=10, shuffle=True, random_state=42) official_profile_pairs = ((x['name'], self.profile_type(x['profile'], x['posts'])) for x in self.profiles if x['label'] == 2) affiliate_profile_pairs = ((x['name'], self.profile_type(x['profile'], x['posts'])) for x in self.profiles if x['label'] == 1) official_profiles = defaultdict(list) for name, profile in official_profile_pairs: official_profiles[name].append(profile) affiliate_profiles = defaultdict(list) for name, profile in affiliate_profile_pairs: affiliate_profiles[name].append(profile) classification_results = defaultdict(list) fold = 1 for train, test in fold_iterator: classifiers = initialize_classifiers() training_data = data[train] training_labels = labels[train] test_set = itertools.compress(profiles[test], labels[test]) company_names = set(x['name'] for x in test_set) print 'Test set', fold, '-', len(company_names), 'companies.' for classifier in classifiers: classifier_name = classifier['type'] c = classifier['classifier'] trained = c.fit(training_data, training_labels) system = SingleNetworkSearcher( classifier=trained, searchengine=self.search_engine, profile_converter=self.converter, network=self.network) number_of_workers = int(multiprocessing.cpu_count() * 0.75) worker_pool = ProcessingPool(number_of_workers) all_results = worker_pool.map(system.query, company_names) combined_official_results = [] combined_affiliate_results = [] for idx, name in enumerate(company_names): official_results = official_profiles[name] affiliate_results = affiliate_profiles[name] results = all_results[idx] classified_official = results.official classified_affiliate = results.affiliate classified_unrelated = results.unrelated marked_official_handles = [x['profile'].handle.lower() for x in classified_official] marked_affiliate_handles = [x['profile'].handle.lower() for x in classified_affiliate] marked_unrelated_handles = [x['profile'].handle.lower() for x in classified_unrelated] official_handles = [x.handle.lower() for x in official_results] affiliate_handles = [x.handle.lower() for x in affiliate_results] official_counts = MetricCalculator.count_positives( actual_handles=official_handles, marked_positive_handles=marked_official_handles, marked_negative_handles=(marked_affiliate_handles + marked_unrelated_handles)) combined_official_results.append(official_counts) affiliate_counts = MetricCalculator.count_positives( actual_handles=affiliate_handles, marked_positive_handles=marked_affiliate_handles, marked_negative_handles=(marked_unrelated_handles + marked_official_handles)) combined_affiliate_results.append(affiliate_counts) official_metrics = MetricCalculator.fold_metrics( combined_official_results) affiliate_metrics = MetricCalculator.fold_metrics( combined_affiliate_results) result = { 'official': official_metrics, 'affiliate': affiliate_metrics } classification_results[classifier_name].append(result) fold += 1 return classification_results
def evaluate_statistical(self): """Evaluates the system using 10-fold cross validation, returning a dictionary containing the number of correct results per-fold in each class.""" trainer = Trainer(self.profiles, self.profile_type, self.converter, self.network) training_set = trainer.generate_training_set() profiles = numpy.array(list(self.profiles)) data = numpy.array(training_set.data) labels = numpy.array(training_set.labels) fold_iterator = cross_validation.StratifiedKFold(labels, n_folds=10, shuffle=True, random_state=42) official_profile_pairs = ((x['name'], self.profile_type(x['profile'], x['posts'])) for x in self.profiles if x['label'] == 2) affiliate_profile_pairs = ((x['name'], self.profile_type(x['profile'], x['posts'])) for x in self.profiles if x['label'] == 1) official_profiles = defaultdict(list) for name, profile in official_profile_pairs: official_profiles[name].append(profile) affiliate_profiles = defaultdict(list) for name, profile in affiliate_profile_pairs: affiliate_profiles[name].append(profile) fold = 1 # This assumes we're just using Random Forest (i.e. one classifier) # Ugly hack for now. classification_results = { 'official_correct': [], 'affiliate_correct': [] } for train, test in fold_iterator: classifiers = initialize_classifiers() training_data = data[train] training_labels = labels[train] test_set = itertools.compress(profiles[test], labels[test]) company_names = set(x['name'] for x in test_set) print 'Test set', fold, '-', len(company_names), 'companies.' for classifier in classifiers: classifier_name = classifier['type'] c = classifier['classifier'] trained = c.fit(training_data, training_labels) system = SingleNetworkSearcher( classifier=trained, searchengine=self.search_engine, profile_converter=self.converter, network=self.network) number_of_workers = int(multiprocessing.cpu_count() * 0.75) worker_pool = ProcessingPool(number_of_workers) all_results = worker_pool.map(system.query, company_names) for idx, name in enumerate(company_names): official_results = official_profiles[name] affiliate_results = affiliate_profiles[name] results = all_results[idx] classified_official = results.official classified_affiliate = results.affiliate marked_official_handles = [x['profile'].handle.lower() for x in classified_official] marked_affiliate_handles = [x['profile'].handle.lower() for x in classified_affiliate] official_handles = [x.handle.lower() for x in official_results] affiliate_handles = [x.handle.lower() for x in affiliate_results] official_correct = 0 for handle in marked_official_handles: if handle in official_handles: official_correct += 1 affiliate_correct = 0 for handle in marked_affiliate_handles: if handle in affiliate_handles: affiliate_correct += 1 classification_results['official_correct'].append(official_correct) classification_results['affiliate_correct'].append(affiliate_correct) fold += 1 return classification_results
filterOn=True flatBG=True touchingEdge=False sigma=random.randint(160, 225) shaderSigma=random.randint(15, 20) minSize=random.randint(500, 750) maxSize=1000 blobThresh=random.randint(150, 180) innerThresh=random.randint(10, 25) # name='B2_'+str(i) name='B_'+str(i) else: # GRADE A filterOn=True flatBG=True numBlob=1 touchingEdge=True sigma=120 shaderSigma=random.randint(15, 20) minSize=random.randint(500, 750) maxSize=random.randint(1000, 2000) blobThresh=random.randint(180, 250) innerThresh=random.randint(10, 20) name='A_'+str(i) testImage=Blob(numBlob, minSize, maxSize, blobThresh, innerThresh, sigma, shaderSigma, path, betweenBlobs, touchingEdge, flatBG, filterOn, addColors, name) imgArr.append(testImage) pool.map(Blob.makeImg, imgArr)
#!/usr/bin/env python from pathos.multiprocessing import ProcessingPool as Pool from pathos.multiprocessing import ThreadingPool as TPool pool = Pool() tpool = TPool() def host(id): import socket return "Rank: %d -- %s" % (id, socket.gethostname()) print "Evaluate 10 items on 1 proc" pool.ncpus = 1 res3 = pool.map(host, range(10)) print pool print '\n'.join(res3) print '' print "Evaluate 10 items on 2 proc" pool.ncpus = 2 res5 = pool.map(host, range(10)) print pool print '\n'.join(res5) print '' print "Evaluate 10 items on ? proc" pool.ncpus = None res7 = pool.map(host, range(10)) print pool print '\n'.join(res7)
def parallel_motif_analysis(self, samples_dirs): pool = ProcessingPool(nodes=16) pool.map(self.sample_motif_analysis, tuple(samples_dirs))
def analyse_samples_parallely(self, samples_dirs): pool = ProcessingPool(nodes=15) pool.map(self.sample_analysis, tuple(samples_dirs))
def applay_parallel_ransac(self): sample_indices = [i for i in xrange(25)] pool = Pool() output = pool.map(self.calculate_distance, sample_indices) return output
def genseq(idx): first = np.where(np.random.multinomial(1,pvals=pops)==1)[0][0] last = first last_ts = datetime.now() result = {'artist_idx':[first],'ts':[last_ts]} for i in xrange(seq_length-1): next_listen = draw(last) last = next_listen gap_bin = 120*np.where(np.random.multinomial(1,pvals=td)==1)[0][0] gap = np.random.randint(gap_bin,gap_bin+120) result['artist_idx'].append(next_listen) new_ts = last_ts+timedelta(0,gap) result['ts'].append(new_ts) last_ts = new_ts df = pd.DataFrame(result) df['block'] = ((df['artist_idx'].shift(1) != df['artist_idx']).astype(int).cumsum())-1 df.to_pickle(str(idx)+'.pkl') logging.info('idx {} complete'.format(idx)) pool = Pool(cpu_count()) indices = range(n) pool.map(genseq,indices) pool.close()
zero = [0] def inner(addend): return addend+augend+zero[0] return inner # build from inner function add_me = adder(5) # build from lambda functions squ = lambda x:x**2 # test 'dilled' multiprocessing for inner print "Evaluate 10 items on 2 proc:" pool.ncpus = 2 print pool print pool.map(add_me, range(10)) print '' # test 'dilled' multiprocessing for lambda print "Evaluate 10 items on 4 proc:" pool.ncpus = 4 print pool print pool.map(squ, range(10)) print '' # test for lambda, but with threads print "Evaluate 10 items on 4 threads:" tpool.nthreads = 4 print tpool print tpool.map(squ, range(10)) print ''
from pathos.pp import ParallelPythonPool as PPP #from pathos.pp import stats pp_pool = PPP(4, servers=('localhost:5653','localhost:2414')) print pp_pool start = time.time() res = pp_pool.map(busy_add, _x, _y, _d) print "time to queue:", time.time() - start start = time.time() _pp_pool = list(res) print "time to results:", time.time() - start #print stats() assert _basic == _pp_pool print "" from pathos.multiprocessing import ProcessingPool as MPP mp_pool = MPP(4) print mp_pool start = time.time() res = mp_pool.map(busy_add, _x, _y, _d) print "time to queue:", time.time() - start start = time.time() _mp_pool = list(res) print "time to results:", time.time() - start assert _basic == _mp_pool print "" # EOF
# creates a worker pool from given comand line parameter. If the given # parameter is to large all detectable CPUs will be utilised. If the given # parameter is nonsense only 1 core will be utilized. workers = 1 if len(sys.argv) >= 2 and sys.argv[1].isdigit() and int(sys.argv[1]) > 0: workers = cpu_count() if int(sys.argv[1]) <= workers: workers = int(sys.argv[1]) print 'N: ' + str(N) print 'PW: ' + str(workers) sleep(3) # just 3 seconds pause to read the input again. # All the magic happens here: pool = ProcessingPool(workers) Ys = pool.map(steadyState,y0) clock = time()-clock # elapsed time print 'Seconds: ' + str(clock) # Not essential but useful. # Serilisation of results and stats: ss = {'STrange': STrange, 'PFDrange': PFDrange, 'Ys': Ys, 'Sec': clock, 'PoolWorkers': workers} output = open('steadyStateAnalysisFixedST_MC_N' + str(N) + '.pkl', 'wb') dill.dump(ss,output,2) output.close() else: print('Well, something went wrong.') #================================================================= # # 3 D plotting routine to obtain figure as in Ebenhoeh et al. 2014 #
from pathos.multiprocessing import ProcessingPool as Pool x = [1,2,3] y = [1,2,3] class Test(object): def __init__(self, c): self.c = c def plus(self, x, y): return self.c + x+y if __name__ == '__main__': p = Pool(4) t = Test(5) out = p.map(t.plus, x, y) print out