def parse_genetrees(args): """parse a set of genetrees in serial or parallel fashion and run through PHYBASE""" is_nexus = False if args.input_file.endswith('.nex') or args.input_file.endswith('.nexus'): is_nexus = True chunks = get_genetree_chunks(args, is_nexus) print "Cleaning genetrees" if args.cores > 1: p = Pool(args.cores) trees = p.map(clean_genetree_worker, chunks) else: trees = map(clean_genetree_worker, chunks) p.close() # get taxa from first tree taxa = getTaxa(trees[0]) # instantiate Phybase instance and analyse trees phybase = Phybase() star_tree, steac_tree = phybase.run(trees, args.outgroup, taxa) template = """#NEXUS\nbegin trees;\ntree 'STAR' = %s\ntree 'STEAC' = %s\nend;""" % (star_tree, steac_tree) print template star_steac_out = os.path.splitext(args.input_file)[0] star_steac_out += '.star_steac.trees' star_steac_out = open(star_steac_out, 'w') star_steac_out.write(template) star_steac_out.close()
def start_crawlers(connector_class, num_processes=1): """ Starts a spider process for each spider class in the project :param num_processes: the number of simultaneous crawling processes :param connector_class: the connector class that should be used by the spiders """ spider_classes = pyjobs_crawlers.tools.get_spiders_classes() if num_processes == 0: connector = connector_class() with _get_lock('ALL') as acquired: if acquired: crawl(spider_classes, connector) else: print("Crawl process of 'ALL' already running") return # Splits the spider_classes list in x lists of size num_processes spider_classes_chunks = list() for x in range(0, len(spider_classes), num_processes): spider_classes_chunks.append(spider_classes[x:x + num_processes]) # Start num_processes number of crawling processes for spider_classes_chunk in spider_classes_chunks: process_params_chunk = [(spider_class, connector_class) for spider_class in spider_classes_chunk] p = Pool(len(process_params_chunk)) p.map(start_crawl_process, process_params_chunk)
def crawl_recursive_threaded(dirpath, ext): from database import indexer from database import utils from multiprocessing import Pool # convert to our infos cdir = indexer.DirInfo(dirpath, ext) cInfos = indexer.dirs_to_info(cdir.subfolders(), ext) # comment if you want a silent indexing print(cdir.to_string()) # recursive pooled call # NOTE: child calls must not be pooled p = Pool(utils.Settings.config['processes']) infos = p.map(crawl_recursive, cInfos) p.close() # remove hierarchy dirInfos = [d for sublist in infos for d in sublist] dirInfos.append(cdir) print('I was crawling with %d processes' % utils.Settings.config['processes']) return dirInfos
def rc(rf, alphabet, numOfThreads): tryn=0 counterTmp = 0 printCounter = 1000 listBasic = [] if rf.endswith('.rar'): funcChosen = unrar elif rf.endswith('.zip') or rf.endswith('.7z') : funcChosen = zipFileUnzip for a in range(1,len(alphabet)+1): for b in itertools.product(alphabet,repeat=a): k="".join(b) k=re.escape(k) listBasic.append(k) tryn+=1 if len(listBasic) == numOfThreads: pool = Pool(numOfThreads) pool.map_async(funcChosen, listBasic, callback = exitPass) pool.close() if resultPass: timeWasted = time.time()-start print 'Found! Password is '+resultPass print "It took " +str(round(time.time()-start,3))+" seconds" print "Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec" print "Tried "+str(tryn)+" passwords" exit() listBasic = [] counterTmp+=1 if counterTmp >= printCounter: print 'Trying combination number '+str(tryn)+':'+str(k) timeWasted = round(time.time()-start,2) if timeWasted > 0: print "It took already " +str(timeWasted) +" seconds. Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec" counterTmp=0
def k_rbm(infile, outfile): #dataset data = sio.loadmat(infile)['data'] # reconstruction cost cost_dict = {} p = Pool(5) first_arg = ["Thread-1", "Thread-2", "Thread-3", "Thread-4", "Thread-5"] second_arg = data a,b,c,d,e = p.map(rbm_star, itertools.izip(first_arg, itertools.repeat(second_arg))) # p.map(rbm_star, itertools.izip(first_arg, itertools.repeat(second_arg))) # get the costs from the tuples cost_1 = a[0] cost_2 = b[1] cost_3 = c[2] cost_4 = d[3] cost_5 = e[4] # find the cluster assignments for i in xrange(len(cost_1)): mincost = min(cost_1[i],cost_2[i],cost_3[i],cost_4[i],cost_5[i]) if mincost == cost_1[i]: cost_dict[i+1] = 1 elif mincost == cost_2[i]: cost_dict[i+1] = 2 elif mincost == cost_3[i]: cost_dict[i+1] = 3 elif mincost == cost_4[i]: cost_dict[i+1] = 4 else: cost_dict[i+1] = 5 # store results json.dump(cost_dict, open(outfile, 'w'))
def spawn_runpy(cp, wait=60, cb=check_rst): "as decorator to run job" global WAITQ, RUNQ, CFG pool = Pool(processes=CFG['MAXJOBS']) while len(WAITQ) > 0 or len(RUNQ) > 0: if len(RUNQ) <= CFG['MAXJOBS'] and len(WAITQ) > 0: path, test = WAITQ.pop() rst = pool.apply_async(call_runpy, (cp, path, test,)) RUNQ.append((rst, test, timeit.default_timer())) else: for r in RUNQ: usec = float("%.2f" %(timeit.default_timer()-r[2])) if r[0].successful: print "[{0}] success used {1} usec".format(r[1], usec) RUNQ.remove(r) if cb: cb(r[1], 'pass', usec) else: if usec > CFG['TIMEOUT']: print "[{0}] unsuccess used timeout {1} usec".format(r[1], usec) r[0].terminate() if cb: cb(r[1], 'fail', usec) time.sleep(float(wait))
def compress_file(self,corpus, np=4,separator=None): """ construct WLZW pattern out of a corpus, parallelism is an option @param corpus - string, file path of the corpus @param np - number of processes, if np = 1 the algorithm is run in serial @param separator - the separator string to separate doc id and document. pass None if no doc id is given @return set, the final set containing all frequent patterns """ #if only one process, no need for parallelization if np==1: return set(_compress_file((corpus,0,np,separator))) p=Pool(processes=np) l=[] for i in range(0,np): l.append((corpus,i,np,separator)) result=p.imap_unordered(_compress_file,l,1) if np==1: final_set=result.next() else: final_set=_union(result) return final_set
def get(self): mode = toAlpha3Code(self.get_argument('lang')) text = self.get_argument('q') if not text: self.send_error(400, explanation='Missing q argument') return def handleCoverage(coverage): if coverage is None: self.send_error(408, explanation='Request timed out') else: self.sendResponse([coverage]) if mode in self.analyzers: pool = Pool(processes=1) result = pool.apply_async(getCoverage, [text, self.analyzers[mode][0], self.analyzers[mode][1]]) pool.close() @run_async_thread def worker(callback): try: callback(result.get(timeout=self.timeout)) except TimeoutError: pool.terminate() callback(None) coverage = yield tornado.gen.Task(worker) handleCoverage(coverage) else: self.send_error(400, explanation='That mode is not installed')
def main(path, out, cores): """ Compute contact energies for each pdb in path and write results to 'out'. :param path: str :param out: str :param cores: int :return: None """ # Find all pdbs in path workload = [] for file in os.listdir(path): if os.path.splitext(file)[1].lower() == ".pdb": workload.append(file) # Print few newlines to prevent progressbar from messing up the shell print("\n\n") # Compute energies pool = Pool(processes=cores) results = [] for (nr, pdb) in enumerate(workload): updateprogress(pdb, nr / len(workload)) e = computecontactenergy(os.path.join(path, pdb), pool) results.append((pdb, e)) pool.close() # Make 100% to appear updateprogress("Finished", 1) # Store output with open(out, "w") as handler: handler.write("PDB,Energy in kcal/mol\n") for pair in results: handler.write("{},{}\n".format(*pair))
class JobPool(object): """ Pool container. """ pool = None message_queue = None def __init__(self, max_instances=4): self.message_queue = Queue() self.pool = Pool(max_instances, execute_task, (self.message_queue,)) atexit.register(self.clear) def add_analysis(self, analysis): """ Add analysis to the pool. """ analysis.set_started() self.message_queue.put(analysis) def clear(self): """ Pool cleanup. """ self.pool.terminate() self.pool.join()
class YaraJobPool(object): """ Yara pool container. """ pool = None message_queue = None def __init__(self, max_instances=3): self.message_queue = Queue() self.pool = Pool(max_instances, execute_yara_task, (self.message_queue,)) atexit.register(self.clear) def add_yara_task(self, yara_task): """ Adds the yara task. """ self.message_queue.put(yara_task) def clear(self): """ Pool cleanup. """ self.pool.terminate() self.pool.join()
def get_location(self): """ Extracts the location of each pixel in the satellite image """ self.ncols = self.satellite_gdal.RasterXSize / 2 self.nrows = self.satellite_gdal.RasterYSize / 2 self.length_df = self.nrows * self.ncols print 'Columns, rows', self.ncols, self.nrows cols_grid, rows_grid = np.meshgrid( range(0, self.ncols), range(0, self.nrows)) self.cols_grid = cols_grid.flatten() self.rows_grid = rows_grid.flatten() print 'Checking the meshgrid procedure works' # getting a series of lat lon points for each pixel self.geotransform = self.satellite_gdal.GetGeoTransform() print 'Getting locations' self.location_series = np.array(parmap.starmap( pixel_to_coordinates, zip(self.cols_grid, self.rows_grid), self.geotransform, processes = self.processes)) print 'Converting to Points' pool = Pool(self.processes) self.location_series = pool.map( point_wrapper, self.location_series)
def get_fractional_errors(R_star, L_star, P_c, T_c): """ Pass in "guess" conditions. Will then calculate inward and outward errors, Returns: [Data array] dY - over/undershoots (+/-, going outward) [dx handled outside this] """ # R_star, L_star, P_c, T_c = x P_c_0 = modelparameters.P_c # core pressure, [dyne cm^-2] T_c_0 = modelparameters.T_c # core temperature, [K] R_star_0 = modelparameters.R_star L_star_0 = modelparameters.L_star print "" print "R: " + str(R_star / R_star_0) print "L: " + str(L_star / L_star_0) print "P: " + str(P_c / P_c_0) print "T: " + str(T_c / T_c_0) X = modelparameters.X Y = modelparameters.Y Z = modelparameters.Z mu = modelparameters.mu params = (X, Y, Z, mu) M_star = modelparameters.M_star m_fitting_point = modelparameters.m_fitting_point pool = Pool(2) outward_results = pool.apply_async(integrate.integrate_outwards, [M_star, m_fitting_point, P_c, T_c, mu, X, Y, Z] ) inward_results = pool.apply_async(integrate.integrate_inwards, [M_star, m_fitting_point, R_star, L_star, mu, X, Y, Z] ) m_outward, y_outward, infodict_outward = outward_results.get() m_inward, y_inward, infodict_inward = inward_results.get() dr = y_inward[-1,0] - y_outward[-1,0] dl = y_inward[-1,1] - y_outward[-1,1] dP = y_inward[-1,2] - y_outward[-1,2] dT = y_inward[-1,3] - y_outward[-1,3] dY = np.array([dr, dl, dP, dT]) print '' print 'fractional errors:' print "dR: " + str(dr / y_inward[-1,0]) print "dL: " + str(dl / y_inward[-1,1]) print "dP: " + str(dP / y_inward[-1,2]) print "dT: " + str(dT / y_inward[-1,3]) return dY
def process_articles(entity_type=Entity, output_filename='output-all.txt', corpus_root='corpus/'): terms = select_terms(entity_type) Session.expunge_all() Session.close() articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None) articles = articles.filter(Entity.sep_dir!='') articles = articles.distinct().all() articles = [a[0] for a in articles] # parallel processing of articles p = Pool() args = [(title, terms, entity_type, None, corpus_root) for title in articles] doc_lines = p.map(process_wrapper, args) p.close() #serial processing for tests ''' doc_lines = [] for title in articles: lines = process_article(title, terms, entity_type, None, corpus_root) doc_lines.append(lines) ''' # write graph output to file print output_filename with open(output_filename, 'w') as f: for lines in doc_lines: f.writelines(lines)
def score_all_genes(self, graph, num_procs=1): partial_score_gene = partial(score_gene, graph=graph, top_genes=self.top_genes) p = Pool(num_procs) result = p.map(partial_score_gene, list(self.vd.gene_names())) p.close() # convert them all to percentiles cent_hist = numpy.array([x[1] for x in result if x[1] != -1]) nn_hist = numpy.array([x[2] for x in result if x[2] != -1]) batch = [] for gene, cent_score, nn_score in result: # edge case: gene is a top gene if gene in self.top_genes: cent_perc = 1 nn_perc = 1 # edge case: gene isn't in network elif cent_score == -1 or \ nn_score == -1: cent_perc = 0 nn_perc = 0 else: cent_perc = scipy.stats.percentileofscore(cent_hist, cent_score) / 100.0 nn_perc = 1 - scipy.stats.percentileofscore(nn_hist, nn_score) / 100.0 print "gene: %s\n c: %s\n c_p: %s\n n: %s\n n_p: %s" % \ (gene, cent_score, cent_perc, nn_score, nn_perc) batch.append((cent_score, cent_perc, nn_score, nn_perc, gene)) self.vd._c.executemany("UPDATE genes SET cent_score = ?, cent_perc = ?, " \ "nn_score = ?, nn_perc = ? WHERE name = ?", batch) self.vd._conn.commit()
def main(): parser = ArgumentParser(description="Speed up your SHA. A different hash style.") parser.add_argument('-1', '--sha1', action='store_true') parser.add_argument('-2', '--sha224', action='store_true') parser.add_argument('-3', '--sha256', action='store_true') parser.add_argument('-4', '--sha384', action='store_true') parser.add_argument('-5', '--sha512', action='store_true') parser.add_argument('-f', '--file', type=str, help="The path to the file") if len(sys.argv) == 1: parser.print_help() return global args args = parser.parse_args() hashtree = '' big_file = open(args.file, 'rb') pool = Pool(multiprocessing.cpu_count()) for chunk_hash in pool.imap(hashing, chunks(big_file)): hashtree += chunk_hash + ":hash" pool.terminate() print(str(hashing(hashtree.encode('ascii'))))
def main(): """ --------------------------------------------------------------------------- AUTHOR: Kyle Hernandez EMAIL: [email protected] Calculate the distribution of polymorphic RAD loci across site classes. --------------------------------------------------------------------------- USAGE: python snp_locations.py gmatrix.tab file.gff out.tab n_threads ARGUMENTS: gmatrix.tab - Tab-delimited genotype matrix file of variant sites file.gff - GFF file out.tab - Output file of counts n_threads - The number of threads to run """ # Load the GFF and SNP positions into dictionaries load_gff() intergenic = process_matrix() # Map: # Create a pool of n_threads workers and use them to process # scaffolds separately ch_vals = sorted(gff_dict.keys()) sys.stdout.write("Counting features...\n") pool = Pool(processes=n_threads) ct_list = pool.map(process_dicts, ch_vals) # Reduce: # Process the list of dicts print_counts(intergenic, ct_list)
def matrix_vector_iteration_by_processes(A,x,k): # create a temporary directory to store the matrix and the vectors tmpdir = tempfile.mkdtemp() nvec = get_nvec(x) y = x.copy() save_matrix(tmpdir,A) for i in xrange(nvec): save_x(tmpdir,x,i) # start processes pool = Pool(processes=min(nvec,6)) processes = [] for i in xrange(nvec): processes.append( pool.apply_async(matrix_vector_iteration_process, (tmpdir,i,k)) ) # fetch results (vector/matrix shape version) if x.ndim == 1: processes[0].get() y = load_x(tmpdir,0) else: for i in xrange(nvec): processes[i].get() y[:,i] = load_x(tmpdir,i) pool.close() # remove temporary directory (with all it contains) shutil.rmtree(tmpdir) return y
def fetch_imagery(image_locations, local_dir): pool = Pool(cpu_count()) tupled = [(loc[0], loc[1], local_dir) for loc in image_locations] try: pool.map(fetch_imagery_uncurried, tupled) finally: pool.close()
def compute_tdbf(): conn = db_conn('bnc') cur = conn.cursor() # select keys and parsed from table sql = 'SELECT xmlID, divIndex, globalID, parsed FROM entropy_DEM100' cur.execute(sql) data = cur.fetchall() # initialize pool = Pool(multiprocessing.cpu_count()) manager = Manager() queue = manager.Queue() # mp args = [(d, queue) for d in data] result = pool.map_async(compute_tdbf_worker, args, chunksize=5000) # manager loop while True: if result.ready(): print('\n all rows processed') break else: sys.stdout.write('\r{}/{} processed'.format(queue.qsize(), len(args))) sys.stdout.flush() time.sleep(1) # update processed_results = result.get() for i, res in enumerate(processed_results): xml_id, div_idx, g_id, sub_tree, td, bf = res sql = 'UPDATE entropy_DEM100 SET parsedSimple = %s, td = %s, bf = %s WHERE xmlID = %s AND divIndex = %s AND globalID = %s' cur.execute(sql, (sub_tree, td, bf, xml_id, div_idx, g_id)) if i % 999 == 0 and i > 0: sys.stdout.write('\r{}/{} updated'.format(i+1, len(processed_results))) sys.stdout.flush() conn.commit()
def downloadImages(self, dirName, urlData): child_folder = 'pictures' failures = 0 dirName = os.path.join(dirName,child_folder) process_pool = Pool(processes=self._pool_size) results = [] for ud in urlData: abs_img = os.path.join(dirName,urlparse(ud).path.strip('/')) try: os.makedirs(dirname(abs_img)) except: pass results.append( process_pool.apply_async( urllib.urlretrieve, [ ud, abs_img ] ) ) self.initialize_bar(max=len(results)) for result in results: try: result.get(self._timeout) except Exception: failures += 1 else: self.update_bar() self.finish_bar() if failures: print(" Completed with errors: Downloaded {0}/{1}".format(len(results) - failures, len(results))) self.finish_bar()
def get_needle_tips(images): """Get sample tips from images.""" tips = [] results = [] # Do not make more processes than needed for the number of images. if len(images) > multiprocessing.cpu_count(): proc_count = multiprocessing.cpu_count() else: proc_count = len(images) pool = Pool(processes=proc_count) for image in images: results.append(pool.apply_async(_get_ellipse_point, args=(image,))) for result in results: tip = result.get() if tip is not None: tips.append(tip) if len(tips) == 0: raise ValueError("No sample tip points found.") return tips
def main(world_folder, replacement_file_name): global replacements world = nbt.world.WorldFolder(world_folder) logger = configure_logging() logger.info("Starting processing of %s", world_folder) if not isinstance(world, nbt.world.AnvilWorldFolder): logger.error("%s is not an Anvil world" % (world_folder)) return 65 # EX_DATAERR if replacement_file_name != None: logger.info("Using Replacements file: %s", replacement_file_name) with open(replacement_file_name, 'r') as replacement_file: replacements = json.load(replacement_file) # get list of region files, going to pass this into function to process region region_files = world.get_regionfiles(); # Parallel q = Queue() lp = threading.Thread(target=logger_thread, args=[q]) lp.start() p = Pool(initializer=process_init, initargs=[q,replacements], maxtasksperchild=1) region_data = p.map(process_region, region_files) # Map has finished up, lets close the logging QUEUE q.put(None) lp.join() # Not Parallel # region_data = map(process_region, region_files) # Write output data write_block_data(region_data,"output.txt") return 0
def updateTranslation(args): # Get map that contains (besides other stuff) # the crowdin ID for a given file translationFilemap = getTranslationFilemapCache(args.language, args.force_filemap_update) # Collect valid downloadable files for parallel processing fileinfos = [] for filename, fileinfo in translationFilemap.items(): filepath = os.path.join("cache", args.language, fileinfo["path"]) # Create dir if not exists try: os.makedirs(os.path.dirname(filepath)) except OSError as exc: if exc.errno == errno.EEXIST: pass else: raise fileid = fileinfo["id"] fileinfos.append((fileid, filepath)) # Curry the function with the language performDownload = functools.partial(performPOTDownload, args.language) # Perform parallel download if args.num_processes > 1: pool = Pool(args.num_processes) pool.map(performDownload, fileinfos) else: for t in fileinfos: performDownload(t) #Set download timestamp timestamp = datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S") with open("lastdownload.txt", "w") as outfile: outfile.write(timestamp)
def __decrypt_file(self, private_d, public_n, keys, path_to_file, CRT, k): if CRT: pool = Pool(processes = k) promises = [] decrpted_data = '' with open(path_to_file, 'r') as f: encrypted_data = f.read() encrypted_data_chunks = list(map(''.join, zip(*[iter(encrypted_data)]*len(str(public_n))))) for i in range(len(encrypted_data_chunks)): stripped = encrypted_data_chunks[i].lstrip('0') if CRT: promise = pool.apply_async(self.compute_part_of_message, args=(stripped, keys, i)) promises.append(promise) else: decrpted_data += chr(self.__decrypt_message(stripped, private_d, public_n)) if CRT: results = [promise.get() for promise in promises] decrypted_sorted = sorted(results, key = lambda x: x[1]) for data in decrypted_sorted: decrpted_data += chr(data[0]) if CRT: pool.close() with open(path_to_file + '.dec', 'w') as f: f.write(decrpted_data) return decrpted_data
def multi_mode(start, stop): print "going multi" from multiprocessing import Pool pool = Pool(processes=4) result = pool.map(factorize, xrange(start, stop + 1), chunksize=100) print uniq_counter(result)
class withPool: def __init__(self, procs): self.p = Pool(procs, init_func) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.p.close()
def mass_tri_plot(data, savedir, name='plot', Type='speed', Map=False): """ Plots all time series. Makes use of multiprocessing for speed. """ trigrid = data['trigrid'] #get the data to plot try: toPlot = data[Type] except KeyError: print Type + " is not an element of data. Please calculate it." raise Exception("Invalid dictionary entry") #set the variable as a global variable global plotvar plotvar = toPlot global saveDir saveDir = savedir global grid grid = trigrid #see if the save directory exists, or make it if not os.path.exists(savedir): os.makedirs(savedir) l = toPlot.shape[0] p = Pool(4) plt.gca().set_aspect('equal') p.map(save_plot, range(50)) clearall()
def main(): idir, ofile, dffile = _parse_cmdline() print u'Loading doc-freqs file {}...'.format(dffile) with open(dffile, 'rb') as f: df = pickle.load(f) print u'Reading input directory: {}'.format(idir) jobs = _load_jobs(idir, df) # Do the work. pool = Pool(4) njobs = len(jobs) try: import sys with codecs.open(ofile, 'wb') as pf: pickle.dump(njobs, pf) results = pool.imap_unordered(worker, jobs) for i, result in enumerate(results, 1): pickle.dump(result, pf) per = 100 * (float(i) / njobs) sys.stdout.write(u'\rPercent Complete: {:2.3f}%'.format(per)) sys.stdout.flush() sys.stdout.write(u'\rPercent Complete: 100% \n') sys.stdout.flush() except KeyboardInterrupt: sys.stdout.write(u'\rPercent Complete: {:2.3f}% \n'.format(per)) sys.stdout.write(u'Shutting down.\n') sys.stdout.flush() sys.exit() print u'Complete!'
def process(self): try: urls = redis_one.hkeys(self.sitemap_prefix) ofh = open('test_urls.txt', 'w+') urls.sort() ofh.write(('\n'.join(urls)).encode('utf8', 'ignore')) logger.error('total urls len %s' % len(urls)) dict_res = defaultdict(int) i = 0 while i <= len(urls): pool = Pool(processes=15) q = Queue() dict_subres = defaultdict(int) list_urls = [urls[i + j * 10000:i+(j+1)*10000] for j in range(15)] #list_dict_res = list(pool.map_async(parse_content, list_urls)) for d in pool.imap(parse_content, list_urls): for k, v in d.iteritems(): dict_res[k] += v logger.error('Parser %s %s' % (len(list_urls), len(dict_res))) i += 10000 * 15 sorted_dict_res = sorted(dict_res.iteritems(), key = lambda s: s[1], reverse=True) ofh = open('./test_sitemap_keywords', 'w+') ofh.write('\n'.join(['%s\t%s' % (k,v) for (k,v) in sorted_dict_res if v>=3]).encode('utf8', 'ignore')) ofh.close() except: logger.error(traceback.format_exc())