def start(self, jenkinsHost, jenkinsUser="******", jenkinsKey=globals.configJenkinsKey): self.__jenkinsHost = jenkinsHost self.__jenkinsUser = jenkinsUser self.__jenkinsKey = jenkinsKey # Keep startup times short. Only connect a few threads to Jenkins upfront. The rest # will be dynamically added as required. This also enables authentication issues to be # resolved by the system admin or developer without taking the tool down for i in range(globals.threadPoolSize): threadRequests = makeRequests(self.connectInit, [((i,), {})]) [globals.threadPool.putRequest(req) for req in threadRequests] # Do 1 to validate the connection. This is synchronous for i in range(1): threadRequests = makeRequests(self.connect, [((i,), {})]) [globals.threadPool.putRequest(req) for req in threadRequests] globals.threadPool.wait() # Thread queueing results in new thread connections later, so do them all now # asynchronously so that they are ready when needed without having to wait. This is # the connectRetry logic, so all paths are used during startup for i in range(globals.threadPoolSize - 1): threadRequests = makeRequests(self.connectRetry, [((), {})]) [globals.threadPool.putRequest(req) for req in threadRequests]
def __init__(self): self.logger = logger cp = ConfigParser.SafeConfigParser() cp.read('config\config.ini') self.req_que_size = (int)(cp.get('autoLevel', 'req_que_size')) self.resp_que_size = (int)(cp.get('autoLevel', 'resp_que_size')) self.threadpoolSize = (int)(cp.get('autoLevel', 'threadpoolSize')) self.accu_threshold = (int)(cp.get('autoLevel', 'accu_threshold')) self.max_alarm_level = (int)(cp.get('autoLevel', 'max_alarm_level')) self.threadpo = threadpool.ThreadPool(self.threadpoolSize) self.root = Node() self.reqQueue = Queue.Queue(maxsize=self.req_que_size) self.respQueue = Queue.Queue(maxsize=self.resp_que_size) self.alarmDura = {} children = {} for i in cp.get('autoLevel', 'alarm').split(','): key = (int)(i.split('-')[0]) value = (int)(i.split('-')[1]) self.alarmDura[key] = value for key in cp.get('autoLevel', 'org').split(','): children[(int)(key)] = AlarmNode(org=(int)(key), req_que_size=self.req_que_size, resp_que_size=self.resp_que_size, threadpo=self.threadpo, alarmDura=self.alarmDura, accu_threshold=self.accu_threshold, max_alarm_level=self.max_alarm_level, parentRespQueue=self.respQueue) self.logger.debug('init: %s, %s' % (self.alarmDura, children)) self.root.setChildren(children) self.alaMsgTask = threadpool.makeRequests(self.alaMsg, [(None, None)]) self.scanTimeTask = threadpool.makeRequests(self.scanTime, [(None, None)]) self.threadpo.putRequest(self.alaMsgTask[0]) self.threadpo.putRequest(self.scanTimeTask[0])
def crawl(self, para_file): """ using thread pool to speed up crawling. """ if not self.writer and not self.method: return fpara = open(para_file, 'r') pool = threadpool.ThreadPool(self.poolsize) parlst = list() for line in fpara: if self.stopped: break # Stop current crawling parlst.append(line.strip()) if len(parlst) > 10: requests = threadpool.makeRequests(self.retrieve, parlst) map(pool.putRequest, requests) pool.wait() self.writer.flush() del parlst[:] #Flush the last part of lines in parlst if not self.stopped: requests = threadpool.makeRequests(self.retrieve, parlst) map(pool.putRequest, requests) pool.wait() self.writer.flush() fpara.close() self.writer.close() if not self.stopped: logging.info('Retrieving finished.') else: logging.info('Retrieving interrupted.') return
def simulateStep(self): tc = self.threadCount kwargs = [([], {'firstRow': self.size/tc * x, 'lastRow': self.size/tc * x + self.size/tc}) for x in range(tc)] [self.pool.putRequest(req) for req in threadpool.makeRequests(self.countAllNeighbours, kwargs)] self.pool.wait() [self.pool.putRequest(req) for req in threadpool.makeRequests(self.simulateLifeAndDeath, kwargs)] self.pool.wait()
def thread_use(): pool = threadpool.ThreadPool(10) # single parameter option = [1, 2] requests = threadpool.makeRequests(thread_sing_parameter, option) [pool.putRequest(req) for req in requests] pool.wait() # multiple parameters option = [([1, 2, 3], None), ([4, 5, 6], None)] requests = threadpool.makeRequests(thread_multiple_parameter, option) [pool.putRequest(req) for req in requests] pool.wait()
def main(): # Max Workers pool = threadpool.ThreadPool(4) # When creating a bunch of stuff streams = ['1.mp3', '2.mp3', '3.mp3', '4.mp3'] requests = threadpool.makeRequests(stream_music, streams, done_callback, handle_exception) # Make Requests for req in requests: pool.putRequest(req) print "Work request #%s added." % req.requestID # or [main.putRequest(req) for req in requests] # Run them while True: try: time.sleep(0.5) pool.poll() print "(active worker threads: %i)" % (threading.activeCount()-1, ) except KeyboardInterrupt: print "**** Interrupted!" break except NoResultsPending: print "**** No pending results." except NoWorkersAvailable: print "**** Out of workers." # Cleanup if pool.dismissedWorkers: print "Joining all dismissed worker threads..." main.joinAllDismissedWorkers()
def create_records_cache(self, provider, path): """ Creates an array of Records classed (s.a.) after parsing all records under `path'. Assumes a valid session """ records = [] # If we are in `/' then get all records for d in self.provider.metadata(path).lsdirs(): recpath = d + "/record.json" log.debug(recpath) records.append(recpath) requests = threadpool.makeRequests(self.records_worker, records, self.append_records_cache, self.handle_exception) #insert the requests into the threadpool # This is ugly but will need serious refactoring for the local provider. # basically: if using local storage then just use one thread to avoid choking on the HD. # For dropbox and other remote providers use multi-theading if ( provider == "local" ): pool = threadpool.ThreadPool(1) else: pool = threadpool.ThreadPool(min(len(requests), default_number)) for req in requests: pool.putRequest(req) log.debug("Work request #%s added." % req.requestID) #wait for them to finish (or you could go and do something else) pool.wait() pool.dismissWorkers(min(len(requests), 20), do_join=True) log.debug("workers length: %s" % len(pool.workers))
def sumbit(self,record_time): log.info( "i did at %s %d" % (util.get_local_time(), record_time) ) coll = Collector() requests = threadpool.makeRequests(coll.collect, [(record_time)], self.print_result) for req in requests: log.info("get request") self.pool.putRequest(req)
def pattern6(fundcodelist): print 'Check delta for self selected fund, give buy/sell/noaction order' print 'Strategy: increased %s%% in passed m days(at most) then sell, or dropped %s%% then buy, otherwise, no action' % (upthreshold, downthreshould) fundlist6 = [] for fundcode in fundcodelist: fund = Fund() fund.fundcode = fundcode fundlist6.append(fund) pool0 = threadpool.ThreadPool(len(fundcodelist)) requests0 = threadpool.makeRequests(getPerfForFund, fundlist6) [pool0.putRequest(req) for req in requests0] pool0.wait() actionsum = 0 for fund in fundlist6: actionsum +=fund.action outputinfo = '' #Strategy to sell or buy in under such unstable situation if actionsum > 2: outputinfo = 'ActionSum: %d for %d funds, Time To Sell Out!!!' % (actionsum, len(fundcodelist)) elif actionsum < -2: outputinfo = 'ActionSum: %d for %d funds, Good To Buy In!!!' % (actionsum, len(fundcodelist)) else: outputinfo = 'ActionSum: %d for %d funds, No Valuable Action!!!' % (actionsum, len(fundcodelist)) print outputinfo return outputinfo
def start_thread_pool(self, thread_pool, app_type): """ 开始请求接口 :param thread_pool: 线程池 :param app_type: 0 >> A; 1 >> B; 2 >> C; 3 >> D :return: """ d1 = datetime.datetime.now() print("读取接口数据中...") s = sessions.ReadSessions.ReadSessions() l = s.get_will_request_sessions() # 获取将要请求的所有接口数据 print("接口请求中,请等待...") pool = threadpool.ThreadPool(self.thread_count) requests1 = threadpool.makeRequests(thread_pool, l) [pool.putRequest(req) for req in requests1] pool.wait() print("接口请求完成!") # 重试机制 retry.Retry.retry11(app_type) # 清理数据 print("正在整理创建的数据...") sessions.DelaySessions.clear_up(0) print("测试报告准备中...") d2 = datetime.datetime.now() t = d2 - d1 print('接口回归测试完成!') print("%s %s%s" % ("耗时:", t.seconds, "s"))
def get_op(self, op, money = 0): if self.gene_trans[op] != None: money = self.gene_trans[op](self, money) # red operation failed when generation if money == -1: return -1 op_rclock = 0 if self.optype[op] == 'red': op_rclock = self.ask_globalmanager_for_flag() while op_rclock == -1: time.sleep(0.5) op_rclock = self.ask_globalmanager_for_flag() log("prepare to issue red operation with rclock %s" %op_rclock) if op > 1: # self.put_op((self.myid, op, money, self.rclock)) # threads = [] # for hostaddr in self.hosts.keys(): # threads.append(threading.Thread(target=self.replicate_latency, args=(hostaddr, op, money))) # for t in threads: # t.start() # for t in threads: # t.join() data = [] for hostaddr in self.hosts.keys(): data.extend([((hostaddr, op, money, op_rclock), {})]) requests = threadpool.makeRequests(self.replicate_latency, data) for req in requests: self.pool.putRequest(req) # self.pool.wait() return 1
def start_crawling(self, multi_way=_multi_process): # self.branch_commit_fp=open(os.path.join(self.saveDir, self.target_repos.repos_name, 'branch_commit.info'), 'w') self.parse_branch_name() print 'Number of branches:%s' % len(self.branches) ###################################################################################### ########## ########## implementing crawler with the third part package 'threadpool', which is not truly multi-thread ########## if multi_way==_multi_thread: para=[((b, baseURL, os.path.join(self.saveDir, self.target_repos.repos_name),), {}) for b in self.branches[:poolsize]] pool=threadpool.ThreadPool(poolsize) requests=threadpool.makeRequests(crawling_branch, para) for req in requests: pool.putRequest(req) pool.wait() ####################################################################################### ####################################################################################### ########## ########## truly multi-process implementation else: pool=ProcessPool(poolsize) # para=[(b, baseURL, os.path.join(self.saveDir, self.target_repos.repos_name),) for b in self.branches[:poolsize]] for b in self.branches: sys.stderr.write('Branch %s\n' % b.branch_name) pool.apply_async(crawling_branch, (b, baseURL, os.path.join(self.saveDir, self.target_repos.repos_name),)) pool.close() pool.join() sys.stderr.write('All processes has been terminated\n')
def thread_process(): global ARTICLE_URLS os.mkdir(USER_NAME) thread_pool = threadpool.ThreadPool(THREAD_NUMBER) requests = threadpool.makeRequests(get_article, ARTICLE_URLS, save_article) [thread_pool.putRequest(req) for req in requests] thread_pool.wait()
def downjpgmutithread( filepathlist, dir=""): print("total downloads: %d"%len(filepathlist)) runlist=[] for file in filepathlist: runlist.append(([file,dir],None)) print("start downloads") pool = threadpool.ThreadPool(5) reqs = threadpool.makeRequests(fileDownload, runlist) [pool.putRequest(req) for req in reqs] pool.wait() ''' task_threads=[] #存储线程 count=1 for file in filepathlist: name=file[file.rfind('/')+1:] if allfiles.count(name)>0: print "file aready saved." continue print "file name :"+file t= threading.Thread( target=downjpg,args=(file,) ) count=count+1 task_threads.append(t) for task in task_threads: task.start() #time.sleep(5) for task in task_threads: task.join() #等待所有线程结束 ''' print("已经完成所有任务")
def main(): print "==" f=open('delay_dist_fail.txt','r',102400); lines = f.readlines() length = len(lines) total = length print length logging.info("length:%d",length) index = 0 process_cnt=0; pool = threadpool.ThreadPool(100) for line in lines : index +=1; logging.info(line) process_cnt +=1 ; #print line requests = threadpool.makeRequests(appendStatusAndTime,[line], asyn_callback) for req in requests: req.total = length; req.process = process_cnt pool.putRequest(req) if index == 200: pool.wait() index =0 #print "=====" pool.wait()
def runWorkers(self, uncachedQueryL, oncols, filters): cherrypy.thread_data.result = [] n = len(uncachedQueryL) if n==0: return [] if THREADPOOL_SECONDARY <= 1: # don't create threads for one-CPU database return self.model.runPipelinedQueriesFromDict( \ (uncachedQueryL, oncols, filters, 0)) p = THREADPOOL_SECONDARY args = [] # # #split equally to workers; good only for ROW-based pipelined execution # # # [args.append( \ (uncachedQueryL[threadNr*n/p:(threadNr+1)*n/p],oncols, filters, threadNr)) \ for threadNr in xrange(p)] requests = threadpool.makeRequests(\ self.model.runPipelinedQueriesFromDict, args, self.workerCallback) [cherrypy.thread_data.threadpool.putRequest(req) for req in requests] cherrypy.thread_data.threadpool.wait() return cherrypy.thread_data.result
def start(self, norm_target_func=None, *args, **kwargs): def args_generator(poc_name): func_args = { 'poc_name': poc_name, 'options': self.options, 'success': None, 'poc_ret': {}, } for target in self.seed_targets: if norm_target_func: func_args['options']['target'] = norm_target_func(target.strip(), *args, **kwargs) else: func_args['options']['target'] = target.strip() yield deepcopy(func_args) for name, func2run in self.funcs2run: requests = threadpool.makeRequests(callable_=func2run, args_list=args_generator(name), callback=self.handle_result, exc_callback=self.handle_result) [self.tp.putRequest(req) for req in requests] self.total_num += requests.__len__() self.tp.wait() self.tp.dismissWorkers(100, do_join=True) return self.total_num, self.success_num
def pattern4(fundinfolist4, maxreturn, threadnum): #Check fund manager perf print 'Fund list 4: 基于pattern3的结果,排序当前基金经理业绩' #Get manager perf if buyable def checkBuyableAndGetPerf(fund): isBuyable(fund) if fund.buyable: getManagerPerf(fund) else: fund.managerperf = '-1000' #With this, no need do extra filter on buyable #Get fund manager perf pool2 = threadpool.ThreadPool(threadnum) requests2 = threadpool.makeRequests(checkBuyableAndGetPerf, fundinfolist4) [pool2.putRequest(req) for req in requests2] pool2.wait() fundInfoListOrdered4 = sorted(fundinfolist4, key=lambda fund:string.atof(fund.managerperf),reverse=True) meetnum = 0 fundlinkTemp = 'http://fund.eastmoney.com/%s.html' jjjllinkTemp = 'http://fund.eastmoney.com/f10/jjjl_%s.html' for i in range(0, min(maxreturn,len(fundInfoListOrdered4))): fundinfolist4.append(fundInfoListOrdered4[i]) fundlink = fundlinkTemp % fundInfoListOrdered4[i].fundcode jjjllink = jjjllinkTemp % fundInfoListOrdered4[i].fundcode print ' %s %s %s 净值:%s 业绩:%s Duration:%s' % (fundlink, jjjllink, fundInfoListOrdered4[i].name, fundInfoListOrdered4[i].latestvalue, fundInfoListOrdered4[i].managerperf.encode('utf-8'), fundInfoListOrdered4[i].managerduration.encode('utf-8'))
def getIDS(names): cnt=[] poolsize=60 def f(name): for i in [1,2]: try: us_set=['Mozilla','IE','Opera','Chrome','Magic','theSage','Iceweasel','Rockmelt'] ua=random.choice(us_set) req=urllib2.Request('http://graph.facebook.com/'+name+'?fields=id,name',headers={'User-Agent':ua}) r=urllib2.urlopen(req) r=r.read() r=eval(r) result.append((r['id'],r['name'])) cnt.append(True) if len(names)>550: time.sleep(60)# to prevent blocking of ip address by limiting rate except Exception as e: print e.reason,len(cnt) if 'Nont Found' in e.reason: if len(names)>550: time.sleep(60)# to prevent blocking of ip address by limiting rate break if 'Forbidden' in e.reason: time.sleep(600) if len(names)>550: time.sleep(60)# to prevent blocking of ip address by limiting rate pass result=[] pool=tp.ThreadPool(poolsize) requests=tp.makeRequests(f,names) r=[pool.putRequest(req) for req in requests] pool.wait() pool.dismissWorkers(poolsize) return result
def start(self): # handle thread l = self.queue.popleft() self.visited |= {l} print "Get Page: "+l data = self.html_get(l) # get the whole for page in self.linkre.findall(data): page = base_url + page if page not in self.visited: self.visited |= {l} self.queue.append(page) # capture the img for pic in self.pattern.findall(data): pic = pic.replace('big', 'pic') self.imgs |= {pic} # reach the limit #if len(self.imgs) == self.limit and self.limit != 0: #self.down_img(self.imgs) requests = threadpool.makeRequests(self.real_iron, range(self.threads)) [self.pool.putRequest(req) for req in requests] self.pool.wait() print self.imgs self.down_img(self.imgs)
def run(self): pool = threadpool.ThreadPool(10) reqs = threadpool.makeRequests(self._scan_start, self.dicts) [pool.putRequest(req) for req in reqs] pool.wait() pool.dismissWorkers(20, do_join=True) return self.result
def batch_download_upps_src_data(self): try: cmdstr="rm "+self.upps_src_data_directory+"*" print(cmdstr) status,output=cmd_execute(cmdstr) print status,output except Exception as e: print(str(e)) while 1: cmdstr="hadoop fs -test -e /app/lbs/lbs-stat/upp/data/mr/up/map/category/"+self.event_day+"/done" e=cmd_execute(cmdstr)[0] if(e == 0): print(u"hadoop文件ready,开始批量下载文件...") break else: print(u"hadoop文件还未ready,60s之后继续监测...") time.sleep(60) now=datetime.datetime.now() print(u"地图类目用户偏好-UPPS源数据下载-开始:"+str(now)) filepath_list=[] for i in xrange(10): filepath_list.append("/app/lbs/lbs-stat/upp/data/mr/up/map/category/"+self.event_day+"/data/part-*"+str(i)) print(filepath_list) pool = threadpool.ThreadPool(self.download_srcdata_poolsize) reqs = threadpool.makeRequests(self.download_upps_src_data,filepath_list,self.on_download_finish) [pool.putRequest(req) for req in reqs] pool.wait() now=datetime.datetime.now() print(u"地图类目用户偏好-UPPS源数据下载-完成:"+str(now))
def main(): pool=threadpool.ThreadPool(4) l=glob(srcdir+os.sep+'*.tar.gz') args=sorted(l) requests=threadpool.makeRequests(conv_file,args,callback) [pool.putRequest(req) for req in requests] pool.wait()
def batchTest(self, norm_target_func=None, *args, **kwds): ''' the func must be the run() function in a poc class. ''' def argsGenerator(): func_args = { 'options': self.options, 'success': None, 'poc_ret': {}, } for seed in self.seed_iter: if norm_target_func: func_args['options']['target'] = norm_target_func(seed.strip(), *args, **kwds) else: func_args['options']['target'] = seed.strip() yield deepcopy(func_args) requests = threadpool.makeRequests(callable_=self.func2run, args_list = argsGenerator(), callback=self.cbSaveResult, exc_callback=self.cbHandleErr) [self.tp.putRequest(req) for req in requests] self.tp.wait() self.tp.dismissWorkers(100, do_join=True) return self.total_num, self.finished_num, self.err_num
def _processOnBackground(self): while True: if self.queue.empty(): time.sleep(1) continue testcase_list = [] for i in range(len(DeviceManager()._deviceInfoList.available_device_list)): ###根据空闲设备数来控制并发量 if not self.queue.empty(): testcase_list.append(self.queue.get()) else: ##用例取完则退出 break req_list = [] for testcase in testcase_list: deviceInfo = DeviceManager().shiftDevice(testcase.condition) if not deviceInfo: ##没有设备则用例送回队列 self.queue.put(testcase) time.sleep(1) continue req_list.append({'deviceInfo':deviceInfo, 'testcase':testcase}) if not req_list: time.sleep(1) continue requests = makeRequests(self._runTestcase, req_list) [ThreadPoolManager().threadPool.putRequest(req) for req in requests]
def winconf_multi(nb_jobs = 4): from ranwinconf.list_AD import get_all_servers pattern = conf_get_IFP(config, "GENERAL", "PATTERN", "operatingSystem='*Server*'") print "Retrieving server list from AD based on pattern: %s" % pattern server_list = get_all_servers("objectClass='computer' AND %s" % pattern) server_name_list = [ computer.cn for computer in server_list] print "%d servers were retrieved" % len(server_name_list) import threadpool pool = threadpool.ThreadPool(nb_jobs) requests = threadpool.makeRequests(thread_work, server_name_list) for req in requests: pool.putRequest(req) pool.wait() for server_name in server_name_list: manage_vcs_and_notification(server_name, "%s.txt" % server_name)
def get_dp_detail1(self): data = self.get_hash_data() pool = threadpool.ThreadPool(100) requests = threadpool.makeRequests(hello1, data[:1000], self.get_dp_data) for req in requests: pool.putRequest(req) pool.wait()
def check_all_instance(instance_ports): datas = [(item, {}) for item in instance_ports] reqs = threadpool.makeRequests(check_one_instance, datas, run_success, run_fault) [pool.putRequest(req) for req in reqs] log.debug("pool.wait for check instance") pool.wait() log.debug("pool.wait end")
def getImageURLInfos(urls,threaded=True,progress=None): infos = {} if not threaded: for url in urls: if url in infos: continue info = {'url':url} try: info['type'],info['w'],info['h'] = getImageURLInfo(url) except: continue infos[url] = info return infos pool = threadpool.ThreadPool(4) req = [] for url in urls: if url in infos: continue info = {'url':url,'w':None,'h':None,'type':None} req.append(info) requests = threadpool.makeRequests(infoWorker, req) [pool.putRequest(req) for req in requests] results = pool.wait(return_results=True,progress=progress) pool.dismissWorkers() infos = {} for info in results: infos[info['url']] = info return infos
def __init__(self, id=0, org=0, req_que_size=100, resp_que_size=100, threadpo=None, alarmDura={}, accu_threshold=0, max_alarm_level=5, parentRespQueue=None): self.id = id self.org = org self.req_que_size = req_que_size self.resp_que_size = resp_que_size self.threadpo = threadpo self.alarmDura = alarmDura self.alarmList = [] self.alarmScoreMap = {} self.packageId = -1 self.preTime = 0 self.duration = 0 self.accu_threshold = accu_threshold self.reqQueue = Queue.Queue(maxsize=self.req_que_size) self.respQueue = Queue.Queue(maxsize=self.resp_que_size) self.max_alarm_level = max_alarm_level self.parentRespQueue = parentRespQueue self.logger = logger self.isStop= True self.threads = threadpool.makeRequests(self.alaMsg, [(None, None)]) self.lock = threading.Lock() self.currentAlarmLevel = {'level': 0, 'alarmId': 0} self.maxAlarmId = 0 #get a AlarmUtil self.alarmUtil = AlarmUtil.AlarmUtil()
def start(self): city = get_city() self.today_path = create_date_path("{0}/xiaoqu".format(SPIDER_NAME), city, self.date_string) t1 = time.time() # 开始计时 # 获得城市有多少区列表, district: 区县 districts = get_districts(city) print('City: {0}'.format(city)) print('Districts: {0}'.format(districts)) # 获得每个区的板块, area: 板块 areas = list() for district in districts: areas_of_district = get_areas(city, district) print('{0}: Area list: {1}'.format(district, areas_of_district)) # 用list的extend方法,L1.extend(L2),该方法将参数L2的全部元素添加到L1的尾部 areas.extend(areas_of_district) # 使用一个字典来存储区县和板块的对应关系, 例如{'beicai': 'pudongxinqu', } for area in areas_of_district: area_dict[area] = district print("Area:", areas) print("District and areas:", area_dict) # 准备线程池用到的参数 nones = [None for i in range(len(areas))] city_list = [city for i in range(len(areas))] args = zip(zip(city_list, areas), nones) # areas = areas[0: 1] # 针对每个板块写一个文件,启动一个线程来操作 pool_size = thread_pool_size pool = threadpool.ThreadPool(pool_size) my_requests = threadpool.makeRequests(self.collect_area_xiaoqu_data, args) [pool.putRequest(req) for req in my_requests] pool.wait() pool.dismissWorkers(pool_size, do_join=True) # 完成后退出 # 计时结束,统计结果 t2 = time.time() print("Total crawl {0} areas.".format(len(areas))) print("Total cost {0} second to crawl {1} data items.".format(t2 - t1, self.total_num))
def multithreading(self, funcname, url, command, pools): key = [ 'kPH+bIxk5D2deZiIxcaaaA==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'wGiHplamyXlVB11UXWol8g==1234url3456' + url + '1234command3456' + command + '1234sven3456', '2AvVhdsgUs0FSA3SDFAdag==1234url3456' + url + '1234command3456' + command + '1234sven3456', '4AvVhmFLUs0KTA3Kprsdag==1234url3456' + url + '1234command3456' + command + '1234sven3456', '3AvVhmFLUs0KTA3Kprsdag==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'Z3VucwAAAAAAAAAAAAAAAA==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'U3ByaW5nQmxhZGUAAAAAAA==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'wGiHplamyXlVB11UXWol8g==1234url3456' + url + '1234command3456' + command + '1234sven3456', '6ZmI6I2j5Y+R5aSn5ZOlAA==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'fCq+/xW488hMTCD+cmJ3aQ==1234url3456' + url + '1234command3456' + command + '1234sven3456', '1QWLxg+NYmxraMoxAXu/Iw==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'ZUdsaGJuSmxibVI2ZHc9PQ==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'L7RioUULEFhRyxM7a2R/Yg==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'r0e3c16IdVkouZgk1TKVMg==1234url3456' + url + '1234command3456' + command + '1234sven3456', '5aaC5qKm5oqA5pyvAAAAAA==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'bWluZS1hc3NldC1rZXk6QQ==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'a2VlcE9uR29pbmdBbmRGaQ==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'WcfHGU25gNnTxTlmJMeSpw==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'bWljcm9zAAAAAAAAAAAAAA==1234url3456' + url + '1234command3456' + command + '1234sven3456', 'MTIzNDU2Nzg5MGFiY2RlZg==1234url3456' + url + '1234command3456' + command + '1234sven3456', '5AvVhmFLUs0KTA3Kprsdag==1234url3456' + url + '1234command3456' + command + '1234sven3456' ] pool = threadpool.ThreadPool(pools) requests = threadpool.makeRequests(funcname, key) [pool.putRequest(req) for req in requests] pool.wait()
def trigger(self): ''' trigger the current event via event listener ''' try: reqs = [] mutex = self.event_listener_list_mutex tp = self.thread_pool def list_access(): if mutex: if mutex.acquire(): return True else: return True def access_finish(): if mutex: mutex.release() match_list = [] if list_access(): try: for l in self.event_listener_list: if l.match(self.event): match_list.append(l) except Exception, e: print Exception, ':', e, \ ' in %s:%s' % get_class_func_name(self) access_finish() else: access_finish() for l in match_list: reqs = reqs + threadpool.makeRequests( l.process_event, [(None, None)]) map(tp.putRequest, reqs) #tp.wait() except Exception, e: print Exception, ':', e, \ ' in %s:%s' % get_class_func_name(self)
def main(args): needData = [] url = find(args.s) if args.v.__contains__("-"): versions = args.v.split("-") for version in range(int(versions[0]), int(versions[1]) + 1): needData.append((None, {"version": int(version), "status": args.s, "url": url})) elif args.v.__contains__("/"): versions = args.v.split("/") for version in versions: needData.append((None, {"version": int(version), "status": args.s, "url": url})) else: needData.append((None, {"version": int(args.v), "status": args.s, "url": url})) start = time.time() size = 5 if len(needData) > 5 else len(needData) pool = threadpool.ThreadPool(size) requests = threadpool.makeRequests(work, needData) [pool.putRequest(request) for request in requests] pool.wait() print("查找总耗时:", (time.time() - start), " s")
def thread_pool_download(imgUrlList): # 定义线程池数量 pool = threadpool.ThreadPool(15) """ makeRequests 方法第二个参数必须是 iter 对象, 然后我们的download_pic函数是用到了 下标(用来保存文件名)和url 2个参数,所以这里必须这样遍历一遍 或者这里你可以把url转成文件名也可以,download_pic 函数只传 url 1个参数的话,就不用这一步遍历了 """ data = [((index, x), None) for index, x in enumerate(imgUrlList)] th_request = threadpool.makeRequests(download_pic, data) for req in th_request: pool.putRequest(req) pool.wait() logging.info( 'https://www.zhihu.com/question/{}/answer/{} 中 {} 张图片下载完成'.format(question_id, answer_id or my_answer_id, img_num)) logging.info('保存路径为 {}'.format(absolute_dir))
def mutliDownloadTs(playlist): global logFile global sumCount global doneCount global taskThreadPool taskList = [] # 每个ts单独作为一个task for index in range(len(playlist)): dict = {"playlist": playlist, "index": index} taskList.append((None, dict)) # 重新设置ts数量,已下载的ts数量 doneCount = 0 sumCount = len(taskList) printProcessBar(sumCount, doneCount, 50) # 构造thread pool requests = threadpool.makeRequests(downloadTs, taskList) [taskThreadPool.putRequest(req) for req in requests] # 等待所有任务处理完成 taskThreadPool.wait() print("") return True
def process(output_fpath): """计算并且将结果输出到文件 """ global fout # 输出文件路径 fout = open(output_fpath, 'w', encoding='utf-8') # 获得所有可用的名字列表 all_name_postfixs = get_name_postfixs() #all_name_postfixs = ['国荣'] global all_count all_count = len(all_name_postfixs) pool = threadpool.ThreadPool(1) requests = threadpool.makeRequests(compute_and_writefile, all_name_postfixs) [pool.putRequest(req) for req in requests] pool.wait() fout.flush() fout.close()
def ServerStart(FtpInfo): DeviceInfo = [] cFtp = ftplib.FTP() print FtpInfo[0] cFtp.connect(FtpInfo[0], FtpInfo[1]) cFtp.login(FtpInfo[2], FtpInfo[3]) cFtp.set_debuglevel(0) cFtp.cwd('/') cRootDir = cFtp.nlst() #cFtp.close() for cDeviceDir in cRootDir: if cDeviceDir[0:5]=='vpnin': DeviceInfo.append([FtpInfo[0],FtpInfo[1],FtpInfo[2],FtpInfo[3],cDeviceDir]) cFtp.close() #创建线程池 cPool = tp.ThreadPool(4) #启动线程 cRequests = makeRequests(UploadPhoto, DeviceInfo) [cPool.putRequest(req) for req in cRequests] cPool.wait()
def __init__(self, server_gateway, task_info): threading.Thread.__init__(self) # self.is_suspend = False self.is_stop = False # 这两个参数控制暂停与停止 self.__server_gateway = server_gateway self.__targets_data.append({'target_url': task_info['target_url']}) self.__scan_data = task_info self.__task_id = task_info['taskID'] # self.__run_done_plugins_id = [] # 这个参数用来存储已经完成的插件ID 用于暂停后恢复线程 并且不会假死 self.__load_plugins() # 加载插件列表 self.__pool = threadpool.ThreadPool(task_info['plugins_thread_num']) self.__requests = threadpool.makeRequests( callable_=self.__run_plugins, args_list=self.__plugins_temp, callback=self.__check_status)
def start(self): #开始第一层 for qq in self.qq: #刚开始层次设置为第一层 waiting_get.put([qq,0,self.uin]) #开始出队20线程爬数据,这里不使用threading的原因是每次出队20个线程执行都要join,20个线程中只要还剩一个没执行完,在其他的都要 #等,效率太慢,而线程池就可以一次性放入200个,再20线程,提高效率 while waiting_get.qsize() >= 0: #一次性出队200个,然后放入线程池 wating_qq_list = [] for i in range(waiting_get.qsize()): wating_qq_list.append(waiting_get.get()) pool_size = 20 pool = threadpool.ThreadPool(pool_size) # 创建工作请求 reqs = threadpool.makeRequests(self.get_data, wating_qq_list) # 将工作请求放入队列 [pool.putRequest(req) for req in reqs] pool.wait() print('新qq队列'+str(new_qq.qsize()))
def mThreadExecute(self, threadcount=10): import threadpool pool = threadpool.ThreadPool(threadcount) tools = [tool for tool in self.AllETLTools if tool.Enabled]; index = extends.getindex(tools, lambda d: d.Type == 'ToListTF'); if index == -1: index = 0; tool = tools[index]; generator = tool.Func(tool, None); else: generator = self.__generate__(tools[:index]); def Funcs(item): mgenerator = self.__generate__(tools[index + 1:], (r for r in [item]), True); for r in mgenerator: pass; print('finish' + str(item)); requests = threadpool.makeRequests(Funcs, generator); [pool.putRequest(req) for req in requests] pool.wait()
def __call__(self): # todo: in cn-north-1, starting all services cost 6 minute. use multiprocess instead of theadpool. total_round = len(self.region_order_dict) for round_num in xrange(1, total_round + 1): regions = self.region_order_dict.get(round_num) if not regions: error_msg = 'region order not continuous: %s, miss %s' % (self.region_order_dict, round_num ) logger.error(error_msg) logger.error('start basic service cancel') return {'ret': False, 'msg': u'RegionInfo表中数据有误,中止启动基础服务\n%s' % error_msg} thread_pool = threadpool.ThreadPool(len(regions)) thread_requests = threadpool.makeRequests(self.start_services_in_region, regions) [thread_pool.putRequest(req) for req in thread_requests] thread_pool.wait() start_result = self.__check_thread_success(regions) if not start_result['ret']: return {'ret': False, 'msg': start_result['msg']} logger.info('basic services started in regions: %s, round: %s' % (regions, round_num)) return {'ret': True, 'msg': u'所有区域的基础服务启动完毕'}
def main(): #ftpLoad() #allTxtNameList = [path for path in os.listdir(txtFile)] #for txtName in allTxtNameList: #print txtName #txt_csv(txtName) #for csvName in os.listdir(csvFile): #txt_upload_odps(csvName) #for jpgName in os.listdir(photoPath): allJpgList = [photoPath + name for name in os.listdir(photoPath)] try: # 上传时的模块 pool11 = tp.ThreadPool(10) requests11 = makeRequests(jpg_Upload, allJpgList) [pool11.putRequest(req) for req in requests11] pool11.wait() except: traceback.print_exc()
def download(url, thread_num=5): prefix = url[:url.rfind('/') + 1] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/74.0.3729.131 Safari/537.36', 'Connection': 'Keep-Alive' } m3u8_res = requests.get(url=url, headers=headers, verify=False) m3u8_txt = m3u8_res.text lines = m3u8_txt.split('\n') lines = list(filter(lambda x: x.endswith('.ts'), lines)) index = 100000000 args = [] for line in lines: args.append((None, {'url': prefix + line, 'index': index})) index = index + 1 pool = threadpool.ThreadPool(thread_num) reqs = threadpool.makeRequests(download_worker, args) [pool.putRequest(req) for req in reqs] pool.wait() generate_txt()
def get_move_probs(self, state, temp=1e-3, verbose=False): """Run all playouts sequentially and return the available actions and their corresponding probabilities. state: the current game state temp: temperature parameter in (0, 1] controls the level of exploration """ coroutine_list = [] for n in range(self._n_playout): state_copy = copy.deepcopy(state) coroutine_list.append(state_copy) #self.loop.run_until_complete(asyncio.gather(*coroutine_list)) requests = threadpool.makeRequests(self._playout, coroutine_list) # calc the move probabilities based on visit counts at the root node [self.task_pool.putRequest(req) for req in requests] self.task_pool.wait() act_visits = [(act, node._n_visits) for act, node in self._root._children.items()] acts, visits = zip(*act_visits) act_probs = softmax(1.0 / temp * np.log(np.array(visits) + 1e-10)) return acts, act_probs
def start_threading(cookies, class_code, hashkey, img_data): global STOP_FLAG device_list = ['pp', 'cjy', 'chaoren', 'ydm'] # 需要处理的设备个数 task_pool = threadpool.ThreadPool(5) # 5是线程池中线程的个数 request_list = [] # 存放任务列表 # 首先构造任务列表 for device in device_list: lst_vars = [cookies, class_code, hashkey, img_data, device] request_list.append((lst_vars, None)) requests = threadpool.makeRequests( post_request, request_list, callback=callback) [task_pool.putRequest(req, block=True, timeout=20) for req in requests] try: # print(f"STOP_FLAG:{STOP_FLAG}") # if STOP_FLAG >= 4: # STOP_FLAG = 0 # return False task_pool.wait() except Success as e: print(e) return 200
def main(): # step1: 获取总页数 print "getting total ..." total = service.getPageTotal() print "total:%d" % total # step2: 抓取数据 使用多线程 print "getting data..." pool = pool = threadpool.ThreadPool(4) requests = threadpool.makeRequests(service.getIndexSSQ, [i for i in range(1, total)], call) [pool.putRequest(req) for req in requests] pool.wait() # step3: 写入JSON文件 print "writing file -> data.json" with open('data.json', 'w') as f: json.dump(result, f) print "writing done!" print "Done!"
def startThreadPool(): name_list = [ (['caoshuai', 1], None), (['caoshuai', 2], None), (['a', 3], None), (['ss', 4], None), (['wwwwww', 12], None), (['m', 12], None), (['n', 12], None), (['b', 12], None), (['v', 12], None), (['x', 12], None), (['z', 12], None), ] pool = threadpool.ThreadPool(10) requestss = threadpool.makeRequests(loop, name_list) [pool.putRequest(req) for req in requestss] pool.wait()
def _del_cos_sync_file(self, cos_client, bucket, del_file_dict): if len(del_file_dict) == 0: return if self.config.get_delete_sync() == 0: return thread_worker_num = self.config.get_thread_num() thread_pool = threadpool.ThreadPool(thread_worker_num) for del_file_path in del_file_dict.keys(): cos_path = self._local_path_to_cos(del_file_path) del_args = [cos_client, bucket, cos_path, self.db, del_file_dict[del_file_path]] args_tuple = (del_args, None) args_list = [args_tuple] requests = threadpool.makeRequests(delete_file, args_list) for req in requests: thread_pool.putRequest(req) thread_pool.wait() thread_pool.dismissWorkers(thread_worker_num, True)
def labels_multi_thread(): global concat_csv, fail_img_path, chinese_dict, txtstr txtstr = [] concat_csv = pd.read_csv( 'D:\py_projects\data\original_csv\concat_train.csv') print('start the game....') fail_img_path = [] temp = open('D:\py_projects\data\chinese\chinese_all.txt', mode='r', encoding='utf-8').readlines()[0].split(',') chinese_dict = {c: i for i, c in enumerate(temp)} threadcount = 2000 pool = threadpool.ThreadPool(threadcount) request = threadpool.makeRequests(get_labels, os.listdir(base_dir)) [pool.putRequest(req) for req in request] pool.wait() with open('labels.txt', mode='w', encoding='utf-8') as f: f.write('\n'.join(txtstr)) with open('fail_img_labels.txt', 'w', encoding='utf-8') as f: f.write('\n'.join(fail_img_path)) print('done!')
def test(self, _ips: list, _on_prog: _collections.Callable) -> list: self._on_prog = _on_prog self._results.clear() self._prog = 0 self._all = len(_ips) * self._test_times _pool = _threadpool.ThreadPool(self._test_thread_amt) _tasks = _threadpool.makeRequests(self._test, _ips) _on_prog(0) [_pool.putRequest(_task) for _task in _tasks] _pool.wait() self._results.sort( key=lambda _item: ( _item[0], # _item[1], _item[2], # _item[3], _item[4], # _item[5] ), reverse=True) return self._results
def update(blog_name, thread_num=10, log=None): # 获取博文总数 total = get_total_post(blog_name, log) print(total) if not total: return False elif total == -1: return -1 db = dbm.DbManager() session = db.get_session() blog_data = session.query( model.Blog).filter(model.Blog.name == blog_name).first() blog_data.total_post = total session.add(blog_data) session.commit() session.close() # print(total) # return False perpage = 10 limit = total // perpage # print(thread_num) # return False # 创建多线程 # 实例化线程锁 lock = threading.Lock() if thread_num > (limit + 1): thread_num = limit + 1 log.info('开始执行: 启动%s个线程下载%s个博文' % (thread_num, total)) # 创建线程池 pool = threadpool.ThreadPool(thread_num) requests_list = [] for x in range(limit + 1): requests_list.append( ([blog_name, perpage, x + 1, lock, log, thread_num], None)) requests_res = threadpool.makeRequests(catch_html, requests_list) [pool.putRequest(req) for req in requests_res] pool.wait() return True
def getIndex(url): global num skuids = [] session = requests.Session() session.headers = headers res = session.get(url, headers=headers) print(res.status_code) res.encoding = res.apparent_encoding soup = BeautifulSoup(res.text, 'lxml') items = soup.select('li.gl-item') for item in items[:3]: # 爬取3个商品测试 title = item.select_one('.p-name a em').text.strip().replace(' ', '') price = item.select_one('.p-price strong').text.strip().replace( '¥', '') try: shop = item.select_one('.p-shopnum a').text.strip() # 获取书籍时查找店铺的方法 except: shop = item.select_one( '.p-shop a').text.strip() # 获取其他商品时查找店铺的方法 link = parse.urljoin('https://', item.select_one('.p-img a').get('href')) SkuId = re.search('\d+', link).group() skuids.append(([SkuId, session], None)) headers['Referer'] = f'https://item.jd.com/{SkuId}.html' headers['Connection'] = 'keep-alive' comments_num = getCommentsNum(SkuId, session) # 评论数量 print(SkuId, title, price, shop, link, comments_num) print("开始将商品存入数据库...") try: IntoGoods(SkuId, title, price, shop, link, comments_num) except Exception as e: print(e) sess_db.rollback() num += 1 print("开始获取评论并存入数据库...") pool2 = threadpool.ThreadPool(3) # 可同时获取3个商品的评论 task = threadpool.makeRequests(getComments, skuids) for r in task: pool2.putRequest(r) pool2.wait()
def updateAllStockHist(): #Get stock code list connection = ENGINE.connect() result = connection.execute('select "stockCode" from ' + dbconf.STOCK_BASIC_TBALE_NAME) stockCodeList = [] for code in result: stockCodeList.append(code.values()[0]) connection.close() # print(stockCodeList); #Get stock last record date connection = ENGINE.connect() #hist table may not exist, do initial load try: result = connection.execute('select "stockCode", max("date") from ' + dbconf.STOCK_HIST_TBALE_NAME + ' group by "stockCode"') except Exception: print('No record found, going to do initial load.') code_date_map = {} for cursor in result: newDay = cursor.values()[1] + datetime.timedelta(1) code_date_map[cursor.values()[0]] = newDay.strftime("%Y-%m-%d") connection.close() #update stock hist one by one varsList = [] for stockCode in stockCodeList: lastRecordDate = code_date_map.get(stockCode) if (code_date_map.get(stockCode) == None): lastRecordDate = '1990-01-01' agrsForOneStock = [stockCode, lastRecordDate] varsList.append((agrsForOneStock, None)) # print(varsList) pool = threadpool.ThreadPool(1) requests = threadpool.makeRequests(updateStockHist, varsList) [pool.putRequest(req) for req in requests] pool.wait() return
def anotherway(a, b): a_, b_, n = expand(a, b) b_= b_.T n = int(n / 2) a11, a12, a21, a22 = divide(a_, n) b11, b12, b21, b22 = divide(b_, n) p = [None, None, None, None, None, None, None, None] def calculate(a, b, i): p[i] = rise_hit_mul(a, b) arg = [([a11, b11, 0], {}), ([a12, b12, 1], {}), ([a11, b21, 2], {}), ([a12, b22, 3], {}), ([a21, b11, 4], {}), ([a22, b12, 5], {}), ([a21, b21, 6], {}), ([a22, b22, 7], {}),] global pool requests = threadpool.makeRequests(calculate, arg) [pool.putRequest(req) for req in requests] pool.wait() p0, p1, p2, p3, p4, p5, p6, p7 = p c11 = p0 + p1 c12 = p2 + p3 c21 = p4 + p5 c22 = p6 + p7 c1 = np.concatenate((c11, c12), axis=1) c2 = np.concatenate((c21, c22), axis=1) c = np.concatenate((c1, c2), axis=0) return c
def poc3(host, rpcPort=111, portNum=55): def send(port): try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) l_onoff = 1 l_linger = 0 sock.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, struct.pack('ii', l_onoff, l_linger)) sock.close() except socket.error as err: #print err pass # multiprocess #procs = [] #for port in xrange(rpcPort, rpcPort+portNum+1): #procs.append(mp.Process(target=send, args=(port,), kwargs={})) #for p in procs: #p.start() #for p in procs: #p.join() # threadpool import threadpool t = threadpool.ThreadPool(100) req = threadpool.makeRequests( callable_=send, args_list=[port for port in xrange(rpcPort, rpcPort+portNum+1)], ) [t.putRequest(r) for r in req] t.wait() try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, rpcPort)) sock.send(TCP_PAYLOAD) sock.close() except socket.error as err: print err
def update(self, city) -> (int, int): lock = threading.Lock() url = "https://{}.lianjia.com/ershoufang".format(city) html = reqPage(url) soup = BeautifulSoup(html, "html.parser") list = soup.find_all('div', attrs={'data-role': 'ershoufang'}) ch_countryTowns = [] pinyin_countryTowns = [] paramList = [] for i in list: list = i.find_all("a") for i in list: href = i.get("href") if len(re.findall(r".*zhoubian", href)) == 0: # not zhoubian pinyin_countryTowns.append(href) ch_countryTowns.append(i.get_text()) group = [city, href, i.get_text()] paramList.append(group) # (target=run, args=("t1",) # t = threading.Thread(target=req, args=(city,href,i.get_text(),)) # t.start() # t.join() # threads.append(t) pool = threadpool.ThreadPool(len(paramList)) requests = threadpool.makeRequests(self.reqCountryTown, paramList) [pool.putRequest(req) for req in requests] pool.wait() if self.houseTotalOfCity is not None and self.houseTotalOfCity != 0: AddCity(city, self.cityAverage / self.houseTotalOfCity, self.houseTotalOfCity, url) return self.houseTotalOfCity, self.cityAverage / self.houseTotalOfCity else: print("err contry town:", self.houseTotalOfCity, self.cityAverage) return 0, 0
def batch_proxy_get(url, proxy_infos, timeout=DEFAULT_TIMEOUT, more_headers={}): global proxyClient, suc_num if proxyClient is None: proxyClient = ProxyClient() if len(proxy_infos) == 0: return 0 if type(proxy_infos[0]) is str: for i in range(0, len(proxy_infos)): addr_port = proxy_infos[i].split(':') if len(addr_port) != 2: continue proxy_infos[i] = (addr_port[0], addr_port[1]) req_results = [] thread_num = 2 protocol = extract_protocol(url) #TODO ,add thread pool , and wait all request over pool = threadpool.ThreadPool(thread_num) req_args = [] for proxy_info in proxy_infos: #argument_map = {'url':url,'protocol':protocol,'proxy_ip':proxy_info[0],'proxy_port':proxy_info[1]} req_args.append(([ url, protocol, proxy_info[0], proxy_info[1], timeout, more_headers ], {})) #req_args.append(argument_map) #requests = threadpool.makeRequests(do_something, [([111,222],{})]) requests = threadpool.makeRequests(proxyClient.get_feedback_full, req_args, request_back) [pool.putRequest(req) for req in requests] #suc_num += 1 #req_results.append((proxy_info[0] + ':' + proxy_info[1],ret)) pool.wait() print 'abbbbbbbbbbbbbbbbbbbbbba' pool.dismissWorkers(20, do_join=True) resp_sub_num = suc_num suc_num = 0 return resp_sub_num
def download_raw_data(self, downloaded=False): """download the data""" if downloaded is True: return make_sure_dirs_exist(self.data_path) logger.info(f'start downloading data from {self.futures_link}') to_update = [] data_fac = RemoteDataFactory(self.data_path, self.ini_parser) for sym in self.symbols: rdata = data_fac.create(sym, sym, SYNC_DATA_MODE.HTTP_DOWNLOAD_CBOE) to_update.append(rdata) # if self.futures_target: # # add the futures target # rdata = data_fac.create( # self.futures_target, self.futures_target, # SYNC_DATA_MODE.PANDAS_DATAREADER_YAHOO) # to_update.append(rdata) # for sym in generate_futures_chain(self.futures_chain_prefix, # self.futures_chain_suffix): # # add the furtures chain # rdata = data_fac.create( # sym, sym, SYNC_DATA_MODE.PANDAS_DATAREADER) # to_update.append(rdata) for expiration_date in self._delivery_dates: remote_path = os.path.join(self.futures_link, expiration_date) rdata = data_fac.create(expiration_date, remote_path, SYNC_DATA_MODE.HTTP_DOWNLOAD_FILE) # (None, dict_param: dict) for pass parameters by dict to_update.append(rdata) # do request in the threadpool requests = threadpool.makeRequests(lambda x: x.sync_data(), to_update) pool = threadpool.ThreadPool(self.pool_size) [pool.putRequest(req) for req in requests] pool.wait() logger.info('all data downloaded. ') checksums = generate_csv_checksums(self.data_path) # save the local file's checksum self.save_checksums(checksums)
def download_images(items, output_folder, prs_idx): RES['fail_list'] = [] RES['success_list'] = [] RES['output_folder'] = output_folder RES['prs'] = tqdm.tqdm(total=len(items), desc="prs%02d" % prs_idx, position=prs_idx) pool = threadpool.ThreadPool(16) reqs = threadpool.makeRequests(download_one_item, items) [pool.putRequest(req) for req in reqs] pool.wait() RES['prs'].refresh() info_folder = output_folder.rstrip("/") + "-info" with open(os.path.join(info_folder, "fail_%02d.json" % prs_idx), 'w') as f: json.dump(RES['fail_list'], f, indent=2) with open(os.path.join(info_folder, "success_%02d.json" % prs_idx), 'w') as f: json.dump(RES['success_list'], f, indent=2) logger.info("prs%02d download done!" % prs_idx)