class Scheduler(object): def __init__(self): self.scheduled_pool = Pool() self.active_pool = Pool() self.shutting_down = False self.stop_event = gevent.event.Event() self.active_watches = {} # TODO: use for managing adding/removing def add(self, delay_seconds, func, *args, **kwargs): if self.shutting_down: return self.active_pool.spawn(func, *args, **kwargs) g = gevent.Greenlet(self.add, delay_seconds, func, *args, **kwargs) self.scheduled_pool.add(g) g.start_later(delay_seconds) def handle_signal(self, signal, frame): logger.info('Received {}. Shutting down.'.format(SIGNALS_TO_NAMES.get(signal))) self.shutting_down = True self.stop_event.set() def run(self): for sig in [signal.SIGINT, signal.SIGTERM]: signal.signal(sig, self.handle_signal) self.stop_event.wait()
def action(self): global STOCK pool = Pool() # 取消所有高于市场价格的订单. cancel_orders = filter( lambda o: o.price > self.market, self.orders) for o in cancel_orders: g = gevent.spawn(self.ft.cancel, STOCK, o.tid) pool.add(g) self.orders.remove(o) # 根据当前市场投放新订单 price_set = set(map(lambda o: o.price, self.orders)) print('>>>>> price_set = %s' % (price_set)) for i in range(4): p = self.market - i * 40 if p in price_set: continue order = Order(p, 250) # 只购买250, 小单. def place_new_order(order): st = self.ft.order(STOCK, order.price, order.qty, 'buy', 'limit') tid = st['id'] order.tid = tid g = gevent.spawn(place_new_order, order) pool.add(g) self.orders.append(order) pool.join() price_set = set(map(lambda o: o.price, self.orders)) print('<<<<< price_set = %s' % (price_set))
def do(): pool = Pool(10) tiezis = getYunHuiTiezi() stop_queue = Queue() content = getContent() sql1 = r"update YunHui_tieba set stop_times = stop_times + 1 where id = %s" sql2 = r"update YunHui_tieba set success = success + 1 where id = %s" sql3 = r"update YunHui_tieba set fail = fail + 1 where id = %s" sql4 = r"update YunHui_data set success = success + 1 where id = %s" for i in tiezis: ltime = time.localtime() t = ltime.tm_hour * 60 + ltime.tm_min if t % i[8] == 0: if i[10]: stop_queue.put((i[0])) else: if i[5]: # Tieziid,bduss, kw, fid, content, quote_id, tid pool.add( gevent.spawn(client_LZL, i[0], i[4], i[2], content, i[7], i[3])) else: # Tieziid,bduss, kw, tid, fid, content pool.add( gevent.spawn(client_Post, i[0], i[1], i[4], i[3], i[2], content)) pool.join() to_mysql(stop_queue, sql1) to_mysql(post_success_queue, sql2) to_mysql(post_fail_queue, sql3) to_mysql(post_data_queue, sql4)
class WorkerPool(object): def __init__(self): self.pool_size = options.pool_size self.job_pool = Pool(size=self.pool_size) self.result = Queue() self.target_queue = Queue() def add_job(self, job_func, *args, **kwargs): job = self.job_pool.apply_async( job_func, args=args, kwds=kwargs, callback=self._call_func) self.job_pool.add(job) def run(self, timeout=None): self.job_pool.join(timeout=timeout, raise_error=False) def _call_func(self, job_ret): if job_ret: self.result.put(job_ret) def shutdown(self): self.job_pool.kill()
def action(self): global STOCK pool = Pool() # 取消所有高于市场价格的订单. cancel_orders = filter(lambda o: o.price > self.market, self.orders) for o in cancel_orders: g = gevent.spawn(self.ft.cancel, STOCK, o.tid) pool.add(g) self.orders.remove(o) # 根据当前市场投放新订单 price_set = set(map(lambda o: o.price, self.orders)) print('>>>>> price_set = %s' % (price_set)) for i in range(4): p = self.market - i * 40 if p in price_set: continue order = Order(p, 250) # 只购买250, 小单. def place_new_order(order): st = self.ft.order(STOCK, order.price, order.qty, 'buy', 'limit') tid = st['id'] order.tid = tid g = gevent.spawn(place_new_order, order) pool.add(g) self.orders.append(order) pool.join() price_set = set(map(lambda o: o.price, self.orders)) print('<<<<< price_set = %s' % (price_set))
def _merge_m3u8_by_tar_time(self, station_num: str, vod: typing.Dict, tar_time_range: typing.List): path = Path(self.VOD_PATH, str(station_num)) os.makedirs(path, exist_ok=True) self.log.info(f'[{self.bj_id}:{station_num}] get vod m3u8 info') tar_video = self._parse_m3u8(vod) self.log.info( f'[{self.bj_id}:{station_num}] get vod m3u8 info success') pool = Pool(20) for t in tar_time_range: min_range, max_range = t min_d, max_d = Duration.set_time(min_range).to_duration( ), Duration.set_time(max_range).to_duration() for i in range(min_d, max_d + 1): if i in tar_video: ts_path = path.joinpath(f'{i}.ts') if os.path.isfile( ts_path) and ts_path.stat().st_size > 1024 * 500: continue pool.add( gevent.spawn(self.down, url=tar_video[i], path=ts_path)) pool.join() self.log.info(f'[{self.bj_id}:{station_num}] download ts success') self._ts2mp4(path, output_name=station_num)
def run(self, login=False): """ tv = ThumbnailSpider('rlrlvkvk123') tv.run(login=True) # print(tv.stash[VOD_TYPE.BAD]) # tv.test() # tv.test_img('36997061_0:0:0.jpg') # tv.test_download_img(43764953,3600) :return: """ self._init_spider(login) self.log.info('spider start') self.valid_thumbnail() vod = self.vod(self.VOD_URL_FORMAT, VOD_TYPE.VOD) self.log.info('prepare vod') # user_vod = self.vod(self.USER_VOD_FORMAT, VOD_TYPE.USER_VOD) self.log.info('prepare user vod') # vod = vod | user_vod # vod = user_vod self.log.info('prepare vod success') pool = Pool(4) for v in vod: pool.add(gevent.spawn(self.download_vod, v)) pool.join() self.log.info('spider end')
def custom_gevent(func, fuzzing): ''' 自定义多协程 -- finish ''' pool = Pool(200) for text in fuzzing: pool.add(gevent.spawn(func, text)) pool.join()
def run100(): # I'm not using gevent.pool.Pool for memory efficience pool = Pool() for b in xrange(1000): pool.add(client.call('echo', params)) # Blocks until all results arrived pool.join()
def run100(): # I'm not using gevent.pool.Pool for memory efficience pool = Pool() for b in xrange(1000): pool.add(client.call('echo', params)) # Blocks until all results arrived pool.join()
class WorkerPool(object): JOB_UNSTART = 0 # poc not run JOB_RUNNING = 1 JOB_FINISHED = 2 # poc run ok JOB_ERROR = -1 # error encountered when run poc JOB_ABORT = -2 # running poc is abort, viz unfinished def __init__(self, concurrency=10): self.concurrency = concurrency self.jobPool = Pool(size=concurrency) self.errNum = 0 # failed job(run time error but not aborted) self.successNum = 0 self.totalNum = 0 self.results = {} def work(self, iterJobFuncArgs, jobFunc, timeout=None): for jobFuncArgs in iterJobFuncArgs: self.results[hash(str(jobFuncArgs))] = { 'state': self.JOB_UNSTART, 'args': jobFuncArgs, } self.totalNum += 1 self.jobPool.add( self.jobPool.apply_async( self._doJob, args=(jobFunc, jobFuncArgs,), kwds=None, callback=self._cbJobFinished ) ) self.jobPool.join(timeout=timeout, raise_error=False) return self.results def _cbJobFinished(self, jobResult): if jobResult['state'] == self.JOB_ERROR: self.errNum += 1 elif jobResult['state'] == self.JOB_FINISHED: self.successNum += 1 def _doJob(self, jobFunc, jobFuncArgs): self.results[hash(str(jobFuncArgs))]['state'] = self.JOB_RUNNING try: self.results[hash(str(jobFuncArgs))]['jobRet'] = \ jobFunc(*jobFuncArgs) if isinstance(jobFuncArgs, list) \ else jobFunc(jobFuncArgs) self.results[hash(str(jobFuncArgs))]['state'] = self.JOB_FINISHED except Exception as err: self.results[hash(str(jobFuncArgs))]['exception'] = str(err) self.results[hash(str(jobFuncArgs))]['state'] = self.JOB_ERROR return self.results[hash(str(jobFuncArgs))] def handleAbort(self): for jobId in self.results.keys(): if self.results[jobId]['state'] in (self.JOB_RUNNING, self.JOB_UNSTART): self.results[jobId]['state'] = self.JOB_ABORT
def run_task_in_gevent(url_list, poc_file_dict): # url_list 每个进程分配到一定量的url poc = Poc_Launcher() pool = Pool(100) for target in url_list: for plugin_type, poc_files in poc_file_dict.iteritems(): for poc_file in poc_files: if target and poc_file: target = fix_target(target) pool.add(gevent.spawn(poc.poc_verify, target, plugin_type, poc_file)) pool.join()
def run_task_in_gevent(url_list, poc_file_dict): # url_list 每个进程分配到一定量的url poc = Poc_Launcher() pool = Pool(100) for target in url_list: for plugin_type,poc_files in poc_file_dict.iteritems(): for poc_file in poc_files: if target and poc_file: target = fix_target(target) pool.add(gevent.spawn(poc.poc_verify, target, plugin_type, poc_file)) pool.join()
def get_all_tbs(): global tbss pool = Pool(20) users = getUsers() for user in users: userid = user[0] bduss = user[1] pool.add(gevent.spawn(add_tbs_to_tbss, userid, bduss)) pool.join() print('all tbs end')
def updata(): pool = Pool(20) sql = r"INSERT INTO YunHui_sign (`fid`,`name`,`level_id`,`cur_score`,`is_sign`,`user_id`) SELECT * from (select %s,%s, %s, %s,1,%s) as tmp WHERE NOT exists (select fid,user_id from YunHui_sign where fid = %s and user_id = %s) LIMIT 1" users = getUsers() for user in users: username = user[2] print(username) userid = user[0] bduss = user[1] pool.add(gevent.spawn(update_one, userid, bduss)) pool.join() to_mysql(update_queue, sql)
def clear_old_orders(self): global STOCK print('clear old orders...') old_orders = self.ft.my_orders()['orders'] pool = Pool() for o in old_orders: if o['symbol'] != STOCK: continue print('id = %s' % (o['id'])) g = gevent.spawn(self.ft.cancel, STOCK, o['id']) pool.add(g) pool.join()
def run_bugscan(url_list): from tools.pocs.bugscan import Bugscan PLUGINS_DIR = 'D:\\Projects\\xlcscan\\tools\\pocs\\' poc = Bugscan() pool = Pool(100) for target in url_list: for poc_file in bugscan_name_list: if target and poc_file: target = fix_target(target) poc_file = PLUGINS_DIR + 'bugscan' + '\\' + poc_file pool.add(gevent.spawn(poc.run, target, poc_file)) pool.join()
def clear_old_orders(self): global STOCK print('clear old orders...') old_orders = self.ft.my_orders()['orders'] pool = Pool() for o in old_orders: if o['symbol'] != STOCK: continue print('id = %s' % (o['id'])) g = gevent.spawn(self.ft.cancel, STOCK, o['id']) pool.add(g) pool.join()
def download_vod(self, station_num: int, rewrite=False): video_info = self._get_video_info(station_num) if not video_info: self.log.error(f'BAD VOD {station_num}') return total = video_info['total'] step = self.thumbnailDuration * self.rowCount * self.columnCount pool = Pool(10) for i in range(0, total, step): pool.add(gevent.spawn(self.download_img, station_num, i, rewrite)) pool.join() self.log.info(f'[{station_num}:vod] success')
def main(psize, filename=None): if filename: urls = Queue() results = Queue() pool = Pool(int(psize)) reader = gevent.spawn(readfile, filename, urls) request = gevent.spawn(work_input_file, urls, results, reader) pool.add(reader) pool.add(request) pool.join() pool.free_count() print results.qsize(), 3333333333333333333 print urls.qsize(), 3333333333333333333 return results
def map_workers(iterator, poolsize, func, *args, **kwargs): '''Concurrently get iterator response :param iterator: fileobject, set, list, ... :param poolsize: Specifies the number of workers to make at a time. ''' pool = Pool(poolsize) for _ in iterator: current_worker = new_worker(func, _, *args, **kwargs) pool.add(pool.apply_async(func=current_worker.start)) pool.join()
def run_task_in_gevent(url_list, poc_file_dict): poc = Poc_Launcher() pool = Pool(100) for target in url_list: for poc_file in poc_file_dict: if target and poc_file: try: target = fix_domain(target) except Exception as e: target = fix_host(target) #print target,poc_file,"^^^^^^^^" pool.add(gevent.spawn(poc.poc_verify, target, poc_file)) pool.join()
def check_bduss(): # 检查bduss是否失效 pool = Pool(20) users = getUsers() sql1 = "delete from YunHui_user where id = %s" sql2 = "delete from YunHui_sign where user_id = %s" sql3 = "delete from YunHui_tieba where user_id = %s" for user in users: userid = user[0] bduss = user[1] pool.add(gevent.spawn(check_bduss_one, userid, bduss)) pool.join() to_mysql(check_queue, sql1) to_mysql(check_queue, sql2) to_mysql(check_queue, sql3)
def sign(): # 签到主函数 pool = Pool(20) global tbss sql = "update YunHui_sign set is_sign = 1 where user_id = %s and fid = %s" # 获取所有的tbs get_all_tbs() users = getUsers() for user in users: userid = user[0] bduss = user[1] tbs = tbss[userid] tiebas = getTiebas(user[0]) for tieba in tiebas: print(tieba) pool.add(gevent.spawn(sign_one, userid, bduss, tieba[1], tieba[2], tbs)) pool.join() to_mysql(sign_queue, sql)
def newSign(): pool = Pool(20) userlist = Queue() sql = r"update YunHui_sign set `is_sign` = 1 where user_id = %s and fid = %s" sql2 = r"update signin_user set flag = 2 where id = %s" users = getFliterUser(1) for user in users: bduss = user[1] tbs = getTBS(bduss) userid = user[0] userlist.put(userid) tiebas = getTiebas(user[0]) for tieba in tiebas: print(tieba[1]) pool.add(gevent.spawn(sign_one, userid, bduss, tieba[1], tieba[2], tbs)) pool.join() to_mysql(sign_queue, sql) to_mysql(userlist, sql2)
class Downloader(object): def __init__(self, concurrent=64): self.proxy_conf = OnlineConfig().proxy self.pool = Pool(concurrent) self.pool.join() def add_task(self, task, proxy): self.pool.add(gevent.spawn(self._download, task, proxy)) def free_count(self): return self.pool.free_count() @staticmethod def _before_download(task, proxy): module = ExternManager().get_model(task.s_platform, task.s_feature + '.request') request = module(task, proxy) if module else RequestExtra(task, proxy) return request @staticmethod def _after_download(task, request, response, proxy): module = ExternManager().get_model(task.s_platform, task.s_feature + '.response') response = module(task, request, response, proxy) \ if module else ResponseExtra(task, request, response, proxy) return response def _download(self, task, proxy): request = None req_response = None try: request = self._before_download(task, proxy) req_response = requests.request(**request()) response = self._after_download(task, request, req_response, proxy) del response del req_response del request except Exception as e: if req_response: del req_response if request: del request finally: del task del proxy
class InnerServer(object): def __init__(self): print "__init__" # gevent.joinall([gevent.spawn(self.server())]) self.pool = Pool(100) self.pool.add(self.server()) print "__init__ed" def testPort(self, ipport): print "¶Ë¿Ú¡¾%s¡¿¿ªÊ¼²âÊÔ"%str(ipport) try: clientSocket = socket.socket(socket.AF_INET,socket.SOCK_STREAM) clientSocket.connect(ipport) clientSocket.send("it's server msg") s = clientSocket.recv(1024) print "¶Ë¿Ú¡¾%s¡¿²âÊÔok!"%str(ipport) except Exception ,e: raise
def process_nodes_worker(self, name, classname, nodes, nodeinfo, command, args): module_name = 'xcatagent.%s' % name obj_func = utils.class_func(module_name, classname) gevent_pool = Pool(1000) for node in nodes: obj = obj_func(self.messager, node, nodeinfo[node]) if not hasattr(obj, command): self.messager.error('%s: command %s is not supported for %s' % (node, command, classname)) func = getattr(obj, command) try: gevent_pool.add( gevent.spawn(func, args) ) except Exception: error = '%s: Internel Error occured in gevent' % node self.messager.error(error) gevent_pool.join()
def vod(self, url: str, stash_key: str) -> set: """ 获取vod_id :param url: :param key: :return: """ is_ok, meta = self._get_vod(1, url, stash_key, append=False, is_check=True) if not is_ok: pool = Pool(10) for i in range(2, meta['last_page'] + 1): pool.add(gevent.spawn(self._get_vod, i, url, stash_key)) pool.join() return self.stash.get(stash_key, set()) return meta
def process_nodes_worker(self, name, classname, nodes, nodeinfo, command, args): module_name = 'xcatagent.%s' % name obj_func = utils.class_func(module_name, classname) gevent_pool = Pool(1000) for node in nodes: obj = obj_func(self.messager, node, nodeinfo[node]) if not hasattr(obj, command): self.messager.error('%s: command %s is not supported for %s' % (node, command, classname)) func = getattr(obj, command) try: gevent_pool.add( gevent.spawn(func, args) ) except Exception: error = '%s: Internel Error occured in gevent' % node self.messager.error(error) gevent_pool.join()
class InnerServer(object): def __init__(self): printmsg( "__init__" ) # gevent.joinall([gevent.spawn(self.server())]) self.pool = Pool(100) # 用Pool 承载所有的IO线程 self.pool.add(self.server()) printmsg( "__init__ed" ) def testPort(self, ipport): printmsg( "端口【%s】开始测试"%str(ipport)) try: clientSocket = socket.socket(socket.AF_INET,socket.SOCK_STREAM) clientSocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) clientSocket.connect(ipport) clientSocket.send("it's server msg") s = clientSocket.recv(1024) printmsg ( "端口【%s】测试ok!"%str(ipport)) except Exception ,e: raise
def newUpdate(): sql = r"INSERT INTO YunHui_sign (`fid`,`name`,`level_id`,`cur_score`,`is_sign`,`user_id`) SELECT * from (select %s,%s, %s, %s,0,%s) as tmp WHERE NOT exists (select fid,user_id from YunHui_sign where fid = %s and user_id = %s) LIMIT 1" sql2 = r"update YunHui_user set flag = 1 where id = %s" db.autocommit(True) pool = Pool(20) userlist = Queue() # 未更新关注列表 users = getFliterUser(0) if users == (): return 1 for user in users: username = user[2] print(username) userid = user[0] userlist.put(userid) bduss = user[1] pool.add(gevent.spawn(update_one, userid, bduss)) pool.join() to_mysql(update_queue, sql) to_mysql(userlist, sql2)
def stock_data(dt_from, dt_to, ls_symbols): ls_keys = ['Open', 'Close', 'High', 'Low', 'Volume', 'Adj_Close'] d = {} def f(sym): data = fetch_data(dt_from, dt_to, sym) dates = map(lambda x: x['Date'], data) values = [] for key in ls_keys: v = np.array(map(lambda x: np.float(x[key]), data)).reshape(-1, 1) values.append(v) values = np.hstack(values) df = pd.DataFrame(values, index=dates, columns=ls_keys) d[sym] = df pool = Pool(size=8) for sym in ls_symbols: g = gevent.spawn(f, sym) pool.add(g) pool.join() return d
def stock_data(dt_from, dt_to, ls_symbols): ls_keys = ['Open', 'Close', 'High', 'Low', 'Volume', 'Adj_Close'] d = {} def f(sym): data = fetch_data(dt_from, dt_to, sym) dates = map(lambda x: x['Date'], data) values = [] for key in ls_keys: v = np.array(map(lambda x: np.float(x[key]), data)).reshape(-1, 1) values.append(v) values = np.hstack(values) df = pd.DataFrame(values, index=dates, columns=ls_keys) d[sym] = df pool = Pool(size=8) for sym in ls_symbols: g = gevent.spawn(f, sym) pool.add(g) pool.join() return d
def update_order_status(self): global STOCK pool = Pool() self.orders.sort(lambda x, y: -cmp(x.price, y.price)) for o in self.orders: o.diff = 0 for o in self.orders[:self.max_size]: def f(order): st = self.ft.order_status(STOCK, order.tid) filled = sum(map(lambda x: x['qty'], st['fills'])) order.diff = filled - order.filled order.filled = filled g = gevent.spawn(f, o) pool.add(g) pool.join() # 最低成交价格. 最低成交价格有可能是当前市场价. deals = [x.price for x in filter(lambda o: o.diff, self.orders)] self.orders = filter(lambda o: o.filled < o.qty, self.orders) price = 0 if not deals else deals[-1] # min # price = 0 if not deals else deals[0] # max return price
class WorkerPool(object): def __init__(self, pool_size=5000): self.job_pool = Pool(size=pool_size) self.result = Queue() self.target_queue = Queue() def add_job(self, job_func, *args, **kwargs): job = self.job_pool.apply_async(job_func, args=args, kwds=kwargs, callback=self._call_func) self.job_pool.add(job) def run(self, timeout=None): self.job_pool.join(timeout=timeout, raise_error=False) def _call_func(self, job_ret): if job_ret: self.result.put(job_ret) def shutdown(self): self.job_pool.kill()
class SocketPool(object): def __init__(self): self.pool = Pool(1) self.pool.add(self.server()) # 适合聊天室的按回车发送文本方式 def listen( self, socket,address): f = socket.makefile() print "listen" while True: name = f.readline().strip() print name def listen2( self, socket,address): print "listen2" print self.pool.free_count() while True: name =socket.recv(1010).strip() print name def add_handler( self, socket,address): if self.pool.full(): raise Exception( "At maximum pool size") else: print (" pool insert") s = self.pool.spawn(self.listen2(socket,address)) # self. pool.spawn( self. listen, socket,address) def shutdown( self): self. pool. kill() def server(self): print "server" server = StreamServer(('0.0.0.0', 8000), self.add_handler) server.serve_forever()
def update_order_status(self): global STOCK pool = Pool() self.orders.sort(lambda x, y: -cmp(x.price, y.price)) for o in self.orders: o.diff = 0 for o in self.orders[:self.max_size]: def f(order): st = self.ft.order_status(STOCK, order.tid) filled = sum(map(lambda x: x['qty'], st['fills'])) order.diff = filled - order.filled order.filled = filled g = gevent.spawn(f, o) pool.add(g) pool.join() # 最低成交价格. 最低成交价格有可能是当前市场价. deals = [x.price for x in filter(lambda o: o.diff, self.orders)] self.orders = filter(lambda o: o.filled < o.qty, self.orders) price = 0 if not deals else deals[-1] # min # price = 0 if not deals else deals[0] # max return price
def request_handler(data): cal_greenlet = gevent.spawn(cal_dealing, queue) f = lambda i, s: [i[x:x + s] for x in xrange(0, len(i), s)] split_size = 10 lists = [CrawlerClient.decode_print(i) for i in eval(data.decode('hex'))] splited = f(lists, split_size) results = [] for i in splited: pool = Pool(split_size) [pool.add(pool.spawn(CrawlerClient.get_detail_page, key)) for key in i] pool.join() queue.put((None, None)) result = cal_greenlet.get() return result
def request_handler(data): cal_greenlet = gevent.spawn(cal_dealing, queue) f = lambda i,s: [i[x:x+s] for x in xrange(0, len(i), s)] split_size = 10 lists = [CrawlerClient.decode_print(i) for i in eval(data.decode('hex'))] splited = f(lists, split_size) results = [] for i in splited: pool = Pool(split_size) [pool.add(pool.spawn(CrawlerClient.get_detail_page, key)) for key in i] pool.join() queue.put((None, None)) result = cal_greenlet.get() return result
def main_loop(config): """ Основной цикл приложения. :param config: конфигурация :type config: Config Алгоритм: * Открываем соединение с tarantool.queue, использую config.QUEUE_* настройки. * Создаем пул обработчиков. * Создаем очередь куда обработчики будут помещать выполненные задачи. * Пока количество обработчиков <= config.WORKER_POOL_SIZE, берем задачу из tarantool.queue и запускаем greenlet для ее обработки. * Посылаем уведомления о том, что задачи завершены в tarantool.queue. * Спим config.SLEEP секунд. """ logger.info( 'Connect to queue server on {host}:{port} space #{space}.'.format( host=config.QUEUE_HOST, port=config.QUEUE_PORT, space=config.QUEUE_SPACE)) queue = tarantool_queue.Queue(host=config.QUEUE_HOST, port=config.QUEUE_PORT, space=config.QUEUE_SPACE) logger.info('Use tube [{tube}], take timeout={take_timeout}.'.format( tube=config.QUEUE_TUBE, take_timeout=config.QUEUE_TAKE_TIMEOUT)) tube = queue.tube(config.QUEUE_TUBE) logger.info( 'Create worker pool[{size}].'.format(size=config.WORKER_POOL_SIZE)) worker_pool = Pool(config.WORKER_POOL_SIZE) processed_task_queue = gevent_queue.Queue() logger.info( 'Run main loop. Worker pool size={count}. Sleep time is {sleep}.'. format(count=config.WORKER_POOL_SIZE, sleep=config.SLEEP)) while run_application: free_workers_count = worker_pool.free_count() logger.debug( 'Pool has {count} free workers.'.format(count=free_workers_count)) for number in xrange(free_workers_count): logger.debug('Get task from tube for worker#{number}.'.format( number=number)) task = tube.take(config.QUEUE_TAKE_TIMEOUT) if task: logger.info( 'Start worker#{number} for task id={task_id}.'.format( task_id=task.task_id, number=number)) worker = Greenlet(notification_worker, task, processed_task_queue, timeout=config.HTTP_CONNECTION_TIMEOUT, verify=False) worker_pool.add(worker) worker.start() done_with_processed_tasks(processed_task_queue) sleep(config.SLEEP) if break_func_for_test(): break else: logger.info('Stop application loop.')
jd_msg = "Boom" test_url = "http://www.xiachufang.com" def deal_with(): while True: try: now_id = gevent.getcurrent() msg = msg_queue.get_nowait() print("handle " + msg) print('now start with now_id: %s' % now_id) requests.get(test_url) print('now end with now_id: %s' % now_id) except Empty: gevent.sleep(0) def product_msg(jd_msg): while True: try: msg_queue.put_nowait(jd_msg) print(msg_queue.qsize()) except Full: gevent.sleep(5) jd_pool.add(gevent.spawn(product_msg, jd_msg)) for i in range(10): jd_pool.add(gevent.spawn(deal_with)) jd_pool.join()
class Connection(object): def __init__(self, access_key_id=None, secret_access_key=None, hostname=None, concurrency=5): if access_key_id is None: try: access_key_id = os.environ['AWS_ACCESS_KEY_ID'] except KeyError: raise AccessKeyIdNotFound() if secret_access_key is None: try: secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY'] except KeyError: raise SecretAccessKeyNotFound() if hostname is None: hostname = "s3.amazonaws.com" self.access_key_id = access_key_id self.secret_access_key = secret_access_key self.hostname = hostname self.concurrency = concurrency self._joined = False self.stats = {'GET': 0, 'POST': 0, 'PUT': 0, 'DELETE': 0, 'HEAD': 0} @property def concurrency(self): return self._concurrency @concurrency.setter def concurrency(self, val): try: self._pool.join() except AttributeError: pass if val > 0: self._pool = Pool(val) self._concurrency = val # Join requires some strange context management because it's # possible for joined methods to themselves call join. If # these methods then saturate the pool, the joins that they're # waiting on will never complete. To counteract this we allow # another greenlet into the pool for the duration of the join # call *if* the join is within a join already. @contextmanager def joincontext(self): old = self._joined self._joined = True if old: self._pool._semaphore.counter += 1 yield self._pool._semaphore.counter -= 1 else: yield self._joined = old def join(self, functions): if self.concurrency <= 0: # Useful for debugging out = [] for f in functions: if hasattr(f, '__iter__'): out.append(f[0](*f[1:])) else: out.append(f()) return out with self.joincontext(): greenlets = [] for f in functions: if hasattr(f, '__iter__'): greenlet = gevent.spawn(*f) else: greenlet = gevent.spawn(f) self._pool.add(greenlet) greenlets.append(greenlet) gevent.joinall(greenlets, raise_error=True) return [g.get() for g in greenlets] # Here be dragons def make_request(self, method, bucket, key=None, params=None, data=None, headers=None): # Remove params that are set to None if isinstance(params, dict): for k, v in params.copy().items(): if v is None: params.pop(k) # Construct target url url = 'http://{}.{}'.format(bucket, self.hostname) url += '/{}'.format(key) if key is not None else '/' if isinstance(params, dict) and len(params) > 0: url += '?{}'.format(urllib.urlencode(params)) elif isinstance(params, basestring): url += '?{}'.format(params) # Make headers case insensitive if headers is None: headers = {} headers = CaseInsensitiveDict(headers) headers['Host'] = '{}.{}'.format(bucket, self.hostname) if data is not None: try: raw_md5 = utils.f_md5(data) except: m = hashlib.md5() m.update(data) raw_md5 = m.digest() md5 = b64encode(raw_md5) headers['Content-MD5'] = md5 else: md5 = '' try: content_type = headers['Content-Type'] except KeyError: content_type = '' date = formatdate(timeval=None, localtime=False, usegmt=True) headers['x-amz-date'] = date # Construct canonicalized amz headers string canonicalized_amz_headers = '' amz_keys = [k for k in list(headers.keys()) if k.startswith('x-amz-')] for k in sorted(amz_keys): v = headers[k].strip() canonicalized_amz_headers += '{}:{}\n'.format(k.lower(), v) # Construct canonicalized resource string canonicalized_resource = '/' + bucket canonicalized_resource += '/' if key is None else '/{}'.format(key) if isinstance(params, basestring): canonicalized_resource += '?{}'.format(params) elif isinstance(params, dict) and len(params) > 0: canonicalized_resource += '?{}'.format(urllib.urlencode(params)) # Construct string to sign string_to_sign = method.upper() + '\n' string_to_sign += md5 + '\n' string_to_sign += content_type + '\n' string_to_sign += '\n' # date is always set through x-amz-date string_to_sign += canonicalized_amz_headers + canonicalized_resource # Create signature h = hmac.new(self.secret_access_key, string_to_sign, hashlib.sha1) signature = b64encode(h.digest()) # Set authorization header auth_head = 'AWS {}:{}'.format(self.access_key_id, signature) headers['Authorization'] = auth_head # Prepare Request req = Request(method, url, data=data, headers=headers).prepare() # Log request data. # Prepare request beforehand so requests-altered headers show. # Combine into a single message so we don't have to bother with # locking to make lines appear together. log_message = '{} {}\n'.format(method, url) log_message += 'headers:' for k in sorted(req.headers.keys()): log_message += '\n {}: {}'.format(k, req.headers[k]) log.debug(log_message) # Send request resp = Session().send(req) # Update stats, log response data. self.stats[method.upper()] += 1 log.debug('response: {} ({} {})'.format(resp.status_code, method, url)) # Handle errors if resp.status_code/100 != 2: soup = BeautifulSoup(resp.text) error = soup.find('error') log_message = "S3 replied with non 2xx response code!!!!\n" log_message += ' request: {} {}\n'.format(method, url) for c in error.children: error_name = c.name error_message = c.text.encode('unicode_escape') log_message += ' {}: {}\n'.format(error_name, error_message) log.debug(log_message) code = error.find('code').text message = error.find('message').text raise S3ResponseError(code, message, resp) return resp
# coding=gbk import gevent from gevent.queue import Queue from gevent.pool import Pool from gevent import getcurrent def DoSomething(): print "thread %s " % id(getcurrent()) gevent.sleep(3) # 本测试发现:pool中add 后超出size 限制 即会开始执行,可以看做pool size +1 =限制容量大小 # greenlet 对象在推拉窗模式中 可以复用 pool = Pool(2) # 可并行 n + 1 个任务 print pool.free_count() pool.add(gevent.spawn(DoSomething)) pool.join() raw_input("waiting...") # print "stage" # for i in range(10): # pool.add(gevent.spawn(DoSomething)) #pool.join()
def main(main_keyword): running = True master_key = main_keyword f = lambda i,s: [i[x:x+s] for x in xrange(0, len(i), s)] split_size = 5 start = time.time() thread_dealing = CalDealing(queue) thread_dealing.start() pool = Pool(size=32) pool.add(gevent.spawn(get_keyword_list, master_key)) pool.add(gevent.spawn(get_relative_list, master_key)) pool.join() ########################################################## allkeys = keywords_map.keys() + relative_map.keys() random.shuffle(allkeys) splited = f(allkeys, split_size) for keys in splited: pool = Pool(split_size) [pool.add(pool.spawn(get_final_keywords, key)) for key in keys] pool.join() ########################################################## def decode_print(v): try: return v.decode('GBK').encode('UTF-8') except: return v.encode('UTF-8') count = 1 last_list = list() for k,v in final_keywords.items(): for i in v: last_list.append(i) count += 1 # 去除关键词的重复项 last_list = {}.fromkeys(last_list).keys() ########################################################## random.shuffle(last_list) node_jobs = get_node_jobs(last_list) jobs = [] for node in node_jobs: jobs.append(gevent.spawn(send_task, node['address'], str(node['parts']).encode('hex'))) gevent.joinall(jobs) final_dealing = {} for job in jobs: values = eval(job.value.decode('hex')) for k,v in values.items(): final_dealing[k] = v ########################################################## end = time.time() # 按照销量排序 final_dealing = sorted(final_dealing.items(), key=lambda x: x[1][0], reverse=True) t = PrettyTable(["ID", "名称", "销售总量", "宝贝总量"]) t.align[1] = 'l' t.align[2] = 'r' t.align[3] = 'r' t.left_padding = 1 for i in final_dealing: t.add_row([final_dealing.index(i), decode_print(i[0]), str(i[1][0]), str(i[1][1])]) running = False return count, last_list, end-start, t
class SpiderPack(object): def __init__(self, machine_id, pack_id, num_slot, **kwargs): self.machine_id = machine_id self.pack_id = pack_id if num_slot < 0 or num_slot > SPIDER_MAX_WORKER: raise ValueError("SpiderPack: invalid number of downloading slots: %s." % num_slot) self.slots = num_slot self.sched = kwargs.get('scheduler', Scheduler()) self.redis = kwargs.get('redis', redis.Redis()) self.store = kwargs.get('content_store', ContentStore()) self.crawl_cache = kwargs.get('crawl_cache', CrawlCache(self.redis)) self.downloader = Downloader(self.store, self.crawl_cache) ## ## two extra greenlets: ## * ContentStore: an actor for handling Hbase append(). ## * local _monitor: to report the spider works. ## self.pool = Pool(self.slots+2) def run(self): slot_list = [] for i in range(1, self.slots + 1): slot_key = "slot:%s:%s:%s" % (self.machine_id, self.pack_id, i) slot_list.append(slot_key) self.pool.spawn(self._worker, i) ## register downloader slots self.sched.addSlots(slot_list) ## local management thread self.pool.spawn(self._monitor, slot_list) ## content store actor thread self.pool.add(self.store) self.pool.start(self.store) self.pool.join() def _download(self, url, qps): start, end, need_abort = self.downloader.get(url) if need_abort: return need_abort crawl_delay = 1.0/qps ## we treat sleep_time less than 1ms as 0, just yield once. ## sleep_time = crawl_delay - (end - start) if sleep_time < 0.1: sleep_time = 0 else: ## Downloader return 'end' time doesn't take into ## account of the overhead for content store and crawl cache ## communications. we need to compensate for that ## if rtt is too small. end = time.time() sleep_time = crawl_delay - (end - start) if sleep_time < 0.1: sleep_time = 0 ## we always yield for gevent scheduling after download. gevent.sleep(sleep_time) return need_abort def _worker(self, slot_id): """ downloading worker thread. """ slot_key = "slot:%s:%s:%s" % (self.machine_id, self.pack_id, slot_id) while True: qps, batch = self.sched.getBatch(slot_key) if batch: print '+++++++++++++++++++++++++++++++++++++++++: %s : %s of urls in batch' % (qps, len(batch)) need_abort = False for url in batch: aborted = self._download(url, qps) if aborted: break self.sched.ackBatch(slot_key, aborted) else: gevent.sleep(3) def _monitor(self, slot_list): """ write a heartbeat signature to redis every SLOT_HEARTBEAT_INTERVAL. Note we can also report stats to scheduler here, for example: * round trip info for recent downloads. * failure rate per host ... """ while True: gevent.sleep(SLOT_HEARTBEAT_INTERVAL) self.sched.reportSlots(slot_list)
def main_loop(config): """ Основной цикл приложения. :param config: конфигурация :type config: Config Алгоритм: * Открываем соединение с tarantool.queue, использую config.QUEUE_* настройки. * Создаем пул обработчиков. * Создаем очередь куда обработчики будут помещать выполненные задачи. * Пока количество обработчиков <= config.WORKER_POOL_SIZE, берем задачу из tarantool.queue и запускаем greenlet для ее обработки. * Посылаем уведомления о том, что задачи завершены в tarantool.queue. * Спим config.SLEEP секунд. """ logger.info('Connect to queue server on {host}:{port} space #{space}.'.format( host=config.QUEUE_HOST, port=config.QUEUE_PORT, space=config.QUEUE_SPACE )) queue = tarantool_queue.Queue( host=config.QUEUE_HOST, port=config.QUEUE_PORT, space=config.QUEUE_SPACE ) logger.info('Use tube [{tube}], take timeout={take_timeout}.'.format( tube=config.QUEUE_TUBE, take_timeout=config.QUEUE_TAKE_TIMEOUT )) tube = queue.tube(config.QUEUE_TUBE) logger.info('Create worker pool[{size}].'.format(size=config.WORKER_POOL_SIZE)) worker_pool = Pool(config.WORKER_POOL_SIZE) processed_task_queue = gevent_queue.Queue() logger.info('Run main loop. Worker pool size={count}. Sleep time is {sleep}.'.format( count=config.WORKER_POOL_SIZE, sleep=config.SLEEP )) while run_application: free_workers_count = worker_pool.free_count() logger.debug('Pool has {count} free workers.'.format(count=free_workers_count)) for number in xrange(free_workers_count): logger.debug('Get task from tube for worker#{number}.'.format(number=number)) task = tube.take(config.QUEUE_TAKE_TIMEOUT) if task: logger.info('Start worker#{number} for task id={task_id}.'.format( task_id=task.task_id, number=number )) worker = Greenlet( notification_worker, task, processed_task_queue, timeout=config.HTTP_CONNECTION_TIMEOUT, verify=False ) worker_pool.add(worker) worker.start() done_with_processed_tasks(processed_task_queue) sleep(config.SLEEP) if break_func_for_test(): break else: logger.info('Stop application loop.')