def do_by_gevent_pool(pool_size=100, job_func=None, loop_items=None, timeout=None, wait_timeout=5 * 60, **kwargs): if not job_func or not loop_items: return worker_pool = Pool(pool_size) if hasattr(loop_items, '__call__'): for item in loop_items(): while worker_pool.full(): try: worker_pool.wait_available(timeout=wait_timeout) except Timeout: worker_pool.kill() worker_pool.spawn(job_func, item, **kwargs) else: for item in loop_items: while worker_pool.full(): try: worker_pool.wait_available(timeout=wait_timeout) except Timeout: worker_pool.kill() worker_pool.spawn(job_func, item, **kwargs) try: worker_pool.join(timeout=timeout) return True # 表示处理完成 except: return False
class GEventStatsdClient(StatsdClient): """ GEvent Enabled statsd client """ def __init__(self, pool_size=None, host=None, port=None, prefix=None, sample_rate=None): """ Create GEvent enabled statsd client :param pool_size: Option size of the greenlet pool :param host: hostname for the statsd server :param port: port for the statsd server :param prefix: user defined prefix :param sample_rate: rate to which stats are dropped """ super(GEventStatsdClient, self).__init__(host, port, prefix, sample_rate) self._send_pool = Pool(pool_size or STATSD_GREEN_POOL_SIZE) self._socket = socket(AF_INET, SOCK_DGRAM) def _socket_send(self, stat): """ Override the subclasses send method to schedule a udp write. :param stat: Stat string to write """ # if we exceed the pool we drop the stat on the floor if not self._send_pool.full(): # We can't monkey patch this as we don't want to ever block the calling greenlet self._send_pool.spawn(self._socket.sendto, stat, (self._host, self._port))
def process_jobs(todo, end): pool = Pool(concurrency) jobs = {} try: while True: # add jobs if within grace period if not pool.full() and time.time() < end: try: for _ in range(concurrency - len(jobs.keys())): func, args = todo.pop(0) # raises IndexError if no more jobs todo greenthread = pool.spawn(func, *args) jobs[greenthread] = args except IndexError: logging.warning("no more jobs todo") # check if done done = pool.join(timeout=1) # iterate through results, delete done greenthreads for greenthread, args in reversed(jobs.items()): if greenthread.successful(): logging.warning("success! args:{}, result:{}".format(args, greenthread.value)) jobs.pop(greenthread) elif greenthread.ready(): logging.warning("fail! args:{}, result:{}".format(args, greenthread.value)) # if no more jobs and graceperiod ended then shutdown if time.time() > end and done : logging.warning("no more jobs, no more time") break except: logging.exception("error") return jobs
def init_all_stock_tick(self): start_date = '2015-01-01' _today = datetime.now().strftime('%Y-%m-%d') num_days = delta_days(start_date, _today) start_date_dmy_format = time.strftime( "%m/%d/%Y", time.strptime(start_date, "%Y-%m-%d")) data_times = pd.date_range(start_date_dmy_format, periods=num_days, freq='D') date_only_array = np.vectorize(lambda s: s.strftime('%Y-%m-%d'))( data_times.to_pydatetime()) date_only_array = date_only_array[::-1] obj_pool = Pool(4) df = self.stock_info_client.get() for _, code_id in df.code.iteritems(): _obj = self.stock_objs[ code_id] if code_id in self.stock_objs else CStock( self.dbinfo, code_id) for _date in date_only_array: if self.cal_client.is_trading_day(_date): try: if obj_pool.full(): obj_pool.join() obj_pool.spawn(_obj.set_ticket, _date) except Exception as e: logger.info(e) obj_pool.join() obj_pool.kill()
def do_by_gevent_pool(pool_size=100, job_func=None, loop_items=None, timeout=None, wait_timeout=5 * 60, callback_func=None, **kwargs): if not job_func or not loop_items: return worker_pool = Pool(pool_size) for item in loop_items: while worker_pool.full(): try: worker_pool.wait_available(timeout=wait_timeout) except Timeout: worker_pool.kill() worker_pool.spawn(job_func, item, **kwargs) try: worker_pool.join(timeout=timeout) if callback_func and hasattr(callback_func, "__call__"): try: callback_func() except: pass return True # 表示处理完成 except: return False
class SocketPool(object): def __init__(self): self.pool = Pool(1000) #设置池容量1000 self.pool.start() def listen(self, socket): while True: socket.recv() def add_handler(self, socket): if self.pool.full(): #容量慢报错 raise Exception("At maximum pool size") else: #否则执行在新的grenlet里面执行listen方法 self.pool.spawn(self.listen, socket) def shutdown(self): self.pool.kill() #关闭pool
def collect_combination_runtime_data(self): obj_pool = Pool(10) for code_id in self.combination_objs: try: if obj_pool.full(): obj_pool.join() obj_pool.spawn(self.combination_objs[code_id].run) except Exception as e: logger.info(e) obj_pool.join() obj_pool.kill()
class GeventExecutor(AbstractExecutor): def __init__(self, task_cls, max_threads, multiple_instances=False): super(GeventExecutor, self).__init__(task_cls) self._max_threads = max_threads self._multiple_instances = multiple_instances if multiple_instances: self._tasks_pool = Queue() for _ in xrange(max_threads): self._tasks_pool.put(task_cls()) else: self._task = task_cls() self._thread_pool = Pool(size=max_threads) def setup_tasks(self): if self._multiple_instances: for task in self._tasks_pool.queue: task.setup() else: self._task.setup() def join(self, timeout=sys.maxint): super(GeventExecutor, self).join() self._thread_pool.join() def available(self): is_it = not self._thread_pool.full() #if not is_it: # gevent.sleep(0) gevent.sleep(0) return is_it def wait_available(self): gevent.sleep(0) self._thread_pool.wait_available() def _run_task(self, run_id): self._thread_pool.apply_async(self._run_on_thread_pool, (run_id,)) #gevent.sleep(0) def _run_on_thread_pool(self, run_id): try: if self._multiple_instances: try: task = self._tasks_pool.get() result = run_task_func_wrapper(task.run, run_id) finally: self._tasks_pool.put(task) else: result = run_task_func_wrapper(self._task.run, run_id) self.on_async_run_finished(result) except: log.debug("DEUUU MEEERDA", exc_info=True)
class RudiusAuthServer(DatagramServer): """Radius auth server""" def __init__(self, adapter, host="0.0.0.0", port=1812, pool_size=32): DatagramServer.__init__(self, (host, port)) self.pool = Pool(pool_size) self.adapter = adapter def handle(self, data, address): if not self.pool.full(): self.pool.spawn(self.adapter.handleAuth, self.socket, data, address) else: logger.error("radius auth workpool full")
def collect_stock_runtime_data(self): obj_pool = Pool(100) for code_id in self.stock_objs: try: if obj_pool.full(): obj_pool.join() ret, df = self.subscriber.get_tick_data(add_prifix(code_id)) if 0 == ret: df = df.set_index('time') df.index = pd.to_datetime(df.index) obj_pool.spawn(self.stock_objs[code_id].run, df) except Exception as e: logger.info(e) obj_pool.join() obj_pool.kill()
class SocketPool(object): def __init__(self): self.pool = Pool(1000) self.pool.start() def listen(self, socket): while True: socket.recv() def add_handler(self, socket): if self.pool.full(): raise Exception('At maximum pool size') else: self.pool.spawn(self.listen, socket) def shutdown(self): self.pool.kill()
class RssPool(object): def __init__(self): self.pool = Pool(RSS_MAX_POOL_NUM) self.start = False self.times = 0 self.beginTime = int(time.time()) def run(self): while True: if (not self.start) and (not self.pool.full()): self.addRssSpider() # self.syncDagrame() continue self.start = False if self.pool.free_count() < RSS_MAX_POOL_NUM: logging.info("---------------join run ") self.pool.join() else: logging.info("---------------not data ,sleep %s senconds " % MAIN_LOOP_SLEEP_TIME) time.sleep(MAIN_LOOP_SLEEP_TIME) def syncDagrame(self): """同步数据到线上""" self.times += 1 if self.times > RUN_SYNC_INTERVAL_TIMES or int(time.time()) - self.beginTime > RUN_SYNC_INTERVAL_TIME: logging.info("**********sync crawl infos ************") sync = SyncCrawlInfos() sync.index() self.times = 0 self.beginTime = int(time.time()) def addRssSpider(self): configList = getCrawlRssRequest() if not configList: self.start = True return True try: spider = CommonFeedRss() self.pool.spawn(spider.run, configList) except Exception, e: logging.info("------------------add spider exception : %s " % e)
def init_today_stock_tick(self): _date = datetime.now().strftime('%Y-%m-%d') obj_pool = Pool(50) df = self.stock_info_client.get() if self.cal_client.is_trading_day(_date): for _, code_id in df.code.iteritems(): _obj = self.stock_objs[ code_id] if code_id in self.stock_objs else CStock( self.dbinfo, code_id) try: if obj_pool.full(): obj_pool.join() obj_pool.spawn(_obj.set_ticket, _date) obj_pool.spawn(_obj.set_k_data) except Exception as e: logger.info(e) obj_pool.join() obj_pool.kill()
class SocketPool(object): def __init__(self): self.pool = Pool(1000) self.pool.start() def listen(self, socket): while True: socket.recv() def add_handler(self, socket): if self.pool.full(): raise Exception('Maximum pool size reached') else: self.pool.spawn(self.listen, socket) def shutdown(self): self.pool.kill()
class SocketPool(object): def __init__(self): self.pool = Pool(1000) def listen(self, socket): while True: line = socket.recv(10240) #print line if not line: #socket.close() break gevent.spawn(self.wait, socket, line)#.join() print 'after spawn' def add_handler(self, socket, address): print address if self.pool.full(): raise Exception("At maximum pool size") else: self.pool.spawn(self.listen, socket) def wait(self, socket, line): gevent.sleep(1) gevent.sleep(random.randint(0,5)*0.1) #print line try: v = unpackb(line) print v except Exception, e: v = "error" print v print e try: socket.send( packb({'status':'ok', 'val':v})) print 'after sleep' except Exception, e: print e.message print 'socket closed'
class SocketPool(object): def __init__(self): self.pool = Pool(1000) self.pool.start() def listen(self, socket): while True: socket.recv() def add_handler(self, socket): if self.pool.full(): raise Exception("At maximum pool size") self.pool.spawn(self.listen, socket) def shutdown(self): self.pool.kill() # 当构造gevent驱动的服务时,经常将围绕一个池结构的整个服务作为中心。 # 这个例子就是在各个socket上轮询的类。
class SocketPool(object): # why is this example written without a way to start it? Hopefully the other examples in this tutorial will show an example of this is actually used. def __init__(self): self.pool = Pool(1000) self.pool.start() def listen(self, socket): while True: socket.recv() def add_handler(self, socket): if self.pool.full(): raise Exception("At maximum pool size") else: self.pool.spawn(self.listen, socket) def shutdown(self): self.pool.kill()
class GeventExtractor(BaseExtractor): def __init__(self, names, max_workers=222): super(self.__class__, self).__init__(names, max_workers) self._pool = Pool(self._max_workers) self._exited_greenlets = 0 def extract(self, job): job = self._job_wrapper(job) for name in self._names: if self._pool.full(): self._pool.wait_available() self._pool.spawn(job, name) def _job_wrapper(self, job): def _job(name): result = None try: result = job(name) except greenlet.GreenletExit: self._exited_greenlets += 1 except Exception: e = sys.exc_info()[1] logger.error('Extracting "{0}", got: {1}'.format(name, e)) return result return _job def wait_complete(self): self._pool.join() def shutdown(self): self._pool.kill(block=True) def final(self): count = self._exited_greenlets if count != 0: print( Color.YELLOW( '** {0} running job exited.'.format(count) ) )
class PyMySQLPool(object): """Pool for pymysql """ version = __version__ def __init__(self, min): self.pool = Pool(10) self.pool.start() def addConnection(self, db): while True: db.recv() def add_handler(self, socket): if self.pool.full(): raise Exception("At maximum pool size") else: self.pool.spawn(self.addConnection, socket) def shutdown(self): self.pool.kill()
class GeventExtractor(BaseExtractor): def __init__(self, names, max_workers=222): super(self.__class__, self).__init__(names, max_workers) self._pool = Pool(self._max_workers) self._exited_greenlets = 0 def extract(self, job): job = self._job_wrapper(job) for name in self._names: if self._pool.full(): self._pool.wait_available() self._pool.spawn(job, name) def _job_wrapper(self, job): def _job(name): result = None try: result = job(name) except greenlet.GreenletExit: self._exited_greenlets += 1 except Exception: e = sys.exc_info()[1] logger.error('Extracting "{0}", got: {1}'.format(name, e)) return result return _job def wait_complete(self): self._pool.join() def shutdown(self): self._pool.kill(block=True) def final(self): count = self._exited_greenlets if count != 0: print('** {0} running job exited.'.format(count))
class SocketPool(object): def __init__(self): self.pool = Pool(1) self.pool.add(self.server()) # 适合聊天室的按回车发送文本方式 def listen( self, socket,address): f = socket.makefile() print "listen" while True: name = f.readline().strip() print name def listen2( self, socket,address): print "listen2" print self.pool.free_count() while True: name =socket.recv(1010).strip() print name def add_handler( self, socket,address): if self.pool.full(): raise Exception( "At maximum pool size") else: print (" pool insert") s = self.pool.spawn(self.listen2(socket,address)) # self. pool.spawn( self. listen, socket,address) def shutdown( self): self. pool. kill() def server(self): print "server" server = StreamServer(('0.0.0.0', 8000), self.add_handler) server.serve_forever()
def run_gevent(): worker = _config.get('count_worker', 4) pool = Pool(worker) funcs = run() while True: if pool.full(): time.sleep(1) continue # getting func delete try: funcnya = next(funcs) pool.spawn(funcnya['func'], *funcnya['param']) except StopIteration as e: if pool.free_count() == worker: break time.sleep(0.01)
class Queue(object): def __init__(self, redis, name, ttl=None): self._redis = redis self.name = name self.tasks_key = self._get_key_(name, 'tasks') self.result_key = self._get_key_(name, 'results') # For killing by timeout (memory leak) self.started = time.time() self.working = set() self._greenlets = [] self.ttl = ttl self.worker = get_worker_name() # if show_stats: # self.stats_start = time.time() # self.tasks_processed = 0 def _get_key_(self, name, token, modifier='queue'): return 'srq:{modifier}:{name}:{token}'.format(name=name, token=token, modifier=modifier) def process(self, func, pool=20, workers=[], stats=None): try: self._pool = Pool(pool) self.func = func self.spawn(self._get_work_) # if self.show_stats: # self.spawn(self._show_stats_) for worker in workers: self.spawn(worker) if stats: self.spawn(self.push_stats, stats) self._pool.join() except Exception: logger.error('Gevent error', exc_info=True) def push_stats(self, fn): while True: stats = fn() self._redis.setex('srqstats:%s:%s' % (self.name, self.token), 6, stats) if sleep: sleep(5) else: yield 5 def _show_stats_(self): while True: if time.time() - self.started > self.ttl: self.stop() elapsed = time.time() - self.stats_start speed = self.tasks_processed / elapsed print('Speed: %d t/s (%d tasks by %d sec)' % (speed, self.tasks_processed, elapsed)) self.stats_start = time.time() self.tasks_processed = 0 if sleep: sleep(5) else: yield 5 def _get_work_(self): while True: if self.ttl: if time.time() - self.started > self.ttl: self._pool.spawn(self.stop) if self._pool.full(): sleep(5) continue task = self._redis.lpop(self.tasks_key) if task: self.spawn(self._work_, task) if sleep: sleep(5) else: yield 5 def _work_(self, task_data): self.working.add(task_data) task = json.loads(task_data) uuid, args, kwargs = task logger.debug('Got task: %s', uuid) try: result = self.func(*args, **kwargs) self._push_result_(uuid, result) logger.debug('Processed: %s', uuid) if self.show_stats: self.tasks_processed += 1 except Exception: logger.error('Proccessing error: #%s', uuid, exc_info=True) try: self.working.remove(task_data) except KeyError: pass def _push_result_(self, uuid, result): result = json.dumps((uuid, result)) self._redis.rpush(self.result_key, result) @property def tasks(self): return self._redis.llen(self.tasks_key) @property def results(self): return self._redis.llen(self.result_key) def request(self, *args, **kwargs): logger.debug('Requesting {name}(*{args}, **{kwargs})'.format(name=self.name, args=str(args), kwargs=str(kwargs))) uuid = uuid4().hex task = (uuid, args, kwargs) self._redis.rpush(self.tasks_key, json.dumps(task)) return uuid def pop_result(self): result = self._redis.lpop(self.result_key) if result: return json.loads(result) def pull_result(self): while True: result = self._redis.lpop(self.result_key) if result: yield json.loads(result) def spawn(self, fn, *args, **kwargs): greenlet = self._pool.spawn(fn, *args, **kwargs) self._greenlets.append(greenlet) return greenlet def stop(self): for greenlet in self._greenlets: self._pool.killone(greenlet) for task in self.working: self._redis.rpush(self.tasks_key, task)
# pool = ProcessPool(48) cnt = 0 def send(msg): res = requests.get('http://localhost:4567/?msg=' + msg.decode('UTF-8')) print(res) while True: cnt += 1 logging.debug('polling...') socks = dict(poll.poll()) if recv in socks and socks[recv] == zmq.POLLIN: msg = recv.recv() print('got %s from upstream' % msg) # send(msg) if pool.full(): pool.join(timeout=1) else: pool.spawn(send, msg) if cnt == 1000: break pool.join()
class ProxyPool: def __init__(self): self.THREAD_ID = 0 self.proxy_list = [] self.wait_for_verify = Queue() self.thread_pool = Pool() self.output = [] gevent.monkey.patch_socket() gevent.monkey.patch_ssl() # self.thread_pool.start() def http_headers(self): headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Chrome/59.0.%d.%d Safari/537.36' % (random.randint(1000, 9999), random.randint(100, 999)), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.8' } return headers def add_thread(self, func, *args): # print('test') if self.thread_pool.full(): raise Exception("At maximum pool size") else: self.thread_pool.spawn(func, *args) # self.thread_pool.join() def add_proxy(self, proxy): self.proxy_list.append(proxy) self.output.append(proxy.dic()) # print(self.output) self.output = sorted(self.output, key=lambda k: k['delay']) open('proxy.json', 'w').write(json.dumps(self.output, ensure_ascii=True, indent=4)) def kill_thread(self): self.thread_pool.kill() def start(self): self.add_thread(self.kuaidaili_com) self.add_thread(self.goubanjia_com) self.add_thread(self._66ip_cn) self.thread_pool.join() def get(self): pass def get_all(self): return self.proxy_list def kuaidaili_com(self, *args): self.add_thread(self.kuaidaili_type_com, 'inha') self.add_thread(self.kuaidaili_type_com, 'intr') self.add_thread(self.kuaidaili_type_com, 'outha') self.add_thread(self.kuaidaili_type_com, 'outtr') def kuaidaili_type_com(self, t, *args): logger.info('kuaidaili.com %s start' % t) i = 1 self.THREAD_ID += 1 rq = requests.Session() headers = self.http_headers() rq.get('http://www.kuaidaili.com/', headers=headers) while(1): gevent.sleep(3) url = 'http://www.kuaidaili.com/free/%s/%d/' % (t, i) r = rq.get(url, headers=headers) if 'qo=eval;qo(po);' in r.text: c = PyV8.JSContext() c.enter() f = c.eval(r.text) print(f) # exit() print(r.text) logger.debug('bypass...') continue if r.status_code == 404: break if r.status_code == 503: logger.error('%s return <%d>' % (url, r.status_code)) continue try: html = BeautifulSoup(r.text, 'lxml') tbody = html.tbody if tbody is None: print(html) continue for tr in tbody.find_all('tr'): # print(tr) p = proxy() p.ip = tr.find_all('td', {'data-title':"IP"})[0].text p.port = int(tr.find_all('td', {'data-title':"PORT"})[0].text) p.safe = tr.find_all('td', {'data-title':"匿名度"})[0].text p.type = tr.find_all('td', {'data-title':"类型"})[0].text p.place = tr.find_all('td', {'data-title':"位置"})[0].text # print(tr.find_all('td', {'data-title':"响应速度"})[0].text) # print(tr.find_all('td', {'data-title':"最后验证时间"})[0].text) logger.debug('<get>%s' % p) self.wait_for_verify.put(p) self.THREAD_ID += 1 self.add_thread(self.verify_proxy_thread, self.THREAD_ID) logger.debug('%s ok' % url) gevent.sleep(1) except AttributeError as e: print(e) # print(r.text) logger.error('%s Error, sleep 10s' % url) gevent.sleep(10) continue # exit() i += 1 def goubanjia_com(self, *args): logger.info('giubanjia.com start') i = 1 self.THREAD_ID += 1 while(1): url = 'http://www.goubanjia.com/free/index%d.shtml' % (i) r = requests.get(url, headers=self.http_headers()) if r.status_code == 404: break try: html = BeautifulSoup(r.text, 'lxml') tbody = html.tbody for tr in tbody.find_all('tr'): p = proxy() [x.extract() for x in tr.find_all('p')] try: _ = tr.find_all('td', {'class':"ip"})[0].text _ = _.split(':') p.ip = _[0] p.port = int(_[1]) # p.port = int(tr.find_all('td', {'data-title':"PORT"})[0].text) p.safe = tr.find_all('td')[1].text.replace(' ', '').replace('\n', '').replace('\t', '') p.type = tr.find_all('td')[2].text.replace(' ', '').replace('\n', '').replace('\t', '') p.place = tr.find_all('td')[3].text.replace(' ', '').replace('\n', '').replace('\t', '').replace('\r', '').replace('\xa0', '') p.net = tr.find_all('td')[4].text.replace(' ', '').replace('\n', '').replace('\t', '') except IndexError as e: print(tr) logger.error('%s is index error' % p) # exit(0) logger.debug('<get>%s' % p) self.wait_for_verify.put(p) self.THREAD_ID += 1 self.add_thread(self.verify_proxy_thread, self.THREAD_ID) logger.debug('%s ok' % url) gevent.sleep(1) except AttributeError as e: print(e) # print(r.text) gevent.sleep(10) logger.error('%s Error, sleep 10s' % url) continue # exit() i += 1 def _66ip_cn(self, *args): logger.info('giubanjia.com start') i = 1 self.THREAD_ID += 1 while(1): url = 'http://www.66ip.cn/%d.html' % (i) r = requests.get(url, headers=self.http_headers()) if r.status_code == 404: break try: html = BeautifulSoup(r.content.decode('gb2312'), 'lxml') tbody = html.find_all('table')[2] for tr in tbody.find_all('tr'): p = proxy() _ = tr.find_all('td')[0].text if _ == 'ip': continue else: p.ip = _ p.port = int(tr.find_all('td')[1].text) p.place = tr.find_all('td')[2].text p.safe = tr.find_all('td')[3].text logger.debug('<get>%s' % p) self.wait_for_verify.put(p) self.THREAD_ID += 1 self.add_thread(self.verify_proxy_thread, self.THREAD_ID) logger.debug('%s ok' % url) gevent.sleep(1) except AttributeError as e: print(e) # print(r.text) logger.error('%s Error, sleep 10s' % url) gevent.sleep(10) continue # exit() i += 1 def get_delay(self, p): r = 0 try: # r = requests.get('http://www.baidu.com', proxies={p.scheme: '%s:%d' % (p.ip, p.port)}).elapsed.microseconds/100000 r = requests.get('http://www.baidu.com', proxies={p.scheme: '%s:%d' % (p.ip, p.port)}).elapsed r = r.seconds + (r.microseconds + 0.0)/1000000 except requests.exceptions.ProxyError: return 0 # except ConnectionError: # return 0 # except ConnectionResetError: # return r except: # logger.error(str(p) + ' cannot get delay) return 0 return r def verify_proxy_thread(self, thread_id): # logger.debug('<thread %d> start' % thread_id) if self.wait_for_verify.empty(): # logger.debug('<thread %d> exit' % thread_id) self.THREAD_ID -= 1 return None # if t <= 0: # logger.info('<thread %d> exit' % thread_id) # return # else: # logger.debug('<thread %d> wait for 1s' % thread_id) # gevent.sleep(1) # return self.verify_proxy_thread(thread_id, t-1) p = self.wait_for_verify.get() delay = self.get_delay(p) if delay > 0: p.delay = delay p.verify = time.time() self.add_proxy(p) # for td in tr.find_all('td'): # print(td.text) logger.info('<thread %d> get a proxy %s' % (thread_id, p)) else: pass # logger.debug('<thread %d> throw away a proxy %s' % (thread_id, p)) return self.verify_proxy_thread(thread_id)
class CoroutineWorker(Worker): DEFAULT_GREENLET_SIZE = 10 # control the pool size def __init__(self, cfg, file_logger=None, ppid=None, sockets=None): super(CoroutineWorker, self).__init__(cfg, file_logger, ppid, sockets) self.max_greenlets = int(self.cfg.max_greenlets or self.DEFAULT_GREENLET_SIZE) def patch(self): from gevent import monkey monkey.noisy = False # if the new version is used make sure to patch subprocess if gevent.version_info[0] == 0: monkey.patch_all() else: monkey.patch_all(subprocess=True) def init_process(self): super(CoroutineWorker, self).init_process() self.patch() self.pool = Pool(self.max_greenlets) self.mutex = threading.Semaphore() self._stop_event = threading.Event() def run(self): super(CoroutineWorker, self).run() while self.alive: if not self.pool.full(): self.pool.spawn(self._run) self.file_logger.debug("pool greenlet size %d" % (self.pool.size - self.pool.free_count())) gevent.sleep(1.0) self._stop_event.wait() gevent.spawn(self.stop).join() def _run(self): if self.LISTENERS: while self.alive: self.mutex.acquire() ret = select.select(self.rd_fds, [], [], 1.0) self.file_logger.debug("Before: socket fd length: %d, greenlet:%d, listen in:%s" % (len(self.rd_fds), id(getcurrent()), self.LISTENERS[0] in self.rd_fds)) if ret[0]: sock = ret[0][0] self.rd_fds.remove(sock) else: sock = None self.mutex.release() if sock: #for sock in ret[0]: if sock in self.LISTENERS: try: client, addr = sock.accept() client.setblocking(0) close_on_exec(client) self.rd_fds.append(client) except socket.error as e: if e.args[0] not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.ECONNABORTED): self.file_logger.error(traceback.format_exc()) finally: self.rd_fds.append(sock) else: r = self.handle_request(client=sock) if r == -1: sock.close() else: self.rd_fds.append(sock) if self.ppid and self.ppid != os.getppid(): self.file_logger.info("Parent changed, shutting down: %s", self) return else: while self.alive: try: self.handle_request() except: self.file_logger.error(traceback.format_exc()) def stop(self): Worker.stop(self) self.pool.join(timeout=1) def handle_quit(self, sig, frame): self.alive = False self._stop_event.set()