class NotifyingQueue(Event): """ A queue that follows the wait protocol. """ def __init__(self): super(NotifyingQueue, self).__init__() self._queue = Queue() def put(self, item): """ Add new item to the queue. """ self._queue.put(item) self.set() def empty(self): return self._queue.empty() def get(self, block=True, timeout=None): """ Removes and returns an item from the queue. """ value = self._queue.get(block, timeout) if self._queue.empty(): self.clear() return value def stop(self): """ Request a stop event. """ self.set()
class MemorySession(Session): """ In memory session with a outgoing gevent Queue as the message store. """ def __init__(self, server, session_id=None): super(MemorySession, self).__init__(server, session_id=session_id) self.session_id = session_id or str(uuid.uuid4())[:8] self.server = server self.queue = Queue() self.hits = 0 self.heartbeats = 0 self.connected = False def add_message(self, msg): self.queue.put_nowait(msg) def get_messages(self, **kwargs): timeout = kwargs.get('timeout', None) self.incr_hits() if self.queue.empty(): try: return self.queue.get(**kwargs) except Empty: return [] else: accum = [] try: while not self.queue.empty(): if timeout: accum.append(self.queue.get(timeout=timeout)) else: accum.append(self.queue.get_nowait()) finally: return accum def interrupt(self): """ A kill event trigged through a client accessible endpoint Internal expires will not have is_interupted() == True """ self.interrupted = True self.kill() def kill(self): self.connected = False # Expire only once if not self.expired: self.expired = True self.timeout.set()
class WorkQueue(object): def __init__(self, worker, start_runner=None, max_work_load=16): if not worker: # TODO raise exception pass self.worker = worker self._start_runner = start_runner self._max_work_load = max_work_load # do we want to limit the size of the queue? self._queue = Queue() self._num_enqueues = 0 self._num_dequeues = 0 self._runner = Runner(self, self._max_work_load) #end __init__ def enqueue(self, work_item): self._queue.put(work_item) self._num_enqueues = self._num_enqueues + 1 self.may_be_start_runner() #end enqueue def dequeue(self): try: work_item = self._queue.get_nowait() except Empty: work_item = None else: self._num_dequeues = self._num_dequeues + 1 return work_item #end dequeue def may_be_start_runner(self): if self._queue.empty() or \ (self._start_runner and not self._start_runner()): return self._runner.start() #end may_be_start_runner def runner_done(self): if self._queue.empty() or \ (self._start_runner and not self._start_runner()): return True return False #end runner_done def is_queue_empty(self): if self._queue.empty(): return True return False def num_enqueues(self): return self._num_enqueues #end num_enqueues def num_dequeues(self): return self._num_dequeues
class MatlabConnect(object): def __init__(self, headset, path = ''): self.headset = headset self.dataQueue = Queue() self.isRunning = False self.sensors = ['F3','FC5', 'AF3', 'F7', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'F8', 'AF4', 'FC6', 'F4'] command = os.path.join(path, 'matlab') os.system('%s -automation -desktop' % command) def get_sensors_info(self): """ Greenlet to get a packet from Emotiv headset. Append new data to queues where consumers will read from """ try: while self.isRunning: packet = self.headset.dequeue() values = [packet.sensors[name]['value'] for name in self.sensors] if self.dataQueue is not None: self.dataQueue.put_nowait(values) gevent.sleep(0) except KeyboardInterrupt: print ('Read stopped') self.isRunning = False except Exception as e: print ('Read Error: %s' % e) self.isRunning = False finally: print ('Read over') self.isRunning = False self.headset.close() def matlabBridge(self, varName): data = None while self.isRunning or not self.dataQueue.empty(): while not self.dataQueue.empty(): buf = self.dataQueue.get() if data is None: data = np.array(buf, dtype=int) else: data = np.vstack((data, buf)) self.session.putvalue(varName, data) gevent.sleep(1) print 'Matlab over'
class Spider: def __init__(self, url='', depth=1, threads=4): self.url = url self.depth = depth self.threads = threads self.tasks = Queue() self.bucket = [] def run(self): self.tasks.put(Task(self.url, self.depth)) threds = [ gevent.spawn(self.worker) for i in range(self.threads) ] gevent.joinall(threds) def worker(self, worker_id=''): while not self.tasks.empty(): task = self.tasks.get() if task.url in self.bucket: # here have a bug continue self.bucket.append(task.url) task.run() for t in task.subtasks: self.tasks.put_nowait(t)
def _run(self): utils.log("[%s] parsing site %s" % (self, self.base)) queue = Queue() pool = Pool(32) seed = 'http://www.amazon.com/best-sellers-books-Amazon/zgbs/books/' parsed = set() queue.put_nowait((seed, 'seed', 0)) while True: items = [] while not queue.empty(): item = queue.get_nowait() if item[0] not in parsed: items.append(item) parsed.add(item[0]) if 0 == len(items) and 0 == len(pool): break for item in items: pool.spawn(self._parseResultsPage, queue, item[0], item[1], item[2]) time.sleep(0.01) pool.join() self._output.put(StopIteration)
class Crawler(object): def __init__(self, processor): self.processor = processor self.pool = Pool(self.processor.concurrency) self.base_host = urlsplit(self.processor.start_url).hostname self.urls = Queue() self.urls.put(self.processor.start_url) self.visited_urls = set() self.visited_urls_lock = RLock() self.pages_count = 0 def start(self): while True: if self.pages_count >= self.processor.max_pages: self.urls = Queue() break try: url = self.urls.get_nowait() self.pool.wait_available() spider = Spider(self, self.processor, url) self.pool.start(spider) self.pages_count += 1 except Empty: break self.pool.join() if not self.urls.empty(): self.start()
class Scheduler(object): """ Scheduler """ def __init__(self): self.request_filter = RequestFilter() self.queue = Queue() def enqueue_request(self, request): """put request """ if not request.dont_filter \ and self.request_filter.request_seen(request): logger.warn("ignore %s", request.url) return self.queue.put(request) def next_request(self): """next request """ if self.queue.empty(): return None return self.queue.get() def __len__(self): return self.queue.qsize()
class Worker(): def __init__(self): self.threads = [] self.queue = Queue() def long_func(self, th, seed): k = 0 while k < 10000: print "LOG: Inside the long function Thread: ", th, " Seed: ", seed time.sleep(.1) print "LOG: Long function is out of the loop", seed self.queue.put_nowait(seed) def short_func(self, th, seed): print "LOG: Inside the short function Thread:", th, " Seed: ", seed self.queue.put_nowait(seed) def start(self, seed): print "INFO: Initializing the threads..." self.threads.append(gevent.spawn(self.long_func, 1, seed)) gevent.sleep(1) self.threads.append(gevent.spawn(self.short_func, 2, seed)) while self.queue.empty(): print "INFO: Queue is empty %s" % seed gevent.sleep(0) raise TaskComplete def stop(self): gevent.killall(self.threads)
class NotifyingQueue(Event): def __init__(self, maxsize=None, items=()): super().__init__() self._queue = Queue(maxsize, items) def put(self, item): """ Add new item to the queue. """ self._queue.put(item) self.set() def get(self, block=True, timeout=None): """ Removes and returns an item from the queue. """ value = self._queue.get(block, timeout) if self._queue.empty(): self.clear() return value def peek(self, block=True, timeout=None): return self._queue.peek(block, timeout) def __len__(self): return len(self._queue) def copy(self): """ Copies the current queue items. """ copy = self._queue.copy() result = list() while not copy.empty(): result.append(copy.get_nowait()) return result
def test_async_multi_publish_consume(): with conn.channel() as channel: # first message message_body = 'test_async_multi_publish_consume message 1' channel.basic_publish( exchange='unit_test_room', routing_key='user1', body=message_body ) recv_queue = Queue() rchannel = conn.allocate_channel() rchannel.basic_consume(queue='listener1', callback=recv_queue.put) resp = recv_queue.get() eq_(resp.body, message_body) resp.ack() assert recv_queue.empty() with conn.channel() as channel: # second message message_body = 'test_async_multi_publish_consume message 2' channel.basic_publish( exchange='unit_test_room', routing_key='user1', body=message_body ) resp = recv_queue.get() eq_(resp.body, message_body) resp.ack()
class Worker(): def __init__(self,inputdict, timeout, outputmode, validation_func): self.threads = [] self.queue = Queue() self.inputdict = inputdict self.timeout = timeout self.outputmode = outputmode self.validation_func = validation_func def infi(self, th, thm): k = 0 while k<10000: print 'I am in INFI ', th, thm time.sleep(.1) print "out while infi", thm self.queue.put_nowait(thm) def test(self, th, thm): print "inside test", thm self.queue.put_nowait(thm) def start(self, thm): print "Hii" self.threads.append(gevent.spawn(self.infi, 1, thm)) self.threads.append(gevent.spawn(self.test, 2, thm)) while self.queue.empty(): print "queue is empty %s" % thm gevent.sleep(0) raise TaskComplete def stop(self): gevent.killall(self.threads)
class WebSocketClient(WebSocketBaseClient): def __init__(self, url, protocols=None, extensions=None): WebSocketBaseClient.__init__(self, url, protocols, extensions) self._th = Greenlet(self.run) self.messages = Queue() def handshake_ok(self): self._th.start() def received_message(self, message): self.messages.put(copy.deepcopy(message)) def closed(self, code, reason=None): # When the connection is closed, put a StopIteration # on the message queue to signal there's nothing left # to wait for self.messages.put(StopIteration) def receive(self): # If the websocket was terminated and there are no messages # left in the queue, return None immediately otherwise the client # will block forever if self.terminated and self.messages.empty(): return None message = self.messages.get() if message is StopIteration: return None return message
def _run(self): utils.log("[%s] parsing site %s" % (self, self.base)) queue = Queue() pool = Pool(64) seed = 'http://www.nytimes.com/best-sellers-books/' pool.spawn(self._parseResultsPage, pool, queue, seed, 'current', True) while True: items = [] while not queue.empty(): item = queue.get_nowait() items.append(item) if 0 == len(items) and 0 == len(pool): break for item in items: pool.spawn(item[0], pool, queue, item[1], item[2], item[3]) time.sleep(0.01) pool.join() self._output.put(StopIteration)
class Spider: def __init__(self, url='', depth=1): self.tasks = Queue() self.tasks.put(url) self.init_url = url or '' self.depth = depth or '' def run(self): threds = [ gevent.spawn(self.work), gevent.spawn(self.work), gevent.spawn(self.work), gevent.spawn(self.work) ] gevent.joinall(threds) def work(self): while not self.tasks.empty(): page = self.tasks.get() p = Page(page, '') p.do_request() p.parse_content() hrefs = p.hrefs for href in hrefs: self.tasks.put_nowait(href)
def _run(self): utils.log("[%s] parsing site %s" % (self, self.base)) queue = Queue() pool = Pool(64) seed = 'http://community.seattletimes.nwsource.com/entertainment/i_results.php?search=venue&type=Restaurant&page=1' pool.spawn(self._parseResultsPage, pool, queue, seed, '1', True) while True: items = [] while not queue.empty(): item = queue.get_nowait() items.append(item) if 0 == len(items) and 0 == len(pool): break for item in items: pool.spawn(item[0], pool, queue, item[1], item[2], item[3]) time.sleep(0.01) pool.join() self._output.put(StopIteration)
class Actor(Greenlet): __metaclass__ = MetaActor def __init__(self): Greenlet.__init__(self) self.inbox = Queue() Actor.actors.append(self) def send(self, actor, message): actor.inbox.put(message) def receive(self, message): raise NotImplemented() @staticmethod def wait_actors(): gevent.joinall(Actor.actors) def loop(self): if not self.inbox.empty(): self.receive(self.inbox.get()) gevent.sleep() def _run(self): while self.started: self.loop()
def _run(self): utils.log("[%s] parsing site %s" % (self, self.base)) queue = Queue() pool = Pool(16) seed = 'http://www.awardannals.com/skin/menubar81.html' pool.spawn(self._parseIndexPage, pool, queue, seed, 'index') while True: items = [] while not queue.empty(): item = queue.get_nowait() items.append(item) if 0 == len(items) and 0 == len(pool): break for item in items: pool.spawn(self._parseResultsPage, pool, queue, item[0], item[1], False) time.sleep(0.01) pool.join() self._output.put(StopIteration)
def recursive_crawl(url): all_urls = set() processing_urls = set() processed_urls = set() task_queue = Queue() data_queue = Queue() def is_processed(url): return url in processed_urls def is_processing(url): return url in processing_urls def mark_processed(url): if is_processing(url): processing_urls.remove(url) if is_processed(url): print('Duplicate processed url {}'.format(url)) else: processed_urls.add(url) def mark_processing(url): processing_urls.add(url) def add_to_all(url): if url not in all_urls: print('Record url {}'.format(url)) all_urls.add(url) mark_processing(url) task_queue.put_nowait(url) # Start workers workers = [] for i in xrange(10): workers.append( gevent.spawn(url_worker, i, task_queue, data_queue) ) print('workers', len(workers)) while processing_urls: if data_queue.empty(): gevent.sleep(0) continue done_url, hrefs = data_queue.get() mark_processed(done_url) for sub_url in hrefs: add_to_all(sub_url) if not is_processed(sub_url) and not is_processing(sub_url): mark_processing(sub_url) task_queue.put_nowait(sub_url) print('Processed', len(processed_urls), 'All', len(all_urls)) print('Total latency', demo_helpers.TOTAL_LATENCY)
class ZMQSummarizedTestResult(ZMQTestResult): def __init__(self, args): super(ZMQSummarizedTestResult, self).__init__(args) self.interval = 1. self._data = Queue() gevent.spawn_later(self.interval, self._dump_data) def push(self, data_type, **data): self._data.put_nowait((data_type, data)) def close(self): while not self._data.empty(): self._dump_data(loop=False) self.context.destroy() def _dump_data(self, loop=True): if self._data.empty() and loop: gevent.spawn_later(self.interval, self._dump_data) return data = {'data_type': 'batch', 'agent_id': self.agent_id, 'hostname': get_hostname(), 'run_id': self.run_id, 'counts': defaultdict(list)} # grabbing what we have for _ in range(self._data.qsize()): data_type, message = self._data.get() data['counts'][data_type].append(message) while True: try: self._push.send(self.encoder.encode(data), zmq.NOBLOCK) break except zmq.ZMQError as e: if e.errno in (errno.EAGAIN, errno.EWOULDBLOCK): continue else: raise if loop: gevent.spawn_later(self.interval, self._dump_data)
def recursive_crawl(url): all_urls = set() processing_urls = set() processed_urls = set() data_queue = Queue() gpool = Pool(10) def is_processed(url): return url in processed_urls def is_processing(url): return url in processing_urls def mark_processed(url): if is_processing(url): processing_urls.remove(url) if is_processed(url): print('Duplicate processed url {}'.format(url)) else: processed_urls.add(url) def mark_processing(url): processing_urls.add(url) def add_to_all(url): if url not in all_urls: print('Record url {}'.format(url)) all_urls.add(url) mark_processing(url) fetch_and_extract(url, data_queue) while processing_urls: if data_queue.empty(): gevent.sleep(0) continue done_url, hrefs = data_queue.get() mark_processed(done_url) for sub_url in hrefs: add_to_all(sub_url) if not is_processed(sub_url) and not is_processing(sub_url): mark_processing(sub_url) gpool.spawn(fetch_and_extract, sub_url, data_queue) print('Processed', len(processed_urls), 'All', len(all_urls)) print('Total latency', demo_helpers.TOTAL_LATENCY)
class PSPool(object): LIFE_TIMES = 60 * 1 #sock生命周期 def __init__(self, host, port, max_sock): self.host = host self.port = port self.max_sock = max_sock self.socks = Queue(maxsize=max_sock) self.threads = {} self.sock_times = {} def init_sock(self): """ 初始化和pp服务器的socket """ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((self.host, self.port)) return sock def get(self): if self.socks.empty(): sock = self.init_sock() self.sock_times[sock] = time.time() return sock return self.socks.get() def put(self, sock): times = time.time() - self.sock_times[sock] if times >= self.LIFE_TIMES or self.socks.full(): self.free(sock) return self.socks.put(sock) def free(self, sock): self.sock_times.pop(sock, None) try: sock.close() except: pass def __enter__(self): cur_thread = getcurrent() if cur_thread in self.threads: raise ValueError('not support reenter') self.threads[cur_thread] = self.get() return self.threads[cur_thread] def __exit__(self, exc_type, exc_val, exc_tb): sock = self.threads.pop(getcurrent()) if exc_type is None: self.put(sock) else: self.free(sock)
def poll(self): print ('@poll(%d):started...' % self.instance) print ('@poll(%d):creating GQueue.Queue...' % self.instance) sessions = GQueue() print ('@poll(%d):creating Thread...' % self.instance) threads = [gevent.spawn(self.task, netdev, sessions) for netdev in self.netdevices] gevent.sleep(0) gevent.joinall(threads) data = {} while not sessions.empty(): data.update(sessions.get()) self.write(data) print ('@poll(%d):Done!' % self.instance)
class ResPool(): def __init__(self): self.que=Queue() #资源池 数据库连接池可参考这样使用 for i in range(0,2): self.que.put(i) def empty(self): return self.que.empty() def get(self): return self.que.get() def put(self, obj): self.que.put(obj)
def _StateIteratorForIdTuples(self, slice_gen): """Generator which iterates over State buffers for a given list of ID tuples. Separated the list of tuples into batches of a maximum size. Args: slice_gen - Generator which returns the next list of ID tuples of the form (Client ID, Taba Name) to lookup. """ # Split the lookups into batches, and start a background greenlet to # retrieve the batches. Use a queue to retrieve results so that they can be # processes as soon as they are available, and limit the size of the queue # to control memory usage. result_queue = Queue(8) def _GetBatchWorker(): while True: try: id_slice = slice_gen.next() except StopIteration: result_queue.put(None) return state_op = self.StateGetBatchGenerator(id_slice) if not state_op.success: LOG.error("Error retrieving State batch\n%s" % state_op) result_queue.put(Exception) return else: result_queue.put(state_op) workers = [gevent.spawn(_GetBatchWorker) for _ in xrange(8)] # Extract the results as long as there are unprocessed slices or there are # results available. while not all([w.ready() for w in workers]) or not result_queue.empty(): state_op = result_queue.get() # Yield the results from this batch. if state_op: for i, ((client_id, name), state) in enumerate(state_op.response_value): if state is not None: yield ((client_id, name), state) # Yield to other greenlets periodically. if i % 5000 == 0: gevent.sleep(0)
class ConnectionPool(object): def __init__(self, host, port, maxsize=10, connect_timeout=None, read_timeout=None, factory=lambda x: x): if not isinstance(maxsize, (int, long)): raise TypeError('Expected integer, got %r' % (maxsize, )) self.maxsize = maxsize self.pool = Queue() self.size = 0 self.host = host self.port = port self.factory = factory self.connect_timeout = connect_timeout self.read_timeout = read_timeout def get(self): pool = self.pool if self.size >= self.maxsize or pool.qsize(): return pool.get() else: self.size += 1 try: new_item = self.create_connection() except Exception: self.size -= 1 raise return new_item def put(self, item): self.pool.put(item) def lose(self, item): self.size -= 1 item.close() def closeall(self): while not self.pool.empty(): conn = self.pool.get_nowait() try: conn.close() except Exception: pass def create_connection(self): """Create connection to remote host.""" sock = socket.create_connection((self.host, self.port), timeout=self.connect_timeout) sock.settimeout(self.read_timeout) return self.factory(sock)
class ConnectionPool(object): def __init__(self, connection_cls, maxsize=100, **kwargs): if not isinstance(maxsize, integer_types): raise TypeError('Expected integer, got %r' % (maxsize, )) self._connection_cls = connection_cls self._maxsize = maxsize self._pool = Queue() self._size = 0 self._conn_params = kwargs def get(self): if self._size >= self._maxsize or self._pool.qsize(): return self._pool.get() else: self._size += 1 try: return self._connection_cls(**self._conn_params) except: self._size -= 1 raise def put(self, item): self._pool.put(item) def closeall(self): while not self._pool.empty(): conn = self._pool.get_nowait() try: conn.close() except Exception: pass @contextlib.contextmanager def connection(self): conn = self.get() try: yield conn except: if conn.closed: conn = None self.closeall() raise finally: if conn is not None and not conn.closed: self.put(conn)
class Crawler: """Crawler class""" def __init__(self, url, limit): """ Initializes Crawler class @param url: Input seed page type limit: number @param limit: total no. of urls to fetch """ self.url = url self.url_count_limit = limit self.tasks = Queue() self.counter = 0 def crawl(self, url): """ Crawler function Takes input seed pages, and uses BeautifulSoup module to fetch links inside it """ try: data = urllib2.urlopen(url) bs = BeautifulSoup(data.read()) links=bs('a') for link in links: if ('href' in dict(link.attrs)): url=urljoin(url,link['href']) if url.find("'") != -1: continue url=url.split('#')[0] if url[0:4] == 'http': if self.counter < self.url_count_limit: self.tasks.put(url) self.counter += 1 logger.info('ADDED: %s' % url) except: logger.info('ERROR: %s' % url) def run(self): self.crawl(self.url) """For Async handling of each url""" while not self.tasks.empty(): url = self.tasks.get() gevent.spawn(self.crawl, url).join() logger.info('FETCH: %s' % url)
class SimpleDBPool(): DBPOOL_SIZE=0 HOST='' PORT=0 USERNAME='' PASSWD='' DATABASE='' def __init__(self): self._dbFree =Queue() self._inited =False def Init(self,dbpool_size,host,port,username,passwd,database): self.DBPOOL_SIZE=dbpool_size self.HOST=host self.PORT=port self.USERNAME=username self.PASSWD=passwd self.DATABASE=database for conn in [pymysql.connect(host=self.HOST,port=self.PORT,user=self.USERNAME,passwd=self.PASSWD,db=self.DATABASE) for i in xrange(0,self.DBPOOL_SIZE)] : self._dbFree.put(conn) self._inited =True def Final(self): for conn in self._dbFree : conn.close() self._inited =False def Get(self): if not self._inited: return while self._dbFree.empty(): print 'empty' gevent.sleep(0.1) conn =self._dbFree.get() return conn def Release(self,conn,commit): if not self._inited: return if commit: conn.commit() else: conn.rollback() print 'put' self._dbFree.put(conn) gevent.sleep(0)
class ServerTransport(gevent.Greenlet): def __init__(self, client_id, conn, delta=0): gevent.Greenlet.__init__(self) self.client_id = client_id self.conn = conn self.delta = delta self.queue = Queue() self.logger = logging.getLogger('server') self.t_0 = 0 def _run(self): #self.send_synchronize() while True: if not self.queue.empty(): evnt = self.queue.get() if evnt["event_type"] == 1: evnt["sent_at"] = pygame.time.get_ticks() self.t_0 = evnt["sent_at"] #self.logger.info("Server before send") gevent.sleep(0) self.conn.sendall(json.dumps(evnt)+"\n") # write event to connection #self.logger.info("Server after send") if evnt["event_type"] == 1: #self.logger.info("Server before recv") gevent.sleep() data = self.conn.recv(1024) # wait read #self.logger.info("Server after recv, data: %s", data) self.handle_response(data) # handle data gevent.sleep(0) self.conn.close() self.s.close() #todo remove def add_event(self, data): self.queue.put(data) def send_synchronize(self): raise "not implemented" def handle_response(self): raise "not implemented"
class SiteInfo: def __init__(self, targets): self.conn = sqlite3.connect(user_path + '/db/Rules.db') self.cursor = self.conn.cursor() self.targets: List = targets self.queue = Queue() self.protocol: str = 'http://' self.headers: Dict = {'User-Agent': user_agent} self.results: List = [] sql = 'select * from fingerprint' self.cursor.execute(sql) self.rules: Dict = {} for item in self.cursor.fetchall(): self.rules[item[1]] = item[2].split(', ') self.title_pattern = "<title.*?>(.*?)</title.*?>" self.encoding_pattern = "encoding=[\'|\"]?(.*?)[\'|\"]" def enqueue_domain(self): for domain in self.targets: self.queue.put_nowait(domain) def get_title(self, r): try: encoding = r.encoding if not r.text: return '' if not encoding: if re.findall('charset=[\'|\"]?(.*?)[\'|\"]', r.text, re.I | re.S): encoding = re.findall('charset=[\'|\"]?(.*?)[\'|\"]', r.text, re.I | re.S)[0] elif re.findall(self.encoding_pattern + '?', r.text, re.I | re.S): encoding = re.findall(self.encoding_pattern, r.text, re.I | re.S)[0] else: return '' if encoding == 'ISO-8859-1' and re.findall(self.title_pattern, r.text, re.I | re.S): if re.findall('charset=[\'|\"]?(.*?)[\'|\"]', r.text, re.I | re.S): encoding = re.findall('charset=[\'|\"]?(.*?)[\'|\"]', r.text, re.I | re.S)[0] elif re.findall(self.encoding_pattern + '?', r.text, re.I | re.S): encoding = re.findall(self.encoding_pattern, r.text, re.I | re.S)[0] else: encoding = 'utf-8' return re.findall(self.title_pattern, r.text, re.I | re.S)[0].encode("iso-8859-1").decode( encoding).encode('utf-8').decode( 'utf-8', errors='ignore') elif re.findall(self.title_pattern, r.text, re.I | re.S) and encoding.lower() in [ 'utf-8', 'gb2312', 'gbk2312', 'gbk' ]: return re.findall(self.title_pattern, r.text, re.I | re.S)[0].strip() elif re.findall(self.title_pattern, r.text, re.I | re.S): return re.findall(self.title_pattern, r.text, re.I | re.S)[0].encode(encoding).decode( 'utf-8', errors='ignore').strip() else: return '' except AttributeError: return '' except LookupError: return '' def get_info(self): while not self.queue.empty(): domain = self.queue.get() result = { 'domain': domain, 'title': '', 'text': '', 'headers': [], 'app': [] } try: r = requests.get(self.protocol + domain, timeout=3, headers=self.headers) except requests.exceptions.ConnectTimeout: return result except requests.exceptions.ReadTimeout: continue except requests.exceptions.ConnectionError: continue result['title'] = self.get_title(r) result['headers'] = [{ "key": k, "value": v } for k, v in r.headers.items()] result['text'] = r.text for appname, rules in self.rules.items(): for rule in rules: place, rule = rule.split(':', 1) if place in ['body']: if r.text.find(rule) != -1: result['app'].append(appname) break elif place in ['title']: if re.search( '<title>.*?' + re.escape(rule) + '.*?</title>', r.text): result['app'].append(appname) break elif place in ['header', 'server']: header = '' for key, value in r.headers.items(): header += key + ': ' + value + ' ' if re.search(re.escape(rule), header, re.I): result['app'].append(appname) break self.results.append(result) def run(self): print("获取页面信息") self.enqueue_domain() threads = [ gevent.spawn(self.get_info) for _ in range(siteinfo_thread_num) ] gevent.joinall(threads) return self.results
class Recorder(object): """ Recorder class. Producer, consumers and controller greenlets are methods of this class. Although not implemented, Recorder should be a Singleton. """ def __init__(self, emotiv, filename): self.isRunning = False self.isRecording = False self.sensors = np.array(['F3','P7','O1','O2','P8','F4']) self.PLOT_MIN_Y = 0 self.PLOT_MAX_Y = 1000 #### PROTOCOL DEFINITION #### self.ITERATIONS = config.RECORDING_ITERATIONS self.PERIOD = config.RECORDING_PERIOD # Recording stimulated SSVEP self.PAUSE_INTER_RECORDING = 2 self.STIMULI_PATH = config.STIMULI_PATH self.DATA_PATH = config.DATA_PATH self.FILENAME = filename self.LOW_FREQ = 1 self.NUM_STIMULI = 3 self.headset = emotiv self.plotQueue = Queue() self.recorderQueue = Queue() def get_sensors_info(self): """ Greenlet to get a packet from Emotiv headset. Append new data to queues where consumers will read from """ try: while self.isRunning: buf = np.zeros((config.FS, len(self.sensors))) for i in range(len(buf)): packet = self.headset.dequeue() values = [packet.sensors[name]['value'] for name in self.sensors] buf[i] = np.array(values) gevent.sleep(0) # need cause recording could be over if self.plotQueue is not None: self.plotQueue.put_nowait(buf) if self.recorderQueue is not None and self.isRecording: self.recorderQueue.put_nowait(buf) gevent.sleep(0) except KeyboardInterrupt: print ('Read stopped') self.isRunning = False except Exception as e: print ('Read Error: %s' % e) self.isRunning = False finally: print ('Read over') self.isRunning = False self.headset.close() def controller(self): """ Greenlet that controls recording process. Performs many iterations of recording response to the stimuli, first left and then right. """ # if self.LOW_FREQ: # TOP = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("64.exe")] # SX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("69.exe")] # DX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("80.exe")] # else: # TOP = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("12.exe")] # SX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("13.exe")] # DX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("15.exe")] # # TOPwindow = Popen(args=TOP) # SXwindow = Popen(args=SX) # DXwindow = Popen(args=DX) stimuliExe = os.path.join(self.STIMULI_PATH, "stimuli_all.exe") stimuliWin = Popen(args=stimuliExe) gevent.sleep(5) try: for i in xrange(self.ITERATIONS): # TOP winsound.Beep(1500, 250) for i in xrange(self.PAUSE_INTER_RECORDING): print ('Seconds to record TOP: %i' % (self.PAUSE_INTER_RECORDING - i)) gevent.sleep(1) print ('Start recording TOP') self.isRecording = True gevent.sleep(self.PERIOD) self.isRecording = False print ('Stop recording TOP') # SX winsound.Beep(1500, 250) for i in xrange(self.PAUSE_INTER_RECORDING): print ('Seconds to record SX: %i' % (self.PAUSE_INTER_RECORDING - i)) gevent.sleep(1) print ('Start recording SX') self.isRecording = True gevent.sleep(self.PERIOD) self.isRecording = False print ('Stop recording SX') # DX winsound.Beep(1500, 250) for i in xrange(self.PAUSE_INTER_RECORDING): print ('Seconds to record DX: %i' % (self.PAUSE_INTER_RECORDING - i)) gevent.sleep(1) print ('Start recording DX') self.isRecording = True gevent.sleep(self.PERIOD) self.isRecording = False print ('Stop recording DX') except Exception as e: print ('Controller error: %s' % e) self.isRunning = False finally: # if TOPwindow is not None: # TOPwindow.kill() # if SXwindow is not None: # SXwindow.kill() # if DXwindow is not None: # DXwindow.kill() if stimuliWin is not None: stimuliWin.kill() print ('Controller over') self.isRunning = False def recorder(self): """ Greenlet that store data read from the headset into a numpy array """ data = np.empty( (self.ITERATIONS * self.PERIOD * self.NUM_STIMULI, config.FS, len(self.sensors)) ) counter = 0 try: while self.isRunning or not self.recorderQueue.empty(): # Controller greenlets controls the recording while self.isRecording or not self.recorderQueue.empty(): while not self.recorderQueue.empty(): buf = self.recorderQueue.get() data[counter] = buf counter += 1 gevent.sleep(1) gevent.sleep(0) except Exception as e: print ('Recorder error: %s' % e) self.isRunning = False finally: print ('Recorder over') data = data.reshape((self.ITERATIONS * self.PERIOD * self.NUM_STIMULI * config.FS, len(self.sensors))) sio.savemat(os.path.join(self.DATA_PATH, self.FILENAME), {'X' : data}) self.isRunning = False def plot(self, bufferSize = 1000): """ Greenlet that plot y once per .1 The y scale is specified through global config but is dynamically adjusted """ ax = plt.subplot(111) canvas = ax.figure.canvas plt.grid() # to ensure proper background restore background = None plotsNum = len(self.sensors) buffers = [deque([0]*bufferSize) for i in xrange(plotsNum)] lines = [plt.plot(buffers[i], lw=1, label=self.sensors[i]).pop() for i in xrange(plotsNum)] plt.legend() plt.axis([0, bufferSize, self.PLOT_MIN_Y, self.PLOT_MAX_Y]) try: while self.isRunning: while not self.plotQueue.empty(): # Getting values from queue values = self.plotQueue.get() # Updating buffer for j in range(len(values)): [buffers[i].appendleft(values[j, i]) for i in xrange(plotsNum)] [buffers[i].pop() for i in xrange(plotsNum)] if background is None: background = canvas.copy_from_bbox(ax.bbox) canvas.restore_region(background) # Adjusting Y scale minY = min(min(buffers[0:])) - 100 maxY = max(max(buffers[0:])) + 100 plt.ylim([minY,maxY]) # Plot refreshes with new buffer [lines[i].set_ydata(buffers[i]) for i in xrange(plotsNum)] plt.draw() plt.pause(0.000001) gevent.sleep(1) except Exception as e: print ('Plot error: %s' % e) self.isRunning = False finally: print 'Plot over' self.isRunning = False
class Worker(object): def __init__(self, workers_number, results_type="extend_list"): ''' array_results: The return value of the function may be the [] array And extend all arrays into the array_results If the results_type is "extend_list", so each result is a list, and extend them If is "add_element" ,so each result is a element ,and add each element to the list ''' self.workers_number = workers_number self.tasks = Queue() self.array_results = [] self.results_type = results_type def put_tasks(self, all_tasks): ''' The boss put all tasks into queue ''' for one_task in all_tasks: self.tasks.put_nowait(one_task) def generate_boss(self, all_tasks): ''' ''' self.all_tasks_number = len(all_tasks) boss = [gevent.spawn(self.put_tasks, all_tasks)] return boss def get_tasks(self, worker_id, func, *args, **kwargs): ''' The worker get all tasks from queue, and run the corresponding function ''' while not self.tasks.empty(): task = self.tasks.get() progress = self.show_progress() ret = func(task, progress, *args, **kwargs) if ret and self.results_type == "extend_list": self.array_results.extend(ret) elif ret and self.results_type == "add_element": self.array_results.append(ret) progress = self.show_progress() print(progress) # logger.info("The worker %s has got task %s " % (worker_id, task)) def generate_workers(self, func, *args, **kwargs): ''' Generate workers array ''' workers = [ gevent.spawn(self.get_tasks, worker_id, func, *args, **kwargs) for worker_id in xrange(1, self.workers_number + 1) ] return workers def joinall(self, boss, workers): all_spawns = boss + workers gevent.joinall(all_spawns) def return_results(self): ''' Return the array results ''' return self.array_results def show_progress(self): ''' Show the progress in two ways 1. current_task / all_task 2. the percentage ''' self.current_tasks_id = self.tasks.qsize() progress_one = '%s/%s' % (self.current_tasks_id, self.all_tasks_number) progress_percentage = 1 - float(self.current_tasks_id)\ / float(self.all_tasks_number) progress_two = "%s" % (progress_percentage * 100) progress = [progress_one, progress_two] return progress def pack(self, all_tasks, func, *args, **kwargs): ''' Pack all steps into one function ''' boss = self.generate_boss(all_tasks) workers = self.generate_workers(func, *args, **kwargs) self.joinall(boss, workers) return self.return_results()
class ScraperGeventQueue(object): def __init__(self, scraperClass, gevent_num=100): self.scraperClass = scraperClass self.gevent_num = gevent_num self.tasks = Queue() @property def args(self): day_list = [('1989-01-01', '1999-12-31', 1)] for i in range(2000, 2017): for j in range(1, 13): begin_day = str(i) + '-' + str(j) + '-01' end_day = str(i) + '-' + str(j) + '-' + str(daysInMonth(i, j)) day_list.append((begin_day, end_day, 1)) return day_list def _run(self, task): begin_day, end_day, page_num = task scraper = self.scraperClass(begin_day, end_day, page_num) if page_num == 1: scraper.html = scraper.getHTML() totalPage = scraper.getTotalPageNumber() if totalPage > 200: new_end_day, new_begin_day = aveDay(begin_day, end_day) task1 = (begin_day, new_end_day, 1) task2 = (new_begin_day, end_day, 1) self.tasks.put(task1) self.tasks.put(task2) writeCSV('overflow_list', *task) writeCSV('split_list', *task1) writeCSV('split_list', *task2) elif totalPage > 1: writeCSV('total_page_list', *(begin_day, end_day, totalPage)) [ self.tasks.put_nowait((begin_day, end_day, i)) for i in range(2, totalPage + 1) ] map(print, self.tasks) elif totalPage == 1: writeCSV('total_page_list', *(begin_day, end_day, totalPage)) else: self.tasks.put(task) writeCSV('failure_list', *task) if os.path.isfile(scraper.file_name): indicator = True else: indicator = scraper.parseURL(scraper.html) else: indicator = scraper.start() if indicator: writeCSV('success_list', *task) print(begin_day, end_day, page_num, 'success') else: self.tasks.put(task) writeCSV('failure_list', *task) def worker(self): while not self.tasks.empty(): task = self.tasks.get() self._run(task) def manager(self): for arg in self.args: self.tasks.put_nowait(arg) def start(self): gevent.spawn(self.manager).join() tasks = [gevent.spawn(self.worker) for i in range(self.gevent_num)] gevent.joinall(tasks)
class DatabaseConnectionPool(object): def __init__(self, maxsize=100): if not isinstance(maxsize, (int, long)): raise TypeError('Expected integer, got %r' % (maxsize, )) self.maxsize = maxsize self.pool = Queue() self.size = 0 def get(self): pool = self.pool if self.size >= self.maxsize or pool.qsize(): return pool.get() else: self.size += 1 try: new_item = self.create_connection() except: self.size -= 1 raise return new_item def put(self, item): self.pool.put(item) def closeall(self): while not self.pool.empty(): conn = self.pool.get_nowait() try: conn.close() except Exception: pass @contextlib.contextmanager def connection(self, isolation_level=None): conn = self.get() try: if isolation_level is not None: if conn.isolation_level == isolation_level: isolation_level = None else: conn.set_isolation_level(isolation_level) yield conn except: if conn.closed: conn = None self.closeall() else: conn = self._rollback(conn) raise else: if conn.closed: raise OperationalError("Cannot commit because connection was closed: %r" % (conn, )) conn.commit() finally: if conn is not None and not conn.closed: if isolation_level is not None: conn.set_isolation_level(isolation_level) self.put(conn) @contextlib.contextmanager def cursor(self, *args, **kwargs): isolation_level = kwargs.pop('isolation_level', None) conn = self.get() try: if isolation_level is not None: if conn.isolation_level == isolation_level: isolation_level = None else: conn.set_isolation_level(isolation_level) yield conn.cursor(*args, **kwargs) except: if conn.closed: conn = None self.closeall() else: conn = self._rollback(conn) raise else: if conn.closed: raise OperationalError("Cannot commit because connection was closed: %r" % (conn, )) conn.commit() finally: if conn is not None and not conn.closed: if isolation_level is not None: conn.set_isolation_level(isolation_level) self.put(conn) def _rollback(self, conn): try: conn.rollback() except: gevent.get_hub().handle_error(conn, *sys.exc_info()) return return conn def execute(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.rowcount def fetchone(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchone() def fetchall(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchall() def fetchiter(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) while True: items = cursor.fetchmany() if not items: break for item in items: yield item
class WebSocket: def __init__(self, socket, environ): self.socket = socket self.version = environ.get("HTTP_SEC_WEBSOCKET_VERSION", None) self.path = environ.get("PATH_INFO", None) self.origin = environ.get("HTTP_ORIGIN", None) self.protocol = environ.get("HTTP_SEC_WEBSOCKET_PROTOCOL", None) self.closed = False self.status = None self._receive_error = None self._queue = Queue() self.max_length = 10 * 1024 * 1024 gevent.spawn(self._listen) def set_max_message_length(self, length): self.max_length = length def _listen(self): try: while True: fin = False message = bytearray() is_first_message = True start_opcode = None while not fin: payload, opcode, fin = self._get_frame( max_length=self.max_length - len(message)) # Make sure continuation frames have correct information if not is_first_message and opcode != 0: self._error(STATUS_PROTOCOL_ERROR) if is_first_message: if opcode not in (OPCODE_TEXT, OPCODE_BINARY): self._error(STATUS_PROTOCOL_ERROR) # Save opcode start_opcode = opcode message += payload is_first_message = False message = bytes(message) if start_opcode == OPCODE_TEXT: # UTF-8 text try: message = message.decode() except UnicodeDecodeError: self._error(STATUS_DATA_ERROR) self._queue.put(message) except Exception as e: self.closed = True self._receive_error = e self._queue.put(None) # To make sure the error is read def receive(self): if not self._queue.empty(): return self.receive_nowait() if isinstance(self._receive_error, EOFError): return None if self._receive_error: raise self._receive_error self._queue.peek() return self.receive_nowait() def receive_nowait(self): ret = self._queue.get_nowait() if self._receive_error and not isinstance(self._receive_error, EOFError): raise self._receive_error return ret def send(self, data): if self.closed: raise EOFError() if isinstance(data, str): self._send_frame(OPCODE_TEXT, data.encode()) elif isinstance(data, bytes): self._send_frame(OPCODE_BINARY, data) else: raise TypeError("Expected str or bytes, got " + repr(type(data))) # Reads a frame from the socket. Pings, pongs and close packets are handled # automatically def _get_frame(self, max_length): while True: payload, opcode, fin = self._read_frame(max_length=max_length) if opcode == OPCODE_PING: self._send_frame(OPCODE_PONG, payload) elif opcode == OPCODE_PONG: pass elif opcode == OPCODE_CLOSE: if len(payload) >= 2: self.status = struct.unpack("!H", payload[:2])[0] was_closed = self.closed self.closed = True if not was_closed: # Send a close frame in response self.close(STATUS_OK) raise EOFError() else: return payload, opcode, fin # Low-level function, use _get_frame instead def _read_frame(self, max_length): header = self._recv_exactly(2) if not (header[1] & 0x80): self._error(STATUS_POLICY_VIOLATION) opcode = header[0] & 0xf fin = bool(header[0] & 0x80) payload_length = header[1] & 0x7f if payload_length == 126: payload_length = struct.unpack("!H", self._recv_exactly(2))[0] elif payload_length == 127: payload_length = struct.unpack("!Q", self._recv_exactly(8))[0] # Control frames are handled in a special way if opcode in (OPCODE_PING, OPCODE_PONG): max_length = 125 if payload_length > max_length: self._error(STATUS_TOO_LONG) mask = self._recv_exactly(4) payload = self._recv_exactly(payload_length) payload = self._unmask(payload, mask) return payload, opcode, fin def _recv_exactly(self, length): buf = bytearray() while len(buf) < length: block = self.socket.recv(min(4096, length - len(buf))) if block == b"": raise EOFError() buf += block return bytes(buf) def _unmask(self, payload, mask): def gen(c): return bytes([x ^ c for x in range(256)]) payload = bytearray(payload) payload[0::4] = payload[0::4].translate(gen(mask[0])) payload[1::4] = payload[1::4].translate(gen(mask[1])) payload[2::4] = payload[2::4].translate(gen(mask[2])) payload[3::4] = payload[3::4].translate(gen(mask[3])) return bytes(payload) def _send_frame(self, opcode, data): for i in range(0, len(data), SEND_PACKET_SIZE): part = data[i:i + SEND_PACKET_SIZE] fin = int(i == (len(data) - 1) // SEND_PACKET_SIZE * SEND_PACKET_SIZE) header = bytes([(opcode if i == 0 else 0) | (fin << 7), min(len(part), 126)]) if len(part) >= 126: header += struct.pack("!H", len(part)) self.socket.sendall(header + part) def _error(self, status): self.close(status) raise EOFError() def close(self, status=STATUS_OK): self.closed = True try: self._send_frame(OPCODE_CLOSE, struct.pack("!H", status)) except (BrokenPipeError, ConnectionResetError): pass self.socket.close()
class DatabaseConnectionPool(object): """ - .bashrc 또는 .bashprofile 에 MYSQL_PASSWD 를 설정해야 함. """ def __init__(self, max_size, auto_commit, fetchiter_size): if not isinstance(max_size, int): raise TypeError('Expected integer, got %r' % (max_size, )) self.max_size = max_size self.auto_commit = auto_commit self.pool = Queue() self.size = 0 self.fetchiter_size = fetchiter_size def get(self): # print('size/max_size: %s/%s' % (self.size, self.max_size) # print('pool.qsize(): %s' % (self.pool.qsize()) # if self.size >= self.max_size or self.pool.qsize(): if self.pool.qsize() >= self.max_size: return self.pool.get() else: self.size += 1 try: new_conn = self.create_connection() except: self.size -= 1 raise return new_conn def put(self, item): self.pool.put(item) def close_all(self): while not self.pool.empty(): conn = self.pool.get_nowait() try: conn.close() except: pass def commit_all(self): while not self.pool.empty(): conn = self.pool.get_nowait() try: conn.commit() except: pass @contextlib.contextmanager def connection(self, isolation_level=None): conn = self.get() try: if isolation_level is not None: if conn.isolation_level == isolation_level: isolation_level = None else: conn.set_isolation_level(isolation_level) yield conn except: if not conn.open: conn = None self.close_all() else: conn = self._rollback(conn) raise else: if not conn.open: raise OperationalError( "Cannot commit because connection was closed: %r" % (conn, )) finally: if conn is not None and conn.open: if isolation_level is not None: conn.set_isolation_level(isolation_level) self.put(conn) @contextlib.contextmanager def cursor(self, *args, **kwargs): try: isolation_level = kwargs.pop('isolation_level', None) with self.connection(isolation_level) as conn: yield conn.cursor(cursorclass=pymysql.cursors.SSDictCursor, *args, **kwargs) except: raise def _rollback(self, conn): try: conn.rollback() except: gevent.hub.get_hub().handle_error(conn, *sys.exc_info()) return return conn def execute(self, *args, **kwargs): try: with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.rowcount except: raise def executemany(self, *args, **kwargs): try: with self.cursor(**kwargs) as cursor: cursor.executemany(*args) return cursor.rowcount except: raise def fetchone(self, *args, **kwargs): try: with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchone() except: raise def fetchall(self, *args, **kwargs): try: with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchall() except: raise def fetchiter(self, *args, **kwargs): try: with self.cursor(**kwargs) as cursor: cursor.execute(*args) while True: items = cursor.fetchmany(size=self.fetchiter_size) if not items: break for item in items: yield item except: raise
class WorkEngine(object): def __init__(self, **kwargs): self._module_dict = {} self._redis_client = RedisClient(host=config_setting.queue_host, port=config_setting.queue_port, password=config_setting.queue_pwd) self._secret_key = 'd6f89b09' self._wid = unique_machine self._queue_list = ['ultron:work:work_id:' + str(self._wid)] #生成token self._token = hashlib.sha1( (self._secret_key + self._wid.replace('-', '')).encode()).hexdigest() self._task_queue = Queue() # self.init_modules() gevent.spawn(self._get_task) gevent.spawn(self._dispatch_task) gevent.spawn(self._heart_tick) gevent.sleep() def init_modules(self): l = json.loads(moddules_info) for setting in l: self.load_modules(setting) def load_modules(self, setting): name = setting['name'] is_effective = setting['isEffective'] if is_effective == 0: return module_name = 'ultron.cluster.work.extern_modules.' + name + '.module' try: module = importlib.import_module(module_name) if 'Module' in dir(module): strategy_class = module.__getattribute__('Module') self._module_dict[name] = strategy_class( name, self._wid, self._token, self._redis_client) print('module %s loading' % (name)) if name == 'login': self._module_dict[name].login_master() except Exception as e: print('Failed to import module:%s:[%s]' % (name, str(e))) def _heart_tick(self): last_time = datetime.datetime.now() while True: now_time = datetime.datetime.now() if (now_time - last_time).seconds > 20: #发送心跳包 task = {'name': 'login', 'opcode': 'heart_tick'} self._module_dict[task['name']].process_respone(task) last_time = now_time gevent.sleep(.3) def _get_task(self): while True: for queue in self._queue_list: task_all = self._redis_client.hmgetall(queue) task_list = task_all[0] self._redis_client.hmdel(queue, task_list.keys()) for tid, task in task_list.items(): self._task_queue.put(json.loads(task)) gevent.sleep(.3) #用于处理各个节点登录 def _dispatch_task(self): while True: while not self._task_queue.empty(): task = self._task_queue.get() space_name = str(task.get('name')) if space_name in self._module_dict: self._module_dict[space_name].process_respone(task) gevent.sleep(.3)
class TestChannelInt(IonIntegrationTestCase): def setUp(self): self._start_container() #@skip('Not working consistently on buildbot') def test_consume_one_message_at_a_time(self): # end to end test # - Process P1 is producing one message every 5 seconds # - Process P2 is producing one other message every 3 seconds # - Process S creates a auto-delete=False queue without a consumer and without a binding # - Process S binds this queue through a pyon.net or container API call to the topic of process P1 # - Process S waits a bit # - Process S checks the number of messages in the queue # - Process S creates a consumer, takes one message off the queue (non-blocking) and destroys the consumer # - Process S waits a bit (let messages accumulate) # - Process S creates a consumer, takes a message off and repeates it until no messges are left (without ever blocking) and destroys the consumer # - Process S waits a bit (let messages accumulate) # - Process S creates a consumer, takes a message off and repeates it until no messges are left (without ever blocking). Then requeues the last message and destroys the consumer # - Process S creates a consumer, takes one message off the queue (non-blocking) and destroys the consumer. # - Process S sends prior message to its queue (note: may be tricky without a subscription to yourself) # - Process S changes the binding of queue to P1 and P2 # - Process S removes all bindings of queue # - Process S deletes the queue # - Process S exists without any residual resources in the broker # - Process P1 and P1 get terminated without any residual resources in the broker # # * Show this works with the ACK or no-ACK mode # * Do the above with semi-abstracted calles (some nicer boilerplate) def every_five(): p = self.container.node.channel(PublisherChannel) p._send_name = NameTrio(bootstrap.get_sys_name(), 'routed.5') counter = 0 while not self.publish_five.wait(timeout=1): p.send('5,' + str(counter)) counter += 1 def every_three(): p = self.container.node.channel(PublisherChannel) p._send_name = NameTrio(bootstrap.get_sys_name(), 'routed.3') counter = 0 while not self.publish_three.wait(timeout=0.6): p.send('3,' + str(counter)) counter += 1 self.publish_five = Event() self.publish_three = Event() self.five_events = Queue() self.three_events = Queue() gl_every_five = spawn(every_five) gl_every_three = spawn(every_three) def listen(lch): """ The purpose of the this listen method is to trigger waits in code below. By setting up a listener that subscribes to both 3 and 5, and putting received messages into the appropriate gevent-queues client side, we can assume that the channel we're actually testing with get_stats etc has had the message delivered too. """ lch._queue_auto_delete = False lch.setup_listener( NameTrio(bootstrap.get_sys_name(), 'alternate_listener'), 'routed.3') lch._bind('routed.5') lch.start_consume() while True: try: newchan = lch.accept() m, h, d = newchan.recv() count = m.rsplit(',', 1)[-1] if m.startswith('5,'): self.five_events.put(int(count)) newchan.ack(d) elif m.startswith('3,'): self.three_events.put(int(count)) newchan.ack(d) else: raise StandardError("unknown message: %s" % m) except ChannelClosedError: break lch = self.container.node.channel(SubscriberChannel) gl_listen = spawn(listen, lch) def do_cleanups(gl_e5, gl_e3, gl_l, lch): self.publish_five.set() self.publish_three.set() gl_e5.join(timeout=5) gl_e3.join(timeout=5) lch.stop_consume() lch._destroy_queue() lch.close() gl_listen.join(timeout=5) self.addCleanup(do_cleanups, gl_every_five, gl_every_three, gl_listen, lch) ch = self.container.node.channel(RecvChannel) ch._recv_name = NameTrio(bootstrap.get_sys_name(), 'test_queue') ch._queue_auto_delete = False # ######### # THIS TEST EXPECTS OLD BEHAVIOR OF NO QOS, SO SET A HIGH BAR # ######### ch._transport.qos_impl(prefetch_count=9999) def cleanup_channel(thech): thech._destroy_queue() thech.close() self.addCleanup(cleanup_channel, ch) # declare exchange and queue, no binding yet ch._declare_exchange(ch._recv_name.exchange) ch._declare_queue(ch._recv_name.queue) ch._purge() # do binding to 5 pub only ch._bind('routed.5') # wait for one message self.five_events.get(timeout=2) # ensure 1 message, 0 consumer self.assertTupleEqual((1, 0), ch.get_stats()) # start a consumer ch.start_consume() time.sleep(0.2) self.assertEquals( ch._recv_queue.qsize(), 1) # should have been delivered to the channel, waiting for us now # receive one message with instant timeout m, h, d = ch.recv(timeout=0) self.assertEquals(m, "5,0") ch.ack(d) # we have no more messages, should instantly fail self.assertRaises(PQueue.Empty, ch.recv, timeout=0) # stop consumer ch.stop_consume() # wait until next 5 publish event num = self.five_events.get(timeout=2) self.assertEquals(num, 1) # start consumer again, empty queue ch.start_consume() time.sleep(0.1) while True: try: m, h, d = ch.recv(timeout=0) self.assertTrue(m.startswith('5,')) ch.ack(d) except PQueue.Empty: ch.stop_consume() break # wait for new message num = self.five_events.get(timeout=2) self.assertEquals(num, 2) # consume and requeue ch.start_consume() time.sleep(0.1) m, h, d = ch.recv(timeout=0) self.assertTrue(m.startswith('5,')) ch.reject(d, requeue=True) # rabbit appears to deliver this later on, only when we've got another message in it # wait for another message publish num = self.five_events.get(timeout=2) self.assertEquals(num, 3) time.sleep(0.1) expect = ["5,2", "5,3"] while True: try: m, h, d = ch.recv(timeout=0) self.assertTrue(m.startswith('5,')) self.assertEquals(m, expect.pop(0)) ch.ack(d) except PQueue.Empty: ch.stop_consume() self.assertListEqual(expect, []) break # let's change the binding to the 3 now, empty the testqueue first (artifact of test) while not self.three_events.empty(): self.three_events.get(timeout=0) # we have to keep the exchange around - it will likely autodelete. ch2 = self.container.node.channel(RecvChannel) ch2.setup_listener(NameTrio(bootstrap.get_sys_name(), "another_queue")) ch._destroy_binding() ch._bind('routed.3') ch2._destroy_queue() ch2.close() self.three_events.get(timeout=1) ch.start_consume() time.sleep(0.1) self.assertEquals(ch._recv_queue.qsize(), 1) m, h, d = ch.recv(timeout=0) self.assertTrue(m.startswith('3,')) ch.ack(d) # wait for a new 3 to reject self.three_events.get(timeout=10) time.sleep(0.1) m, h, d = ch.recv(timeout=0) ch.reject(d, requeue=True) # recycle consumption, should get the requeued message right away? ch.stop_consume() ch.start_consume() time.sleep(0.1) self.assertEquals(ch._recv_queue.qsize(), 1) m2, h2, d2 = ch.recv(timeout=0) self.assertEquals(m, m2) ch.stop_consume()
class Engine(object): def __init__(self, sequence, database, phase_hook=None, batch_size=100, force=False, retries=3): self.sequence = sequence self.database = database self.queue = Queue() self.phase_hook = phase_hook self.batch_size = batch_size self.force = force self.retries = retries self.errors = [] def _push_to_target(self, targets): """Get a batch of elements from the queue, and push it to the targets. This function returns True if it proceeded all the elements in the queue, and there isn't anything more to read. """ if self.queue.empty(): return 0 # nothing batch = [] pushed = 0 # collecting a batch while len(batch) < self.batch_size: item = self.queue.get() if item == 'END': pushed += 1 # the 'END' item break batch.append(item) if len(batch) != 0: greenlets = Group() for plugin in targets: green = greenlets.spawn(self._put_data, plugin, batch) green.link_exception(partial(self._error, exception.InjectError, plugin)) greenlets.join() pushed += len(batch) return pushed # # transaction managment # def _start_transactions(self, plugins): for plugin in plugins: plugin.start_transaction() def _commit_transactions(self, plugins): # XXX what happends when this fails? for plugin in plugins: plugin.commit_transaction() def _rollback_transactions(self, plugins): for plugin in plugins: plugin.rollback_transaction() def _put_data(self, plugin, data): return plugin.inject(data) def _get_data(self, plugin, start_date, end_date): try: for item in plugin.extract(start_date, end_date): self.queue.put((plugin.get_id(), item)) finally: self.queue.put('END') def _log_transaction(self, source, start_date, end_date, greenlet): self.database.add_entry([source], start_date, end_date) def _error(self, exception, plugin, greenlet): self.errors.append((exception, plugin, greenlet)) def _run_phase(self, phase, start_date, end_date): phase, sources, targets = phase logger.info('Running phase %r' % phase) self._reset_counters() self._start_transactions(targets) self.database.start_transaction() try: greenlets = Group() # each callable will push its result in the queue for source in sources: exists = self.database.exists(source, start_date, end_date) if exists and not self.force: logger.info('Already done: %s, %s to %s' % ( source.get_id(), start_date, end_date)) continue green = greenlets.spawn(self._get_data, source, start_date, end_date) green.link_value(partial(self._log_transaction, source, start_date, end_date)) green.link_exception(partial(self._error, exception.ExtractError, source)) # looking at the queue pushed = 0 while len(greenlets) > 0 or self.queue.qsize() > 0: gevent.sleep(0) pushed += self._push_to_target(targets) # let's see if we have some errors if len(self.errors) > 0: # yeah! we need to rollback # XXX later we'll do a source-by-source rollback raise exception.RunError(self.errors) except Exception: self._rollback_transactions(targets) self.database.rollback_transaction() raise else: self._commit_transactions(targets) self.database.commit_transaction() def _clear(self, start_date, end_date): source_ids = set() plugins = [] for phase, sources, targets in self.sequence: source_ids.update(set([s.get_id() for s in sources])) plugins.extend(targets) for target in plugins: try: target.clear(start_date, end_date, list(source_ids)) except Exception: logger.error('Failed to clear %r' % target.get_id()) def _purge(self, start_date, end_date): for phase, sources, targets in self.sequence: for source in sources: try: source.purge(start_date, end_date) except Exception: logger.error('Failed to purge %r' % source.get_id()) def _retry(self, func, *args, **kw): tries = 0 retries = self.retries while tries < retries: try: return func(*args, **kw) except Exception, exc: self.queue.queue.clear() logger.exception('%s failed (%d/%d)' % (func, tries + 1, retries)) tries += 1 raise
class ScraperGeventQueue(object): ''' A gevent queue for RT-Mart scraper. Parameters ---------- scraperClass: class RT-Mart page scraper for CPP categories: list a category list for CPP areas: list a area information list gevent_num=100: int maximum running gevent number. ''' def __init__(self,scraperClass,categories,areas,gevent_num=80): self.scraperClass = scraperClass self.args = self._args(categories,areas) self.gevent_num = gevent_num self.tasks = Queue() # create a gevent queue self.failure_list = [] def _args(self,categories,areas): ''' Parse input args. Parameters ---------- category: list or tuple a group of input categories areas: list or tuple a group pf input area information Returns ------- new_args: list a group of new input args ''' new_args = [] for category in categories: for area in areas: new_args.append((category,area,1)) return new_args def _run(self,task): ''' Define the run function. Parameters ---------- task: list or tuple a group of parameters for self.scraperClass ''' # split parameters for the page scraper class category_name,area_info,page_num=task # run the page scraper class scraper = self.scraperClass(category_name,area_info,page_num) scraper.json = scraper.getJSON() # if connect error, add the parameters into queue once again if (not scraper.json) and (task not in self.failure_list): self.failure_list.append(task) self.tasks.put_nowait(task) data_list = scraper.parseJSON(scraper.json) indicator = scraper.writeMongoDB(data_list) #indicator = scraper.writeCSV(data_list) # produce new parameters and add them to the gevent queue if (page_num == 1) and indicator: total_page = scraper.getTotalPageNumber() if total_page>1: [self.tasks.put_nowait((category_name,area_info,i)) for i in range(2,total_page+1)] def worker(self): 'A gevent worker.' while not self.tasks.empty(): task = self.tasks.get() self._run(task) def manager(self): 'A gevent manager, creating the initial gevents' for arg in self.args: self.tasks.put_nowait(arg) def start(self): 'Run the gevent queue.' gevent.spawn(self.manager).join() tasks = [gevent.spawn(self.worker) for i in range(self.gevent_num)] gevent.joinall(tasks) #----------class definition----------
class TestResourceAgreementWorker(unittest.TestCase): worker_config = { 'worker_config': { 'worker_type': 'basic_couchdb', 'client_inc_step_timeout': 0.1, 'client_dec_step_timeout': 0.02, 'drop_threshold_client_cookies': 1.5, 'worker_sleep': 5, 'retry_default_timeout': 0.5, 'retries_count': 2, 'queue_timeout': 3, 'bulk_save_limit': 100, 'bulk_save_interval': 3 }, 'storage_config': { # required for databridge "storage_type": "couchdb", # possible values ['couchdb', 'elasticsearch'] # arguments for storage configuration "host": "localhost", "port": 5984, "user": "", "password": "", "db_name": "basic_bridge_db", "bulk_query_interval": 3, "bulk_query_limit": 100, }, 'filter_type': 'basic_couchdb', 'retrievers_params': { 'down_requests_sleep': 5, 'up_requests_sleep': 1, 'up_wait_sleep': 30, 'queue_size': 1001 }, 'extra_params': { "mode": "_all_", "limit": 1000 }, 'bridge_mode': 'basic', 'resources_api_server': 'http://*****:*****@patch('openprocurement.bridge.basic.workers.logger') def test_add_to_retry_queue(self, mocked_logger): retry_items_queue = PriorityQueue() worker = AgreementWorker(config_dict=self.worker_config, retry_resource_items_queue=retry_items_queue) resource_item = {'id': uuid.uuid4().hex} priority = 1000 self.assertEqual(retry_items_queue.qsize(), 0) # Add to retry_resource_items_queue worker.add_to_retry_queue(resource_item, priority=priority) self.assertEqual(retry_items_queue.qsize(), 1) priority, retry_resource_item = retry_items_queue.get() self.assertEqual((priority, retry_resource_item), (1001, resource_item)) resource_item = {'id': 0} # Add to retry_resource_items_queue with status_code '429' worker.add_to_retry_queue(resource_item, priority, status_code=429) self.assertEqual(retry_items_queue.qsize(), 1) priority, retry_resource_item = retry_items_queue.get() self.assertEqual((priority, retry_resource_item), (1001, resource_item)) priority = 1002 worker.add_to_retry_queue(resource_item, priority=priority) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(retry_items_queue.qsize(), 1) priority, retry_resource_item = retry_items_queue.get() self.assertEqual((priority, retry_resource_item), (1003, resource_item)) worker.add_to_retry_queue(resource_item, priority=priority) self.assertEqual(retry_items_queue.qsize(), 0) mocked_logger.critical.assert_called_once_with( 'Tender {} reached limit retries count {} and droped from ' 'retry_queue.'.format(resource_item['id'], worker.config['retries_count']), extra={ 'MESSAGE_ID': 'dropped_documents', 'JOURNAL_TENDER_ID': resource_item['id'] }) del worker def test__get_api_client_dict(self): api_clients_queue = Queue() client = MagicMock() client_dict = { 'id': uuid.uuid4().hex, 'client': client, 'request_interval': 0 } client_dict2 = { 'id': uuid.uuid4().hex, 'client': client, 'request_interval': 0 } api_clients_queue.put(client_dict) api_clients_queue.put(client_dict2) api_clients_info = { client_dict['id']: { 'drop_cookies': False, 'not_actual_count': 5, 'request_interval': 3 }, client_dict2['id']: { 'drop_cookies': True, 'not_actual_count': 3, 'request_interval': 2 } } # Success test worker = AgreementWorker(api_clients_queue=api_clients_queue, config_dict=self.worker_config, api_clients_info=api_clients_info) self.assertEqual(worker.api_clients_queue.qsize(), 2) api_client = worker._get_api_client_dict() self.assertEqual(api_client, client_dict) # Get lazy client api_client = worker._get_api_client_dict() self.assertEqual(api_client['not_actual_count'], 0) self.assertEqual(api_client['request_interval'], 0) # Empty queue test api_client = worker._get_api_client_dict() self.assertEqual(api_client, None) # Exception when try renew cookies client.renew_cookies.side_effect = Exception('Can\'t renew cookies') worker.api_clients_queue.put(client_dict2) api_clients_info[client_dict2['id']]['drop_cookies'] = True api_client = worker._get_api_client_dict() self.assertIs(api_client, None) self.assertEqual(worker.api_clients_queue.qsize(), 1) self.assertEqual(worker.api_clients_queue.get(), client_dict2) # Get api_client with raise Empty exception api_clients_queue = MagicMock() api_clients_queue.empty.return_value = False api_clients_queue.get = MagicMock(side_effect=Empty) worker.api_clients_queue = api_clients_queue api_client = worker._get_api_client_dict() self.assertEqual(api_client, None) del worker def test__get_resource_item_from_queue(self): items_queue = PriorityQueue() item = (1, {'id': uuid.uuid4().hex}) items_queue.put(item) # Success test worker = AgreementWorker(resource_items_queue=items_queue, config_dict=self.worker_config) self.assertEqual(worker.resource_items_queue.qsize(), 1) priority, resource_item = worker._get_resource_item_from_queue() self.assertEqual((priority, resource_item), item) self.assertEqual(worker.resource_items_queue.qsize(), 0) # Empty queue test priority, resource_item = worker._get_resource_item_from_queue() self.assertEqual(resource_item, None) self.assertEqual(priority, None) del worker @patch('openprocurement_client.client.TendersClient') def test__get_resource_item_from_public(self, mock_api_client): resource_item = {'id': uuid.uuid4().hex} resource_item_id = uuid.uuid4().hex priority = 1 api_clients_queue = Queue() client_dict = { 'id': uuid.uuid4().hex, 'request_interval': 0.02, 'client': mock_api_client } api_clients_queue.put(client_dict) api_clients_info = { client_dict['id']: { 'drop_cookies': False, 'request_durations': {} } } retry_queue = PriorityQueue() return_dict = { 'data': { 'id': resource_item_id, 'dateModified': datetime.datetime.utcnow().isoformat() } } mock_api_client.get_resource_item.return_value = return_dict worker = AgreementWorker(api_clients_queue=api_clients_queue, config_dict=self.worker_config, retry_resource_items_queue=retry_queue, api_clients_info=api_clients_info) # Success test self.assertEqual(worker.api_clients_queue.qsize(), 1) api_client = worker._get_api_client_dict() self.assertEqual(api_client['request_interval'], 0.02) self.assertEqual(worker.api_clients_queue.qsize(), 0) public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) self.assertEqual(worker.retry_resource_items_queue.qsize(), 0) self.assertEqual(public_item, return_dict['data']) # InvalidResponse mock_api_client.get_resource_item.side_effect = InvalidResponse( 'invalid response') api_client = worker._get_api_client_dict() self.assertEqual(worker.api_clients_queue.qsize(), 0) public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) self.assertEqual(public_item, None) sleep(worker.config['retry_default_timeout'] * 1) self.assertEqual(worker.retry_resource_items_queue.qsize(), 1) self.assertEqual(worker.api_clients_queue.qsize(), 1) # RequestFailed status_code=429 mock_api_client.get_resource_item.side_effect = RequestFailed( munchify({'status_code': 429})) api_client = worker._get_api_client_dict() self.assertEqual(worker.api_clients_queue.qsize(), 0) self.assertEqual(api_client['request_interval'], 0) public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) self.assertEqual(public_item, None) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(worker.retry_resource_items_queue.qsize(), 2) self.assertEqual(worker.api_clients_queue.qsize(), 1) api_client = worker._get_api_client_dict() self.assertEqual(worker.api_clients_queue.qsize(), 0) self.assertEqual(api_client['request_interval'], worker.config['client_inc_step_timeout']) # RequestFailed status_code=429 with drop cookies api_client['request_interval'] = 2 public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) sleep(api_client['request_interval']) self.assertEqual(worker.api_clients_queue.qsize(), 1) self.assertEqual(public_item, None) self.assertEqual(api_client['request_interval'], 0) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(worker.retry_resource_items_queue.qsize(), 3) # RequestFailed with status_code not equal 429 mock_api_client.get_resource_item.side_effect = RequestFailed( munchify({'status_code': 404})) api_client = worker._get_api_client_dict() self.assertEqual(worker.api_clients_queue.qsize(), 0) public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) self.assertEqual(public_item, None) self.assertEqual(worker.api_clients_queue.qsize(), 1) self.assertEqual(api_client['request_interval'], 0) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(worker.retry_resource_items_queue.qsize(), 4) # ResourceNotFound mock_api_client.get_resource_item.side_effect = RNF( munchify({'status_code': 404})) api_client = worker._get_api_client_dict() self.assertEqual(worker.api_clients_queue.qsize(), 0) public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) self.assertEqual(public_item, None) self.assertEqual(worker.api_clients_queue.qsize(), 1) self.assertEqual(api_client['request_interval'], 0) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(worker.retry_resource_items_queue.qsize(), 5) # ResourceGone mock_api_client.get_resource_item.side_effect = ResourceGone( munchify({'status_code': 410})) api_client = worker._get_api_client_dict() self.assertEqual(worker.api_clients_queue.qsize(), 0) public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) self.assertEqual(public_item, None) self.assertEqual(worker.api_clients_queue.qsize(), 1) self.assertEqual(api_client['request_interval'], 0) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(worker.retry_resource_items_queue.qsize(), 5) # Exception api_client = worker._get_api_client_dict() mock_api_client.get_resource_item.side_effect = Exception( 'text except') public_item = worker._get_resource_item_from_public( api_client, priority, resource_item) self.assertEqual(public_item, None) self.assertEqual(api_client['request_interval'], 0) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(worker.retry_resource_items_queue.qsize(), 6) del worker def test_shutdown(self): worker = AgreementWorker( 'api_clients_queue', 'resource_items_queue', 'db', { 'worker_config': { 'bulk_save_limit': 1, 'bulk_save_interval': 1 }, 'resource': 'tenders' }, 'retry_resource_items_queue') self.assertEqual(worker.exit, False) worker.shutdown() self.assertEqual(worker.exit, True) def up_worker(self): worker_thread = AgreementWorker.spawn( resource_items_queue=self.queue, retry_resource_items_queue=self.retry_queue, api_clients_info=self.api_clients_info, api_clients_queue=self.api_clients_queue, config_dict=self.worker_config, db=self.db) idle() worker_thread.shutdown() sleep(3) @patch('openprocurement.bridge.basic.workers.handlers_registry') @patch( 'openprocurement.bridge.basic.workers.AgreementWorker._get_resource_item_from_public' ) @patch('openprocurement.bridge.basic.workers.logger') def test__run(self, mocked_logger, mock_get_from_public, mock_registry): self.queue = Queue() self.retry_queue = Queue() self.api_clients_queue = Queue() queue_item = (1, { 'id': uuid.uuid4().hex, 'procurementMethodType': 'closeFrameworkAgreementUA' }) doc = { 'id': queue_item[1], '_rev': '1-{}'.format(uuid.uuid4().hex), 'dateModified': datetime.datetime.utcnow().isoformat(), 'doc_type': 'Tender' } client = MagicMock() api_client_dict = { 'id': uuid.uuid4().hex, 'client': client, 'request_interval': 0 } client.session.headers = {'User-Agent': 'Test-Agent'} self.api_clients_info = { api_client_dict['id']: { 'drop_cookies': False, 'request_durations': [] } } self.db = MagicMock() worker = AgreementWorker(api_clients_queue=self.api_clients_queue, resource_items_queue=self.queue, retry_resource_items_queue=self.retry_queue, db=self.db, api_clients_info=self.api_clients_info, config_dict=self.worker_config) worker.exit = MagicMock() worker.exit.__nonzero__.side_effect = [False, True] # Try get api client from clients queue self.assertEqual(self.queue.qsize(), 0) worker._run() self.assertEqual(self.queue.qsize(), 0) mocked_logger.critical.assert_called_once_with( 'API clients queue is empty.') # Try get item from resource items queue with no handler self.api_clients_queue.put(api_client_dict) worker.exit.__nonzero__.side_effect = [False, True] mock_registry.get.return_value = '' self.queue.put(queue_item) mock_get_from_public.return_value = doc worker._run() self.assertEqual(mocked_logger.critical.call_args_list, [ call('API clients queue is empty.'), call('Not found handler for procurementMethodType: {}, {} {}'. format(doc['id']['procurementMethodType'], self.worker_config['resource'][:-1], doc['id']['id']), extra={ 'JOURNAL_TENDER_ID': doc['id']['id'], 'MESSAGE_ID': 'bridge_worker_exception' }) ]) # Try get item from resource items queue self.api_clients_queue.put(api_client_dict) worker.exit.__nonzero__.side_effect = [False, True] handler_mock = MagicMock() handler_mock.process_resource.return_value = None mock_registry.return_value = { 'closeFrameworkAgreementUA': handler_mock } worker._run() self.assertEqual(mocked_logger.debug.call_args_list[2:], [ call('GET API CLIENT: {} {} with requests interval: {}'.format( api_client_dict['id'], api_client_dict['client'].session.headers['User-Agent'], api_client_dict['request_interval']), extra={ 'REQUESTS_TIMEOUT': 0, 'MESSAGE_ID': 'get_client' }), call('PUT API CLIENT: {}'.format(api_client_dict['id']), extra={'MESSAGE_ID': 'put_client'}), call('Resource items queue is empty.') ]) # Try get resource item from local storage self.queue.put(queue_item) mock_get_from_public.return_value = doc worker.exit.__nonzero__.side_effect = [False, True] worker._run() self.assertEqual(mocked_logger.debug.call_args_list[5:], [ call('GET API CLIENT: {} {} with requests interval: {}'.format( api_client_dict['id'], api_client_dict['client'].session.headers['User-Agent'], api_client_dict['request_interval']), extra={ 'REQUESTS_TIMEOUT': 0, 'MESSAGE_ID': 'get_client' }), call('Get tender {} from main queue.'.format(doc['id']['id'])) ]) # Try get local_resource_item with Exception self.api_clients_queue.put(api_client_dict) self.queue.put(queue_item) mock_get_from_public.return_value = doc self.db.get.side_effect = [Exception('Database Error')] worker.exit.__nonzero__.side_effect = [False, True] worker._run() self.assertEqual(mocked_logger.debug.call_args_list[7:], [ call('GET API CLIENT: {} {} with requests interval: {}'.format( api_client_dict['id'], api_client_dict['client'].session.headers['User-Agent'], api_client_dict['request_interval']), extra={ 'REQUESTS_TIMEOUT': 0, 'MESSAGE_ID': 'get_client' }), call('Get tender {} from main queue.'.format(doc['id']['id'])) ]) # Try process resource with Exception self.api_clients_queue.put(api_client_dict) self.queue.put(queue_item) mock_get_from_public.return_value = doc worker.exit.__nonzero__.side_effect = [False, True] mock_handler = MagicMock() mock_handler.process_resource.side_effect = (RequestFailed(), ) mock_registry.get.return_value = mock_handler worker._run() self.assertEqual(mocked_logger.error.call_args_list, [ call('Error while processing {} {}: {}'.format( self.worker_config['resource'][:-1], doc['id']['id'], 'Not described error yet.'), extra={ 'JOURNAL_TENDER_ID': doc['id']['id'], 'MESSAGE_ID': 'bridge_worker_exception' }) ]) check_queue_item = (queue_item[0] + 1, queue_item[1] ) # priority is increased self.assertEquals(self.retry_queue.get(), check_queue_item) # Try process resource with Exception self.api_clients_queue.put(api_client_dict) self.queue.put(queue_item) mock_get_from_public.return_value = doc worker.exit.__nonzero__.side_effect = [False, True] mock_handler = MagicMock() mock_handler.process_resource.side_effect = (Exception(), ) mock_registry.get.return_value = mock_handler worker._run() self.assertEqual(mocked_logger.error.call_args_list[1:], [ call('Error while processing {} {}: {}'.format( self.worker_config['resource'][:-1], doc['id']['id'], ''), extra={ 'JOURNAL_TENDER_ID': doc['id']['id'], 'MESSAGE_ID': 'bridge_worker_exception' }) ]) check_queue_item = (queue_item[0] + 1, queue_item[1] ) # priority is increased self.assertEquals(self.retry_queue.get(), check_queue_item) # No resource item self.api_clients_queue.put(api_client_dict) self.queue.put(queue_item) mock_get_from_public.return_value = None worker.exit.__nonzero__.side_effect = [False, True] mock_handler = MagicMock() mock_handler.process_resource.side_effect = (Exception(), ) mock_registry.get.return_value = mock_handler worker._run() self.assertEquals(self.queue.empty(), True) self.assertEquals(self.retry_queue.empty(), True) @patch('openprocurement.bridge.basic.workers.datetime') @patch('openprocurement.bridge.basic.workers.logger') def test_log_timeshift(self, mocked_logger, mocked_datetime): worker = AgreementWorker( 'api_clients_queue', 'resource_items_queue', 'db', { 'worker_config': { 'bulk_save_limit': 1, 'bulk_save_interval': 1 }, 'resource': 'tenders' }, 'retry_resource_items_queue') time_var = datetime.datetime.now(iso8601.UTC) mocked_datetime.now.return_value = time_var resource_item = {'id': '0' * 32, 'dateModified': time_var.isoformat()} worker.log_timeshift(resource_item) self.assertEqual(mocked_logger.debug.call_args_list, [ call('{} {} timeshift is {} sec.'.format( self.worker_config['resource'][:-1], resource_item['id'], 0.0), extra={'DOCUMENT_TIMESHIFT': 0.0}) ])
class GeventedConnPool(object): closed = False maxsize = None pool = None _connectargs = None def __init__(self, maxsize=8, **connectargs): self.maxsize = maxsize self.pool = Queue() self.lock = gevent.lock.BoundedSemaphore(maxsize) self._connectargs = connectargs def _connect(self): return psycopg2.connect(**self._connectargs) def get(self): if self.closed: raise psycopg2.pool.PoolError("connection pool is closed") self.lock.acquire() try: conn = self.pool.get_nowait() if conn.closed or conn.status != psycopg2.extensions.STATUS_READY: self.lock.release() logger.info("Conn isn't ready: %r", conn.status) conn.close() return self.get() return conn except gevent.queue.Empty: try: return self._connect() except: self.lock.release() raise def put(self, conn): assert conn is not None try: if self.closed: conn.close() if conn.closed: # If the connection is closed, we just discard it. self.lock.release() return # Return the connection into a consistent state before putting # it back into the pool status = conn.get_transaction_status() if status == psycopg2.extensions.TRANSACTION_STATUS_UNKNOWN: # server connection lost conn.close() self.lock.release() return elif status != psycopg2.extensions.TRANSACTION_STATUS_IDLE: # connection in error or in transaction conn.rollback() except StandardError: logger.exception("Failed in put") self.lock.release() gevent.get_hub().handle_error(conn, *sys.exc_info()) else: gevent.spawn(self._reset_and_return, conn) def _reset_and_return(self, conn): try: if self.closed: conn.close() if not conn.closed: conn.reset() self.pool.put(conn) except: logger.exception("Failed in reset") gevent.get_hub().handle_error(conn, *sys.exc_info()) finally: self.lock.release() def closeall(self, timeout=5): logger.info("Closing all connections: %d", self.pool.qsize()) self.closed = True while not self.pool.empty(): conn = self.pool.get_nowait() try: conn.close() except Exception: pass if self.lock.counter != self.maxsize: gevent.wait(timeout=timeout) assert self.lock.counter == self.maxsize self.closed = False @contextlib.contextmanager def connection(self, isolation_level=None, autocommit=None, readonly=False): conn = self.get() try: if isolation_level is not None and isolation_level != conn.isolation_level: conn.set_isolation_level(isolation_level) if autocommit is not None: conn.autocommit = autocommit if readonly is not None: conn.set_session(readonly=readonly) yield conn conn.commit() finally: if conn: #self.put(conn) gevent.spawn(self.put, conn) @contextlib.contextmanager def cursor(self, *args, **kwargs): connargs = { 'isolation_level': kwargs.pop('isolation_level', None), 'autocommit': kwargs.pop('autocommit', None), 'readonly': kwargs.pop('readonly', None) } if kwargs.pop('named', False) is True: kwargs['name'] = str(uuid.uuid4()) with self.connection(**connargs) as conn: yield conn.cursor(*args, **kwargs) def mogrify(self, *args, **kwargs): with self.cursor(**kwargs) as cur: return cur.mogrify(*args) # Some shortcut functions def execute(self, *args, **kwargs): """like cursor.execute kwargs to cursor, positional args to execute """ with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.rowcount def executemany(self, *args, **kwargs): """Pasthrough to cursor.executemany kwargs to cursor, positional args to executemany""" with self.cursor(**kwargs) as cursor: cursor.executemany(*args) return cursor.rowcount def fetchone(self, *args, **kwargs): """like cursor.fetchone kwargs to cursor, positional args to execute """ with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchone() def fetchall(self, *args, **kwargs): """like cursor.fetchall kwargs to cursor, positional args to execute """ with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchall() def fetchiter(self, *args, **kwargs): """iterate over a cursors results kwargs to cursor, positional args to execute """ with self.cursor(**kwargs) as cursor: cursor.execute(*args) for f in cursor: yield f
class BitcoinDepositService(object): def __init__(self, _config=None, _persistent=None, _watchlist=None, _balance_service=None): self.tasks = Queue() if _config: self.config = _config else: _config = RawConfigParser() _config.read('config.cfg') self.config = _config # Persistent is for saving and loading the progress, # the data can be saved in a local file or the database self.persistent = \ _persistent or FilePersistent(_start=self.config.getint('deposit', 'start_block')) # TODO: extract transaction fetcher self.base_url = self.config.get('deposit', 'base_url') self.session = requests.Session() self.watchlist = _watchlist self.balance_service = _balance_service def get_block(self, block_height='latest'): """ Get the detail info of a block :param block_height: either an integer or 'latest' :return: the block dict returned by BTC.com """ url = '%s/block/%s' % (self.base_url, block_height) rv = self.session.get(url).json() if rv['err_msg']: raise Exception(rv['err_msg']) else: return rv['data'] def generate_block_transaction_urls(self, block_height): # Get the total count of this block url = '%s/block/%s/tx' % (self.base_url, block_height) rv = self.session.get(url) data = rv.json()['data'] page_size = data['pagesize'] total_count = data['total_count'] # Get each pages for i in range(1, int(total_count / page_size) + 1): paginated_url = url + '?page=' + str(i) self.tasks.put_nowait(paginated_url) gevent.sleep(.5) def process_transaction(self, transaction): outputs = transaction['outputs'] for output in outputs: if output['spent_by_tx']: logger.info('%s|spent', transaction['block_height']) else: logger.info('%s|a:%s|v:%d', transaction['block_height'], output['addresses'], output['value']) if len(output['addresses']) > 1: logger.error('more than one output addresses') continue if len(output['addresses']) == 0: logger.error('no address found') continue address = output['addresses'][0] value = output['value'] tx_id = transaction['hash'] if self.watchlist.exists(address): try: self.deposit(address, value, tx_id) logger.info('deposit %s to %s: OK', value, address) except: logger.error('deposit %s to %s: failed', value, address) def worker(self): while not self.tasks.empty(): url = self.tasks.get() rv = self.session.get(url) if rv.status_code != 200: # Hit the rate limit, retry. # Note that there is not max retry times. self.tasks.put_nowait(url) # Wait for the next URL continue # All is well, process the transactions data = rv.json()['data'] transactions = data['list'] for transaction in transactions: self.process_transaction(transaction) gevent.sleep(.5) def run(self): # Pick up the progress block_height = self.persistent.get_last_processed_block() + 1 min_confirmation_count = self.config.getint('deposit', 'min_confirmation_count') # Main event loop while True: try: block = self.get_block(block_height) logger.info('New block: %d', block_height) if block['confirmations'] < min_confirmation_count: raise WorkerConfirmException( 'Confirmation is less than required minimum: %d', min_confirmation_count) sleep(.5) gevent.spawn(self.generate_block_transaction_urls, block_height).join() gevent.spawn(self.worker).join() # Save the checkpoint self.persistent.set_last_processed_block(block_height) # increase block height block_height += 1 except WorkerConfirmException as e: pprint(e) sleep(self.config.getfloat('deposit', 'block_times')) def deposit(self, address, value, tx_id): self.balance_service.deposit(address, value, tx_id)
class LoadBalancer: def __init__(self, ctx): """Initialize a LoadBalancer object, which manages workflow execution. Args: ctx (Context object): A Context object, shared with the Receiver thread. """ self.available_workers = [] self.workflow_comms = {} self.thread_exit = False self.pending_workflows = Queue() self.ctx = ctx server_secret_file = os.path.join( core.config.paths.zmq_private_keys_path, "server.key_secret") server_public, server_secret = auth.load_certificate( server_secret_file) self.request_socket = self.ctx.socket(zmq.ROUTER) self.request_socket.curve_secretkey = server_secret self.request_socket.curve_publickey = server_public self.request_socket.curve_server = True self.request_socket.bind(REQUESTS_ADDR) self.comm_socket = self.ctx.socket(zmq.ROUTER) self.comm_socket.curve_secretkey = server_secret self.comm_socket.curve_publickey = server_public self.comm_socket.curve_server = True self.comm_socket.bind(COMM_ADDR) gevent.sleep(2) def manage_workflows(self): """Manages the workflows to be executed and the workers. It waits for the server to submit a request to execute a workflow, and then passes the workflow off to an available worker, once one becomes available. """ while True: if self.thread_exit: break # There is a worker available and a workflow in the queue, so pop it off and send it to the worker if self.available_workers and not self.pending_workflows.empty(): workflow = self.pending_workflows.get() worker = self.available_workers.pop() self.workflow_comms[workflow['execution_uid']] = worker self.request_socket.send_multipart( [worker, b"", asbytes(json.dumps(workflow))]) # If there is a worker available but no pending workflows, then see if there are any other workers # available, but do not block in case a workflow becomes available else: try: worker, empty, ready = self.request_socket.recv_multipart( flags=zmq.NOBLOCK) if ready == b"Ready" or ready == b"Done": self.available_workers.append(worker) except zmq.ZMQError: gevent.sleep(0.1) continue self.request_socket.close() self.comm_socket.close() return def add_workflow(self, workflow_json): """Adds a workflow to the queue to be executed. Args: workflow_json (dict): Dict representation of a workflow, along with some additional fields necessary for reconstructing the workflow. """ self.pending_workflows.put(workflow_json) def pause_workflow(self, workflow_execution_uid): """Pauses a workflow currently executing. Args: workflow_execution_uid (str): The execution UID of the workflow. """ logger.info('Pausing workflow {0}'.format(workflow_execution_uid)) if workflow_execution_uid in self.workflow_comms: self.comm_socket.send_multipart( [self.workflow_comms[workflow_execution_uid], b'', b'Pause']) def resume_workflow(self, workflow_execution_uid): """Resumes a workflow that has previously been paused. Args: workflow_execution_uid (str): The execution UID of the workflow. """ logger.info('Resuming workflow {0}'.format(workflow_execution_uid)) if workflow_execution_uid in self.workflow_comms: self.comm_socket.send_multipart( [self.workflow_comms[workflow_execution_uid], b'', b'Resume']) def send_data_to_trigger(self, data_in, workflow_uids, inputs={}): """Sends the data_in to the workflows specified in workflow_uids. Args: data_in (dict): Data to be used to match against the triggers for a Step awaiting data. workflow_uids (list[str]): A list of workflow execution UIDs to send this data to. inputs (dict, optional): An optional dict of inputs to update for a Step awaiting data for a trigger. Defaults to None. """ data = dict() data['data_in'] = data_in data['inputs'] = inputs for uid in workflow_uids: if uid in self.workflow_comms: self.comm_socket.send_multipart([ self.workflow_comms[uid], b'', str.encode(json.dumps(data)) ])
class Peer(gevent.Greenlet): def __init__(self, peermanager, node_info): gevent.Greenlet.__init__(self) print ("Constructing basic configs") self.peermanager = peermanager self.peerID = node_info['ID'] self.peerAddr = node_info['addr'] self.ping_interval = self.peermanager.configs['p2p']['pingtime'] # timeout self.timeout = self.peermanager.configs['p2p']['timeout'] try: self.socket = gevent.socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.socket.bind(("", 0)) self.socket.settimeout(self.peermanager.configs['p2p']['timeout']) # timeout except gevent.socket.error as e: print('Socket creation error: %s' % e.strerror) self.myID = peermanager.configs['node']['ID'] self.myAddr = (self.peermanager.configs['p2p']['listen_host'], self.socket.getsockname()[1]) self.handler = Protocol(self,self.peermanager.configs['node']['wif'],self.peermanager.configs['node']['pubkey'],self.peermanager.configs['p2p']['num_workers']) self.handler.start() self.greenlets = dict() self.outbox = Queue() self.vipbox = Queue() self.inbox = Queue() self.is_stopped = False self.is_pinged = False self.last_contact = time.time() self.read_ready = gevent.event.Event() self.read_ready.set() def stop(self): if not self.is_stopped: print("Trying to stop peer.") self.is_stopped = True try: self.handler.stop() for process in self.greenlets.values(): try: process.kill() except gevent.GreenletExit: pass self.greenlets = None except: print('Failed to kill all processes.') finally: self.peermanager.peers.remove(self) self.kill() def run(self): print('Running main loop of peer ', self.peerID) self.handler.run() self.greenlets['sender'] = gevent.spawn(self.send_loop) self.greenlets['receiver'] = gevent.spawn(self.recv_loop) while not self.is_stopped: self.read_ready.wait() try: gevent.socket.wait_read(self.socket.fileno()) except gevent.socket.error as e: print('Network error: %s' %e.strerror) if e.errno in (errno.EBADF): self.report(("disconnect", dict(type="end_session",reason=e.__str__()))) self.stop() else: raise e try: message, addr = self.socket.recvfrom(8192) self.peerAddr = addr except gevent.socket.error as e: print('Network error: %s' %e.strerror) if e.errno in (errno.ENETDOWN, errno.ECONNRESET, errno.ETIMEDOUT,errno.EHOSTUNREACH, errno.ECONNABORTED): self.report(("disconnect", dict(type="end_session",reason=e.__str__()))) self.stop() else: raise e if message: self.last_contact = time.time() self.is_pinged = False self.inbox.put(message) def send_loop(self): while not self.is_stopped: elapsed = time.time() - self.last_contact if elapsed > self.timeout: self.send_disconnect('Ping pong timeout') elif elapsed > self.ping_interval and not self.is_pinged: print("time elapsed:", elapsed) self.is_pinged = True self.send_ping() else: if not self.vipbox.empty(): self.send(self.vipbox.get()) if not self.outbox.empty(): self.send(self.outbox.get()) gevent.sleep(0) def recv_loop(self): while not self.is_stopped: self.report(self.handler.rQ.get()) def send(self, packet): if not packet: print("Missing packet!") return self.read_ready.clear() try: self.socket.sendto(packet, self.peerAddr) except gevent.socket.error as e: print("Error in send! ", e) self.report(("disconnect", dict(type="end_session",reason='send error'))) self.stop() except gevent.socket.timeout as e: print("Timeout in send! ", e) self.report(("disconnect", dict(type="end_session",reason='send timeout'))) self.stop() self.read_ready.set() def send_hello(self): self.handler.sQ.put(dict(action=0, payload="Hello, requesting connection.")) def send_confirm(self): self.handler.sQ.put(dict(action=1, payload="Successfully received hello.")) def send_disconnect(self, msg): self.handler.sQ.put(dict(action=2, payload=msg)) self.report(("disconnect", dict(type="end_session",reason=msg))) self.stop() def send_packet(self, packet): packet = json.dumps(packet) self.handler.sQ.put(dict(action=5,payload=packet)) def send_ping(self): self.handler.sQ.put(dict(action=3)) def report(self, rp): if rp[0] == "checked": self.peermanager.log(self.peerID, 1) if rp[0] == "disconnect": self.peermanager.log(self.peerID, 0, reasons=rp[1]) self.stop() if rp[0] == "data": self.peermanager.log(self.peerID, 1) rp[1]['data'] = json.loads(rp[1]['data']) self.parse_data(rp[1]) #self.peermanager.recv_queue.put(rp[1]) def parse_data(self, data): method = int(data['data']['method']) try: self.peermanager.recv_queue[method].put(data) except IndexError: print ("Illegal method index!")
class VncIfmapClient(object): # * Not all properties in an object needs to be published # to IfMap. # * In some properties, not all fields are relevant # to be publised to IfMap. # If the property is not relevant at all, define the property # with None. If it is partially relevant, then define the fn. # which would handcraft the generated xml for the object. IFMAP_PUBLISH_SKIP_LIST = { # Format - <prop_field> : None | <Handler_fn> u"perms2": None, u"id_perms": build_idperms_ifmap_obj } def handler(self, signum, frame): file = open("/tmp/api-server-ifmap-cache.txt", "w") file.write(pformat(self._id_to_metas)) file.close() def __init__(self, db_client_mgr, ifmap_srv_ip, ifmap_srv_port, uname, passwd, ssl_options): self._ifmap_srv_ip = ifmap_srv_ip self._ifmap_srv_port = ifmap_srv_port self._username = uname self._password = passwd self._ssl_options = ssl_options self._dequeue_greenlet = None self._CONTRAIL_XSD = "http://www.contrailsystems.com/vnc_cfg.xsd" self._IPERMS_NAME = "id-perms" self._NAMESPACES = { 'env': "http://www.w3.org/2003/05/soap-envelope", 'ifmap': "http://www.trustedcomputinggroup.org/2010/IFMAP/2", 'meta': "http://www.trustedcomputinggroup.org/2010/IFMAP-METADATA/2", 'contrail': self._CONTRAIL_XSD } self._db_client_mgr = db_client_mgr self._sandesh = db_client_mgr._sandesh ConnectionState.update( conn_type=ConnectionType.IFMAP, name='IfMap', status=ConnectionStatus.INIT, message='', server_addrs=["%s:%s" % (ifmap_srv_ip, ifmap_srv_port)]) self._conn_state = ConnectionStatus.INIT self._is_ifmap_up = False self._queue = Queue(self._get_api_server()._args.ifmap_queue_size) self.reset() # Set the signal handler signal.signal(signal.SIGUSR2, self.handler) self._init_conn() self._publish_config_root() self._health_checker_greenlet =\ vnc_greenlets.VncGreenlet('VNC IfMap Health Checker', self._health_checker) # end __init__ @classmethod def object_alloc(cls, obj_class, parent_res_type, fq_name): res_type = obj_class.resource_type my_fqn = ':'.join(fq_name) parent_fqn = ':'.join(fq_name[:-1]) my_imid = 'contrail:%s:%s' % (res_type, my_fqn) if parent_fqn: if parent_res_type is None: err_msg = "Parent: %s type is none for: %s" % (parent_fqn, my_fqn) return False, (409, err_msg) parent_imid = 'contrail:' + parent_res_type + ':' + parent_fqn else: # parent is config-root parent_imid = 'contrail:config-root:root' # Normalize/escape special chars my_imid = escape(my_imid) parent_imid = escape(parent_imid) return True, (my_imid, parent_imid) # end object_alloc def object_set(self, obj_class, my_imid, existing_metas, obj_dict): update = {} # Properties Meta for prop_field in obj_class.prop_fields: field = obj_dict.get(prop_field) if field is None: continue # construct object of xsd-type and get its xml repr # e.g. virtual_network_properties prop_field_types = obj_class.prop_field_types[prop_field] is_simple = not prop_field_types['is_complex'] prop_type = prop_field_types['xsd_type'] # e.g. virtual-network-properties prop_meta = obj_class.prop_field_metas[prop_field] if prop_field in VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST: # Field not relevant, skip publishing to IfMap if not VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST[prop_field]: continue # Call the handler fn to generate the relevant fields. if callable( VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST[prop_field]): prop_xml = VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST[ prop_field](prop_field, field) meta = Metadata(prop_meta, '', {'ifmap-cardinality': 'singleValue'}, ns_prefix='contrail', elements=prop_xml) else: log_str = '%s is marked for partial publish\ to Ifmap but handler not defined' % (prop_field) self.config_log(log_str, level=SandeshLevel.SYS_DEBUG) continue elif is_simple: norm_str = escape(str(field)) meta = Metadata(prop_meta, norm_str, {'ifmap-cardinality': 'singleValue'}, ns_prefix='contrail') else: # complex type prop_cls = str_to_class(prop_type, __name__) buf = cStringIO.StringIO() # perms might be inserted at server as obj. # obj construction diff from dict construction. if isinstance(field, dict): prop_cls(**field).exportChildren(buf, level=1, name_=prop_meta, pretty_print=False) elif isinstance(field, list): for elem in field: if isinstance(elem, dict): prop_cls(**elem).exportChildren(buf, level=1, name_=prop_meta, pretty_print=False) else: elem.exportChildren(buf, level=1, name_=prop_meta, pretty_print=False) else: # object field.exportChildren(buf, level=1, name_=prop_meta, pretty_print=False) prop_xml = buf.getvalue() buf.close() meta = Metadata(prop_meta, '', {'ifmap-cardinality': 'singleValue'}, ns_prefix='contrail', elements=prop_xml) # If obj is new (existing metas is none) or # if obj does not have this prop_meta (or) # or if the prop_meta is different from what we have currently, # then update if (not existing_metas or not prop_meta in existing_metas or ('' in existing_metas[prop_meta] and str(meta) != str(existing_metas[prop_meta]['']))): self._update_id_self_meta(update, meta) # end for all property types # References Meta for ref_field in obj_class.ref_fields: refs = obj_dict.get(ref_field) if not refs: continue for ref in refs: ref_fq_name = ref['to'] ref_fld_types_list = list(obj_class.ref_field_types[ref_field]) ref_res_type = ref_fld_types_list[0] ref_link_type = ref_fld_types_list[1] ref_meta = obj_class.ref_field_metas[ref_field] ref_imid = get_ifmap_id_from_fq_name(ref_res_type, ref_fq_name) ref_data = ref.get('attr') if ref_data: buf = cStringIO.StringIO() attr_cls = str_to_class(ref_link_type, __name__) attr_cls(**ref_data).exportChildren(buf, level=1, name_=ref_meta, pretty_print=False) ref_link_xml = buf.getvalue() buf.close() else: ref_link_xml = '' meta = Metadata(ref_meta, '', {'ifmap-cardinality': 'singleValue'}, ns_prefix='contrail', elements=ref_link_xml) self._update_id_pair_meta(update, ref_imid, meta) # end for all ref types self._publish_update(my_imid, update) return (True, '') # end object_set def object_create(self, obj_ids, obj_dict): obj_type = obj_ids['type'] obj_class = self._db_client_mgr.get_resource_class(obj_type) if not 'parent_type' in obj_dict: # parent is config-root parent_type = 'config-root' parent_imid = 'contrail:config-root:root' else: parent_type = obj_dict['parent_type'] parent_imid = obj_ids.get('parent_imid', None) # Parent Link Meta update = {} parent_cls = self._db_client_mgr.get_resource_class(parent_type) parent_link_meta = parent_cls.children_field_metas.get('%ss' % (obj_type)) if parent_link_meta: meta = Metadata(parent_link_meta, '', {'ifmap-cardinality': 'singleValue'}, ns_prefix='contrail') self._update_id_pair_meta(update, obj_ids['imid'], meta) self._publish_update(parent_imid, update) (ok, result) = self.object_set(obj_class, obj_ids['imid'], None, obj_dict) return (ok, result) # end object_create def _object_read_to_meta_index(self, ifmap_id): # metas is a dict where key is meta-name and val is list of dict of # form [{'meta':meta}, {'id':id1, 'meta':meta}, {'id':id2, 'meta':meta}] metas = {} if ifmap_id in self._id_to_metas: metas = self._id_to_metas[ifmap_id].copy() return metas # end _object_read_to_meta_index def object_update(self, obj_cls, new_obj_dict): ifmap_id = get_ifmap_id_from_fq_name(obj_cls.resource_type, new_obj_dict['fq_name']) # read in refs from ifmap to determine which ones become inactive after update existing_metas = self._object_read_to_meta_index(ifmap_id) if not existing_metas: # UPDATE notify queued before CREATE notify, Skip publish to IFMAP. return (True, '') # remove properties that are no longer active props = obj_cls.prop_field_metas for prop, meta in props.items(): if meta in existing_metas and new_obj_dict.get(prop) is None: self._delete_id_self_meta(ifmap_id, meta) # remove refs that are no longer active delete_list = [] refs = dict( (obj_cls.ref_field_metas[rf], obj_cls.ref_field_types[rf][0]) for rf in obj_cls.ref_fields) #refs = {'virtual-network-qos-forwarding-class': 'qos-forwarding-class', # 'virtual-network-network-ipam': 'network-ipam', # 'virtual-network-network-policy': 'network-policy', # 'virtual-network-route-table': 'route-table'} for meta, ref_res_type in refs.items(): old_set = set(existing_metas.get(meta, {}).keys()) new_set = set() ref_obj_type = self._db_client_mgr.get_resource_class( ref_res_type).object_type for ref in new_obj_dict.get(ref_obj_type + '_refs', []): to_imid = get_ifmap_id_from_fq_name(ref_res_type, ref['to']) new_set.add(to_imid) for inact_ref in old_set - new_set: delete_list.append((inact_ref, meta)) if delete_list: self._delete_id_pair_meta_list(ifmap_id, delete_list) (ok, result) = self.object_set(obj_cls, ifmap_id, existing_metas, new_obj_dict) return (ok, result) # end object_update def object_delete(self, obj_ids): ifmap_id = obj_ids['imid'] parent_imid = obj_ids.get('parent_imid') existing_metas = self._object_read_to_meta_index(ifmap_id) meta_list = [] for meta_name, meta_infos in existing_metas.items(): # Delete all refs/links in the object. # Refs are identified when the key is a non-empty string. meta_list.extend([(k, meta_name) for k in meta_infos if k != '']) if parent_imid: # Remove link from parent meta_list.append((parent_imid, None)) if meta_list: self._delete_id_pair_meta_list(ifmap_id, meta_list) # Remove all property metadata associated with this ident self._delete_id_self_meta(ifmap_id, None) return (True, '') # end object_delete def _init_conn(self): self._mapclient = client( ("%s" % (self._ifmap_srv_ip), "%s" % (self._ifmap_srv_port)), self._username, self._password, self._NAMESPACES, self._ssl_options) connected = False while not connected: try: resp_xml = self._mapclient.call('newSession', NewSessionRequest()) except socket.error as e: msg = 'Failed to establish IF-MAP connection: %s' % str(e) self.config_log(msg, level=SandeshLevel.SYS_WARN) time.sleep(3) continue resp_doc = etree.parse(StringIO.StringIO(resp_xml)) err_codes = resp_doc.xpath( '/env:Envelope/env:Body/ifmap:response/errorResult/@errorCode', namespaces=self._NAMESPACES) if not err_codes: connected = True else: msg = "Failed to establish IF-MAP connection: %s" % err_codes self.config_log(msg, level=SandeshLevel.SYS_WARN) session_id = self._mapclient.get_session_id() try: self._mapclient.call('endSession', EndSessionRequest(session_id)) except socket.error as e: msg = "Failed to end the IF-MAP session %s: %s" %\ (session_id, str(e)) self.config_log(msg, level=SandeshLevel.SYS_WARN) time.sleep(3) ConnectionState.update(conn_type=ConnectionType.IFMAP, name='IfMap', status=ConnectionStatus.UP, message='', server_addrs=[ "%s:%s" % (self._ifmap_srv_ip, self._ifmap_srv_port) ]) self._conn_state = ConnectionStatus.UP msg = 'IFMAP connection ESTABLISHED' self.config_log(msg, level=SandeshLevel.SYS_NOTICE) self._mapclient.set_session_id( newSessionResult(resp_xml).get_session_id()) self._mapclient.set_publisher_id( newSessionResult(resp_xml).get_publisher_id()) # end _init_conn def _get_api_server(self): return self._db_client_mgr._api_svr_mgr # end _get_api_server def reset(self): self._id_to_metas = {} while not self._queue.empty(): self._queue.get_nowait() if (self._dequeue_greenlet is not None and gevent.getcurrent() != self._dequeue_greenlet): self._dequeue_greenlet.kill() self._dequeue_greenlet =\ vnc_greenlets.VncGreenlet("VNC IfMap Dequeue", self._ifmap_dequeue_task) # end reset def _publish_config_root(self): # Remove all resident data result = ifmap_wipe(self._mapclient) if result is None: msg = "Cannot purge the IF-MAP server before publishing root graph" self.config_log(msg, level=SandeshLevel.SYS_WARN) # Build default config-root buf = cStringIO.StringIO() perms = Provision.defaults.perms perms.exportChildren(buf, level=1, pretty_print=False) id_perms_xml = buf.getvalue() buf.close() update = {} meta = Metadata(self._IPERMS_NAME, '', {'ifmap-cardinality': 'singleValue'}, ns_prefix='contrail', elements=id_perms_xml) self._update_id_self_meta(update, meta) self._publish_update("contrail:config-root:root", update) # end _publish_config_root def config_log(self, msg, level): self._db_client_mgr.config_log(msg, level) # end config_log @ignore_exceptions def _generate_ifmap_trace(self, oper, body): req_id = get_trace_id() ifmap_trace = IfmapTrace(request_id=req_id) ifmap_trace.operation = oper ifmap_trace.body = body return ifmap_trace # end _generate_ifmap_trace def _publish_to_ifmap_enqueue(self, oper, oper_body, do_trace=True): # safety check, if we proceed ifmap-server reports error # asking for update|delete in publish if not oper_body: return self._queue.put((oper, oper_body, do_trace)) # end _publish_to_ifmap_enqueue def _ifmap_dequeue_task(self): while True: try: self._publish_to_ifmap_dequeue() except Exception as e: tb = detailed_traceback() self.config_log(tb, level=SandeshLevel.SYS_ERR) def _publish_to_ifmap_dequeue(self): def _publish(requests, traces, publish_discovery=False): if not requests: return ok = False # Continue to trying publish requests until the queue is full. # When queue is full, ifmap is totally resync from db while not ok: ok, err_msg = self._publish_to_ifmap(''.join(requests)) if ok: trace_msg(traces, 'IfmapTraceBuf', self._sandesh) else: trace_msg(traces, 'IfmapTraceBuf', self._sandesh, error_msg=err_msg) if publish_discovery and ok: self._get_api_server().publish_ifmap_to_discovery() self._is_ifmap_up = True if not ok: msg = ("%s. IF-MAP sending queue size: %d/%d" % (err_msg, self._queue.qsize(), self._get_api_server()._args.ifmap_queue_size)) self.config_log(msg, level=SandeshLevel.SYS_WARN) gevent.sleep(1) # end _publish while True: # block until there is data in the queue (oper, oper_body, do_trace) = self._queue.get() requests = [] requests_len = 0 traces = [] while True: # drain the queue till empty or max message size # or change of oper because ifmap does not like # different operations in same message if oper == 'publish_discovery': _publish(requests, traces, True) break if do_trace: trace = self._generate_ifmap_trace(oper, oper_body) traces.append(trace) requests.append(oper_body) requests_len += len(oper_body) if (requests_len > self._get_api_server()._args.ifmap_max_message_size): _publish(requests, traces) break old_oper = oper try: (oper, oper_body, do_trace) = self._queue.get_nowait() if oper != old_oper: _publish(requests, traces) requests = [] requests_len = 0 traces = [] continue except Empty: _publish(requests, traces) break # end _publish_to_ifmap_dequeue def _publish_to_ifmap(self, oper_body): try: not_published = True retry_count = 0 resp_xml = None while not_published: sess_id = self._mapclient.get_session_id() req_xml = PublishRequest(sess_id, oper_body) resp_xml = self._mapclient.call('publish', req_xml) resp_doc = etree.parse(StringIO.StringIO(resp_xml)) err_codes = resp_doc.xpath( '/env:Envelope/env:Body/ifmap:response/errorResult/@errorCode', namespaces=self._NAMESPACES) if err_codes: if retry_count == 0: log_str = 'Error publishing to ifmap, req: %s, resp: %s' \ %(req_xml, resp_xml) self.config_log(log_str, level=SandeshLevel.SYS_ERR) ConnectionState.update( conn_type=ConnectionType.IFMAP, name='IfMap', status=ConnectionStatus.INIT, message='Session lost, renew it', server_addrs=[ "%s:%s" % (self._ifmap_srv_ip, self._ifmap_srv_port) ]) self._conn_state = ConnectionStatus.INIT self._is_ifmap_up = False retry_count = retry_count + 1 self._init_conn() if self._ifmap_restarted(): msg = "IF-MAP servers restarted, re-populate it" self.config_log(msg, level=SandeshLevel.SYS_ERR) self.reset() self._get_api_server().publish_ifmap_to_discovery( 'down', msg) self._publish_config_root() self._db_client_mgr.db_resync() self._publish_to_ifmap_enqueue('publish_discovery', 1) else: # successful publish not_published = False break # end while not_published if retry_count: log_str = 'Success publishing to ifmap after %d tries' \ %(retry_count) self.config_log(log_str, level=SandeshLevel.SYS_ERR) return True, resp_xml except Exception as e: # Failed to publish the operation due to unknown error. # Probably a connection issue with the ifmap server. msg = "Failed to publish request %s: %s" % (oper_body, str(e)) return False, msg # end _publish_to_ifmap def _build_request(self, id1_name, id2_name, meta_list, delete=False): request = '' id1 = unicode( Identity(name=id1_name, type="other", other_type="extended")) if id2_name != 'self': id2 = unicode( Identity(name=id2_name, type="other", other_type="extended")) else: id2 = None for m in meta_list: if delete: filter = unicode(m) if m else None op = PublishDeleteOperation(id1=id1, id2=id2, filter=filter) else: op = PublishUpdateOperation(id1=id1, id2=id2, metadata=unicode(m), lifetime='forever') request += unicode(op) return request def _delete_id_self_meta(self, self_imid, meta_name): contrail_metaname = 'contrail:' + meta_name if meta_name else None del_str = self._build_request(self_imid, 'self', [contrail_metaname], True) self._publish_to_ifmap_enqueue('delete', del_str) try: # del meta from cache and del id if this was last meta if meta_name: del self._id_to_metas[self_imid][meta_name] if not self._id_to_metas[self_imid]: del self._id_to_metas[self_imid] else: del self._id_to_metas[self_imid] except KeyError: # Case of delete received for an id which we do not know about. # Could be a case of duplicate delete. # There is nothing for us to do here. Just log and proceed. msg = "Delete received for unknown imid(%s) meta_name(%s)." % \ (self_imid, meta_name) self.config_log(msg, level=SandeshLevel.SYS_DEBUG) # end _delete_id_self_meta def _delete_id_pair_meta_list(self, id1, meta_list): del_str = '' for id2, metadata in meta_list: contrail_metadata = 'contrail:' + metadata if metadata else None del_str += self._build_request(id1, id2, [contrail_metadata], True) self._publish_to_ifmap_enqueue('delete', del_str) # del meta,id2 from cache and del id if this was last meta def _id_to_metas_delete(id1, id2, meta_name): if id1 not in self._id_to_metas: return if meta_name not in self._id_to_metas[id1]: return if not self._id_to_metas[id1][meta_name]: del self._id_to_metas[id1][meta_name] if not self._id_to_metas[id1]: del self._id_to_metas[id1] return # if meta is prop, noop if id2 in self._id_to_metas[id1][meta_name]: del self._id_to_metas[id1][meta_name][id2] #end _id_to_metas_delete for id2, metadata in meta_list: if metadata: # replace with remaining refs _id_to_metas_delete(id1, id2, metadata) _id_to_metas_delete(id2, id1, metadata) else: # no meta specified remove all links from id1 to id2 for meta_name in self._id_to_metas.get(id1, {}).keys(): _id_to_metas_delete(id1, id2, meta_name) for meta_name in self._id_to_metas.get(id2, {}).keys(): _id_to_metas_delete(id2, id1, meta_name) # end _delete_id_pair_meta_list def _update_id_self_meta(self, update, meta): """ update: dictionary of the type update[<id> | 'self'] = list(metadata) """ mlist = update.setdefault('self', []) mlist.append(meta) # end _update_id_self_meta def _update_id_pair_meta(self, update, to_id, meta): mlist = update.setdefault(to_id, []) mlist.append(meta) # end _update_id_pair_meta def _publish_update(self, self_imid, update): requests = [] self_metas = self._id_to_metas.setdefault(self_imid, {}) for id2, metalist in update.items(): request = self._build_request(self_imid, id2, metalist) # remember what we wrote for diffing during next update old_metalist = [] for m in metalist: meta_name = m._Metadata__name[9:] # Objects have two types of members - Props and refs/links. # Props are cached in id_to_metas as # id_to_metas[self_imid][meta_name][''] # (with empty string as key) # Links are cached in id_to_metas as # id_to_metas[self_imid][meta_name][id2] # id2 is used as a key if id2 == 'self': self_metas[meta_name] = {'': m} continue if meta_name in self_metas: old_metalist.append(self_metas[meta_name]) # Update the link/ref self_metas[meta_name][id2] = m else: # Create a new link/ref self_metas[meta_name] = {id2: m} # Reverse linking from id2 to id1 self._id_to_metas.setdefault(id2, {}) if meta_name in self._id_to_metas[id2]: self._id_to_metas[id2][meta_name][self_imid] = m else: self._id_to_metas[id2][meta_name] = {self_imid: m} old_request = self._build_request(self_imid, id2, old_metalist) if request != old_request: requests.append(request) upd_str = ''.join(requests) self._publish_to_ifmap_enqueue('update', upd_str) # end _publish_update def _ifmap_restarted(self): return not entity_is_present(self._mapclient, 'config-root', ['root']) def _health_checker(self): while True: try: # do the healthcheck only if we are connected if self._conn_state == ConnectionStatus.DOWN: continue meta = Metadata('display-name', '', {'ifmap-cardinality': 'singleValue'}, ns_prefix='contrail', elements='') request_str = self._build_request('healthcheck', 'self', [meta]) self._publish_to_ifmap_enqueue('update', request_str, do_trace=False) # Confirm the existence of the following default global entities in IFMAP. search_list = [ ('global-system-config', ['default-global-system-config']), ] for type, fq_name in search_list: if not entity_is_present(self._mapclient, type, fq_name): raise Exception("%s not found in IFMAP DB" % ':'.join(fq_name)) # If we had unpublished the IFMAP server to discovery server earlier # publish it back now since it is valid now. if not self._is_ifmap_up: self._get_api_server().publish_ifmap_to_discovery('up', '') self._is_ifmap_up = True ConnectionState.update( conn_type=ConnectionType.IFMAP, name='IfMap', status=ConnectionStatus.UP, message='', server_addrs=[ "%s:%s" % (self._ifmap_srv_ip, self._ifmap_srv_port) ]) except Exception as e: log_str = 'IFMAP Healthcheck failed: %s' % (str(e)) self.config_log(log_str, level=SandeshLevel.SYS_ERR) if self._is_ifmap_up: self._get_api_server().publish_ifmap_to_discovery( 'down', 'IFMAP DB - Invalid state') self._is_ifmap_up = False ConnectionState.update( conn_type=ConnectionType.IFMAP, name='IfMap', status=ConnectionStatus.DOWN, message='Invalid IFMAP DB State', server_addrs=[ "%s:%s" % (self._ifmap_srv_ip, self._ifmap_srv_port) ]) finally: gevent.sleep( self._get_api_server().get_ifmap_health_check_interval())
class CMSscan(object): def __init__(self, url): self.q = Queue() self.url = url.rstrip("/") fp = open(os.path.dirname(__file__) + '\\..\\data\\data.json', 'r', encoding='gbk') webdata = json.load(fp) for i in webdata: self.q.put(i) fp.close() self.nums = "web指纹总数:%d" % len(webdata) # print("web指纹总数:%d"%len(webdata)) def _GetMd5(self, body): md5 = hashlib.md5() md5.update(body) return md5.hexdigest() def _clearQueue(self): while not self.q.empty(): self.q.get() def _worker(self): data = self.q.get() scan_url = self.url + data["url"] try: r = requests.get(scan_url, timeout=20) if (r.status_code != 200): return rtext = r.text if rtext is None: return except: rtext = '' if data["re"]: if (rtext.find(data["re"]) != -1): result = data["name"] # print("CMS:%s 判定位置:%s 正则匹配:%s" % (result, scan_url, data["re"])) self.resultout = "CMS:%s 判定位置:%s 正则匹配:%s" % (result, scan_url, data["re"]) self._clearQueue() return True else: md5 = self._GetMd5(rtext) if (md5 == data["md5"]): result = data["name"] # print("CMS:%s 判定位置:%s md5:%s" % (result, scan_url, data["md5"])) self.resultout = "CMS:%s 判定位置:%s md5:%s" % (result, scan_url, data["md5"]) self._clearQueue() return True def _boss(self): while not self.q.empty(): self._worker() def outputdatalen(self): return self.nums def outputreuslt(self): return self.resultout def runtime(self, maxsize=100): start = time.clock() allr = [gevent.spawn(self._boss) for i in range(maxsize)] gevent.joinall(allr) end = time.clock() # print("执行用时: %f s" % (end - start)) self.timeout = "执行用时: %f s" % (end - start) return self.timeout
class gwhatweb(object): def __init__(self, url): self.tasks = Queue() self.url = url.rstrip("/") fp = open('file/data.json') webdata = json.load(fp, encoding="utf-8") for i in webdata: self.tasks.put(i) fp.close() print("webdata total:%d" % len(webdata)) def _GetMd5(self, body): m2 = hashlib.md5() m2.update(body) return m2.hexdigest() def _clearQueue(self): while not self.tasks.empty(): self.tasks.get() def _worker(self): data = self.tasks.get() test_url = self.url + data["url"] f2 = open('Cms_scan.txt', 'r+') f2.truncate() rtext = '' try: r = requests.get(test_url, timeout=10) if (r.status_code != 200): return rtext = r.text if rtext is None: return except: rtext = '' if data["re"]: if (rtext.find(data["re"]) != -1): result = data["name"] print("CMS:%s Judge:%s re:%s" % (result, test_url, data["re"])) f2.write((result + " " + test_url + " " + data["re"]).encode('utf-8')) f2.close() self._clearQueue() return True else: md5 = self._GetMd5(rtext) if (md5 == data["md5"]): result = data["name"] print("CMS:%s Judge:%s md5:%s" % (result, test_url, data["md5"])) f2.write((result + " " "+test_url+" "" + data["md5"]).encode('utf-8')) f2.close() self._clearQueue() return True def _boss(self): while not self.tasks.empty(): self._worker() def whatweb(self, maxsize=100): start = time.clock() allr = [gevent.spawn(self._boss) for i in range(maxsize)] gevent.joinall(allr) end = time.clock() print("cost: %f s" % (end - start))
class Command(collectstatic.Command): """ This command extends Django's `collectstatic` with a `--faster` argument for parallel file copying using gevent. The speed improvement is especially helpful for remote storage backends like S3. """ def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) self.counter = 0 self.task_queue = None self.worker_spawn_method = None self.use_multiprocessing = False self.found_files = OrderedDict() def add_arguments(self, parser): super(Command, self).add_arguments(parser) parser.add_argument('--faster', action='store_true', default=False, help='Collect static files simultaneously') parser.add_argument('--workers', action='store', default=20, help='Amount of simultaneous workers (default=20)') parser.add_argument( '--use-multiprocessing', action='store_true', default=False, help='Use multiprocessing library instead of gevent') def set_options(self, **options): self.faster = options.pop('faster') self.queue_worker_amount = int(options.pop('workers')) self.use_multiprocessing = options.pop('use_multiprocessing') if self.use_multiprocessing: self.task_queue = multiprocessing.JoinableQueue() self.worker_spawn_method = self.mp_spawn else: self.task_queue = GeventQueue() self.worker_spawn_method = self.gevent_spawn super(Command, self).set_options(**options) if self.faster: # The original management command of Django collects all the files and calls the post_process method of # the storage backend within the same method. Because we are using a task queue, post processing is started # before all files were collected. self.post_process_original = self.post_process self.post_process = False def handle(self, **options): start_time = time.time() super(Command, self).handle(**options) self.log('%s static files copied asynchronously in %is.' % (self.counter, time.time() - start_time), level=1) def copy_file(self, path, prefixed_path, source_storage): self.file_handler('copy', path, prefixed_path, source_storage) def link_file(self, path, prefixed_path, source_storage): self.file_handler('link', path, prefixed_path, source_storage) def file_handler(self, handler_type, path, prefixed_path, source_storage): """ Create a dict with all kwargs of the `copy_file` or `link_file` method of the super class and add it to the queue for later processing. """ if self.faster: if prefixed_path not in self.found_files: self.found_files[prefixed_path] = (source_storage, path) self.task_queue.put({ 'handler_type': handler_type, 'path': path, 'prefixed_path': prefixed_path, 'source_storage': source_storage }) self.counter += 1 else: if handler_type == 'link': super(Command, self).link_file(path, prefixed_path, source_storage) else: super(Command, self).copy_file(path, prefixed_path, source_storage) def delete_file(self, path, prefixed_path, source_storage): """ We don't need all the file_exists stuff because we have to override all files anyways. """ if self.faster: return True else: return super(Command, self).delete_file(path, prefixed_path, source_storage) def collect(self): """ Create some concurrent workers that process the tasks simultaneously. """ collected = super(Command, self).collect() if self.faster: self.worker_spawn_method() self.post_processor() return collected def post_processor(self): # Here we check if the storage backend has a post_process # method and pass it the list of modified files. if self.post_process_original and hasattr(self.storage, 'post_process'): processor = self.storage.post_process(self.found_files, dry_run=self.dry_run) for original_path, processed_path, processed in processor: if isinstance(processed, Exception): self.stderr.write("Post-processing '%s' failed!" % original_path) # Add a blank line before the traceback, otherwise it's # too easy to miss the relevant part of the error message. self.stderr.write("") raise processed if processed: self.log("Post-processed '%s' as '%s'" % (original_path, processed_path), level=1) self.post_processed_files.append(original_path) else: self.log("Skipped post-processing '%s'" % original_path) def gevent_spawn(self): """ Spawn worker threads (using gevent) """ monkey.patch_all(thread=False) joinall([ spawn(self.gevent_worker) for x in range(self.queue_worker_amount) ]) def gevent_worker(self): """ Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class. """ while not self.task_queue.empty(): task_kwargs = self.task_queue.get() handler_type = task_kwargs.pop('handler_type') if handler_type == 'link': super(Command, self).link_file(**task_kwargs) else: super(Command, self).copy_file(**task_kwargs) def mp_spawn(self): """ Spawn worker processes (using multiprocessing) """ processes = [] for x in range(self.queue_worker_amount): process = multiprocessing.Process(target=self.mp_worker) process.start() processes.append(process) for process in processes: process.join() def mp_worker(self): """ Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class. """ while not self.task_queue.empty(): task_kwargs = self.task_queue.get() handler_type = task_kwargs.pop('handler_type') if handler_type == 'link': super(Command, self).link_file(**task_kwargs) else: super(Command, self).copy_file(**task_kwargs) self.task_queue.task_done()
class ClientPool(object): """Base Interface for Gevent-coroutine based DBAPI2 connection pooling. Implementation uses `gevent` Queueing mechanism so we can ensure that a DB tasks will be not be claimed from more that one Greenlet. Attributes: maxsize (int): Greenlet pool size. """ def __init__(self, maxsize=20): if not isinstance(maxsize, integer_types): raise TypeError('Expected integer, got %r' % (maxsize, )) self.maxsize = maxsize self.pool = Queue() self.size = 0 def create_connection(self): raise NotImplemented("Must implement `create_connection` method.") def get(self): pool = self.pool if self.size >= self.maxsize or pool.qsize(): return pool.get() else: self.size += 1 try: new_item = self.create_connection() except: self.size -= 1 raise return new_item def put(self, item): self.pool.put(item) def closeall(self): while not self.pool.empty(): conn = self.pool.get_nowait() try: conn.close() except Exception: pass @contextlib.contextmanager def connection(self, isolation_level=None): conn = self.get() try: if isolation_level is not None: if conn.isolation_level == isolation_level: isolation_level = None else: conn.set_isolation_level(isolation_level) yield conn except: if conn.closed: conn = None self.closeall() else: conn = self._rollback(conn) raise else: if conn.closed: raise OperationalError( "Cannot commit because connection was closed: %r" % conn) conn.commit() finally: if conn is not None and not conn.closed: if isolation_level is not None: conn.set_isolation_level(isolation_level) self.put(conn) @contextlib.contextmanager def cursor(self, *args, **kwargs): isolation_level = kwargs.pop('isolation_level', None) with self.connection(isolation_level) as conn: yield conn.cursor(*args, **kwargs) def _rollback(self, conn): try: conn.rollback() except: gevent.get_hub().handle_error(conn, *sys.exc_info()) return return conn def execute(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.rowcount def fetchone(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchone() def fetchall(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchall() def fetchiter(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) while True: items = cursor.fetchmany() if not items: break for item in items: yield item def query(self, query, fetch_opts='many', cursor_type='RealDictCursor'): try: return getattr(self, dict(CURSOR_FETCH).get(fetch_opts))( *(query, ), cursor_factory=getattr(psycopg2.extras, cursor_type)) except Exception as e: raise DBPoolError(e.args)
class DatabaseConnectionPool(object): def __init__(self, maxsize=100, debug=False): if not isinstance(maxsize, (int, long)): raise TypeError('Expected integer, got %r' % (maxsize, )) self.maxsize = maxsize self.pool = Queue() self.size = 0 self.debug = debug def get(self): pool = self.pool if self.size >= self.maxsize or pool.qsize(): return pool.get() else: self.size += 1 try: new_item = self.create_connection() except: self.size -= 1 raise return new_item def put(self, item): self.pool.put(item) def closeall(self): while not self.pool.empty(): conn = self.pool.get_nowait() try: conn.close() except Exception: pass @contextlib.contextmanager def connection(self): conn = self.get() try: yield conn except: if conn.closed: conn = None self.closeall() else: conn = self._rollback(conn) raise else: if conn.closed: raise OperationalError( "Cannot commit because connection was closed: %r" % (conn, )) conn.commit() finally: if conn is not None and not conn.closed: self.put(conn) @contextlib.contextmanager def cursor(self, *args, **kwargs): conn = self.get() try: yield conn.cursor(*args, **kwargs) except: if conn.closed: conn = None self.closeall() else: conn = self._rollback(conn) raise else: if conn.closed: raise OperationalError( "Cannot commit because connection was closed: %r" % (conn, )) conn.commit() finally: if conn is not None and not conn.closed: self.put(conn) def _rollback(self, conn): try: conn.rollback() except: gevent.get_hub().handle_error(conn, *sys.exc_info()) return return conn def execute(self, *args, **kwargs): with self.cursor() as cursor: t = '' if self.debug: t1 = time.time() cursor.execute(*args, **kwargs) if self.debug: t = '%.3f' % (time.time() - t1) try: log.debug(u'execute %s %s' % \ (t, cursor.mogrify(*args, **kwargs))) except: pass def executemany(self, *args, **kwargs): with self.cursor() as cursor: t = '' if self.debug: t1 = time.time() cursor.executemany(*args, **kwargs) if self.debug: t = '%.3f' % (time.time() - t1) try: log.debug(u'executemany %s %s' % \ (t, cursor.mogrify(*args, **kwargs))) except: pass def fetchone(self, *args, **kwargs): with self.cursor() as cursor: t = '' if self.debug: t1 = time.time() cursor.execute(*args, **kwargs) if self.debug: t = '%.3f' % (time.time() - t1) try: log.debug(u'fetchone %s %s' % \ (t, cursor.mogrify(*args, **kwargs))) except: pass return cursor.fetchone() def fetchall(self, *args, **kwargs): with self.cursor() as cursor: t = '' if self.debug: t1 = time.time() cursor.execute(*args, **kwargs) if self.debug: t = ' %.3f' % (time.time() - t1) try: log.debug(u'fetchall %s %s' % (t, cursor.mogrify(*args, **kwargs))) except: pass return cursor.fetchall()
class WorkQueue(object): _MAX_QUEUE_SIZE = 1024 _MAX_WORKLOAD = 16 def __init__(self, worker, start_runner=None, max_qsize=None, max_work_load=None): self.worker = worker self._start_runner = start_runner self._max_qsize = max_qsize or WorkQueue._MAX_QUEUE_SIZE self._max_work_load = max_work_load or WorkQueue._MAX_WORKLOAD self._bounded = False self._queue = Queue() self._qsize = 0 self._num_enqueues = 0 self._num_dequeues = 0 self._drops = 0 self._high_watermarks = None self._low_watermarks = None self._hwm_index = -1 self._lwm_index = -1 self._runner = Runner(self, self._max_work_load) self._max_qlen = 0 # end __init__ def set_bounded(self, bounded): self._bounded = bounded # end set_bounded def bounded(self): return self._bounded # end bounded def set_high_watermarks(self, high_wm): # weed out duplicates and store the watermarks in sorted order self._high_watermarks = list(sorted(set(high_wm))) self._set_watermark_indices(-1, -1) # end set_high_watermarks def high_watermarks(self): return self._high_watermarks # end high_watermarks def set_low_watermarks(self, low_wm): # weed out duplicates and store the watermarks in sorted order self._low_watermarks = list(sorted(set(low_wm))) self._set_watermark_indices(-1, -1) # end set_low_watermarks def low_watermarks(self): return self._low_watermarks # end low_watermarks def watermark_indices(self): return self._hwm_index, self._lwm_index # end watermark_indices def enqueue(self, work_item): if self.increment_queue_size(work_item) > self._max_qlen: self._max_qlen = self._qsize if self._bounded: if self._qsize > self._max_qsize: self.decrement_queue_size(work_item) self._max_qlen = self._qsize self._drops += 1 return False self._num_enqueues += 1 self._process_high_watermarks() self._queue.put(work_item) self.may_be_start_runner() return True # end enqueue def dequeue(self): try: work_item = self._queue.get_nowait() except Empty: work_item = None else: self.decrement_queue_size(work_item) self._num_dequeues += 1 self._process_low_watermarks() return work_item # end dequeue def increment_queue_size(self, work_item): self._qsize += 1 return self._qsize # end increment_queue_size def decrement_queue_size(self, work_item): self._qsize -= 1 # end decrement_queue_size def size(self): return self._qsize # end size def max_qlen(self): return self._max_qlen def may_be_start_runner(self): if self._queue.empty() or \ (self._start_runner and not self._start_runner()): return self._runner.start() # end may_be_start_runner def runner_done(self): if self._queue.empty() or \ (self._start_runner and not self._start_runner()): return True return False # end runner_done def is_queue_empty(self): if self._queue.empty(): return True return False # end is_queue_empty def num_enqueues(self): return self._num_enqueues # end num_enqueues def num_dequeues(self): return self._num_dequeues # end num_dequeues def drops(self): return self._drops # end drops def runner(self): return self._runner # end runner def _set_watermark_indices(self, hwm_index, lwm_index): self._hwm_index = hwm_index self._lwm_index = lwm_index # end _set_watermark_indices def _process_high_watermarks(self): if not self._high_watermarks: return # Check if we have crossed any high watermarks. # Find the index of the first element greater than self._qsize # in self._high_watermarks. index = bisect.bisect_right(self._high_watermarks, WaterMark(self._qsize, None)) # If the first element > qsize, then we have not crossed any # high watermark. if index == 0: return # We have crossed (index-1)th watermark in the list. hwm_index = index - 1 if hwm_index == self._hwm_index: return self._set_watermark_indices(hwm_index, hwm_index + 1) # Now invoke the watermark callback self._high_watermarks[self._hwm_index].callback(self._qsize) # end _process_high_watermarks def _process_low_watermarks(self): if not self._low_watermarks: return # Check if we have crossed any low watermarks. # Find the index of the first element not less than self._qsize # in self._low_watermarks. index = bisect.bisect_left(self._low_watermarks, WaterMark(self._qsize, None)) # If there is no element >= qsize, then we have not crossed any # low watermark. if index == len(self._low_watermarks): return lwm_index = index if lwm_index == self._lwm_index: return self._set_watermark_indices(lwm_index - 1, lwm_index) # Now invoke the watermark callback self._low_watermarks[self._lwm_index].callback(self._qsize)
class Illust_download(Pixiv_Login): # def __init__(self): # super(Illust_download, self).__init__() # self.work_1 = Queue() # self.list_photo = [] # self.tasks_list_1 = [] # self.work_2 = Queue() # self.tasks_list_2 = [] # self.work_2_num = 0 def illustID(self, ides): id_list = [] self.work_1 = Queue() if type(ides) == str: ides = ides.replace(',', ',') id_list = ides.split(',') elif type(ides) == list: id_list = ides for id in id_list: # print(id) self.work_1.put_nowait(id) def illust_info(self): self.list_photo = [] while not self.work_1.empty(): id_photo = self.work_1.get_nowait() url_works = 'https://www.pixiv.net/ajax/illust/{}/pages?lang=zh'.format( id_photo) headers_works = { 'referer': 'https://www.pixiv.net/artworks/{}'.format(id_photo), 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } res_works = self.session.get(url_works, headers=headers_works) print(res_works.status_code) json_works = res_works.json() for body_works in json_works['body']: url_original = body_works['urls']['original'] self.list_photo.append([url_original, id_photo]) def get_illust(self): self.tasks_list_1 = [] for reptile in range(10): task_1 = gevent.spawn(self.illust_info) self.tasks_list_1.append(task_1) gevent.joinall(self.tasks_list_1) def urls(self): self.work_2 = Queue() for urls_photo in self.list_photo: self.work_2.put_nowait(urls_photo) self.work_2_nums = self.work_2.qsize() PP.ui.progressBar.setRange(0, self.work_2_nums) def download(self, path): while not self.work_2.empty(): urls_photo = self.work_2.get_nowait() headers_photo = { 'referer': 'https://www.pixiv.net/artworks/{}'.format(urls_photo[1]), 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } res_photo = self.session.get(urls_photo[0], headers=headers_photo) print(res_photo.status_code) with open( '{0}/{1}'.format( path, urls_photo[0].replace( 'https://i.pximg.net/img-original/img/', '').replace('/', '_')), 'wb') as photo: photo.write(res_photo.content) self.work_2_num = self.work_2_num + 1 MS.text_print.emit(PP.ui.textBrowser, '第{}张插画……下载成功'.format(self.work_2_num)) MS.progress_update.emit(self.work_2_num) def run_download(self, path): self.tasks_list_2 = [] self.work_2_num = 0 MS.progress_update.emit(self.work_2_num) for reptile in range(5): task_2 = gevent.spawn(self.download, path) self.tasks_list_2.append(task_2) gevent.joinall(self.tasks_list_2) def illust_download(self, path): def thread_illust_download(path): self.__init__() self.Censor_cookies() self.get_illust() self.urls() self.run_download(path) thread = Thread(target=thread_illust_download, args=(path, )) # thread.setDaemon(True) thread.start() def author_illust(self, authorID): illust_list = [] id_author = authorID url_authorHome = 'https://www.pixiv.net/users/{}'.format(id_author) ulr_author = 'https://www.pixiv.net/ajax/user/{}/profile/all?lang=zh'.format( id_author) # headers_authorHome = { # 'referer': 'https://www.pixiv.net/users/{}/following'.format(id_author), # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' # } headers_author = { 'referer': 'https://www.pixiv.net/users/{}'.format(id_author), 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } res_authorHome = self.session.get(url_authorHome, headers=headers_author) res_author = self.session.get(ulr_author, headers=headers_author) print(res_authorHome.status_code, res_author.status_code) json_author = res_author.json() illusts = json_author['body']['illusts'] soup_authorHome = BeautifulSoup(res_authorHome.text, 'html.parser') json_content = soup_authorHome.find(id='meta-preload-data')['content'] self.userName = json.loads(json_content)['user'][id_author]['name'] illust_num = len(illusts) MS.text_print.emit(PP.ui.textBrowser, '画师{0}共有{1}幅作品'.format(self.userName, illust_num)) # print('画师{0}共有{1}幅作品'.format(userName, illust_num)) for illust in illusts: illust_list.append(illust) PD.illustID(illust_list) def Author_iIllust(self, authorID, path): def work_Author_iIllust(authorID, path): self.__init__() self.Censor_cookies() self.author_illust(authorID) self.get_illust() self.urls() path = path + '\\' + self.userName os.mkdir(path) self.run_download(path) thread_Author_iIllust = Thread(target=work_Author_iIllust, args=( authorID, path, )) # thread_Author_iIllust.setDaemon(True) thread_Author_iIllust.start() def Collect_page(self, page_num): self.work_3 = Queue() self.list_id_photo = [] page = page_num for i in range(1, page + 1): self.work_3.put_nowait(str(i)) def Collection(self): while not self.work_3.empty(): page = self.work_3.get_nowait() url_collection = "https://www.pixiv.net/bookmark.php?rest=show&p={}".format( page) headers = { 'referer': 'https://accounts.pixiv.net/login', 'origin': 'https://accounts.pixiv.net', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } res_collection = self.session.get(url_collection, headers=headers) print(res_collection.status_code) html_collection = res_collection.text soup_collection = BeautifulSoup(html_collection, 'html.parser') list_collection = soup_collection.find_all(class_='image-item') # print(list_collection) for collection in list_collection: id_collection = collection.find(class_='ui-scroll-view') id_photo = id_collection['data-id'] self.list_id_photo.append(id_photo) def Collect_iIllust(self, path): def work_Collect_iIllust(path): self.__init__() self.Censor_cookies() self.Collection() self.illustID(self.list_id_photo) self.get_illust() self.urls() self.run_download(path) thread_Collect_iIllust = Thread(target=work_Collect_iIllust, args=(path, )) thread_Collect_iIllust.start()
class Actor(object): """ Class that contains a queue and a greenlet serving that queue. """ max_ops_before_yield = 10000 """Number of calls to self._maybe_yield before it yields""" def __init__(self, qualifier=None): self._event_queue = Queue() self.greenlet = gevent.Greenlet(self._loop) self._op_count = 0 self._current_msg = None self.started = False # Message being processed; purely for logging. self.msg_uuid = None # Logging parameters self.qualifier = qualifier if qualifier: self.name = "%s(%s)" % (self.__class__.__name__, qualifier) else: self.name = self.__class__.__name__ # Can't use str(self) yet, it might not be ready until subclass # constructed. _log.info("%s created.", self.name) def start(self): assert not self.greenlet, "Already running" _log.info("Starting %s", self) self.started = True self.greenlet.start() return self def _loop(self): """ Main greenlet loop, repeatedly runs _step(). Doesn't return normally. """ actor_storage.class_name = self.__class__.__name__ actor_storage.name = self.name actor_storage.msg_uuid = None try: while True: self._step() except: _log.exception("Exception killed %s", self) raise def _step(self): """ Run one iteration of the event loop for this actor. Mainly broken out to allow the UTs to single-step an Actor. It also has the beneficial side effect of introducing a new local scope so that our variables die before we block next time. """ # Block waiting for work. msg = self._event_queue.get() batch = [msg] batches = [] if not msg.needs_own_batch: # Try to pull some more work off the queue to combine into a # batch. while not self._event_queue.empty(): # We're the only ones getting from the queue so this should # never fail. msg = self._event_queue.get_nowait() if msg.needs_own_batch: if batch: batches.append(batch) batches.append([msg]) batch = [] else: batch.append(msg) if batch: batches.append(batch) num_splits = 0 while batches: # Process the first batch on our queue of batches. Invariant: # we'll either process this batch to completion and discard it or # we'll put all the messages back into the batch queue in the same # order but with a first batch that is half the size and the # rest of its messages in the second batch. batch = batches.pop(0) # Give subclass a chance to filter the batch/update its state. batch = self._start_msg_batch(batch) assert batch is not None, "_start_msg_batch() should return batch." results = [] # Will end up same length as batch. for msg in batch: _log.debug("Message %s recd by %s from %s, queue length %d", msg, msg.recipient, msg.caller, self._event_queue.qsize()) self._current_msg = msg actor_storage.msg_uuid = msg.uuid actor_storage.msg_name = msg.name try: # Actually execute the per-message method and record its # result. result = msg.method() except BaseException as e: _log.exception("Exception processing %s", msg) results.append(ResultOrExc(None, e)) _stats.increment("Messages executed with exception") else: results.append(ResultOrExc(result, None)) _stats.increment("Messages executed OK") finally: self._current_msg = None actor_storage.msg_uuid = None actor_storage.msg_name = None try: # Give subclass a chance to post-process the batch. _log.debug("Finishing message batch") actor_storage.msg_name = "<finish batch>" self._finish_msg_batch(batch, results) except SplitBatchAndRetry: # The subclass couldn't process the batch as is (probably # because a failure occurred and it couldn't figure out which # message caused the problem). Split the batch into two and # re-run it. _log.warn("Splitting batch to retry.") self.__split_batch(batch, batches) num_splits += 1 # For diags. _stats.increment("Split batches") continue except BaseException as e: # Most-likely a bug. Report failure to all callers. _log.exception("_finish_msg_batch failed.") results = [(None, e)] * len(results) _stats.increment("_finish_msg_batch() exception") finally: actor_storage.msg_name = None # Batch complete and finalized, set all the results. assert len(batch) == len(results) for msg, (result, exc) in zip(batch, results): for future in msg.results: if exc is not None: future.set_exception(exc) else: future.set(result) _stats.increment("Messages completed") _stats.increment("Batches processed") if num_splits > 0: _log.warn("Split batches complete. Number of splits: %s", num_splits) @staticmethod def __split_batch(current_batch, remaining_batches): """ Splits batch in half and prepends it to the list of remaining batches. Modifies remaining_batches in-place. :param list[Message] current_batch: list of messages that's currently being processed. :param list[list[Message]] remaining_batches: list of batches still to process. """ assert len(current_batch) > 1, "Batch too small to split" # Split the batch. split_point = len(current_batch) // 2 _log.debug("Split-point = %s", split_point) first_half = current_batch[:split_point] second_half = current_batch[split_point:] if remaining_batches and not remaining_batches[0][0].needs_own_batch: # Optimization: there's another batch already queued and # it also contains batchable messages push the second # half of this batch onto the front of that one. _log.debug("Split batch and found a subsequent batch, " "coalescing with that.") next_batch = remaining_batches[0] next_batch[:0] = second_half else: _log.debug("Split batch but cannot prepend to next batch, adding " "both splits to start of queue.") remaining_batches[:0] = [second_half] remaining_batches[:0] = [first_half] def _start_msg_batch(self, batch): """ Called before processing a batch of messages to give subclasses a chance to filter the batch. Implementations must ensure that every AsyncResult in the batch is correctly set. Usually, that means combining them into one list. It is usually easier to build up a batch of changes to make in the @actor_message-decorated methods and then process them in _finish_msg_batch(). Intended to be overridden. This implementation simply returns the input batch. :param list[Message] batch: """ return batch def _finish_msg_batch(self, batch, results): """ Called after a batch of events have been processed from the queue before results are set. Intended to be overridden. This implementation does nothing. Exceptions raised by this method are propagated to all messages in the batch, overriding the existing results. It is recommended that the implementation catches appropriate exceptions and maps them back to the correct entry in results. :param list[ResultOrExc] results: Pairs of (result, exception) representing the result of each message-processing function. Only one of the values is set. Updates to the list alter the result send to any waiting listeners. :param list[Message] batch: The input batch, always the same length as results. """ pass def _maybe_yield(self): """ With some probability, yields processing to another greenlet. (Utility method to be called from the actor's greenlet during long-running operations.) """ self._op_count += 1 if self._op_count >= self.max_ops_before_yield: gevent.sleep() self._op_count = 0 def __str__(self): return self.__class__.__name__ + "<%s,queue_len=%s,live=%s,msg=%s>" % ( self.qualifier, self._event_queue.qsize(), bool( self.greenlet), self._current_msg)
class ArchivariusBridge(object): """Archivarius Bridge""" def __init__(self, config): self.config = config self.workers_config = {} self.log_dict = {} self.bridge_id = uuid.uuid4().hex self.api_host = self.config_get('resources_api_server') self.api_version = self.config_get('resources_api_version') # Workers settings for key in WORKER_CONFIG: self.workers_config[key] = (self.config_get(key) or WORKER_CONFIG[key]) # Init config for key in DEFAULTS: value = self.config_get(key) setattr(self, key, type(DEFAULTS[key])(value) if value else DEFAULTS[key]) # Pools self.workers_pool = Pool(self.workers_max) self.retry_workers_pool = Pool(self.retry_workers_max) self.filter_workers_pool = Pool() # Queues self.api_clients_queue = Queue() if self.resource_items_queue_size == -1: self.resource_items_queue = Queue() else: self.resource_items_queue = Queue(self.resource_items_queue_size) if self.retry_resource_items_queue_size == -1: self.retry_resource_items_queue = Queue() else: self.retry_resource_items_queue = Queue( self.retry_resource_items_queue_size) # Default values for statistic variables for key in ( 'droped', 'add_to_resource_items_queue', 'add_to_retry', 'exceptions_count', 'not_found_count', 'archived', 'moved_to_public_archive', 'dumped_to_secret_archive', ): self.log_dict[key] = 0 if self.api_host != '' and self.api_host is not None: api_host = urlparse(self.api_host) if api_host.scheme == '' and api_host.netloc == '': raise ConfigError('Invalid \'resources_api_server\' url.') else: raise ConfigError('In config dictionary empty or missing' ' \'resources_api_server\'') self.db = prepare_couchdb(self.couch_url, self.db_name, logger) self.archive_db = prepare_couchdb(self.couch_url, self.db_archive_name, logger) # TODO self.archive_db2 = prepare_couchdb(self.couch_url, self.db_archive_name + '_secret', logger) self.resources = {} for entry_point in iter_entry_points( 'openprocurement.archivarius.resources'): self.resources[entry_point.name] = { 'filter': entry_point.load(), 'view_path': '_design/{}/_view/by_dateModified'.format(entry_point.name) } def create_api_client(self): client_user_agent = self.user_agent + '/' + self.bridge_id + '/' + uuid.uuid4( ).hex timeout = 0.1 while True: try: api_client = APIClient(host_url=self.api_host, user_agent=client_user_agent, api_version=self.api_version, resource='RESOURCE', key=self.api_key) self.api_clients_queue.put({ 'client': api_client, 'request_interval': 0 }) logger.info('Started api_client {}'.format( api_client.session.headers['User-Agent'])) break except RequestFailed as e: self.log_dict['exceptions_count'] += 1 logger.error( 'Failed start api_client with status code {}'.format( e.status_code)) timeout = timeout * 2 sleep(timeout) def fill_api_clients_queue(self): while self.api_clients_queue.qsize() == 0: self.create_api_client() def fill_resource_items_queue(self, resource): start_time = datetime.now(TZ) rows = self.db.iterview(self.resources[resource]['view_path'], 10**3, include_docs=True) filter_func = partial(self.resources[resource]['filter'], time=start_time) for row in ifilter(filter_func, rows): self.resource_items_queue.put({'id': row.id, 'resource': resource}) self.log_dict['add_to_resource_items_queue'] += 1 def queues_controller(self): while True: self.fill_api_clients_queue() #if self.workers_pool.free_count() > 0 and (self.resource_items_queue.qsize() > int((self.resource_items_queue_size / 100) * self.workers_inc_threshold)): if self.resource_items_queue.qsize( ) > 0 and self.workers_pool.free_count() > 0: w = ArchiveWorker.spawn(self.api_clients_queue, self.resource_items_queue, self.db, self.archive_db, self.archive_db2, self.workers_config, self.retry_resource_items_queue, self.log_dict) self.workers_pool.add(w) logger.info('Queue controller: Create main queue worker.') #elif self.resource_items_queue.qsize() < int((self.resource_items_queue_size / 100) * self.workers_dec_threshold): elif self.resource_items_queue.qsize() == 0: if len(self.workers_pool) > self.workers_min: wi = self.workers_pool.greenlets.pop() wi.shutdown() logger.info('Queue controller: Kill main queue worker.') logger.info('Main resource items queue contains {} items'.format( self.resource_items_queue.qsize())) logger.info('Retry resource items queue contains {} items'.format( self.retry_resource_items_queue.qsize())) logger.info( 'Status: add to queue - {add_to_resource_items_queue}, add to retry - {add_to_retry}, moved to public archive - {moved_to_public_archive}, dumped to secret archive - {dumped_to_secret_archive}, archived - {archived}, exceptions - {exceptions_count}, not found - {not_found_count}' .format(**self.log_dict)) sleep(self.queues_controller_timeout) def gevent_watcher(self): self.fill_api_clients_queue() if not self.resource_items_queue.empty() and len( self.workers_pool) < self.workers_min: w = ArchiveWorker.spawn(self.api_clients_queue, self.resource_items_queue, self.db, self.archive_db, self.archive_db2, self.workers_config, self.retry_resource_items_queue, self.log_dict) self.workers_pool.add(w) logger.info('Watcher: Create main queue worker.') if not self.retry_resource_items_queue.empty() and len( self.retry_workers_pool) < self.retry_workers_min: w = ArchiveWorker.spawn(self.api_clients_queue, self.retry_resource_items_queue, self.db, self.archive_db, self.archive_db2, self.workers_config, self.retry_resource_items_queue, self.log_dict) self.retry_workers_pool.add(w) logger.info('Watcher: Create retry queue worker.') def run(self): logger.info('Start Archivarius Bridge', extra={'MESSAGE_ID': 'edge_bridge_start_bridge'}) for resource in self.resources: self.filter_workers_pool.spawn(self.fill_resource_items_queue, resource=resource) spawn(self.queues_controller) while True: self.gevent_watcher() if len(self.filter_workers_pool) == 0 and len( self.workers_pool) == 0 and len( self.retry_workers_pool) == 0: break sleep(self.watch_interval) def config_get(self, name): try: return self.config.get('main', name) except NoOptionError: return
class MeekSession(RelaySession): conn_pool = HTTPClientPool() def __init__(self, socksconn, meek, timeout): super(MeekSession, self).__init__(socksconn) self.sessionid = session_id() self.meek = meek self.meektimeout = timeout self.relay = self.meek.select_relay() self.ca_certs = self.meek.ca_certs self.httpclient = self.conn_pool.get(self.relay, self.ca_certs, self.meektimeout) self.udpsock = None self.allsocks = [self.socksconn] self.l2m_queue = Queue() self.m2l_queue = Queue() self.m_notifier = Event() self.l_notifier = Event() self.finish = Event() self.m_notifier.clear() self.l_notifier.clear() self.finish.clear() self.timer = SharedTimer(self.meektimeout) def _stream_response(self, response): try: chunk = response.read(MAX_PAYLOAD_LENGTH) while chunk: log.debug("%s streaming DOWN %d bytes" % (self.sessionid, len(chunk))) yield chunk, "" chunk = response.read(MAX_PAYLOAD_LENGTH) except GeneratorExit: response.release() raise StopIteration def meek_response(self, response, stream): if stream: return self._stream_response(response) data = response.read() response.release() if not data: return [("", "")] if not self.udpsock: return [(data, "")] # parse UDP packets log.debug("%s DOWN %d bytes" % (self.sessionid, len(data))) lengths = get_meek_meta(response.headers, HEADER_UDP_PKTS).split(",") pos = 0 pkts = [] for length in lengths: nxt = pos + int(length) pkts.append((data[pos:nxt], "")) pos = nxt return pkts def meek_roundtrip(self, pkts): headers = { HEADER_SESSION_ID: self.sessionid, HEADER_MSGTYPE: MSGTYPE_DATA, 'Host': self.relay.hostname, 'Content-Type': "application/octet-stream", 'Connection': "Keep-Alive", } stream = False if not self.udpsock and "stream" in self.relay.properties: stream = True headers[HEADER_MODE] = MODE_STREAM if pkts and self.udpsock: lengths = str(",".join([str(len(p)) for p in pkts])) headers[HEADER_UDP_PKTS] = lengths data = "".join(pkts) headers['Content-Length'] = str(len(data)) for _ in range(CLIENT_MAX_TRIES): try: log.debug("%s UP %d bytes" % (self.sessionid, len(data))) resp = self.httpclient.post("/", body=data, headers=headers) if resp.status_code != 200: # meek server always give 200, so all non-200s mean external issues. continue err = get_meek_meta(resp.headers, HEADER_ERROR) if err: return [("", err)] else: try: return self.meek_response(resp, stream) except Exception as ex: log.error( "[Exception][meek_roundtrip - meek_response]: %s" % str(ex)) resp.release() return [("", "Data Format Error")] except socket.timeout: # @UndefinedVariable return [("", "timeout")] except Exception as ex: log.error("[Exception][meek_roundtrip]: %s" % str(ex)) gevent.sleep(CLIENT_RETRY_DELAY) self.relay.failure += 1 return [("", "Max Retry (%d) Exceeded" % CLIENT_MAX_TRIES)] def meek_sendrecv(self): pkts = [] datalen = 0 while not self.l2m_queue.empty(): pkt = self.l2m_queue.get() pkts.append(pkt) datalen += len(pkt) if datalen >= MAX_PAYLOAD_LENGTH: for (resp, err) in self.meek_roundtrip(pkts): yield (resp, err) if err or not resp: return pkts = [] datalen = 0 for (resp, err) in self.meek_roundtrip(pkts): yield (resp, err) if err or not resp: return def meek_relay(self): for (resp, err) in self.meek_sendrecv(): if err: return err if resp: self.m2l_queue.put(resp) self.l_notifier.set() return "" def meek_relay_thread(self): interval = CLIENT_INITIAL_POLL_INTERVAL while not self.finish.is_set(): try: hasdata = self.m_notifier.wait(timeout=interval) self.m_notifier.clear() err = self.meek_relay() if err: break if not hasdata: interval *= CLIENT_POLL_INTERVAL_MULTIPLIER if interval > CLIENT_MAX_POLL_INTERVAL: interval = CLIENT_MAX_POLL_INTERVAL except Exception as ex: log.error("[Exception][meek_relay_thread]: %s" % str(ex)) break self.finish.set() def write_to_client(self, data): if self.udpsock: self.udpsock.sendto(data, self.last_clientaddr) else: self.socksconn.sendall(data) def meek_write_to_client_thread(self): while not self.finish.is_set(): try: hasdata = self.l_notifier.wait( timeout=CLIENT_MAX_POLL_INTERVAL) self.l_notifier.clear() if not hasdata: self.timer.count(CLIENT_MAX_POLL_INTERVAL) if self.timer.timeout(): break else: self.timer.reset() while not self.m2l_queue.empty(): data = self.m2l_queue.get() if data: self.write_to_client(data) except Exception as ex: log.error("[Exception][meek_write_to_client_thread]: %s" % str(ex)) break self.finish.set() def read_from_client(self, timeout): readable, _, _ = select.select(self.allsocks, [], [], CLIENT_MAX_POLL_INTERVAL) if not readable: return None if self.socksconn in readable: if self.udpsock: raise RelaySessionError( "unexcepted read-event from tcp socket in UDP session") data = self.socksconn.recv(MAX_PAYLOAD_LENGTH) if not data: raise RelaySessionError("peer closed") return data if self.udpsock and self.udpsock in readable: data, addr = self.udpsock.recvfrom(MAX_PAYLOAD_LENGTH) if not self.valid_udp_client(addr): return None else: self.last_clientaddr = addr return data def meek_read_from_client_thread(self): while not self.finish.is_set(): try: data = self.read_from_client(CLIENT_MAX_POLL_INTERVAL) if not data: self.timer.count(CLIENT_MAX_POLL_INTERVAL) if self.timer.timeout(): break else: self.timer.reset() self.l2m_queue.put(data) self.m_notifier.set() except Exception as ex: log.error("[Exception][meek_read_from_client_thread]: %s" % str(ex)) break self.finish.set() def proc_tcp_request(self, req): self.l2m_queue.put(req.pack()) def relay_tcp(self): read_thread = gevent.spawn(self.meek_read_from_client_thread) write_thread = gevent.spawn(self.meek_write_to_client_thread) relay_thread = gevent.spawn(self.meek_relay_thread) # notify relay to send request self.m_notifier.set() [t.join() for t in (read_thread, write_thread, relay_thread)] log.info("Session %s Ended" % self.sessionid) def valid_udp_client(self, addr): if self.client_associate[0] == "0.0.0.0" or \ self.client_associate[0] == "::": return True if self.client_associate == addr: return True return False def cmd_udp_associate(self, req): self.client_associate = (req.dstaddr, req.dstport) self.last_clientaddr = self.client_associate for (resp, err) in self.meek_roundtrip([req.pack()]): if err: return if resp: Reply(resp) self.udpsock = bind_local_udp(self.socksconn) if not self.udpsock: request_fail(self.socksconn, req, GENERAL_SOCKS_SERVER_FAILURE) return self.track_sock(self.udpsock) read_thread = gevent.spawn(self.meek_read_from_client_thread) write_thread = gevent.spawn(self.meek_write_to_client_thread) relay_thread = gevent.spawn(self.meek_relay_thread) request_success(self.socksconn, *sock_addr_info(self.udpsock)) [t.join() for t in (read_thread, write_thread, relay_thread)] log.info("Session %s Ended" % self.sessionid) def meek_terminate(self): headers = { HEADER_SESSION_ID: self.sessionid, HEADER_MSGTYPE: MSGTYPE_TERMINATE, #'Content-Type': "application/octet-stream", 'Content-Length': "0", 'Connection': "Keep-Alive", 'Host': self.relay.hostname, } try: self.httpclient.post("/", data="", headers=headers) except: pass def clean(self): self.meek_terminate() for sock in self.allsocks: sock.close() #self.httpclient.close() self.conn_pool.release(self.relay, self.httpclient)
class TreeWatcher(object): """A watcher will subscribe events from a tree holder and turn them into iterator. :param tree_hub: A :class:`TreeHub` instance. :param from_application_name: The name of caller application. :param with_initial: ``True`` if you want to dump whole tree as the first element of iterator. :param life_span: The life span in seconds of this session. """ MESSAGE_TYPES = { TreeEvent.NODE_ADDED: 'update', TreeEvent.NODE_UPDATED: 'update', TreeEvent.NODE_REMOVED: 'delete' } TYPE_NAMES = (SERVICE_SUBDOMAIN, SWITCH_SUBDOMAIN, CONFIG_SUBDOMAIN, EXTRA_SUBDOMAIN_SERVICE_INFO) PATH_LEVEL_TYPE = 1 # /huskar/{type} PATH_LEVEL_APPLICATION = 2 # /huskar/{type}/{application} PATH_LEVEL_CLUSTER = 3 # /huskar/{type}/{application}/{cluster} PATH_LEVEL_INSTANCE = 4 # /huskar/{type}/{application}/{cluster}/{id} def __init__(self, tree_hub, from_application_name=None, from_cluster_name=None, with_initial=False, life_span=None, metrics_tag_from=None): self.hub = tree_hub # The optional route context self.from_application_name = from_application_name self.from_cluster_name = from_cluster_name self.with_initial = with_initial self.queue = Queue() self.holders = set() self.cluster_maps = collections.defaultdict(ClusterMap) self.cluster_whitelist = collections.defaultdict(set) self.watch_map = collections.defaultdict(set) self.life_span = life_span self._metrics_tag_from = metrics_tag_from def __iter__(self): """The tree watcher is iterable for subscribing events.""" monitor_client.increment('tree_watcher.session', 1, tags={ 'from': str(self._metrics_tag_from), 'appid': str(self._metrics_tag_from), }) started_at = time.time() if self.with_initial: body = self._load_entire_body() yield ('all', body) monitor_client.increment('tree_watcher.event', 1, tags={ 'from': str(self._metrics_tag_from), 'appid': str(self._metrics_tag_from), 'event_type': 'all', }) while True: while not self.queue.empty(): event_type, body = self.queue.get() yield (event_type, body) monitor_client.increment('tree_watcher.event', 1, tags={ 'from': str(self._metrics_tag_from), 'appid': str(self._metrics_tag_from), 'event_type': event_type, }) yield ('ping', {}) if self.life_span and time.time() > started_at + self.life_span: break sleep(1) def watch(self, application_name, type_name): """Watches a new subtree. :param application_name: The appid of subtree. (e.g. ``base.foo``) :param type_name: The type of subtree. (e.g. ``service``) """ with self.maintain_watch_map(application_name, type_name) as type_name: holder = self.hub.get_tree_holder(application_name, type_name) if holder in self.holders: return try: holder.block_until_initialized( timeout=settings.ZK_SETTINGS['treewatch_timeout']) except TreeTimeoutError: self.hub.release_tree_holder(application_name, type_name) raise cluster_map = self.cluster_maps[application_name, type_name] cluster_routes = holder.list_cluster_routes(self.from_application_name, self.from_cluster_name) for cluster_name, resolved_name in cluster_routes: cluster_map.register(cluster_name, resolved_name) self.holders.add(holder) holder.tree_changed.connect(self.handle_event, sender=holder) @contextlib.contextmanager def maintain_watch_map(self, application_name, type_name): subdomain = subdomain_map[type_name] self.watch_map[subdomain.name].add(application_name) try: yield subdomain.basic_name except Exception: self.watch_map[subdomain.name].discard(application_name) raise def limit_cluster_name(self, application_name, type_name, cluster_name): """Adds a whitelist item to limit events by cluster name. :param application_name: The appid of subtree. (e.g. ``base.foo``) :param type_name: The type of subtree. (e.g. ``service``) :param cluster_name: Only added cluster names will be shown. """ self.cluster_whitelist[application_name, type_name].add(cluster_name) def handle_event(self, sender, event): path = parse_path(self.hub.base_path, event.event_data.path) path_level = path.get_level() if path.is_none() or path_level == self.PATH_LEVEL_TYPE: logger.warning('Unexpected path: %r', event) return # We should notify for changes of cluster route. if path_level in (self.PATH_LEVEL_APPLICATION, self.PATH_LEVEL_CLUSTER): # Publish message if and only if node is modified if (event.event_type == TreeEvent.NODE_ADDED and not event.event_data.data): return cluster_map = self.cluster_maps[path.application_name, path.type_name] last_cluster_names = dict(cluster_map.cluster_names) self._update_cluster_route(path, event) # Publish message if and only if the callee cluster is watched if self._has_cluster_route_changed(path, last_cluster_names): # Dump all data for symlink or route changing entire_body = self._load_entire_body() message = ('all', entire_body) self.queue.put(message) else: # Dump updated data for watched extra types body = self.handle_event_for_extra_type('update', path) if body: message = ('update', body) self.queue.put(message) # We should notify for changes of instance node. if path_level == self.PATH_LEVEL_INSTANCE: data = event.event_data.data event_type = event.event_type if event_type == TreeEvent.NODE_REMOVED: data = None entire_body = self._dump_body([(path, data)]) if entire_body: message = (self.MESSAGE_TYPES[event_type], entire_body) self.queue.put(message) return def _load_entire_body(self): entire_body = self._dump_body(self._iter_instance_nodes()) extra_types_data = self.handle_all_for_extra_type() entire_body.update(extra_types_data) entire_body = self._fill_body(entire_body) return entire_body def _update_cluster_route(self, path, event): # symlink or route changed only at service scope path_level = path.get_level() cluster_map = self.cluster_maps[path.application_name, path.type_name] holder = self.hub.get_tree_holder(path.application_name, path.type_name) force_route_cluster_name = self.from_cluster_name \ if path.type_name == SERVICE_SUBDOMAIN else None # Update cluster map for route if self.from_application_name and self.from_cluster_name: # NOTE It is not possible to know whether the changed cluster # is a middle node in the [route -> symlink -> physical] chain # style configuration. # We must resolve all intent in whichever cluster changed. for intent in settings.ROUTE_INTENT_LIST: resolved_name = holder.cluster_resolver.resolve( self.from_cluster_name, self.from_application_name, intent, force_route_cluster_name=force_route_cluster_name) if resolved_name is None: resolved_name = self.from_cluster_name cluster_map.deregister(intent) cluster_map.register(intent, resolved_name) # Update cluster map for symlink if path_level == self.PATH_LEVEL_CLUSTER: resolved_name = holder.cluster_resolver.resolve( path.cluster_name, force_route_cluster_name=force_route_cluster_name) cluster_map.deregister(path.cluster_name) cluster_map.register(path.cluster_name, resolved_name) def _has_cluster_route_changed(self, path, last_cluster_names): cluster_map = self.cluster_maps[path.application_name, path.type_name] cluster_whitelist = self.cluster_whitelist[path.application_name, path.type_name] # Compare the difference of cluster names cluster_difference = set( dict( set(last_cluster_names.items()) ^ set(cluster_map.cluster_names.items()))) if cluster_difference: return not cluster_whitelist or (len( cluster_whitelist.intersection(cluster_difference)) != 0) return False def handle_all_for_extra_type(self): body = {} for type_name in subdomain_map.BASIC_SUBDOMAINS: path = Path.make(type_name=type_name) type_body = self.handle_event_for_extra_type('all', path) body.update(type_body) return body def handle_event_for_extra_type(self, event_type, path): body = {} if not switch.is_switched_on(SWITCH_ENABLE_META_MESSAGE_CANARY): return body extra_types = subdomain_map.get_extra_types(path.type_name) for extra_type in extra_types: type_data = body.setdefault(extra_type, {}) application_names = set() if path.application_name: if path.application_name in self.watch_map[extra_type]: application_names.add(path.application_name) else: application_names = self.watch_map[extra_type] for application_name in application_names: app_data = type_data.setdefault(application_name, {}) handler = extra_handlers[extra_type, event_type] data = handler( self, Path.make(path.type_name, application_name, path.cluster_name, path.data_name)) if data: app_data.update(data) return { type_name: type_data for type_name, type_data in body.items() if any(type_data.values()) } def _iter_instance_nodes(self): for holder in self.holders: if holder.type_name in self.watch_map: for path, data in holder.list_instance_nodes(): yield path, data def _dump_body(self, pairs): entire_body = {} for path, data in pairs: cluster_map = self.cluster_maps[path.application_name, path.type_name] cluster_whitelist = self.cluster_whitelist[path.application_name, path.type_name] cluster_names = set([path.cluster_name]).union( cluster_map.resolved_names[path.cluster_name]) # We should ignore the subtree of callee cluster because it # has been overrided by symlink or route. if cluster_map.cluster_names.get(path.cluster_name): continue if cluster_whitelist: cluster_names = cluster_names & cluster_whitelist for cluster_name in cluster_names: data_body = entire_body \ .setdefault(path.type_name, {}) \ .setdefault(path.application_name, {}) \ .setdefault(cluster_name, {}) \ .setdefault(decode_key(path.data_name), {}) data_body['value'] = data return entire_body def _fill_body(self, body): # Fills the type names and application names for type_name in self.TYPE_NAMES: type_data = body.setdefault(type_name, {}) application_names = self.watch_map.get(type_name, []) for application_name in application_names: type_data.setdefault(application_name, {}) # Fills the cluster names for (application_name, type_name), cluster_names \ in self.cluster_whitelist.iteritems(): for cluster_name in cluster_names: body.setdefault(type_name, {}) \ .setdefault(application_name, {}) \ .setdefault(cluster_name, {}) # Checks extra information self._detect_bad_route(body) return body def _detect_bad_route(self, body): if not switch.is_switched_on(SWITCH_DETECT_BAD_ROUTE): return if self.from_application_name in settings.LEGACY_APPLICATION_LIST: return from_cluster_blacklist = settings.ROUTE_FROM_CLUSTER_BLACKLIST.get( self.from_application_name, []) if self.from_cluster_name in from_cluster_blacklist: return type_name = SERVICE_SUBDOMAIN type_body = body[type_name] flat_cluster_names = ( (application_name, cluster_name, cluster_body) for application_name, application_body in type_body.iteritems() for cluster_name, cluster_body in application_body.iteritems()) for application_name, cluster_name, cluster_body in flat_cluster_names: if application_name in settings.LEGACY_APPLICATION_LIST: continue if cluster_name in settings.ROUTE_DEST_CLUSTER_BLACKLIST.get( application_name, []): continue cluster_map = self.cluster_maps[application_name, type_name] resolved_name = cluster_map.cluster_names.get(cluster_name) if cluster_body or not resolved_name: continue monitor_client.increment( 'tree_watcher.bad_route', 1, tags=dict( from_application_name=self.from_application_name, from_cluster_name=self.from_cluster_name, dest_application_name=application_name, appid=application_name, dest_cluster_name=cluster_name, dest_resolved_cluster_name=resolved_name, )) logger.info('Bad route detected: %s %s %s %s -> %s (%r)', self.from_application_name, self.from_cluster_name, application_name, cluster_name, resolved_name, dict(cluster_map.cluster_names))
class Server(object): def __init__(self, address, size=None, log_level=DEFAULT_LOG_LEVEL): self.daemon = True self.started = False self.size = size self.queue = Queue(maxsize=size) self.address = address self.context = zmq.Context(1) self.server = None self.logger = get_logger(self, log_level) self._has_fetched_jobs = False def send(self, cmd, data=''): self.server.send_multipart([cmd, data]) def recv(self): reply = self.server.recv_multipart() assert len(reply) == 2 return reply def bind(self): if self.server: self.server.close() self.server = self.context.socket(zmq.REP) self.server.bind(self.address) def start(self): self.started = True self.logger.info("Taskmaster binding to %r", self.address) self.bind() while self.started: gevent.sleep(0) cmd, data = self.recv() if cmd == 'GET': if not self.has_work(): self.send('QUIT') continue try: job = self.queue.get_nowait() except Empty: self.send('WAIT') continue self.send('OK', pickle.dumps(job)) elif cmd == 'DONE': self.queue.task_done() if self.has_work(): self.send('OK') else: self.send('QUIT') else: self.send('ERROR', 'Unrecognized command') self.logger.info('Shutting down') self.shutdown() def mark_queue_filled(self): self._has_fetched_jobs = True def put_job(self, job): return self.queue.put(job) def first_job(self): return self.queue.queue[0] def get_current_size(self): return self.queue.qsize() def get_max_size(self): return self.size def has_work(self): if not self._has_fetched_jobs: return True return not self.queue.empty() def is_alive(self): return self.started def shutdown(self): if not self.started: return self.server.close() self.context.term() self.started = False
class CeleryReporter(Greenlet): one_min_stats = [] one_sec_stats = ['queued'] def _set_config(self, **config): self.config = dict(celery_task_prefix='simplecoin.tasks', celery={'CELERY_DEFAULT_QUEUE': 'celery'}, report_pool_stats=True, share_batch_interval=60, tracker_expiry_time=180) self.config.update(config) # check that we have at least one configured coin server if not self.config['celery_task_prefix']: self.logger.error("You need to specify a celery prefix") exit() def __init__(self, server, **config): Greenlet.__init__(self) self.logger = server.register_logger('reporter') self._set_config(**config) # setup our celery agent and monkey patch self.celery = Celery() self.celery.conf.update(self.config['celery']) self.share_reporter = None self.server = server self.server.register_stat_counters(self.one_min_stats, self.one_sec_stats) self.queue = Queue() self.addresses = {} self.workers = {} @property def status(self): dct = dict(queue_size=self.queue.qsize(), addresses_count=len(self.addresses), workers_count=len(self.workers)) dct.update({ key: self.server[key].summary() for key in self.one_min_stats + self.one_sec_stats }) return dct # Remote methods to send information to other servers ######################## def add_one_minute(self, *args, **kwargs): self.server['queued'].incr() self.queue.put(("add_one_minute", args, kwargs)) self.logger.info("Calling celery task {} with {}".format( "add_one_minute", args)) def add_share(self, *args, **kwargs): self.server['queued'].incr() self.queue.put(("add_share", args, kwargs)) self.logger.info("Calling celery task {} with {}".format( "add_shares", args)) def agent_send(self, *args, **kwargs): self.server['queued'].incr() self.queue.put(("agent_receive", args, kwargs)) def add_block(self, *args, **kwargs): self.server['queued'].incr() self.queue.put(("add_block", args, kwargs)) self.logger.info("Calling celery task {} with {}".format( "transmit_block", args)) def _run(self): self.share_reporter = spawn(self.report_loop) while True: self._queue_proc() def _queue_proc(self): name, args, kwargs = self.queue.peek() try: self.celery.send_task( self.config['celery_task_prefix'] + '.' + name, args, kwargs) except Exception as e: self.logger.error( "Unable to communicate with celery broker! {}".format(e)) else: self.queue.get() def report_loop(self): """ Repeatedly do our share reporting on an interval """ while True: sleep(self.config['share_batch_interval']) try: self._report_shares() except Exception: self.logger.error("Unhandled error in report shares", exc_info=True) def _report_shares(self, flush=False): """ Goes through our internal aggregated share data structures and reports them to our external storage. If asked to flush it will report all one minute shares, otherwise it will only report minutes that have passed. """ if flush: self.logger.info("Flushing all aggreated share data...") self.logger.info("Reporting shares for {:,} users".format( len(self.addresses))) t = time.time() for address, tracker in self.addresses.items(): tracker.report() # if the last log time was more than expiry time ago... if (tracker.last_log + self.config['tracker_expiry_time']) < t: assert tracker.unreported == 0 del self.addresses[address] self.logger.info("Shares reported (queued) in {}".format( time_format(time.time() - t))) self.logger.info( "Reporting one minute shares for {:,} address/workers".format( len(self.workers))) t = time.time() if flush: upper = t + 10 else: upper = (t // 60) * 60 for worker_addr, tracker in self.workers.items(): tracker.report(upper) # if the last log time was more than expiry time ago... if (tracker.last_log + self.config['tracker_expiry_time']) < t: assert sum(tracker.slices.itervalues()) == 0 del self.workers[worker_addr] self.logger.info("One minute shares reported (queued) in {}".format( time_format(time.time() - t))) def log_share(self, address, worker, amount, typ): """ Logs a share for a user and user/worker into all three share aggregate sources. """ # log the share for the pool cache total as well if address != "pool" and self.config['report_pool_stats']: self.log_share("pool", '', amount, typ) # collecting for reporting to the website for display in graphs addr_worker = (address, worker) if addr_worker not in self.workers: self.workers[addr_worker] = WorkerTracker(self, address, worker) self.workers[(address, worker)].count_share(amount, typ) # reporting for payout share logging and vardiff rates if typ == StratumClient.VALID_SHARE and address != "pool": if address not in self.addresses: self.addresses[address] = AddressTracker(self, address) # for tracking vardiff speeds self.addresses[address].count_share(amount) def kill(self, *args, **kwargs): self.share_reporter.kill(*args, **kwargs) self._report_shares(flush=True) self.logger.info("Flushing the reporter task queue, {} items blocking " "exit".format(self.queue.qsize())) while not self.queue.empty(): self._queue_proc() self.logger.info("Shutting down CeleryReporter..") Greenlet.kill(self, *args, **kwargs)