def test_service_failure(): "Service() should handle custom callback failures" # Given the following service class MyService(Service): def __init__(self, result_queue=None): super(MyService, self).__init__( callback=self.run, result_queue=result_queue, ) def run(self, package, sender_data): raise ValueError("I don't want to do anything") queue = JoinableQueue() service = MyService(result_queue=queue) # When I queue a package to be processed by my service and start the # service with 1 concurrent worker service.queue('gherkin==0.1.0', 'main') service.consume() service.pool.join() # Ensure we finish spawning the greenlet # Then I see that no package was processed queue.qsize().should.equal(0) # And that the list of failed packages was updated service.failed_queue[0][0].should.equal('gherkin==0.1.0') service.failed_queue[0][1].should.be.a(ValueError) service.failed_queue[0][1].message.should.equal("I don't want to do anything")
def test_service_failure(): "Service() should handle custom callback failures" # Given the following service class MyService(Service): def __init__(self, result_queue=None): super(MyService, self).__init__( callback=self.run, result_queue=result_queue, ) def run(self, package, sender_data): raise ValueError("I don't want to do anything") queue = JoinableQueue() service = MyService(result_queue=queue) # When I queue a package to be processed by my service and start the # service with 1 concurrent worker service.queue('gherkin==0.1.0', 'main') service.consume() service.pool.join() # Ensure we finish spawning the greenlet # Then I see that no package was processed queue.qsize().should.equal(0) # And that the list of failed packages was updated service.failed_queue[0][0].should.equal('gherkin==0.1.0') service.failed_queue[0][1].should.be.a(ValueError) service.failed_queue[0][1].message.should.equal( "I don't want to do anything")
def test_main(self): queue = JoinableQueue() print dir(queue) queue.put(1) queue.put(3) queue.put(2) queue.put(6) print queue.qsize() print '1', queue.get(), queue.get()
else: sleep(5) if __name__ == '__main__': t_status = spawn_link_exception(status_thread) t_item_queue = spawn_link_exception(add_to_item_queue) for i in range(80): spawn_link_exception(run_find_item) #t_index_items = spawn_link_exception(index_items) for i in range(8): spawn_link_exception(run_solr_queue, i) #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr]) sleep(1) print('join item_queue thread') t_item_queue.join() print('item_queue thread complete') #print 'join item_and_host_queue:', item_and_host_queue.qsize() #item_and_host_queue.join() #print 'item_and_host_queue complete' for host, host_queue in host_queues.items(): qsize = host_queue.qsize() print('host:', host, qsize) host_queue.join() print('join solr_queue:', solr_queue.qsize()) solr_queue.join() print('solr_queue complete')
class BaseLogger(Collected, Jobber): """\ This class implements one particular way to log things. """ storage = Loggers.storage q = None job = None ready = False _in_flush = False def __init__(self, level): self.level = level global logger_nr logger_nr += 1 if not hasattr(self, "name") or self.name is None: self.name = Name(self.__class__.__name__, "x" + str(logger_nr)) super(BaseLogger, self).__init__() self._init() def _init(self): """Fork off the writer thread. Override this to do nothing if you don't have one.""" self.q = JoinableQueue(100) self.start_job("job", self._writer) self.job.link(self.delete) if self.ready is False: self.ready = True else: self.stop_job("job") # concurrency issues? def _writer(self): errs = 0 for r in self.q: try: if r is FlushMe: self._flush() else: self._log(*r) except Exception as ex: errs += 1 fix_exception(ex) from moat.run import process_failure process_failure(ex) if errs > 10: reraise(ex) else: if errs: errs -= 1 finally: self.q.task_done() self.q.task_done() # for the StopIter # Collection stuff def list(self): yield super(BaseLogger, self) yield ("Type", self.__class__.__name__) yield ("Level", LogNames[self.level]) yield ("Queue", self.q.qsize()) def info(self): return LogNames[self.level] + ": " + self.__class__.__name__ def delete(self, ctx=None): if self.ready: self.ready = None super(BaseLogger, self).delete(ctx) try: if self.q: self.q.put(StopIteration, block=False) except Full: ## panic? pass if self.job is not None: self.job.join(timeout=1) self.stop_job("job") def _wlog(self, *a): try: self.q.put(a, block=False) except Full: ## panic? self.delete() def _log(self, level, *a): a = " ".join( (x if isinstance(x, six.string_types) else str(x) for x in a)) self._slog(level, a) def _slog(self, a): raise NotImplementedError("You need to override %s._log or ._slog" % (self.__class__.__name__, )) def _flush(self): pass def log(self, level, *a): if LogLevels[level] >= self.level: self._wlog(level, *a) if TESTING and not (hasattr(a[0], "startswith") and a[0].startswith("TEST")): self.flush() else: gevent.sleep(0) def log_event(self, event, level): if level >= self.level: for r in report_(event, 99): self._wlog(LogNames[level], r) if TESTING: self.flush() def log_failure(self, err, level=WARN): if level >= self.level: self._wlog(LogNames[level], format_exception(err)) if TESTING: self.flush() def flush(self): if self._in_flush: return if self.q is not None: try: self._in_flush = True self.q.put(FlushMe) self.q.join() finally: self._in_flush = False def end_logging(self): self.flush() self.delete()
sleep(1) else: sleep(5) if __name__ == '__main__': t_status = spawn_link_exception(status_thread) t_item_queue = spawn_link_exception(add_to_item_queue) for i in range(80): spawn_link_exception(run_find_item) #t_index_items = spawn_link_exception(index_items) for i in range(8): spawn_link_exception(run_solr_queue, i) #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr]) sleep(1) print 'join item_queue thread' t_item_queue.join() print 'item_queue thread complete' #print 'join item_and_host_queue:', item_and_host_queue.qsize() #item_and_host_queue.join() #print 'item_and_host_queue complete' for host, host_queue in host_queues.items(): qsize = host_queue.qsize() print 'host:', host, qsize host_queue.join() print 'join solr_queue:', solr_queue.qsize() solr_queue.join() print 'solr_queue complete'
class RequestBase(object): def __init__(self,url,parameter,HTTPClients,ClientConnectionPool,task=None): if task is not None: self.celeryTask = task self.celeryTaskId = task.request.id else: self.celeryTask = None self.parameter = parameter self.url = url self.numberHTTPClients = HTTPClients self.numberClientConnectionPool = ClientConnectionPool self.http = HTTPClient.from_url(URL(url),concurrency=self.numberClientConnectionPool) self.clientPool = gevent.pool.Pool(self.numberHTTPClients) self.workQueue = JoinableQueue() self.resultList = {} self.workQueueMax = 0 self.workQueueDone = 0 self.countRequests = 0 self.status_codes = {} self.status_codes_count = {} self.meta = {} self.greenletList = {} self.initAdditionalStructures() self.progressMeta = None self.exitFlag = False self.pauseRequests = False def destroy(self): self.http.close() def initAdditionalStructures(self): pass def destroyAdditionstrucutres(self): pass def getProgress(self): return self.meta def updateProgress(self,state="PROGRESS"): '''Updates the status''' self.meta = {'state':state,'workQueueDone': self.workQueueDone, 'workQueueMax': self.workQueueMax,'current':len(self.resultList),'workQueue':self.workQueue.qsize(),'requests':self.countRequests} #iterate over status_codes dict and save the queue size. may be not the best solution from performance view for code,queue in self.status_codes.iteritems(): self.status_codes_count[code] = queue.qsize() self.meta['status_codes'] = self.status_codes_count if self.celeryTask is not None: self.celeryTask.update_state(task_id=self.celeryTaskId,state=state,meta=self.meta) def worker(self,http,clientId): while not self.workQueue.empty() or self.exitFlag: try: code = self.makeRequest(http,self.getWorkQueueItem()) finally: self.workQueue.task_done() def stop(self): self.exitFlag=True def buildRequestURL(self,workQueueItem): '''Function used to build the request URL from a workingQueue item''' pass def handleRequestSuccess(self,workQueueItem, result): '''Required function, called after every successful request''' pass def handleRequestFailure(self,result): '''Function called after a failed request. For example error code 404''' pass def makeRequest(self,http,workQueueItem): '''Makes the request to and ''' url_string = self.buildRequestURL(workQueueItem) self.countRequests += 1 try: response = http.get(URL(url_string).request_uri) statusCode = response.status_code #create a new queue if the response status_code did not exist and adds the item to the queue if str(statusCode) not in self.status_codes: self.status_codes[str(statusCode)] = JoinableQueue() self.status_codes[str(statusCode)].put(workQueueItem) try: self.handleRequestSuccess(workQueueItem,response) except SSLError,e: print e return statusCode except Exception,e: self.putWorkQueueItem(workQueueItem)
class GeventConsumer(object): def __init__( self, consumer_config=None, topic=None, parse_func=None, num=8, auto_commit_offset=False, is_debug=False, ): if not parse_func: raise Exception("not parse func, system exit") self.parse = parse_func self.queue = Queue(100) self.stop_flag = Event() self.num = num self.debug = is_debug if not self.debug: self.auto_commit_offset = auto_commit_offset if isinstance(consumer_config, dict): consumer_config.update({'enable.auto.commit':self.auto_commit_offset}) self.consumer = Consumer(consumer_config) self.topic = topic self.consumer.subscribe(self.topic) def sign_handler(self, sig, frame): print(" >>> Termination_signal:[{}] to stop".format(sig)) self.stop_flag.set() def kafka_to_queue(self): logger.info("Start Producer thread") m = 0 time_diff = 0 start_time = time.time() while not self.stop_flag.is_set(): msg = self.consumer.poll(1) if msg is None: time.sleep(0.001) return err = msg.error() if err: if err.code() == KafkaError._PARTITION_EOF: logger.debug( '%s [%s] reached end at offset %s', msg.topic(), msg.partition(), msg.offset() ) else: logger.error('kafka failed, system exit') self.stop_flag.set() self.queue.put(msg) # 消费速度统计 m += 1 current_time = time.time() time_diff = current_time - start_time if time_diff > 10: rate = m / time_diff start_time = current_time m = 0 logger.info('consumer_rate:[%.2f]p/s, queue_size:[%d]' % (rate, self.queue.qsize())) logger.info("Producer thread has stopped") def consume(self): logger.info('Start Thread To Consumer') data = dict() stop = False while True: stop = self.stop_flag.is_set() if stop and self.queue.empty(): break msg = self.queue.get() try: data = self.parse(msg.value()) if data: self.handle_data(data, stop) finally: self.queue.task_done() if not stop and not self.auto_commit_offset: self.consumer.commit(msg) logger.info('Thread Consumer has stopped') def handle_data(self, data, stop): raise NotImplementedError def consume_forever(self): """ start consume forever """ signal(SIGTERM, self.sign_handler) signal(SIGINT, self.sign_handler) if self.debug: consume_func = self.mock_consume produce_func = self.mock_kafka else: consume_func = self.consume produce_func = self.kafka_to_queue task_list = [] for _ in range(self.num): task_list.append(gevent.spawn(consume_func)) produce_func() self.queue.join() if not self.debug: logger.info("closing kafka...") self.consumer.close() gevent.joinall(task_list, timeout=5) logger.info('Exiting with qsize:%d' % self.queue.qsize()) # ===========mock kafka and consumer======================= def mock_kafka(self): logger.info("Start Producer thread") m = 0 time_diff = 0 start_time = time.time() # jing5 msg msg = "23230254455354325631393046433232323232320101008e14080b0e0c38426e0101008422551354455354325631393046433232323232323131313131313131313131313131313131313131313131313131313131313131313130010000000002803365818a91eb00010002fffe050018fffe2eeb596f50830005e91efd02649c6b7eb1ac0d80000043c497fd0022f90a3d057b2403032581373635343332310082e99f008a06".decode('hex') while not self.stop_flag.is_set(): self.queue.put(msg) m += 1 # 消费速度统计 current_time = time.time() time_diff = current_time - start_time if time_diff > 5: rate = m / time_diff start_time = current_time m = 0 logger.info('consumer_rate:[%.2f]p/s, queue_size:[%d]' % (rate, self.queue.qsize())) logger.info("closing produce...") logger.info("Producer thread has stopped") def mock_consume(self): logger.info('Start Thread To Consumer') data = dict() stop = False while True: stop = self.stop_flag.is_set() if stop and self.queue.empty(): break msg = self.queue.get() try: data = self.parse(msg) self.handle_data(data, stop) except Exception as err: logger.error("consumer:{}".format(getcurrent())) finally: self.queue.task_done() logger.info('Thread Consumer has stopped')
class LeakQueue(object): def __init__(self, maxsize=0, workers=10): """ Setup the gevent queue and the workers. :param int maxsize: the max lenght of the queue, default the queue size is infinite. :param int workers: the number of workers, default=10. """ self.queue = JoinableQueue(maxsize=maxsize) [spawn(self.worker) for x in xrange(workers)] def __repr__(self): return u'{} items in queue'.format(self.queue.qsize()) def put(self, operation, item, date=None): """ Each item are queued for a later processing. :param str operation: the operation name. :param item: the item to queued. :param date date: when the item is trigger. :returns: True if insertions succeeds, False otherwise. """ try: self.queue.put({ "operation": operation, "item": item, "date": date or datetime.utcnow() }) self.flush() except Exception as e: logger.critical( 'unable to put an item in the queue :: {}'.format(e)) return False else: return True def flush(self, force=False): """ Flush the queue and block until all tasks are done. :param boolean force: force the queue flushing :returns: True if the flush occurs, False otherwise. """ if self.queue.full() or force: logger.info('queue is full ({} items) :: flush it !'.format( self.queue.qsize())) self.queue.join() return True return False def worker(self): while True: try: item = self.queue.get() logger.info('get item :: {}'.format(item)) if not self.worker_process(item): logger.info('re-queue item :: {}'.format(item)) self.queue.put(item) except Empty: logger.info('queue is empty') else: self.queue.task_done() def worker_process(self, item): """ Default action execute by each worker. Must return a True statement to remove the item, otherwise the worker put the item into the queue. """ g_sleep() return item
class DriverPool(object): """ Create a pool of available Selenium containers for processing. Args: size (int): maximum concurrent tasks. Must be at least ``2``. driver_cls (WebDriver): driver_cls_args (tuple): driver_cls_kw (dict): use_proxy (bool): factory (:obj:`~selenium_docker.base.ContainerFactory`): name (str): logger (:obj:`logging.Logger`): Example:: pool = DriverPool(size=2) urls = [ 'https://google.com', 'https://reddit.com', 'https://yahoo.com', 'http://ksl.com', 'http://cnn.com' ] def get_title(driver, url): driver.get(url) return driver.title for result in pool.execute(get_title, urls): print(result) """ INNER_THREAD_SLEEP = 0.5 """float: essentially our polling interval between tasks and checking when tasks have completed. """ PROXY_CLS = SquidProxy """:obj:`~selenium_docker.proxy.AbstractProxy`: created for the pool when ``use_proxy=True`` during pool instantiation. """ def __init__(self, size, driver_cls=ChromeDriver, driver_cls_args=None, driver_cls_kw=None, use_proxy=True, factory=None, name=None, logger=None): self.size = max(2, size) self.name = name or gen_uuid(6) self.factory = factory or ContainerFactory.get_default_factory() self.logger = logger or getLogger( '%s.DriverPool.%s' % (__name__, self.name)) self._driver_cls = driver_cls self._driver_cls_args = driver_cls_args or tuple() self._driver_cls_kw = driver_cls_kw or dict() self._drivers = Queue(maxsize=self.size) # post init inspections if not hasattr(self._driver_cls, 'CONTAINER'): raise DriverPoolValueError('driver_cls must extend DockerDriver') if not isiterable(self._driver_cls_args): raise DriverPoolValueError( '%s is not iterable' % self._driver_cls_args) if not isinstance(self._driver_cls_kw, Mapping): raise DriverPoolValueError( '%s is not a valid mapping' % self._driver_cls_kw) # determine proxy usage self.proxy = None self._use_proxy = use_proxy # type: bool # deferred instantiation self._pool = None # type: Pool self._results = None # type: Queue self._tasks = None # type: JoinableQueue self._processing = False # type: bool self.__feeder_green = None # type: gevent.Greenlet def __repr__(self): return '<DriverPool-%s(size=%d,driver=%s,proxy=%s,async=%s)>' % ( self.name, self.size, self._driver_cls.BROWSER, self._use_proxy, self.is_async) def __iter__(self): return self.results(block=self.is_async) def __del__(self): try: self.close() except Exception as e: if hasattr(self, 'logger'): self.logger.exection(e, exc_info=False) @property def is_processing(self): """bool: whether or not we're currently processing tasks. """ return self._processing @property def is_async(self): """bool: returns True when asynchronous processing is happening. """ return self.__feeder_green is not None def __bootstrap(self): """ Prepare this driver pool instance to batch execute task items. """ if self.is_processing: # cannot run two executions simultaneously raise DriverPoolRuntimeException( 'cannot bootstrap pool, already running') if self._results and self._results.qsize(): # pragma: no cover self.logger.debug('pending results being discarded') if self._tasks and self._tasks.qsize(): # pragma: no cover self.logger.debug('pending tasks being discarded') if self._pool: # pragma: no cover self.logger.debug('killing processing pool') self._pool.join(timeout=10.0) self._pool.kill() self._pool = None if self._use_proxy and not self.proxy: # defer proxy instantiation -- since spinning up a squid proxy # docker container is surprisingly time consuming. self.logger.debug('bootstrapping squid proxy') self.proxy = self.PROXY_CLS(factory=self.factory) self.logger.debug('bootstrapping pool processing') self._processing = True self._results = Queue() self._tasks = JoinableQueue() self._load_drivers() # create our processing pool with headroom over the number of drivers # requested for this processing pool. self._pool = Pool(size=self.size + math.ceil(self.size * 0.25)) def __cleanup(self, force=False): """ Stop and remove the web drivers and their containers. This function should not remove pending tasks or results. It should be possible to cleanup all the external resources of a driver pool and still extract the results of the work that was completed. Raises: DriverPoolRuntimeException: when attempting to cleanup an environment while processing is still happening, and forcing the cleanup is set to ``False``. SeleniumDockerException: when a driver instance or container cannot be closed properly. Returns: None """ if self.is_processing and not force: # pragma: no cover raise DriverPoolRuntimeException( 'cannot cleanup driver pool while executing') self._processing = False squid = None # type: gevent.Greenlet error = None # type: SeleniumDockerException if self.proxy: self.logger.debug('closing squid proxy') squid = gevent.spawn(self.proxy.quit) if self._pool: # pragma: no cover self.logger.debug('emptying task pool') if not force: self._pool.join(timeout=10.0) self._pool.kill(block=False, timeout=10.0) self._pool = None self.logger.debug('closing all driver containers') while not self._drivers.empty(): d = self._drivers.get(block=True) try: d.quit() except SeleniumDockerException as e: # pragma: no cover self.logger.exception(e, exc_info=True) if not force: error = e if self.proxy: squid.join() self.proxy = None if error: # pragma: no cover raise error def _load_driver(self, and_add=True): """ Load a single web driver instance and container. """ args = self._driver_cls_args kw = dict(self._driver_cls_kw) kw.update({ 'proxy': self.proxy, 'factory': self.factory, }) driver = self._driver_cls(*args, **kw) if and_add: self._drivers.put(driver) return driver def _load_drivers(self): """ Load the web driver instances and containers. Raises: DriverPoolRuntimeException: when the requested number of drivers for the given pool size cannot be created for some reason. Returns: None """ if not self._drivers.empty(): # pragma: no cover return threads = [] for o in range(self.size): self.logger.debug('creating driver %d of %d', o + 1, self.size) thread = gevent.spawn(self._load_driver) threads.append(thread) for t in reversed(threads): t.join() if not self._drivers.full(): raise DriverPoolRuntimeException( 'unable to fulfill required concurrent drivers, %d of %d' % ( self._drivers.qsize(), self.size)) def _recycle_driver(self, driver): if not driver: return try: driver.quit() except Exception as e: self.logger.exception(e, exc_info=True) # do NOT add the new driver container to the drivers queue, # instead this will be handled in the recycle logic that requested # the driver in the first place. Instead of returning the one it # received this "new" instance will be put in its placed. print('RECYCLED!!!!!!') return self._load_driver(and_add=False) def add_async(self, *items): """ Add additional items to the asynchronous processing queue. Args: items (list(Any)): list of items that need processing. Each item is applied one at a time to an available driver from the pool. Raises: StopIteration: when all items have been added. """ if len(items) == 1 and isinstance(items[0], list): items = iter(items[0]) if not items: raise DriverPoolValueError( 'cannot add items with value: %s' % str(items)) item_count = count(items) self.logger.debug('adding %d additional items to tasks', item_count) for o in items: self._tasks.put(o) def close(self): """ Force close all the drivers and cleanup their containers. Returns: None """ self.__cleanup(force=True) def execute(self, fn, items, preserve_order=False, auto_clean=True, no_wait=False): """ Execute a fixed function, blocking for results. Args: fn (Callable): function that takes two parameters, ``driver`` and ``task``. items (list(Any)): list of items that need processing. Each item is applied one at a time to an available driver from the pool. preserve_order (bool): should the results be returned in the order they were supplied via ``items``. It's more performant to allow results to return in any order. auto_clean (bool): cleanup docker containers after executing. If multiple processing tasks are going to be used, it's more performant to leave the containers running and reuse them. no_wait (bool): forgo a small sleep interval between finishing a task and putting the driver back in the available drivers pool. Yields: results: the result for each item as they're finished. """ def worker(o): job_num, item = o self.logger.debug('doing work on item %d' % job_num) driver = self._drivers.get(block=True) ret_val = fn(driver, item) if not no_wait: gevent.sleep(self.INNER_THREAD_SLEEP) self._drivers.put(driver) return ret_val if self.__feeder_green: raise DriverPoolRuntimeException( 'cannot perform a blocking execute while async processing') self.__bootstrap() self.logger.debug('starting sync processing') if preserve_order: ittr = self._pool.imap else: ittr = self._pool.imap_unordered self.logger.debug('yielding processed results') for o in ittr(worker, enumerate(items)): self._results.put(o) self._results.put(StopIteration) self.logger.debug('stopping sync processing') if auto_clean: self.logger.debug('auto cleanup pool environment') self.__cleanup(force=True) return self.results(block=False) def execute_async(self, fn, items=None, callback=None, catch=(WebDriverException,), requeue_task=False): """ Execute a fixed function in the background, streaming results. Args: fn (Callable): function that takes two parameters, ``driver`` and ``task``. items (list(Any)): list of items that need processing. Each item is applied one at a time to an available driver from the pool. callback (Callable): function that takes a single parameter, the return value of ``fn`` when its finished processing and has returned the driver to the queue. catch (tuple[Exception]): tuple of Exception classes to catch during task execution. If one of these Exception classes is caught during ``fn`` execution the driver that crashed will attempt to be recycled. requeue_task (bool): in the event of an Exception being caught should the task/item that was being worked on be re-added to the queue of items being processed. Raises: DriverPoolValueError: if ``callback`` is not ``None`` or ``callable``. Returns: None """ def worker(fn, task): ret_val = None async_task_id = gen_uuid(12) self.logger.debug('starting async task %s', async_task_id) driver = self._drivers.get(block=True) if isinstance(driver, Exception): raise driver try: ret_val = fn(driver, task) except catch as e: self.logger.exception('hihi') if self.is_processing: driver = self._recycle_driver(driver) if requeue_task: self._tasks.put(task) finally: self._results.put(ret_val) self._drivers.put(driver) gevent.sleep(self.INNER_THREAD_SLEEP) return ret_val def feeder(): self.logger.debug('starting async feeder thread') while True: while not self._tasks.empty(): task = self._tasks.get() if self._pool is None: break self._pool.apply_async( worker, args=(fn, task,), callback=greenlet_callback) gevent.sleep(self.INNER_THREAD_SLEEP) if self._pool is None and not self.is_processing: break return if callback is None: def logger(value): self.logger.debug('%s', value) callback = logger def real_callback(cb, value): if isinstance(value, gevent.GreenletExit): raise value else: cb(value) greenlet_callback = partial(real_callback, callback) for f in [fn, callback]: if not callable(f): raise DriverPoolValueError( 'cannot use %s, is not callable' % callback) self.logger.debug('starting async processing') self.__bootstrap() if not self.__feeder_green: self.__feeder_green = gevent.spawn(feeder) if items: self.add_async(*items) def quit(self): """ Alias for :func:`~DriverPool.close()`. Included for consistency with driver instances that generally call ``quit`` when they're no longer needed. Returns: None """ if self.__feeder_green: return self.stop_async() return self.close() def results(self, block=True): """ Iterate over available results from processed tasks. Args: block (bool): when ``True``, block this call until all tasks have been processed and all results have been returned. Otherwise this will continue indefinitely while tasks are dynamically added to the async processing queue. Yields: results: one result at a time as they're finished. Raises: StopIteration: when the processing is finished. """ est_size = self._results.qsize() self.logger.debug('there are an estimated %d results', est_size) if block: self.logger.debug('blocking for results to finish processing') while self.is_processing: while not self._results.empty(): yield self._results.get() gevent.sleep(self.INNER_THREAD_SLEEP) if self._tasks.empty() and self._results.empty(): break raise StopIteration else: if est_size > 0: self.logger.debug('returning as many results as have finished') self._results.put(StopIteration) for result in self._results: yield result def stop_async(self, timeout=None, auto_clean=True): """ Stop all the async worker processing from executing. Args: timeout (float): number of seconds to wait for pool to finish processing before killing and closing out the execution. auto_clean (bool): cleanup docker containers after executing. If multiple processing tasks are going to be used, it's more performant to leave the containers running and reuse them. Returns: None """ self.logger.debug('stopping async processing') if self.__feeder_green: self.logger.debug('killing async feeder thread') gevent.kill(self.__feeder_green) self.__feeder_green = None if self._pool: self.logger.debug('joining async pool before kill') self._pool.join(timeout=timeout or 1.0) self._pool.kill(block=False) tasks_count = self._tasks.qsize() self.logger.info('%d tasks remained unprocessed', tasks_count) if auto_clean: self.logger.debug('auto cleanup pool environment') self.__cleanup(force=True)
class BaseLogger(Collected,Jobber): """\ This class implements one particular way to log things. """ storage = Loggers.storage q = None job = None ready = False _in_flush = False def __init__(self, level): self.level = level global logger_nr logger_nr += 1 if not hasattr(self,"name") or self.name is None: self.name = Name(self.__class__.__name__, "x"+str(logger_nr)) super(BaseLogger,self).__init__() self._init() def _init(self): """Fork off the writer thread. Override this to do nothing if you don't have one.""" self.q = JoinableQueue(100) self.start_job("job",self._writer) self.job.link(self.delete) if self.ready is False: self.ready = True else: self.stop_job("job") # concurrency issues? def _writer(self): errs = 0 for r in self.q: try: if r is FlushMe: self._flush() else: self._log(*r) except Exception as ex: errs += 1 fix_exception(ex) from moat.run import process_failure process_failure(ex) if errs > 10: reraise(ex) else: if errs: errs -= 1 finally: self.q.task_done() self.q.task_done() # for the StopIter # Collection stuff def list(self): yield super(BaseLogger,self) yield ("Type",self.__class__.__name__) yield ("Level",LogNames[self.level]) yield ("Queue",self.q.qsize()) def info(self): return LogNames[self.level]+": "+self.__class__.__name__ def delete(self, ctx=None): if self.ready: self.ready = None super(BaseLogger,self).delete(ctx) try: if self.q: self.q.put(StopIteration,block=False) except Full: ## panic? pass if self.job is not None: self.job.join(timeout=1) self.stop_job("job") def _wlog(self, *a): try: self.q.put(a, block=False) except Full: ## panic? self.delete() def _log(self, level, *a): a=" ".join(( x if isinstance(x,six.string_types) else str(x) for x in a)) self._slog(level,a) def _slog(self, a): raise NotImplementedError("You need to override %s._log or ._slog" % (self.__class__.__name__,)) def _flush(self): pass def log(self, level, *a): if LogLevels[level] >= self.level: self._wlog(level,*a) if TESTING and not (hasattr(a[0],"startswith") and a[0].startswith("TEST")): self.flush() else: gevent.sleep(0) def log_event(self, event, level): if level >= self.level: for r in report_(event,99): self._wlog(LogNames[level],r) if TESTING: self.flush() def log_failure(self, err, level=WARN): if level >= self.level: self._wlog(LogNames[level],format_exception(err)) if TESTING: self.flush() def flush(self): if self._in_flush: return if self.q is not None: try: self._in_flush = True self.q.put(FlushMe) self.q.join() finally: self._in_flush = False def end_logging(self): self.flush() self.delete()