def green_consumer_sink(sink_func, queue, producer_instance): """ Consume queue if anything is immediately available and apply sink_func to return value of each consumed item. If nothing is immediately available, switch to producer_instance greenlet. Details: Call queue.get_nowait(), and expect to receive a gevent.event.AsyncResult. Then, apply sink_func(async_result.get()) If nothing is immediately available, switch to producer_instance greenlet. For cases where sink_func requires the same costly initialization every call, consider a coroutine sink_func: sink_func = lambda rv:other_func.send(rv) """ log.debug('initializing consumer. This will only appear once per greenlet') producer_instance.switch() while True: try: async_result = queue.get_nowait() except gevent.queue.Empty: producer_instance.switch() try: rv = async_result.get() except Exception, e: log.exception(e) producer_instance.switch() sink_rv = sink_func(rv) log.debug("Completed job with sink result: {sink_rv}".format(**locals()))
def retrieve_worker(): try: while True: url, fn = queue.get_nowait() log.info('update %s' % fn) cb('download_file', fn) resp = remote.open(url) d = resp.read() if resp.headers.get('Content-Encoding') == 'gzip': d = GzipFile(fileobj=StringIO(d), mode='rb').read() resp.close() cb('download_complete', fn) ffn = os.path.join(base, fn) try: try: os.makedirs(os.path.dirname(ffn)) except OSError: pass with open(ffn, 'wb') as f: f.write(d) except EnvironmentError: cb('write_failed', fn) except gevent.queue.Empty: pass except Exception as e: me.kill(e)
def worker(): while True: try: url, tag = queue.get_nowait() scrape(url, tag) except gevent.queue.Empty: return
def task_thread(self, queue): """ Executes tasks in queue """ while not self.shutdown.is_set(): if queue.empty() is False: (job, task) = queue.get_nowait() # Don't run the task if the job is done if job.status in [Status.ERROR, Status.ABORT]: task.status = Status.ABORT else: options = {} gpu_id = -1 try: if isinstance(task, model_tasks.TrainTask): ### Select GPU if len(self.gpu_list): for gpu in self.gpu_list: if not gpu['active']: gpu_id = gpu['index'] gpu['active'] = True break assert gpu_id != -1, 'no available GPU' else: gpu_id = None options['gpu_id'] = gpu_id task.run(**options) except Exception as e: logger.error('%s: %s' % (type(e).__name__, e), job_id=job.id()) task.exception = e task.traceback = traceback.format_exc() task.status = Status.ERROR finally: ### Release GPU if gpu_id != -1 and gpu_id is not None: for gpu in self.gpu_list: if gpu['index'] == gpu_id: gpu['active'] = False else: # Wait before checking again for a task time.sleep(utils.wait_time())
def scrape_base_url(): global data startTime = datetime.now() tree = html.fromstring(session.get(base_url).text) func = lambda x: queue.put_nowait((parse_comp, { 'url': domain + x.xpath('./@href')[0], 'name': x.xpath('./text()')[0] })) [ func(x) for x in tree.xpath('//div[@class="st-text"]//td/a') if x.xpath('./text()') != [] ] while not queue.empty() and not pool.full(): for x in xrange(0, min(queue.qsize(), pool.free_count())): t = queue.get_nowait() pool.start(pool.spawn(t[0], t[1])) pool.join() print 'Time Taken : ', datetime.now() - startTime with open('data.json', 'w') as fp: json.dump(data, fp)