class Stream(object): def __init__(self, raw_headers, header_table): self._queue = Queue() self._header_table = header_table self._current_headers = self._header_table.merge(raw_headers) @gen.coroutine def get(self, timeout=0): if timeout == 0: res, headers = yield self._queue.get() else: deadline = datetime.timedelta(seconds=timeout) res, headers = yield self._queue.get(deadline) self._current_headers = headers if isinstance(res, Exception): raise res else: raise gen.Return(res) def push(self, item, raw_headers): headers = self._header_table.merge(raw_headers) self._queue.put_nowait((item, headers)) def done(self, raw_headers): headers = self._header_table.merge(raw_headers) return self._queue.put_nowait((ChokeEvent(), headers)) def error(self, errnumber, reason, raw_headers): headers = self._header_table.merge(raw_headers) return self._queue.put_nowait((RequestError(errnumber, reason), headers)) @property def headers(self): return self._current_headers
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event(status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set(): self.disconnected_event.set() def message(self, pubnub, message): self.message_queue.put(message) def presence(self, pubnub, presence): self.presence_queue.put(presence) @tornado.gen.coroutine def wait_for_connect(self): if not self.connected_event.is_set(): yield self.connected_event.wait() else: raise Exception("instance is already connected") @tornado.gen.coroutine def wait_for_disconnect(self): if not self.disconnected_event.is_set(): yield self.disconnected_event.wait() else: raise Exception("instance is already disconnected") @tornado.gen.coroutine def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = yield self.message_queue.get() if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.message_queue.task_done() @tornado.gen.coroutine def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = yield self.presence_queue.get() if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.presence_queue.task_done()
class ConnectionPool(object): def __init__(self, servers, maxsize=15, minsize=1, loop=None, debug=0): loop = loop if loop is not None else tornado.ioloop.IOLoop.instance() if debug: logging.basicConfig( level=logging.DEBUG, format="'%(levelname)s %(asctime)s" " %(module)s:%(lineno)d %(process)d %(thread)d %(message)s'") self._loop = loop self._servers = servers self._minsize = minsize self._debug = debug self._in_use = set() self._pool = Queue(maxsize) @gen.coroutine def clear(self): """Clear pool connections.""" while not self._pool.empty(): conn = yield self._pool.get() conn.close_socket() def size(self): return len(self._in_use) + self._pool.qsize() @gen.coroutine def acquire(self): """Acquire connection from the pool, or spawn new one if pool maxsize permits. :return: ``Connetion`` (reader, writer) """ while self.size() < self._minsize: _conn = yield self._create_new_conn() yield self._pool.put(_conn) conn = None while not conn: if not self._pool.empty(): conn = yield self._pool.get() if conn is None: conn = yield self._create_new_conn() self._in_use.add(conn) raise gen.Return(conn) @gen.coroutine def _create_new_conn(self): conn = yield Connection.get_conn(self._servers, self._debug) raise gen.Return(conn) def release(self, conn): self._in_use.remove(conn) try: self._pool.put_nowait(conn) except (QueueEmpty, QueueFull): conn.close_socket()
class Publisher(MQAsyncSub): """Handles new data to be passed on to subscribers.""" def __init__(self): self.WSmessages = Queue() self.MQmessages = Queue() self.sub = MQAsyncSub.__init__(self, zmq.Context(), 'admin', []) self.subscribers = set() def register(self, subscriber): """Register a new subscriber.""" self.subscribers.add(subscriber) def deregister(self, subscriber): """Stop publishing to a subscriber.""" self.subscribers.remove(subscriber) @gen.coroutine def on_message(self, did, msg): """Receive message from MQ sub and send to WS.""" yield self.WSmessages.put({"msgid": did, "content": msg}) @gen.coroutine def submit(self, message): """Submit a new message to publish to subscribers.""" yield self.WSmessages.put(message) @gen.coroutine def publishToWS(self): while True: message = yield self.WSmessages.get() if len(self.subscribers) > 0: print("Pushing MQ message {} to {} WS subscribers...".format( message, len(self.subscribers))) yield [subscriber.submit(message) for subscriber in self.subscribers] @gen.coroutine def publishToMQ(self): ctx = zmq.Context() cli = MQSyncReq(ctx) pub = MQPub(ctx, 'admin') while True: message = yield self.MQmessages.get() jsons = json.loads(message) # req/rep if 'mq_request' in jsons and 'data' in jsons: msg = MQMessage() msg.set_action(str(jsons['mq_request'])) msg.set_data(jsons['data']) print("REQ : {0}".format(msg.get())) if 'dst' in jsons: print cli.request(str(jsons['dst']), msg.get(), timeout=10).get() else: print cli.request('manager', msg.get(), timeout=10).get() # pub elif 'mq_publish' in jsons and 'data' in jsons: print("Publish : {0}".format(jsons['data'])) pub.send_event(jsons['mq_publish'], jsons['data'])
class RestfulHandler(RequestHandler): _registrar = None _queue = None def initialize(self, registrar=None): self._registrar: PartRegistrar = registrar self._queue = Queue() @gen.coroutine def get(self, endpoint_str): # called from tornado thread path = endpoint_str.split("/") request = Get(path=path) self.report_request(request) response = yield self._queue.get() self.handle_response(response) # curl -d '{"name": "me"}' http://localhost:8008/rest/HELLO/greet @gen.coroutine def post(self, endpoint_str): # called from tornado thread path = endpoint_str.split("/") parameters = json_decode(self.request.body.decode()) request = Post(path=path, parameters=parameters) self.report_request(request) response = yield self._queue.get() self.handle_response(response) def report_request(self, request): # called from tornado thread request.set_callback(self.queue_response) mri = request.path[0] self._registrar.report(builtin.infos.RequestInfo(request, mri)) def queue_response(self, response): # called from cothread IOLoopHelper.call(self._queue.put, response) def handle_response(self, response): # called from tornado thread if isinstance(response, Return): message = json_encode(response.value) self.finish(message + "\n") else: if isinstance(response, Error): message = response.message else: message = "Unknown response %s" % type(response) self.set_status(500, message) self.write_error(500)
class Room(object): def __init__(self, server, name): self.server = server self.name = name self.clients = {} self.lock = threading.RLock() self.inqueue = Queue(maxsize=QUEUE_SIZE) @coroutine def dispatch(self): logging.debug('Chatroom: %s opened' % self.name) while True: msg = yield self.inqueue.get() logging.debug("Room got message: room[%s], command[%s], content[%s]", msg.receiver, msg.command, msg.content) if msg.command == COMMAND_JOIN: logging.debug("%s joined", msg.sender.name) self.clients[msg.sender.name] = msg.sender elif msg.command == COMMAND_QUIT: del self.clients[msg.sender.name] yield self.broadcast(msg) @coroutine def broadcast(self, msg): for _, client in self.clients.items(): yield client.inqueue.put(msg)
class gather(Stream): def __init__(self, child, limit=10, client=None): self.client = client or default_client() self.queue = Queue(maxsize=limit) self.condition = Condition() Stream.__init__(self, child) self.client.loop.add_callback(self.cb) def update(self, x, who=None): return self.queue.put(x) @gen.coroutine def cb(self): while True: x = yield self.queue.get() L = [x] while not self.queue.empty(): L.append(self.queue.get_nowait()) results = yield self.client._gather(L) for x in results: yield self.emit(x) if self.queue.empty(): self.condition.notify_all() @gen.coroutine def flush(self): while not self.queue.empty(): yield self.condition.wait()
class Publisher(object): """Handles new data to be passed on to subscribers.""" def __init__(self): self.messages = Queue() self.subscribers = set() def register(self, subscriber): """Register a new subscriber.""" self.subscribers.add(subscriber) def deregister(self, subscriber): """Stop publishing to a subscriber.""" self.subscribers.remove(subscriber) @gen.coroutine def submit(self, message): """Submit a new message to publish to subscribers.""" yield self.messages.put(message) @gen.coroutine def publish(self): while True: message = yield self.messages.get() if len(self.subscribers) > 0: # print("Pushing message {} to {} subscribers...".format( # message, len(self.subscribers))) yield [ subscriber.submit(message) for subscriber in self.subscribers ]
class delay(Stream): """ Add a time delay to results """ _graphviz_shape = 'octagon' def __init__(self, upstream, interval, loop=None, **kwargs): loop = loop or upstream.loop or IOLoop.current() self.interval = interval self.queue = Queue() Stream.__init__(self, upstream, loop=loop, **kwargs) self.loop.add_callback(self.cb) @gen.coroutine def cb(self): while True: last = time() x = yield self.queue.get() yield self._emit(x) duration = self.interval - (time() - last) if duration > 0: yield gen.sleep(duration) def update(self, x, who=None): return self.queue.put(x)
class buffer(Stream): """ Allow results to pile up at this point in the stream This allows results to buffer in place at various points in the stream. This can help to smooth flow through the system when backpressure is applied. """ _graphviz_shape = 'diamond' def __init__(self, upstream, n, loop=None, **kwargs): loop = loop or upstream.loop or IOLoop.current() self.queue = Queue(maxsize=n) Stream.__init__(self, upstream, loop=loop, **kwargs) self.loop.add_callback(self.cb) def update(self, x, who=None): return self.queue.put(x) @gen.coroutine def cb(self): while True: x = yield self.queue.get() yield self._emit(x)
class EventSource(RequestHandler): def initialize(self, stream): #assert isinstance(stream, Stream) self.stream = stream self.messages = Queue() self.finished = False self.set_header('content-type', 'text/event-stream') self.set_header('cache-control', 'no-cache') self.store = self.stream.sink(self.messages.put) @gen.coroutine def publish(self, message): """Pushes data to a listener.""" try: self.write(message >> to_str) yield self.flush() except StreamClosedError: self.finished = True (self.request.remote_ip, StreamClosedError) >> log @gen.coroutine def get(self, *args, **kwargs): try: while not self.finished: message = yield self.messages.get() yield self.publish(message) except Exception: pass finally: self.store.destroy() self.messages.empty() self.finish()
class Decode(object): def __init__(self, sess_field): self.q = Queue(maxsize=1000) self.p = Queue(maxsize=1000) self.sess_field = sess_field @staticmethod def batch_pad(nd): max_length = max(map(len, nd)) pad_nd = [ i + [text_encoder.PAD_ID] * (max_length - len(i)) for i in nd ] return pad_nd @gen.coroutine def decode(self): log.info("[biz] Decode: model loading ... ") saver = tf.train.Saver() with tf.Session(config=self.sess_field.sess_config) as sess: # Load weights from checkpoint. log.info("[biz] Decode: restoring parameters") saver.restore(sess, self.sess_field.ckpt) log.info("[biz] Decode: model already loaded") while True: inputs = yield self.q.get() log.info("[biz] Decode: " + str(inputs)) st_time = time.time() inputs_numpy = [ self.sess_field.encoders["inputs"].encode(i) + [text_encoder.EOS_ID] for i in inputs ] num_decode_batches = (len(inputs_numpy) - 1) // self.sess_field.batch_size + 1 results = [] for i in range(num_decode_batches): input_numpy = inputs_numpy[i * self.sess_field.batch_size:(i + 1) * self.sess_field.batch_size] inputs_numpy_batch = input_numpy + [[ text_encoder.EOS_ID ]] * (self.sess_field.batch_size - len(input_numpy)) inputs_numpy_batch = self.batch_pad( inputs_numpy_batch) # pad using 0 # log.info("[biz] Decode: " + str(inputs_numpy_batch)) feed = {self.sess_field.inputs_ph: inputs_numpy_batch} result = sess.run(self.sess_field.prediction, feed) decoded_outputs = [ self.sess_field.encoders["targets"].decode(i).strip( "<pad>").strip("<EOS>") for i in result["outputs"][:len(input_numpy)] ] results += decoded_outputs self.p.put(results) log.info("[biz] Decode: source: " + str(inputs)) log.info("[biz] Decode: target: " + str(results)) log.info("[biz] Decode: using %s s" % (time.time() - st_time))
def _first_completed(futures): """ Return a single completed future See Also: _as_completed """ q = Queue() yield _as_completed(futures, q) result = yield q.get() raise gen.Return(result)
def _first_completed(futures): """ Return a single completed future See Also: _as_completed """ q = Queue() yield _as_completed(futures, q) result = yield q.get() raise gen.Return(result)
class TornadoQuerierBase(object): def __init__(self): self.tasks = TornadoQueue() def gen_task(self): raise NotImplementError() def run_task(self, task): raise NotImplementError() def prepare(self): self.running = True def cleanup(self): self.running = False @coroutine def run_worker(self, worker_id, f): while self.tasks.qsize() > 0: task = yield self.tasks.get() LOG.debug('worker[%d]: current task is %s' % (worker_id, task)) try: yield f(task) pass except Exception as e: LOG.warning(str(e)) finally: self.tasks.task_done() task = None LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks)) @coroutine def start(self, num_workers=1): self.prepare() # add tasks tasks = yield self.gen_task() for task in tasks: yield self.tasks.put(task) # start shoot workers for worker_id in range(num_workers): LOG.debug('starting worker %d' % worker_id) self.run_worker(worker_id, self.run_task) yield self.tasks.join() self.cleanup()
class Stream(object): def __init__(self, raw_headers, header_table): self._queue = Queue() self._header_table = header_table self._current_headers = self._header_table.merge(raw_headers) @gen.coroutine def get(self, timeout=0): if timeout == 0: res, headers = yield self._queue.get() else: deadline = datetime.timedelta(seconds=timeout) res, headers = yield self._queue.get(deadline) self._current_headers = headers if isinstance(res, Exception): raise res else: raise gen.Return(res) def push(self, item, raw_headers): headers = self._header_table.merge(raw_headers) self._queue.put_nowait((item, headers)) def done(self, raw_headers): headers = self._header_table.merge(raw_headers) return self._queue.put_nowait((ChokeEvent(), headers)) def error(self, errnumber, reason, raw_headers): headers = self._header_table.merge(raw_headers) return self._queue.put_nowait((RequestError(errnumber, reason), headers)) @property def headers(self): return self._current_headers
class TornadoQuerierBase(object): def __init__(self): self.tasks = TornadoQueue() def gen_task(self): raise NotImplementError() def run_task(self, task): raise NotImplementError() def prepare(self): self.running = True def cleanup(self): self.running = False @coroutine def run_worker(self, worker_id, f): while self.tasks.qsize() > 0: task = yield self.tasks.get() LOG.debug('worker[%d]: current task is %s' % (worker_id, task)) try: yield f(task) pass except Exception as e: LOG.warning(str(e)) finally: self.tasks.task_done() task = None LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks)) @coroutine def start(self, num_workers=1): self.prepare() # add tasks tasks = yield self.gen_task() for task in tasks: yield self.tasks.put(task) # start shoot workers for worker_id in range(num_workers): LOG.debug('starting worker %d' % worker_id) self.run_worker(worker_id, self.run_task) yield self.tasks.join() self.cleanup()
class buffer(Stream): def __init__(self, n, child, loop=None): self.queue = Queue(maxsize=n) Stream.__init__(self, child, loop=loop) self.loop.add_callback(self.cb) def update(self, x, who=None): return self.queue.put(x) @gen.coroutine def cb(self): while True: x = yield self.queue.get() yield self.emit(x)
class TopicAppllication(tornado.web.Application): def __init__(self): handlers = [ url(r'/', MainHandler) ] self.queue = Queue(maxsize=10) super(TopicAppllication, self).__init__(handlers=handlers, debug=True) @gen.coroutine def consumer(self): item = yield self.queue.get() try: print item finally: self.queue.task_done()
def as_completed(fs): if len(set(f.executor for f in fs)) == 1: loop = first(fs).executor.loop else: # TODO: Groupby executor, spawn many _as_completed coroutines raise NotImplementedError( "as_completed on many event loops not yet supported") from .compatibility import Queue queue = Queue() coroutine = lambda: _as_completed(fs, queue) loop.add_callback(coroutine) for i in range(len(fs)): yield queue.get()
def as_completed(fs): if len(set(f.executor for f in fs)) == 1: loop = first(fs).executor.loop else: # TODO: Groupby executor, spawn many _as_completed coroutines raise NotImplementedError( "as_completed on many event loops not yet supported") from .compatibility import Queue queue = Queue() coroutine = lambda: _as_completed(fs, queue) loop.add_callback(coroutine) for i in range(len(fs)): yield queue.get()
class CommandQueue(): def __init__(self): self.queue = Queue() @gen.coroutine def process_command(self): while True: item = yield self.queue.get() try: yield gen.sleep(0.1) command, view = item view.write_message({command[0]: command[1]}) finally: self.queue.task_done() def put(self, item): self.queue.put(item)
class FirehoseWebSocket(tornado.websocket.WebSocketHandler): @tornado.gen.coroutine def open(self): print "hose open" global queues self.queue = Queue() queues.append(self.queue) while True: item = yield self.queue.get() self.queue.task_done() self.write_message(json.dumps(item)) @tornado.gen.coroutine def on_close(self): global queues yield self.queue.join() queues.remove(self.queue)
class MessageRouter(object): def __init__(self, message_sender, default_handler=None): self._queue = Queue() self.message_sender = message_sender self.default_handler = default_handler self._message_handlers = {} self._working = False def register_message_handler(self, message, handler): assert isinstance(message, MessageMeta) assert hasattr(handler, '__call__') self._message_handlers[message.__name__] = handler @gen.coroutine def put_message(self, message): assert isinstance(message, Message) yield self._queue.put(message) @gen.coroutine def start(self): self._working = True while self._working: message = yield self._queue.get() try: # TODO: Maybe we need to add special handling for BarrierRequest handler = self._message_handlers.get(message.type, self.default_handler) if handler: yield handler(message) except Exception as e: exc_type, exc_value, exc_tb = sys.exc_info() error_type, error_subtype, error_message, extended_message = errors.exception_to_error_args( exc_type, exc_value, exc_tb) response = Error.from_request( message, error_type=error_type, error_subtype=error_subtype, message=error_message, extended_message=extended_message) yield self.message_sender.send_message_ignore_response( response) finally: self._queue.task_done() def stop(self): self._working = False
def request(subscriber, request_type, tega_id, path, **kwargs): ''' tega request/response service -- this method returns a generator (tornado coroutine) to send a request to a remote tega db. ''' global seq_no seq_no += 1 if seq_no > 65535: # seq_no region: 0 - 65535. seq_no = 0 subscriber.write_message('REQUEST {} {} {} {}\n{}'.format( seq_no, request_type.name, tega_id, path, json.dumps(kwargs))) queue = Queue(maxsize=1) # used like a synchronous queue callback[seq_no] = queue # synchronous queue per request/response try: result = yield queue.get(timeout=timedelta(seconds=REQUEST_TIMEOUT)) return result except gen.TimeoutError: raise
def request(subscriber, request_type, tega_id, path, **kwargs): ''' tega request/response service -- this method returns a generator (tornado coroutine) to send a request to a remote tega db. ''' global seq_no seq_no += 1 if seq_no > 65535: # seq_no region: 0 - 65535. seq_no = 0 subscriber.write_message('REQUEST {} {} {} {}\n{}'.format( seq_no, request_type.name, tega_id, path, json.dumps(kwargs))) queue = Queue(maxsize=1) # used like a synchronous queue callback[seq_no] = queue # synchronous queue per request/response try: result = yield queue.get(timeout=timedelta(seconds=REQUEST_TIMEOUT)) return result except gen.TimeoutError: raise
def stream(self, nvr, mjpeg_stream, mjpeg_stream_config, publish_frame_topic): """Subscribe to frames, draw on them, then publish processed frame.""" frame_queue = Queue(maxsize=10) subscribe_frame_topic = ( f"{nvr.config.camera.name_slug}/{TOPIC_FRAME_PROCESSED}/*" ) unique_id = DataStream.subscribe_data(subscribe_frame_topic, frame_queue) while self.active_streams[mjpeg_stream]: item = yield frame_queue.get() frame = copy.copy(item.frame) ret, jpg = yield self.process_frame(nvr, frame, mjpeg_stream_config) if ret: DataStream.publish_data(publish_frame_topic, jpg) DataStream.unsubscribe_data(subscribe_frame_topic, unique_id) LOGGER.debug(f"Closing stream {mjpeg_stream}")
class StreamClient(object): MAX_SIZE = 60 def __init__(self, steam_id): self.id = generate_id() self.stream_id = steam_id self.queue = Queue(StreamClient.MAX_SIZE) @coroutine def send(self, item): yield self.queue.put(item) @coroutine def fetch(self): item = yield self.queue.get() self.queue.task_done() return item def empty(self): return self.queue.qsize() == 0
class Subscription(WebSocketHandler): """Websocket for subscribers.""" def initialize(self, publisher): self.publisher = publisher self.messages = Queue() self.finished = False def open(self): print("New subscriber.") self.publisher.register(self) self.run() def on_close(self): self._close() def _close(self): print("Subscriber left.") self.publisher.deregister(self) self.finished = True @gen.coroutine def submit(self, message): yield self.messages.put(message) @gen.coroutine def run(self): """ Empty the queue of messages to send to the WS """ while not self.finished: message = yield self.messages.get() self.send(message) def send(self, message): try: self.write_message(message) except WebSocketClosedError: self._close() def on_message(self, content): """ reciev message from websocket and send to MQ """ #print(u"WS to MQ: {0}".format(content)) self.publisher.MQmessages.put(content)
class Subscription(WebSocketHandler): """Websocket for subscribers.""" def initialize(self, publisher): self.publisher = publisher self.messages = Queue() self.finished = False def open(self): print("New subscriber.") self.publisher.register(self) self.run() def on_close(self): self._close() def _close(self): print("Subscriber left.") self.publisher.deregister(self) self.finished = True @gen.coroutine def submit(self, message): yield self.messages.put(message) @gen.coroutine def run(self): """ Empty the queue of messages to send to the WS """ while not self.finished: message = yield self.messages.get() self.send(message) def send(self, message): try: self.write_message(message) except WebSocketClosedError: self._close() def on_message(self, content): """ reciev message from websocket and send to MQ """ #print(u"WS to MQ: {0}".format(content)) self.publisher.MQmessages.put(content)
class delay(Stream): def __init__(self, interval, child, loop=None): self.interval = interval self.queue = Queue() Stream.__init__(self, child, loop=loop) self.loop.add_callback(self.cb) @gen.coroutine def cb(self): while True: last = time() x = yield self.queue.get() yield self.emit(x) duration = self.interval - (time() - last) if duration > 0: yield gen.sleep(duration) def update(self, x, who=None): return self.queue.put(x)
class PopularCategories: def __init__(self): self.categories = {} self.update_queue = Queue() @gen.coroutine def add_for_processing(self, predictions): yield self.update_queue.put(predictions) @gen.coroutine def process_queue(self): if self.update_queue.qsize() > 0: for i in range(self.update_queue.qsize()): predictions = yield self.update_queue.get() try: self._update_categories(predictions) finally: self.update_queue.task_done() # update top 5 top_5 = sorted(self.categories.items(), key=lambda x: x[1], reverse=True)[:5] mapped = map(lambda x: to_json_result(x[0], x[1]), top_5) yield update_top_5(list(mapped)) def _update_categories(self, new_predictions): predictions = new_predictions.argsort()[0] # update categories total for prediction in predictions: label = configuration.image_labels[prediction] score = new_predictions[0][prediction] if label in self.categories: update_score = (self.categories[label] + score) / 2 else: update_score = score self.categories[label] = update_score
class Subscription(WebSocketHandler): """Websocket for subscribers.""" def initialize(self, publisher): self.publisher = publisher self.messages = Queue() self.finished = False def open(self): print("New subscriber.") self.publisher.register(self) self.run() def on_close(self): self._close() def _close(self): print("Subscriber left.") self.publisher.deregister(self) self.finished = True @gen.coroutine def submit(self, message): yield self.messages.put(message) @gen.coroutine def run(self): while not self.finished: message = yield self.messages.get() #print("New MQ message: " + str(message)) self.send(message) def send(self, message): try: self.write_message(message) except WebSocketClosedError: self._close() def on_message(self, content): self.publisher.MQmessages.put(content)
class Subscription(WebSocketHandler): """Websocket for subscribers.""" def initialize(self, publisher): self.publisher = publisher self.messages = Queue() self.finished = False def check_origin(self, origin): return True def open(self): print("New subscriber.") self.publisher.register(self) self.run() def on_close(self): self._close() def _close(self): print("Subscriber left.") self.publisher.deregister(self) self.finished = True @gen.coroutine def submit(self, message): yield self.messages.put(message) @gen.coroutine def run(self): while not self.finished: message = yield self.messages.get() # print("New message: " + str(message)) self.send(message) def send(self, message): try: self.write_message(dict(value=message)) except WebSocketClosedError: self._close()
def call(self, action, body): """ Do the actual calling :param str action: action to perform (CRUD for example) :param str body: an object to send (will be json-encoded) """ # queue is used to send result back to this routine corr_id = str(uuid.uuid4()) queue = Queue(maxsize=1) self._reply_queues[corr_id] = queue # send message self.channel().basic_publish(exchange='', routing_key=action, properties=pika.BasicProperties( correlation_id=corr_id, reply_to=self._callback_queue, ), body=json.dumps(body)) # add timeout callback self._ioloop.add_timeout( time.time() + self._timeout, functools.partial( self._on_timeout, queue=queue, correlation_id=corr_id, )) # retrieve result back result = yield queue.get() queue.task_done() if 'timeout_error' in result: raise TimeoutError(result['error']) return result
class QueueStore(BaseStore): """Publish data via queues. This class is meant to be used in cases where subscribers should not miss any data. Compared to the :class:`DataStore` class, new messages to be broadcast to clients are put in a queue to be processed in order. """ def initialize(self): self.messages = Queue() self.publish() @gen.coroutine def submit(self, message): yield self.messages.put(message) @gen.coroutine def publish(self): while True: message = yield self.messages.get() if len(self.subscribers) > 0: yield [subscriber.submit(message) for subscriber in self.subscribers]
def as_completed(fs): """ Return futures in the order in which they complete This returns an iterator that yields the input future objects in the order in which they complete. Calling ``next`` on the iterator will block until the next future completes, irrespective of order. This function does not return futures in the order in which they are input. """ if len(set(f.executor for f in fs)) == 1: loop = first(fs).executor.loop else: # TODO: Groupby executor, spawn many _as_completed coroutines raise NotImplementedError( "as_completed on many event loops not yet supported") from .compatibility import Queue queue = Queue() coroutine = lambda: _as_completed(fs, queue) loop.add_callback(coroutine) for i in range(len(fs)): yield queue.get()
def as_completed(fs): """ Return futures in the order in which they complete This returns an iterator that yields the input future objects in the order in which they complete. Calling ``next`` on the iterator will block until the next future completes, irrespective of order. This function does not return futures in the order in which they are input. """ if len(set(f.executor for f in fs)) == 1: loop = first(fs).executor.loop else: # TODO: Groupby executor, spawn many _as_completed coroutines raise NotImplementedError( "as_completed on many event loops not yet supported") from .compatibility import Queue queue = Queue() coroutine = lambda: _as_completed(fs, queue) loop.add_callback(coroutine) for i in range(len(fs)): yield queue.get()
class TaskLogger(object): def __init__(self, task_id, engine=EngineType.REQUESTS, io_loop=None, task_url=TASK_URL, wrap=False, tenant=None): self.task_id = task_id self.task_url = task_url self._seq = 0 self._partial_log_url = self._get_partial_url('log') self._partial_result_url = self._get_partial_url('result') self.wrap = wrap if wrap and tenant: self._partial_log_url = update_query_params( self._partial_log_url, {'tenant': tenant}) self._partial_result_url = update_query_params( self._partial_result_url, {'tenant': tenant}) if engine == EngineType.REQUESTS: self.log = self._log_by_requests self.result = self._result_by_requests elif engine == EngineType.TORNADO: io_loop = io_loop if io_loop else IOLoop.current() self._http_client = AsyncHTTPClient(io_loop=io_loop) self._queue = Queue() self.log = self._log_by_tornado self.result = self._result_by_tornado else: raise TaskLoggerError('', reason='engine only supports {}'.format( EngineType.types_str())) def _get_partial_url(self, partial_name): url = urljoin(self.task_url, partial_name) url = update_query_params(url, {'task_id': self.task_id}) return url def _get_log_url(self, seq): url = update_query_params(self._partial_log_url, {'seq': seq}) return url def _get_result_url(self, seq, exit_code=0): url = update_query_params(self._partial_result_url, { 'seq': seq, 'exit_code': exit_code }) return url def _log_by_requests(self, log): self._seq += 1 log_url = self._get_log_url(self._seq) data = self._create_log(log, self._seq) self._send_by_requests(log_url, data) def _result_by_requests(self, result, exit_code=0): self._seq += 1 result_url = self._get_result_url(self._seq, exit_code) data = self._create_result(result, self._seq, exit_code=exit_code) self._send_by_requests(result_url, data) @staticmethod def _send_by_requests(url, data): res = requests.post(url, data=data, verify=False) if res.status_code != 200: raise TaskLoggerError(data, reason=res.reason) @gen.coroutine def _log_by_tornado(self, log): yield self._queue.put(1) self._seq += 1 log_url = self._get_log_url(self._seq) data = self._create_log(log, self._seq) try: yield self._send_by_tornado(log_url, data) finally: yield self._queue.get() self._queue.task_done() @gen.coroutine def _result_by_tornado(self, result, exit_code=0): yield self._queue.join() self._seq += 1 result_url = self._get_result_url(self._seq, exit_code) data = self._create_result(result, self._seq, exit_code=exit_code) yield self._send_by_tornado(result_url, data) @gen.coroutine def _send_by_tornado(self, url, data): try: response = yield self._http_client.fetch( url, method='POST', headers={'Content-Type': 'application/json'}, validate_cert=False, body=data) except Exception as exc: if hasattr(exc, 'response') and exc.response: exc = 'url:{}, exc:{}, body:{}'.format(url, exc, exc.response.body) raise TaskLoggerError(data, str(exc)) else: if response.code != 200: raise TaskLoggerError(data, reason=response.body) def _create_log(self, log, seq): assert isinstance(log, basestring) log = log + '\n' if self.wrap: log_msg = TaskLogMessage(task_id=self.task_id, log=log, seq=seq) data = json_encode({'messages': log_msg}) else: data = log return data def _create_result(self, result, seq, exit_code): assert isinstance(result, basestring) result = result + '\n' if self.wrap: result_msg = TaskResultMessage(task_id=self.task_id, result=result, seq=seq, exit_code=exit_code) data = json_encode({'messages': result_msg}) else: data = result return data
class TornadoTransmission(): def __init__(self, max_concurrent_batches=10, block_on_send=False, block_on_response=False, max_batch_size=100, send_frequency=0.25, user_agent_addition=''): if not has_tornado: raise ImportError('TornadoTransmission requires tornado, but it was not found.') self.block_on_send = block_on_send self.block_on_response = block_on_response self.max_batch_size = max_batch_size self.send_frequency = send_frequency user_agent = "libhoney-py/" + VERSION if user_agent_addition: user_agent += " " + user_agent_addition self.http_client = AsyncHTTPClient( force_instance=True, defaults=dict(user_agent=user_agent)) # libhoney adds events to the pending queue for us to send self.pending = Queue(maxsize=1000) # we hand back responses from the API on the responses queue self.responses = Queue(maxsize=2000) self.batch_data = {} self.sd = statsd.StatsClient(prefix="libhoney") self.batch_sem = Semaphore(max_concurrent_batches) def start(self): ioloop.IOLoop.current().spawn_callback(self._sender) def send(self, ev): '''send accepts an event and queues it to be sent''' self.sd.gauge("queue_length", self.pending.qsize()) try: if self.block_on_send: self.pending.put(ev) else: self.pending.put_nowait(ev) self.sd.incr("messages_queued") except QueueFull: response = { "status_code": 0, "duration": 0, "metadata": ev.metadata, "body": "", "error": "event dropped; queue overflow", } if self.block_on_response: self.responses.put(response) else: try: self.responses.put_nowait(response) except QueueFull: # if the response queue is full when trying to add an event # queue is full response, just skip it. pass self.sd.incr("queue_overflow") # We're using the older decorator/yield model for compatibility with # Python versions before 3.5. # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await @gen.coroutine def _sender(self): '''_sender is the control loop that pulls events off the `self.pending` queue and submits batches for actual sending. ''' events = [] last_flush = time.time() while True: try: ev = yield self.pending.get(timeout=self.send_frequency) if ev is None: # signals shutdown yield self._flush(events) return events.append(ev) if (len(events) > self.max_batch_size or time.time() - last_flush > self.send_frequency): yield self._flush(events) events = [] except TimeoutError: yield self._flush(events) events = [] last_flush = time.time() @gen.coroutine def _flush(self, events): if not events: return for dest, group in group_events_by_destination(events).items(): yield self._send_batch(dest, group) @gen.coroutine def _send_batch(self, destination, events): ''' Makes a single batch API request with the given list of events. The `destination` argument contains the write key, API host and dataset name used to build the request.''' start = time.time() status_code = 0 try: # enforce max_concurrent_batches yield self.batch_sem.acquire() url = urljoin(urljoin(destination.api_host, "/1/batch/"), destination.dataset) payload = [] for ev in events: event_time = ev.created_at.isoformat() if ev.created_at.tzinfo is None: event_time += "Z" payload.append({ "time": event_time, "samplerate": ev.sample_rate, "data": ev.fields()}) req = HTTPRequest( url, method='POST', headers={ "X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json", }, body=json.dumps(payload, default=json_default_handler), ) self.http_client.fetch(req, self._response_callback) # store the events that were sent so we can process responses later # it is important that we delete these eventually, or we'll run into memory issues self.batch_data[req] = {"start": start, "events": events} except Exception as e: # Catch all exceptions and hand them to the responses queue. self._enqueue_errors(status_code, e, start, events) finally: self.batch_sem.release() def _enqueue_errors(self, status_code, error, start, events): for ev in events: self.sd.incr("send_errors") self._enqueue_response(status_code, "", error, start, ev.metadata) def _response_callback(self, resp): # resp.request should be the same HTTPRequest object built by _send_batch # and mapped to values in batch_data events = self.batch_data[resp.request]["events"] start = self.batch_data[resp.request]["start"] try: status_code = resp.code resp.rethrow() statuses = [d["status"] for d in json.loads(resp.body)] for ev, status in zip(events, statuses): self._enqueue_response(status, "", None, start, ev.metadata) self.sd.incr("messages_sent") except Exception as e: self._enqueue_errors(status_code, e, start, events) self.sd.incr("send_errors") finally: # clean up the data for this batch del self.batch_data[resp.request] def _enqueue_response(self, status_code, body, error, start, metadata): resp = { "status_code": status_code, "body": body, "error": error, "duration": (time.time() - start) * 1000, "metadata": metadata } if self.block_on_response: self.responses.put(resp) else: try: self.responses.put_nowait(resp) except QueueFull: pass def close(self): '''call close to send all in-flight requests and shut down the senders nicely. Times out after max 20 seconds per sending thread plus 10 seconds for the response queue''' try: self.pending.put(None, 10) except QueueFull: pass # signal to the responses queue that nothing more is coming. try: self.responses.put(None, 10) except QueueFull: pass def get_response_queue(self): ''' return the responses queue on to which will be sent the response objects from each event send''' return self.responses
class CounterCache(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.m_queue = Queue() self.m_CacheFlag = 1 self.m_CounterCache = None self.m_Cache_A = defaultdict() self.m_Cache_B = defaultdict() self.database = Database(redis_conf = REDISEVER, password = STATUS_REDIS_PASS) self.cacheInit(self.m_Cache_A) self.cacheInit(self.m_Cache_B) def switchCache(self): if self.m_CacheFlag == 1: return self.m_Cache_A elif self.m_CacheFlag == 2: return self.m_Cache_B def chageCacheFlag(self): if self.m_CacheFlag == 1: self.m_CacheFlag = 2 elif self.m_CacheFlag == 2: self.m_CacheFlag = 1 def clearCache(self): if self.m_CacheFlag == 1: self.m_Cache_B.clear() self.cacheInit(self.m_Cache_B) elif self.m_CacheFlag == 2: self.m_Cache_A.clear() self.cacheInit(self.m_Cache_A) def cacheInit(self, cache): cache['pid_info'] = defaultdict(int) cache['eid_info'] = { 'pv':defaultdict(int), 'exchange_price':defaultdict(int) } cache['adx_info'] = { 'pv':defaultdict(int), 'exchange_price':defaultdict(int) } cache['aid_info'] = { 'exchange_price':defaultdict(int) } @tornado.gen.coroutine def queueMsgPut(self, msg): yield self.m_queue.put(msg) @tornado.gen.coroutine def queueMsgGet(self): while True: msg = yield self.m_queue.get() #print msg logger.info('QueueGet:%r' % msg) self.cacheInfoPut(msg) def cacheInfoPut(self, info): cache = self.switchCache() type = eid = pid = aid = price = adx = None if info.has_key('type'): type = info['type'] if info.has_key('eid'): eid = info['eid'] if info.has_key('pid'): pid = info['pid'] if info.has_key('price'): price = info['price'] if info.has_key('aid'): aid = info['aid'] #if info.has_key('adx'): # adx = info['adx'] if type == 1 and eid and (price != None) and aid: # pv cache['aid_info']['exchange_price'][aid] = cache['aid_info']['exchange_price'][aid] + price cache['eid_info']['pv'][eid] = cache['eid_info']['pv'][eid] + 1 cache['eid_info']['exchange_price'][eid] = cache['eid_info']['exchange_price'][eid] + price #cache['adx_info']['pv'][adx] = cache['adx_info']['pv'][adx] + 1 #cache['adx_info']['exchange_price'][adx] = cache['adx_info']['exchange_price'][adx] + price else: return None def cacheDura(self): cache = None if self.m_CacheFlag == 1: cache = self.m_Cache_B if self.m_CacheFlag == 2: cache = self.m_Cache_A #loginfo(cache) if cache.has_key('pid_info'): pass if cache.has_key('eid_info'): it_p = cache['eid_info']['exchange_price'] it_m = cache['eid_info']['pv'] for eid in it_p.iterkeys(): self.database.incEidHourSp(eid, it_p[eid]) logger.debug("increase Order:%r Money:%r OK!" % (eid, it_p[eid])) for eid in it_m.iterkeys(): self.database.incEidShow(eid, it_m[eid]) logger.debug("increase Order:%r PV:%r OK!" % (eid,it_m[eid])) if cache.has_key('aid_info'): it_a = cache['aid_info']['exchange_price'] for aid in it_a.iterkeys(): self.database.incAidHourSp(aid, it_a[aid]) self.database.decAdvBidSpend(aid, "-%.3f" % (float(it_a[aid])/1000)) logger.debug("increase Advertiser:%s Money:%s!" % (aid, str(float(it_a[aid])/1000)) ) def run(self): while True: try: time.sleep( CACHE_DUR_FREQ ) self.chageCacheFlag() self.cacheDura() self.clearCache() except Exception, e: logger.error(e) continue
class BatchedStream(object): """ Mostly obsolete, see BatchedSend """ def __init__(self, stream, interval): self.stream = stream self.interval = interval / 1000.0 self.last_transmission = default_timer() self.send_q = Queue() self.recv_q = Queue() self._background_send_coroutine = self._background_send() self._background_recv_coroutine = self._background_recv() self._broken = None self.pc = PeriodicCallback(lambda: None, 100) self.pc.start() @gen.coroutine def _background_send(self): with log_errors(): while True: msg = yield self.send_q.get() if msg == "close": break msgs = [msg] now = default_timer() wait_time = self.last_transmission + self.interval - now if wait_time > 0: yield gen.sleep(wait_time) while not self.send_q.empty(): msgs.append(self.send_q.get_nowait()) try: yield write(self.stream, msgs) except StreamClosedError: self.recv_q.put_nowait("close") self._broken = True break if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for _ in msgs: self.send_q.task_done() @gen.coroutine def _background_recv(self): with log_errors(): while True: try: msgs = yield read(self.stream) except StreamClosedError: self.recv_q.put_nowait("close") self.send_q.put_nowait("close") self._broken = True break assert isinstance(msgs, list) if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for msg in msgs: self.recv_q.put_nowait(msg) @gen.coroutine def flush(self): yield self.send_q.join() @gen.coroutine def send(self, msg): if self._broken: raise StreamClosedError("Batch Stream is Closed") else: self.send_q.put_nowait(msg) @gen.coroutine def recv(self): result = yield self.recv_q.get() if result == "close": raise StreamClosedError("Batched Stream is Closed") else: raise gen.Return(result) @gen.coroutine def close(self): yield self.flush() raise gen.Return(self.stream.close()) def closed(self): return self.stream.closed()
class Model: def __init__(self, config_file): self.lock = locks.Lock() self.classification_queue = Queue() print('loading config %s' % config_file, file=log.v5) # Load and setup config try: self.config = Config.Config() self.config.load_file(config_file) self.pause_after_first_seq = self.config.float('pause_after_first_seq', 0.2) self.batch_size = self.config.int('batch_size', 5000) self.max_seqs = self.config.int('max_seqs', -1) except Exception: print('Error: loading config %s failed' % config_file, file=log.v1) raise try: self.devices = self._init_devices() except Exception: print('Error: Loading devices for config %s failed' % config_file, file=log.v1) raise print('Starting engine for config %s' % config_file, file=log.v5) self.engine = Engine.Engine(self.devices) try: self.engine.init_network_from_config(config=self.config) except Exception: print('Error: Loading network for config %s failed' % config_file, file=log.v1) raise IOLoop.current().spawn_callback(self.classify_in_background) self.last_used = datetime.datetime.now() def _init_devices(self): """ Initiates the required devices for a config. Same as the funtion initDevices in rnn.py. :param config: :return: A list with the devices used. """ oldDeviceConfig = ",".join(self.config.list('device', ['default'])) if "device" in TheanoFlags: # This is important because Theano likely already has initialized that device. config.set("device", TheanoFlags["device"]) print("Devices: Use %s via THEANO_FLAGS instead of %s." % (TheanoFlags["device"], oldDeviceConfig), file=log.v4) devArgs = get_devices_init_args(self.config) assert len(devArgs) > 0 devices = [Device(**kwargs) for kwargs in devArgs] for device in devices: while not device.initialized: time.sleep(0.25) if devices[0].blocking: print("Devices: Used in blocking / single proc mode.", file=log.v4) else: print("Devices: Used in multiprocessing mode.", file=log.v4) return devices @tornado.gen.coroutine def classify_in_background(self): while True: requests = [] # fetch first request r = yield self.classification_queue.get() requests.append(r) # grab all other waiting requests try: while True: requests.append(self.classification_queue.get_nowait()) except QueueEmpty: pass output_dim = {} # Do dataset creation and classification. dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim) dataset.init_seq_order() batches = dataset.generate_batches(recurrent_net=self.engine.network.recurrent, batch_size=self.batch_size, max_seqs=self.max_seqs) with (yield self.lock.acquire()): ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches) yield ctt.join() try: for i in range(dataset.num_seqs): requests[i].future.set_result(ctt.result[i]) self.classification_queue.task_done() except Exception as e: print('exception', e) raise @tornado.gen.coroutine def classify(self, data): self.last_used = datetime.datetime.now() request = ClassificationRequest(data) yield self.classification_queue.put(request) yield request.future return request.future.result()
class BlogBackup(object): _default_dir_name = 'seg_blog_backup' def _generate_save_dir(self): cur_dir = os.path.dirname(__file__) self.save_path = os.path.join(cur_dir, self._default_dir_name) if not os.path.isdir(self.save_path): os.mkdir(self.save_path) def _parse_save_path(self): if self.save_path: if os.path.exists(self.save_path) and \ os.path.isdir(self.save_path): return else: raise BlogSavePathError( "'%s' not exists or is not dir!" % self.save_path) else: self._generate_save_dir() def _get_user_cookies(self): url = target_url + login_page_path self.driver.get(url) try: user_input = self.driver.find_element_by_name('mail') passwd_input = self.driver.find_element_by_name('password') submit_btn = self.driver.find_element_by_class_name('pr20') except NoSuchElementException: raise PageHtmlChanged( "%s login page structure have changed!" % _domain) user_input.send_keys(self.username) passwd_input.send_keys(self.passwd) submit_btn.click() try: WebDriverWait(self.driver, 3).until(staleness_of(submit_btn)) except TimeoutException: raise Exception("Wrong username or password!") WebDriverWait(self.driver, timeout=10).until(has_page_load) try_times = 0 while True: time.sleep(1) if url != self.driver.current_url: return self.driver.get_cookies() try_times += 1 if try_times > 10: raise Exception("Getting cookie info failed!") def _get_driver(self): if self.phantomjs_path: try: return webdriver.PhantomJS( executable_path=self.phantomjs_path, service_log_path=os.path.devnull) except WebDriverException: raise PhantomjsPathError("Phantomjs locate path invalid!") else: return webdriver.PhantomJS(service_log_path=os.path.devnull) def __init__(self, **conf): self.username = conf['username'] self.passwd = conf['passwd'] self.phantomjs_path = conf.get('phantomjs_path') self.save_path = conf.get('save_path') self._q = Queue() self._parse_save_path() self.driver = self._get_driver() self._cookies = self._get_user_cookies() @gen.coroutine def run(self): self.__filter_cookies() start_url = target_url + blog_path yield self._fetch_blog_list_page(start_url) for _ in xrange(cpu_count()): self._fetch_essay_content() yield self._q.join() def __filter_cookies(self): self._cookies = {k['name']: k['value'] for k in self._cookies if k['domain'] == _domain} @gen.coroutine def _fetch_blog_list_page(self, page_link): ret = requests.get(page_link, cookies=self._cookies) d = pq(ret.text) link_elements = d('.stream-list__item > .summary > h2 > a') for link in link_elements: yield self._q.put(d(link).attr('href')) next_ele = d('.pagination li.next a') if next_ele: next_page_url = target_url + next_ele.attr('href') self._fetch_blog_list_page(next_page_url) @gen.coroutine def _fetch_essay_content(self): while True: try: essay_path = yield self._q.get(timeout=1) essay_url = target_url + essay_path + edit_suffix ret = requests.get(essay_url, cookies=self._cookies) d = pq(ret.text) title = d("#myTitle").val() content = d("#myEditor").text() file_name = title + '.md' real_file_name = os.path.join(self.save_path, file_name) with open(real_file_name, 'w') as f: f.writelines(content.encode('utf8')) except gen.TimeoutError: raise gen.Return() finally: self._q.task_done()
async def test_listeners(known_server, handlers, jsonrpc_init_msg): """will some listeners listen?""" handler, ws_handler = handlers manager = handler.manager manager.all_listeners = ["jupyter_lsp.tests.listener.dummy_listener"] manager.initialize() manager._listeners["client"] = [] # hide predefined client listeners assert len(manager._listeners["all"]) == 1 dummy_listener = manager._listeners["all"][0] assert re.match( ("<MessageListener listener=<function dummy_listener at .*?>," " method=None, language_server=None>"), repr(dummy_listener), ) handler_listened = Queue() server_listened = Queue() all_listened = Queue() # some client listeners @lsp_message_listener("client", language_server=known_server, method="initialize") async def client_listener(scope, message, language_server, manager): await handler_listened.put(message) @lsp_message_listener("client", method=r"not-a-method") async def other_client_listener(scope, message, language_server, manager): # pragma: no cover await handler_listened.put(message) raise NotImplementedError("shouldn't get here") # some server listeners @lsp_message_listener("server", language_server=None, method=None) async def server_listener(scope, message, language_server, manager): await server_listened.put(message) @lsp_message_listener("server", language_server=r"not-a-language-server") async def other_server_listener(scope, message, language_server, manager): # pragma: no cover await handler_listened.put(message) raise NotImplementedError("shouldn't get here") # an all listener @lsp_message_listener("all") async def all_listener(scope, message, language_server, manager): # pragma: no cover await all_listened.put(message) assert len(manager._listeners["server"]) == 2 assert len(manager._listeners["client"]) == 2 assert len(manager._listeners["all"]) == 2 ws_handler.open(known_server) await ws_handler.on_message(jsonrpc_init_msg) results = await asyncio.wait_for( asyncio.gather( handler_listened.get(), server_listened.get(), all_listened.get(), all_listened.get(), return_exceptions=True, ), 20, ) assert all([isinstance(res, dict) for res in results]) ws_handler.on_close() handler_listened.task_done() server_listened.task_done() all_listened.task_done() all_listened.task_done() [ manager.unregister_message_listener(listener) for listener in [ client_listener, other_client_listener, server_listener, other_server_listener, all_listener, ] ] assert not manager._listeners["server"] assert not manager._listeners["client"] assert len(manager._listeners["all"]) == 1
class Executor(object): """ Distributed executor with data dependencies This executor resembles executors in concurrent.futures but also allows Futures within submit/map calls. Provide center address on initialization >>> executor = Executor(('127.0.0.1', 8787)) # doctest: +SKIP Use ``submit`` method like normal >>> a = executor.submit(add, 1, 2) # doctest: +SKIP >>> b = executor.submit(add, 10, 20) # doctest: +SKIP Additionally, provide results of submit calls (futures) to further submit calls: >>> c = executor.submit(add, a, b) # doctest: +SKIP This allows for the dynamic creation of complex dependencies. """ def __init__(self, center=None, scheduler=None, start=True, delete_batch_time=1, loop=None): self.futures = dict() self.refcount = defaultdict(lambda: 0) self.loop = loop or IOLoop() self.scheduler_queue = Queue() self.report_queue = Queue() if scheduler: if isinstance(scheduler, Scheduler): self.scheduler = scheduler if not center: self.center = scheduler.center else: raise NotImplementedError() # self.scheduler = coerce_to_rpc(scheduler) else: self.scheduler = Scheduler(center, loop=self.loop, delete_batch_time=delete_batch_time) if center: self.center = coerce_to_rpc(center) if not self.center: raise ValueError("Provide Center address") if start: self.start() def start(self): """ Start scheduler running in separate thread """ if hasattr(self, '_loop_thread'): return from threading import Thread self._loop_thread = Thread(target=self.loop.start) self._loop_thread.daemon = True _global_executor[0] = self self._loop_thread.start() sync(self.loop, self._start) def send_to_scheduler(self, msg): if isinstance(self.scheduler, Scheduler): self.loop.add_callback(self.scheduler_queue.put_nowait, msg) else: raise NotImplementedError() @gen.coroutine def _start(self): if self.scheduler.status != 'running': yield self.scheduler._sync_center() self.scheduler.start() start_event = Event() self.coroutines = [ self.scheduler.handle_queues(self.scheduler_queue, self.report_queue), self.report(start_event)] _global_executor[0] = self yield start_event.wait() logger.debug("Started scheduling coroutines. Synchronized") def __enter__(self): if not self.loop._running: self.start() return self def __exit__(self, type, value, traceback): self.shutdown() def _inc_ref(self, key): self.refcount[key] += 1 def _dec_ref(self, key): self.refcount[key] -= 1 if self.refcount[key] == 0: del self.refcount[key] self._release_key(key) def _release_key(self, key): """ Release key from distributed memory """ logger.debug("Release key %s", key) if key in self.futures: self.futures[key]['event'].clear() del self.futures[key] self.send_to_scheduler({'op': 'release-held-data', 'key': key}) @gen.coroutine def report(self, start_event): """ Listen to scheduler """ while True: if isinstance(self.scheduler, Scheduler): msg = yield self.report_queue.get() elif isinstance(self.scheduler, IOStream): raise NotImplementedError() msg = yield read(self.scheduler) else: raise NotImplementedError() if msg['op'] == 'stream-start': start_event.set() if msg['op'] == 'close': break if msg['op'] == 'key-in-memory': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'finished' self.futures[msg['key']]['event'].set() if msg['op'] == 'lost-data': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'lost' self.futures[msg['key']]['event'].clear() if msg['op'] == 'task-erred': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'error' self.futures[msg['key']]['exception'] = msg['exception'] self.futures[msg['key']]['traceback'] = msg['traceback'] self.futures[msg['key']]['event'].set() if msg['op'] == 'restart': logger.info("Receive restart signal from scheduler") events = [d['event'] for d in self.futures.values()] self.futures.clear() for e in events: e.set() with ignoring(AttributeError): self._restart_event.set() @gen.coroutine def _shutdown(self, fast=False): """ Send shutdown signal and wait until scheduler completes """ self.send_to_scheduler({'op': 'close'}) if _global_executor[0] is self: _global_executor[0] = None if not fast: yield self.coroutines def shutdown(self, timeout=10): """ Send shutdown signal and wait until scheduler terminates """ self.send_to_scheduler({'op': 'close'}) self.loop.stop() self._loop_thread.join(timeout=timeout) if _global_executor[0] is self: _global_executor[0] = None def submit(self, func, *args, **kwargs): """ Submit a function application to the scheduler Parameters ---------- func: callable *args: **kwargs: pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> c = executor.submit(add, a, b) # doctest: +SKIP Returns ------- Future See Also -------- distributed.executor.Executor.submit: """ if not callable(func): raise TypeError("First input to submit must be a callable function") key = kwargs.pop('key', None) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if key is None: if pure: key = funcname(func) + '-' + tokenize(func, kwargs, *args) else: key = funcname(func) + '-' + next(tokens) if key in self.futures: return Future(key, self) if kwargs: task = (apply, func, args, kwargs) else: task = (func,) + args if workers is not None: restrictions = {key: workers} else: restrictions = {} logger.debug("Submit %s(...), %s", funcname(func), key) self.send_to_scheduler({'op': 'update-graph', 'dsk': {key: task}, 'keys': [key], 'restrictions': restrictions}) return Future(key, self) def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- list of futures See also -------- distributed.executor.Executor.submit """ pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if not callable(func): raise TypeError("First input to map must be a callable function") iterables = [list(it) for it in iterables] if pure: keys = [funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables)] else: uid = str(uuid.uuid4()) keys = [funcname(func) + '-' + uid + '-' + next(tokens) for i in range(min(map(len, iterables)))] if not kwargs: dsk = {key: (func,) + args for key, args in zip(keys, zip(*iterables))} else: dsk = {key: (apply, func, args, kwargs) for key, args in zip(keys, zip(*iterables))} if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") logger.debug("map(%s, ...)", funcname(func)) self.send_to_scheduler({'op': 'update-graph', 'dsk': dsk, 'keys': keys, 'restrictions': restrictions}) return [Future(key, self) for key in keys] @gen.coroutine def _gather(self, futures): futures2, keys = unpack_remotedata(futures) keys = list(keys) while True: logger.debug("Waiting on futures to clear before gather") yield All([self.futures[key]['event'].wait() for key in keys if key in self.futures]) exceptions = [self.futures[key]['exception'] for key in keys if self.futures[key]['status'] == 'error'] if exceptions: raise exceptions[0] try: data = yield _gather(self.center, keys) except KeyError as e: logger.debug("Couldn't gather keys %s", e) self.send_to_scheduler({'op': 'missing-data', 'missing': e.args}) for key in e.args: self.futures[key]['event'].clear() else: break data = dict(zip(keys, data)) result = pack_data(futures2, data) raise gen.Return(result) def gather(self, futures): """ Gather futures from distributed memory Accepts a future or any nested core container of futures Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> x = e.submit(add, 1, 2) # doctest: +SKIP >>> e.gather(x) # doctest: +SKIP 3 >>> e.gather([x, [x], x]) # doctest: +SKIP [3, [3], 3] """ return sync(self.loop, self._gather, futures) @gen.coroutine def _scatter(self, data, workers=None): remotes = yield self.scheduler._scatter(None, data, workers) if isinstance(remotes, list): remotes = [Future(r.key, self) for r in remotes] keys = {r.key for r in remotes} elif isinstance(remotes, dict): remotes = {k: Future(v.key, self) for k, v in remotes.items()} keys = set(remotes) for key in keys: self.futures[key]['status'] = 'finished' self.futures[key]['event'].set() raise gen.Return(remotes) def scatter(self, data, workers=None): """ Scatter data into distributed memory Accepts a list of data elements or dict of key-value pairs Optionally provide a set of workers to constrain the scatter. Specify workers as hostname/port pairs, i.e. ('127.0.0.1', 8787). Default port is 8788. Examples -------- >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.scatter([1, 2, 3]) # doctest: +SKIP [RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>, RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>, RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>] >>> e.scatter({'x': 1, 'y': 2, 'z': 3}) # doctest: +SKIP {'x': RemoteData<center=127.0.0.1:8787, key=x>, 'y': RemoteData<center=127.0.0.1:8787, key=y>, 'z': RemoteData<center=127.0.0.1:8787, key=z>} >>> e.scatter([1, 2, 3], workers=[('hostname', 8788)]) # doctest: +SKIP """ return sync(self.loop, self._scatter, data, workers=workers) @gen.coroutine def _get(self, dsk, keys, restrictions=None, raise_on_error=True): flatkeys = list(flatten([keys])) futures = {key: Future(key, self) for key in flatkeys} self.send_to_scheduler({'op': 'update-graph', 'dsk': dsk, 'keys': flatkeys, 'restrictions': restrictions or {}}) packed = pack_data(keys, futures) if raise_on_error: result = yield self._gather(packed) else: try: result = yield self._gather(packed) result = 'OK', result except Exception as e: result = 'error', e raise gen.Return(result) def get(self, dsk, keys, **kwargs): """ Gather futures from distributed memory Parameters ---------- dsk: dict keys: object, or nested lists of objects restrictions: dict (optional) A mapping of {key: {set of worker hostnames}} that restricts where jobs can take place Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.get({'x': (add, 1, 2)}, 'x') # doctest: +SKIP 3 """ status, result = sync(self.loop, self._get, dsk, keys, raise_on_error=False, **kwargs) if status == 'error': raise result else: return result def compute(self, *args, **kwargs): """ Compute dask collections on cluster Parameters ---------- args: iterable of dask objects Collections like dask.array or dataframe or dask.value objects sync: bool (optional) Returns Futures if False (default) or concrete values if True Returns ------- Tuple of Futures or concrete values Examples -------- >>> from dask import do, value >>> from operator import add >>> x = dask.do(add)(1, 2) >>> y = dask.do(add)(x, x) >>> xx, yy = executor.compute(x, y) # doctest: +SKIP >>> xx # doctest: +SKIP <Future: status: finished, key: add-8f6e709446674bad78ea8aeecfee188e> >>> xx.result() # doctest: +SKIP 3 >>> yy.result() # doctest: +SKIP 6 """ sync = kwargs.pop('sync', False) assert not kwargs if sync: return dask.compute(*args, get=self.get) variables = [a for a in args if isinstance(a, Base)] groups = groupby(lambda x: x._optimize, variables) dsk = merge([opt(merge([v.dask for v in val]), [v._keys() for v in val]) for opt, val in groups.items()]) names = ['finalize-%s' % tokenize(v) for v in variables] dsk2 = {name: (v._finalize, v, v._keys()) for name, v in zip(names, variables)} self.loop.add_callback(self.scheduler_queue.put_nowait, {'op': 'update-graph', 'dsk': merge(dsk, dsk2), 'keys': names}) i = 0 futures = [] for arg in args: if isinstance(arg, Base): futures.append(Future(names[i], self)) i += 1 else: futures.append(arg) return futures @gen.coroutine def _restart(self): self.send_to_scheduler({'op': 'restart'}) self._restart_event = Event() yield self._restart_event.wait() raise gen.Return(self) def restart(self): """ Restart the distributed network This kills all active work, deletes all data on the network, and restarts the worker processes. """ return sync(self.loop, self._restart) @gen.coroutine def _upload_file(self, filename, raise_on_error=True): with open(filename, 'rb') as f: data = f.read() _, fn = os.path.split(filename) d = yield self.center.broadcast(msg={'op': 'upload_file', 'filename': fn, 'data': data}) if any(isinstance(v, Exception) for v in d.values()): exception = next(v for v in d.values() if isinstance(v, Exception)) if raise_on_error: raise exception else: raise gen.Return(exception) assert all(len(data) == v for v in d.values()) def upload_file(self, filename): """ Upload local package to workers Parameters ---------- filename: string Filename of .py file to send to workers """ result = sync(self.loop, self._upload_file, filename, raise_on_error=False) if isinstance(result, Exception): raise result
class ReceiveQueue(): u"""LINEからの受信メッセージキュー """ # BOT定義 Content_Type = "application/json" X_Line_ChannelID = "ChannelID" X_Line_ChannelSecret = "ChannelSecret" X_Line_Trusted_User_With_ACL = "MID" # リクエストヘッダ REQUEST_HEADER = { "Content-Type": "application/json; charset=UTF-8", "X-Line-ChannelID": X_Line_ChannelID, "X-Line-ChannelSecret": X_Line_ChannelSecret, "X-Line-Trusted-User-With-ACL": X_Line_Trusted_User_With_ACL } # POST EVENT APIのURL POST_EVENT_API = "https://trialbot-api.line.me/v1/events" # Profiles APIのURL(GET) PROFILES_API = "https://trialbot-api.line.me/v1/profiles" def __init__(self): self.queued_items = Queue() self.db = DatabaseUtil("db/database.db") if not self.db.checkDuplicate("MessageObjects"): sql = ReceivingEventObject().getQuery_MessageObjects("create") self.db.execute(sql) if not self.db.checkDuplicate("OperationObjects"): sql = ReceivingEventObject().getQuery_OperationObjects("create") self.db.execute(sql) @tornado.gen.coroutine def watch_queue(self): while True: items = yield self.queued_items.get() self.parse_receiving_event(items[0], items[1]) def parse_receiving_event(self, req_head, req_body): u"""LINEServerからのイベント通知を解析 """ # リクエストの署名検証 if not self.validate_signature(req_head['X-LINE-ChannelSignature'], req_body): print("Signature NG") return # リクエストの解析 # 1回の通知でresultが複数含まれている場合もある # 以下の処理は1リクエストずつ処理 json_dic = tornado.escape.json_decode(req_body) for result in json_dic["result"]: print(result) result_jsonstr = tornado.escape.json_encode(result) reo = ReceivingEventObject(result) if reo.isEventTypeMessage(): print("MessageObject") # メッセージ通知(MessageObject) mo = reo.content # DBに格納 sql = reo.getQuery_MessageObjects('insert') d = { 'id': mo.id, 'contentType': mo.contentType, 'from': mo._from, 'createdTime': mo.createdTime, 'to': mo.to, 'toType': mo.toType, 'text': mo.text, 'json_data': result_jsonstr } self.db.execute(sql, d) # イベント通知に対する送信 self.toPOSTEventAPI( tornado.escape.json_encode( SendingEventObject(reo).createRequestBody() ) ) elif reo.isEventTypeOperation(): # ユーザ操作(OperationObject) oo = reo.content # DBに格納 sql = reo.getQuery_OperationObjects('insert') d = { 'revision': oo.revision, 'opType': oo.opType, 'params0': oo.params[0], 'params1': '', 'params2': '', 'json_data': result_jsonstr } self.db.execute(sql, d) # ユーザーによる友だち追加(ブロック解除を含む) if reo.content.isFriendsAdd(): print("User Operation Add ",reo.content.params[0]) # プロフィールの取得 result = self.toProfilesAPI(reo.content.params[0]) for v in result: name = v["displayName"] # ありがとうメッセージの送信 seo = SendingEventObject(reo) seo.createSendContent_Thanks(name) res = self.toPOSTEventAPI( tornado.escape.json_encode( seo.createRequestBody() ) ) elif reo.content.isFriendsBlock(): # ユーザーによるブロック print("User Operation Block ",reo.content.params[0]) @tornado.gen.coroutine def toPOSTEventAPI(self, send_body): u"""LINEServerへ送信 ※LINEServerへ送信する際はこちらがクライアント """ # リクエストボディ(Sending Event ObjectのJSONデータ) # ※リクエストボディは8Kib以下であること(LINEの仕様) http_client = tornado.httpclient.AsyncHTTPClient() response = None try: response = yield http_client.fetch( self.POST_EVENT_API, method='POST', headers=self.REQUEST_HEADER, body=send_body ) except http_client.HTTPError as e: # HTTPError is raised for non-200 responses print("Error: " + str(e)) except Exception as e: # Other errors print("Error: " + str(e)) http_client.close() return response def toProfilesAPI(self, mid): u"""PROFILES APIでLINEServerにプロフィールを問い合わせ ※Contact Response ObjectのJSONデータが返却されてくる ※TODO:複数一括リクエストは未対応 """ # リクエストはGetでユーザーの識別子 (複数時はカンマ区切り)を渡す http_client = tornado.httpclient.HTTPClient() url = url_concat(self.PROFILES_API, {"mids": mid}) print(url) result = None try: response = http_client.fetch( url, method='GET', headers=self.REQUEST_HEADER, body=None ) json_dic = tornado.escape.json_decode(response.body) result = json_dic["contacts"] except http_client.HTTPError as e: # HTTPError is raised for non-200 responses print("Error: " + str(e)) except Exception as e: # Other errors print("Error: " + str(e)) http_client.close() return result def validate_signature(self, signature, content): u"""LINEServerからのリクエストかを署名検証する [検証内容] 1. ChannelSecretを秘密鍵としHMAC-SHA256でリクエストボディのダイジェスト値を取得 2. ダイジェスト値をBase64化した値がリクエストヘッダのChannelSignatureと合致するか検証 ※LINE BOT SDK for Python(https://github.com/studio3104/line-bot-sdk-python)から借用 """ return hmac.compare_digest( signature.encode('utf-8'), base64.b64encode( hmac.new( self.X_Line_ChannelSecret.encode('utf-8'), msg=content, digestmod=hashlib.sha256 ).digest() ) )
class Worker(Server): """ Worker Node Workers perform two functions: 1. **Serve data** from a local dictionary 2. **Perform computation** on that data and on data from peers Additionally workers keep a Center informed of their data and use that Center to gather data from other workers when necessary to perform a computation. You can start a worker with the ``dworker`` command line application:: $ dworker scheduler-ip:port **State** * **data:** ``{key: object}``: Dictionary mapping keys to actual values * **active:** ``{key}``: Set of keys currently under computation * **ncores:** ``int``: Number of cores used by this worker process * **executor:** ``concurrent.futures.ThreadPoolExecutor``: Executor used to perform computation * **local_dir:** ``path``: Path on local machine to store temporary files * **center:** ``rpc``: Location of center or scheduler. See ``.ip/.port`` attributes. * **name:** ``string``: Alias * **services:** ``{str: Server}``: Auxiliary web servers running on this worker * **service_ports:** ``{str: port}``: Examples -------- Create centers and workers in Python: >>> from distributed import Center, Worker >>> c = Center('192.168.0.100', 8787) # doctest: +SKIP >>> w = Worker(c.ip, c.port) # doctest: +SKIP >>> yield w._start(port=8788) # doctest: +SKIP Or use the command line:: $ dcenter Start center at 127.0.0.1:8787 $ dworker 127.0.0.1:8787 Start worker at: 127.0.0.1:8788 Registered with center at: 127.0.0.1:8787 See Also -------- distributed.center.Center: """ def __init__(self, center_ip, center_port, ip=None, ncores=None, loop=None, local_dir=None, services=None, service_ports=None, name=None, **kwargs): self.ip = ip or get_ip() self._port = 0 self.ncores = ncores or _ncores self.data = dict() self.loop = loop or IOLoop.current() self.status = None self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-') self.executor = ThreadPoolExecutor(self.ncores) self.thread_tokens = Queue() # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572 for i in range(self.ncores): self.thread_tokens.put_nowait(i) self.center = rpc(ip=center_ip, port=center_port) self.active = set() self.name = name if not os.path.exists(self.local_dir): os.mkdir(self.local_dir) if self.local_dir not in sys.path: sys.path.insert(0, self.local_dir) self.services = {} self.service_ports = service_ports or {} for k, v in (services or {}).items(): if isinstance(k, tuple): k, port = k else: port = 0 self.services[k] = v(self) self.services[k].listen(port) self.service_ports[k] = self.services[k].port handlers = {'compute': self.compute, 'gather': self.gather, 'compute-stream': self.compute_stream, 'run': self.run, 'get_data': self.get_data, 'update_data': self.update_data, 'delete_data': self.delete_data, 'terminate': self.terminate, 'ping': pingpong, 'health': self.health, 'upload_file': self.upload_file} super(Worker, self).__init__(handlers, **kwargs) @gen.coroutine def _start(self, port=0): self.listen(port) self.name = self.name or self.address for k, v in self.services.items(): v.listen(0) self.service_ports[k] = v.port logger.info(' Start worker at: %20s:%d', self.ip, self.port) for k, v in self.service_ports.items(): logger.info(' %16s at: %20s:%d' % (k, self.ip, v)) logger.info('Waiting to connect to: %20s:%d', self.center.ip, self.center.port) while True: try: resp = yield self.center.register( ncores=self.ncores, address=(self.ip, self.port), keys=list(self.data), services=self.service_ports, name=self.name) break except (OSError, StreamClosedError): logger.debug("Unable to register with scheduler. Waiting") yield gen.sleep(0.5) if resp != 'OK': raise ValueError(resp) logger.info(' Registered to: %20s:%d', self.center.ip, self.center.port) self.status = 'running' def start(self, port=0): self.loop.add_callback(self._start, port) def identity(self, stream): return {'type': type(self).__name__, 'id': self.id, 'center': (self.center.ip, self.center.port)} @gen.coroutine def _close(self, report=True, timeout=10): if report: yield gen.with_timeout(timedelta(seconds=timeout), self.center.unregister(address=(self.ip, self.port)), io_loop=self.loop) self.center.close_streams() self.stop() self.executor.shutdown() if os.path.exists(self.local_dir): shutil.rmtree(self.local_dir) for k, v in self.services.items(): v.stop() self.status = 'closed' self.stop() @gen.coroutine def terminate(self, stream, report=True): yield self._close(report=report) raise Return('OK') @property def address(self): return '%s:%d' % (self.ip, self.port) @property def address_tuple(self): return (self.ip, self.port) @gen.coroutine def gather(self, stream=None, who_has=None): who_has = {k: [coerce_to_address(addr) for addr in v] for k, v in who_has.items() if k not in self.data} try: result = yield gather_from_workers(who_has) except KeyError as e: logger.warn("Could not find data", e) raise Return({'status': 'missing-data', 'keys': e.args}) else: self.data.update(result) raise Return({'status': 'OK'}) @gen.coroutine def _ready_task(self, function=None, key=None, args=(), kwargs={}, task=None, who_has=None): diagnostics = {} if who_has: local_data = {k: self.data[k] for k in who_has if k in self.data} who_has = {k: set(map(coerce_to_address, v)) for k, v in who_has.items() if k not in self.data} try: logger.info("gather %d keys from peers: %s", len(who_has), str(who_has)) diagnostics['transfer-start'] = time() other = yield gather_from_workers(who_has) diagnostics['transfer-stop'] = time() data = merge(local_data, other) except KeyError as e: logger.warn("Could not find data for %s", key) raise Return({'status': 'missing-data', 'keys': e.args, 'key': key}) else: data = {} transfer_time = 0 try: start = default_timer() if task is not None: task = loads(task) if function is not None: function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) diagnostics['deserialization'] = default_timer() - start except Exception as e: logger.warn("Could not deserialize task", exc_info=True) raise Return(assoc(error_message(e), 'key', key)) if task is not None: assert not function and not args and not kwargs function = execute_task args = (task,) # Fill args with data args2 = pack_data(args, data) kwargs2 = pack_data(kwargs, data) raise Return({'status': 'OK', 'function': function, 'args': args2, 'kwargs': kwargs2, 'diagnostics': diagnostics, 'key': key}) @gen.coroutine def executor_submit(self, key, function, *args, **kwargs): """ Safely run function in thread pool executor We've run into issues running concurrent.future futures within tornado. Apparently it's advantageous to use timeouts and periodic callbacks to ensure things run smoothly. This can get tricky, so we pull it off into an separate method. """ token = yield self.thread_tokens.get() job_counter[0] += 1 i = job_counter[0] # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key) future = self.executor.submit(function, *args, **kwargs) pc = PeriodicCallback(lambda: logger.debug("future state: %s - %s", key, future._state), 1000); pc.start() try: if sys.version_info < (3, 2): yield future else: while not future.done() and future._state != 'FINISHED': try: yield gen.with_timeout(timedelta(seconds=1), future, io_loop=self.loop) break except gen.TimeoutError: logger.info("work queue size: %d", self.executor._work_queue.qsize()) logger.info("future state: %s", future._state) logger.info("Pending job %d: %s", i, future) finally: pc.stop() self.thread_tokens.put(token) result = future.result() logger.info("Finish job %d, %s", i, key) raise gen.Return(result) @gen.coroutine def compute_stream(self, stream): with log_errors(): logger.debug("Open compute stream") bstream = BatchedSend(interval=10, loop=self.loop) bstream.start(stream) @gen.coroutine def process(msg): try: result = yield self.compute(report=False, **msg) bstream.send(result) except Exception as e: logger.exception(e) bstream.send(assoc(error_message(e), 'key', msg.get('key'))) with log_errors(): while True: try: msgs = yield read(stream) except StreamClosedError: break if not isinstance(msgs, list): msgs = [msgs] for msg in msgs: op = msg.pop('op', None) if op == 'close': break elif op == 'compute-task': self.loop.add_callback(process, msg) else: logger.warning("Unknown operation %s, %s", op, msg) yield bstream.close() logger.info("Close compute stream") @gen.coroutine def compute(self, stream=None, function=None, key=None, args=(), kwargs={}, task=None, who_has=None, report=True): """ Execute function """ self.active.add(key) # Ready function for computation msg = yield self._ready_task(function=function, key=key, args=args, kwargs=kwargs, task=task, who_has=who_has) if msg['status'] != 'OK': try: self.active.remove(key) except KeyError: pass raise Return(msg) else: function = msg['function'] args = msg['args'] kwargs = msg['kwargs'] # Log and compute in separate thread result = yield self.executor_submit(key, apply_function, function, args, kwargs) result['key'] = key result.update(msg['diagnostics']) if result['status'] == 'OK': self.data[key] = result.pop('result') if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=[key]) if not response == 'OK': logger.warn('Could not report results to center: %s', str(response)) else: logger.warn(" Compute Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) logger.debug("Send compute response to scheduler: %s, %s", key, msg) try: self.active.remove(key) except KeyError: pass raise Return(result) @gen.coroutine def run(self, stream, function=None, args=(), kwargs={}): function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) try: result = function(*args, **kwargs) except Exception as e: logger.warn(" Run Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) response = error_message(e) else: response = { 'status': 'OK', 'result': dumps(result), } raise Return(response) @gen.coroutine def update_data(self, stream, data=None, report=True): data = valmap(loads, data) self.data.update(data) if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=list(data)) assert response == 'OK' info = {'nbytes': {k: sizeof(v) for k, v in data.items()}, 'status': 'OK'} raise Return(info) @gen.coroutine def delete_data(self, stream, keys=None, report=True): for key in keys: if key in self.data: del self.data[key] logger.info("Deleted %d keys", len(keys)) if report: logger.debug("Reporting loss of keys to center") yield self.center.remove_keys(address=self.address, keys=list(keys)) raise Return('OK') def get_data(self, stream, keys=None): return {k: dumps(self.data[k]) for k in keys if k in self.data} def upload_file(self, stream, filename=None, data=None, load=True): out_filename = os.path.join(self.local_dir, filename) if isinstance(data, unicode): data = data.encode() with open(out_filename, 'wb') as f: f.write(data) f.flush() if load: try: name, ext = os.path.splitext(filename) if ext in ('.py', '.pyc'): logger.info("Reload module %s from .py file", name) name = name.split('-')[0] reload(import_module(name)) if ext == '.egg': sys.path.append(out_filename) pkgs = pkg_resources.find_distributions(out_filename) for pkg in pkgs: logger.info("Load module %s from egg", pkg.project_name) reload(import_module(pkg.project_name)) if not pkgs: logger.warning("Found no packages in egg file") except Exception as e: logger.exception(e) return {'status': 'error', 'exception': dumps(e)} return {'status': 'OK', 'nbytes': len(data)} def health(self, stream=None): """ Information about worker """ d = {'active': len(self.active), 'stored': len(self.data), 'time': time()} try: import psutil mem = psutil.virtual_memory() d.update({'cpu': psutil.cpu_percent(), 'memory': mem.total, 'memory-percent': mem.percent}) try: net_io = psutil.net_io_counters() d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv except AttributeError: pass self._last_net_io = net_io try: disk_io = psutil.disk_io_counters() d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes except (AttributeError, RuntimeError): disk_io = None self._last_disk_io = disk_io except ImportError: pass return d
class Rx(PrettyPrintable): def __init__(self, rx_tree, session_id, header_table=None, io_loop=None, service_name=None, raw_headers=None, trace_id=None): if header_table is None: header_table = CocaineHeaders() # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self._io_loop = io_loop or IOLoop.current() self._queue = Queue() self._done = False self.session_id = session_id self.service_name = service_name self.rx_tree = rx_tree self.default_protocol = detect_protocol_type(rx_tree) self._headers = header_table self._current_headers = self._headers.merge(raw_headers) self.log = get_trace_adapter(log, trace_id) @coroutine def get(self, timeout=0, protocol=None): if self._done and self._queue.empty(): raise ChokeEvent() # to pull various service errors if timeout <= 0: item = yield self._queue.get() else: deadline = datetime.timedelta(seconds=timeout) item = yield self._queue.get(deadline) if isinstance(item, Exception): raise item if protocol is None: protocol = self.default_protocol name, payload, raw_headers = item self._current_headers = self._headers.merge(raw_headers) res = protocol(name, payload) if isinstance(res, ProtocolError): raise ServiceError(self.service_name, res.reason, res.code, res.category) else: raise Return(res) def done(self): self._done = True def push(self, msg_type, payload, raw_headers): dispatch = self.rx_tree.get(msg_type) self.log.debug("dispatch %s %.300s", dispatch, payload) if dispatch is None: raise InvalidMessageType(self.service_name, CocaineErrno.INVALIDMESSAGETYPE, "unexpected message type %s" % msg_type) name, rx = dispatch self.log.info( "got message from `%s`: channel id: %s, type: %s", self.service_name, self.session_id, name ) self._queue.put_nowait((name, payload, raw_headers)) if rx == {}: # the last transition self.done() elif rx is not None: # not a recursive transition self.rx_tree = rx def error(self, err): self._queue.put_nowait(err) def closed(self): return self._done def _format(self): return "name: %s, queue: %s, done: %s" % (self.service_name, self._queue, self._done) @property def headers(self): return self._current_headers
class Executor(object): """ Distributed executor with data dependencies This executor resembles executors in concurrent.futures but also allows Futures within submit/map calls. Provide center address on initialization >>> executor = Executor(('127.0.0.1', 8787)) # doctest: +SKIP Use ``submit`` method like normal >>> a = executor.submit(add, 1, 2) # doctest: +SKIP >>> b = executor.submit(add, 10, 20) # doctest: +SKIP Additionally, provide results of submit calls (futures) to further submit calls: >>> c = executor.submit(add, a, b) # doctest: +SKIP This allows for the dynamic creation of complex dependencies. """ def __init__(self, center, start=True, delete_batch_time=1): self.center = coerce_to_rpc(center) self.futures = dict() self.refcount = defaultdict(lambda: 0) self.dask = dict() self.restrictions = dict() self.loop = IOLoop() self.report_queue = Queue() self.scheduler_queue = Queue() self._shutdown_event = Event() self._delete_batch_time = delete_batch_time if start: self.start() def start(self): """ Start scheduler running in separate thread """ from threading import Thread self.loop.add_callback(self._go) self._loop_thread = Thread(target=self.loop.start) self._loop_thread.start() def __enter__(self): if not self.loop._running: self.start() return self def __exit__(self, type, value, traceback): self.shutdown() def _inc_ref(self, key): self.refcount[key] += 1 def _dec_ref(self, key): self.refcount[key] -= 1 if self.refcount[key] == 0: del self.refcount[key] self._release_key(key) def _release_key(self, key): """ Release key from distributed memory """ self.futures[key]['event'].clear() logger.debug("Release key %s", key) del self.futures[key] self.scheduler_queue.put_nowait({'op': 'release-held-data', 'key': key}) @gen.coroutine def report(self): """ Listen to scheduler """ while True: msg = yield self.report_queue.get() if msg['op'] == 'close': break if msg['op'] == 'task-finished': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'finished' self.futures[msg['key']]['event'].set() if msg['op'] == 'lost-data': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'lost' self.futures[msg['key']]['event'].clear() if msg['op'] == 'task-erred': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'error' self.futures[msg['key']]['event'].set() @gen.coroutine def _shutdown(self): """ Send shutdown signal and wait until _go completes """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) yield self._shutdown_event.wait() def shutdown(self): """ Send shutdown signal and wait until scheduler terminates """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) self.loop.stop() self._loop_thread.join() @gen.coroutine def _go(self): """ Setup and run all other coroutines. Block until finished. """ self.who_has, self.has_what, self.ncores = yield [self.center.who_has(), self.center.has_what(), self.center.ncores()] self.waiting = {} self.processing = {} self.stacks = {} worker_queues = {worker: Queue() for worker in self.ncores} delete_queue = Queue() coroutines = ([ self.report(), scheduler(self.scheduler_queue, self.report_queue, worker_queues, delete_queue, self.who_has, self.has_what, self.ncores, self.dask, self.restrictions, self.waiting, self.stacks, self.processing), delete(self.scheduler_queue, delete_queue, self.center.ip, self.center.port, self._delete_batch_time)] + [worker(self.scheduler_queue, worker_queues[w], w, n) for w, n in self.ncores.items()]) results = yield All(coroutines) self._shutdown_event.set() def submit(self, func, *args, **kwargs): """ Submit a function application to the scheduler Parameters ---------- func: callable *args: **kwargs: pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> c = executor.submit(add, a, b) # doctest: +SKIP Returns ------- Future See Also -------- distributed.executor.Executor.submit: """ if not callable(func): raise TypeError("First input to submit must be a callable function") key = kwargs.pop('key', None) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if key is None: if pure: key = funcname(func) + '-' + tokenize(func, kwargs, *args) else: key = funcname(func) + '-' + next(tokens) if key in self.futures: return Future(key, self) if kwargs: task = (apply, func, args, kwargs) else: task = (func,) + args if workers is not None: restrictions = {key: workers} else: restrictions = {} if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} logger.debug("Submit %s(...), %s", funcname(func), key) self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': {key: task}, 'keys': [key], 'restrictions': restrictions}) return Future(key, self) def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- list of futures See also -------- distributed.executor.Executor.submit """ pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if not callable(func): raise TypeError("First input to map must be a callable function") iterables = [list(it) for it in iterables] if pure: keys = [funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables)] else: uid = str(uuid.uuid4()) keys = [funcname(func) + '-' + uid + '-' + next(tokens) for i in range(min(map(len, iterables)))] if not kwargs: dsk = {key: (func,) + args for key, args in zip(keys, zip(*iterables))} else: dsk = {key: (apply, func, args, kwargs) for key, args in zip(keys, zip(*iterables))} for key in dsk: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") logger.debug("map(%s, ...)", funcname(func)) self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': dsk, 'keys': keys, 'restrictions': restrictions}) return [Future(key, self) for key in keys] @gen.coroutine def _gather(self, futures): futures2, keys = unpack_remotedata(futures) keys = list(keys) while True: yield All([self.futures[key]['event'].wait() for key in keys]) try: data = yield _gather(self.center, keys) except KeyError as e: self.scheduler_queue.put_nowait({'op': 'missing-data', 'missing': e.args}) for key in e.args: self.futures[key]['event'].clear() else: break data = dict(zip(keys, data)) result = pack_data(futures2, data) raise gen.Return(result) def gather(self, futures): """ Gather futures from distributed memory Accepts a future or any nested core container of futures Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> x = e.submit(add, 1, 2) # doctest: +SKIP >>> e.gather(x) # doctest: +SKIP 3 >>> e.gather([x, [x], x]) # doctest: +SKIP [3, [3], 3] """ return sync(self.loop, self._gather, futures) @gen.coroutine def _get(self, dsk, keys, restrictions=None): flatkeys = list(flatten(keys)) for key in flatkeys: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': None} futures = {key: Future(key, self) for key in flatkeys} self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': dsk, 'keys': flatkeys, 'restrictions': restrictions or {}}) packed = pack_data(keys, futures) result = yield self._gather(packed) raise gen.Return(result) def get(self, dsk, keys, **kwargs): """ Gather futures from distributed memory Parameters ---------- dsk: dict keys: object, or nested lists of objects restrictions: dict (optional) A mapping of {key: {set of worker hostnames}} that restricts where jobs can take place Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.get({'x': (add, 1, 2)}, 'x') # doctest: +SKIP 3 """ return sync(self.loop, self._get, dsk, keys, **kwargs)
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() self.error_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event(status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set(): self.disconnected_event.set() elif status.is_error(): self.error_queue.put_nowait(status.error_data.exception) def message(self, pubnub, message): self.message_queue.put(message) def presence(self, pubnub, presence): self.presence_queue.put(presence) @tornado.gen.coroutine def _wait_for(self, coro): error = self.error_queue.get() wi = tornado.gen.WaitIterator(coro, error) while not wi.done(): result = yield wi.next() if wi.current_future == coro: raise gen.Return(result) elif wi.current_future == error: raise result else: raise Exception("Unexpected future resolved: %s" % str(wi.current_future)) @tornado.gen.coroutine def wait_for_connect(self): if not self.connected_event.is_set(): yield self._wait_for(self.connected_event.wait()) else: raise Exception("instance is already connected") @tornado.gen.coroutine def wait_for_disconnect(self): if not self.disconnected_event.is_set(): yield self._wait_for(self.disconnected_event.wait()) else: raise Exception("instance is already disconnected") @tornado.gen.coroutine def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: # NOQA env = yield self._wait_for(self.message_queue.get()) if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.message_queue.task_done() @tornado.gen.coroutine def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: try: env = yield self._wait_for(self.presence_queue.get()) except: # NOQA E722 pylint: disable=W0702 break if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.presence_queue.task_done()
class Scheduler(object): def __init__(self, center, delete_batch_time=1): self.scheduler_queue = Queue() self.report_queue = Queue() self.delete_queue = Queue() self.status = None self.center = coerce_to_rpc(center) self.dask = dict() self.dependencies = dict() self.dependents = dict() self.generation = 0 self.has_what = defaultdict(set) self.held_data = set() self.in_play = set() self.keyorder = dict() self.nbytes = dict() self.ncores = dict() self.processing = dict() self.restrictions = dict() self.stacks = dict() self.waiting = dict() self.waiting_data = dict() self.who_has = defaultdict(set) self.exceptions = dict() self.tracebacks = dict() self.exceptions_blame = dict() self.delete_batch_time = delete_batch_time @gen.coroutine def _sync_center(self): self.ncores, self.has_what, self.who_has = yield [ self.center.ncores(), self.center.has_what(), self.center.who_has()] def start(self): collections = [self.dask, self.dependencies, self.dependents, self.waiting, self.waiting_data, self.in_play, self.keyorder, self.nbytes, self.processing, self.restrictions] for collection in collections: collection.clear() self.processing = {addr: set() for addr in self.ncores} self.stacks = {addr: list() for addr in self.ncores} self.worker_queues = {addr: Queue() for addr in self.ncores} self.coroutines = ([ self.scheduler(), delete(self.scheduler_queue, self.delete_queue, self.center.ip, self.center.port, self.delete_batch_time)] + [worker(self.scheduler_queue, self.worker_queues[w], w, n) for w, n in self.ncores.items()]) for cor in self.coroutines: if cor.done(): raise cor.exception() return All(self.coroutines) @gen.coroutine def _close(self): self.scheduler_queue.put_nowait({'op': 'close'}) yield All(self.coroutines) @gen.coroutine def cleanup(self): """ Clean up queues and coroutines, prepare to stop """ logger.debug("Cleaning up coroutines") n = 0 self.delete_queue.put_nowait({'op': 'close'}); n += 1 for w, nc in self.ncores.items(): for i in range(nc): self.worker_queues[w].put_nowait({'op': 'close'}); n += 1 for i in range(n): yield self.scheduler_queue.get() def mark_ready_to_run(self, key): """ Send task to an appropriate worker, trigger worker """ logger.debug("Mark %s ready to run", key) if key in self.waiting: assert not self.waiting[key] del self.waiting[key] new_worker = decide_worker(self.dependencies, self.stacks, self.who_has, self.restrictions, self.nbytes, key) self.stacks[new_worker].append(key) self.ensure_occupied(new_worker) def mark_key_in_memory(self, key, workers=None): logger.debug("Mark %s in memory", key) if workers is None: workers = self.who_has[key] for worker in workers: self.who_has[key].add(worker) self.has_what[worker].add(key) with ignoring(KeyError): self.processing[worker].remove(key) for dep in sorted(self.dependents.get(key, []), key=self.keyorder.get, reverse=True): if dep in self.waiting: s = self.waiting[dep] with ignoring(KeyError): s.remove(key) if not s: # new task ready to run self.mark_ready_to_run(dep) for dep in self.dependencies.get(key, []): if dep in self.waiting_data: s = self.waiting_data[dep] with ignoring(KeyError): s.remove(key) if not s and dep: self.release_key(dep) self.report_queue.put_nowait({'op': 'key-in-memory', 'key': key, 'workers': workers}) def ensure_occupied(self, worker): """ Spin up tasks on worker while it has tasks and free cores """ logger.debug('Ensure worker is occupied: %s', worker) while (self.stacks[worker] and self.ncores[worker] > len(self.processing[worker])): key = self.stacks[worker].pop() self.processing[worker].add(key) logger.debug("Send job to worker: %s, %s, %s", worker, key, self.dask[key]) self.worker_queues[worker].put_nowait( {'op': 'compute-task', 'key': key, 'task': self.dask[key], 'needed': self.dependencies[key]}) def seed_ready_tasks(self, keys=None): """ Distribute leaves among workers Takes an iterable of keys to consider for execution """ if keys is None: keys = self.dask new_stacks = assign_many_tasks( self.dependencies, self.waiting, self.keyorder, self.who_has, self.stacks, self.restrictions, self.nbytes, [k for k in keys if k in self.waiting and not self.waiting[k]]) logger.debug("Seed ready tasks: %s", new_stacks) for worker, stack in new_stacks.items(): if stack: self.ensure_occupied(worker) def release_key(self, key): """ Release key from distributed memory if its ready """ logger.debug("Release key %s", key) if key not in self.held_data and not self.waiting_data.get(key): self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) for w in self.who_has[key]: self.has_what[w].remove(key) del self.who_has[key] if key in self.waiting_data: del self.waiting_data[key] if key in self.in_play: self.in_play.remove(key) def update_data(self, extra_who_has, extra_nbytes): logger.debug("Update data %s", extra_who_has) for key, workers in extra_who_has.items(): self.mark_key_in_memory(key, workers) self.nbytes.update(extra_nbytes) self.held_data.update(extra_who_has) self.in_play.update(extra_who_has) def mark_failed(self, key, failing_key=None): """ When a task fails mark it and all dependent task as failed """ logger.debug("Mark key as failed %s", key) if key in self.exceptions_blame: return self.exceptions_blame[key] = failing_key self.report_queue.put_nowait({'op': 'task-erred', 'key': key, 'exception': self.exceptions[failing_key], 'traceback': self.tracebacks[failing_key]}) if key in self.waiting: del self.waiting[key] if key in self.waiting_data: del self.waiting_data[key] self.in_play.remove(key) for dep in self.dependents[key]: self.mark_failed(dep, failing_key) def log_state(self, msg=''): logger.debug("Runtime State: %s", msg) logger.debug('\n\nwaiting: %s\n\nstacks: %s\n\nprocessing: %s\n\n' 'in_play: %s\n\n', self.waiting, self.stacks, self.processing, self.in_play) def mark_worker_missing(self, worker): logger.debug("Mark worker as missing %s", worker) if worker not in self.processing: return keys = self.has_what.pop(worker) for i in range(self.ncores[worker]): # send close message, in case not dead self.worker_queues[worker].put_nowait({'op': 'close', 'report': False}) del self.worker_queues[worker] del self.ncores[worker] del self.stacks[worker] del self.processing[worker] if not self.stacks: logger.critical("Lost all workers") missing_keys = set() for key in keys: self.who_has[key].remove(worker) if not self.who_has[key]: missing_keys.add(key) gone_data = {k for k, v in self.who_has.items() if not v} self.in_play.difference_update(missing_keys) for k in gone_data: del self.who_has[k] def heal_state(self): """ Recover from catastrophic change """ logger.debug("Heal state") self.log_state("Before Heal") state = heal(self.dependencies, self.dependents, set(self.who_has), self.stacks, self.processing, self.waiting, self.waiting_data) released = state['released'] self.in_play.clear(); self.in_play.update(state['in_play']) add_keys = {k for k, v in self.waiting.items() if not v} for key in self.held_data & released: self.report_queue.put_nowait({'op': 'lost-key', 'key': key}) if self.stacks: for key in add_keys: self.mark_ready_to_run(key) for key in set(self.who_has) & released - self.held_data: self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) self.in_play.update(self.who_has) self.log_state("After Heal") def my_heal_missing_data(self, missing): logger.debug("Heal from missing data") return heal_missing_data(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, missing) @gen.coroutine def scheduler(self): """ The scheduler coroutine for dask scheduling This coroutine manages interactions with all worker cores and with the delete coroutine through queues. Parameters ---------- scheduler_queue: tornado.queues.Queue Get information from outside report_queue: tornado.queues.Queue Report information to outside worker_queues: dict {worker: tornado.queues.Queue} One queue per worker node. Each queue is listened to by several worker_core coroutines. delete_queue: tornado.queues.Queue One queue listened to by ``delete`` which connects to the center to delete unnecessary intermediate data who_has: dict {key: set} Mapping key to {set of worker-identities} has_what: dict {worker: set} Mapping worker-identity to {set of keys} ncores: dict {worker: int} Mapping worker-identity to number-of-cores """ assert (not self.dask) == (not self.dependencies), (self.dask, self.dependencies) self.heal_state() self.status = 'running' self.report_queue.put_nowait({'op': 'start'}) while True: msg = yield self.scheduler_queue.get() logger.debug("scheduler receives message %s", msg) if msg['op'] == 'close': break elif msg['op'] == 'update-graph': update_state(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, msg['dsk'], msg['keys']) cover_aliases(self.dask, msg['dsk']) self.restrictions.update(msg.get('restrictions', {})) new_keyorder = order(msg['dsk']) # TODO: define order wrt old graph for key in new_keyorder: if key not in self.keyorder: # TODO: add test for this self.keyorder[key] = (self.generation, new_keyorder[key]) # prefer old if len(msg['dsk']) > 1: self.generation += 1 # older graph generations take precedence for key in msg['dsk']: for dep in self.dependencies[key]: if dep in self.exceptions_blame: self.mark_failed(key, self.exceptions_blame[dep]) self.seed_ready_tasks(msg['dsk']) for key in msg['keys']: if self.who_has[key]: self.mark_key_in_memory(key) elif msg['op'] == 'update-data': self.update_data(msg['who-has'], msg['nbytes']) elif msg['op'] == 'task-finished': key, worker = msg['key'], msg['workers'][0] logger.debug("Mark task as finished %s, %s", key, worker) if key in self.processing[worker]: self.nbytes[key] = msg['nbytes'] self.mark_key_in_memory(key, [worker]) self.ensure_occupied(worker) else: logger.debug("Key not found in processing, %s, %s, %s", key, worker, self.processing[worker]) elif msg['op'] == 'task-erred': key, worker = msg['key'], msg['worker'] if key in self.processing[worker]: self.processing[worker].remove(key) self.exceptions[key] = msg['exception'] self.tracebacks[key] = msg['traceback'] self.mark_failed(key, key) self.ensure_occupied(worker) elif msg['op'] in ('missing-data', 'task-missing-data'): missing = set(msg['missing']) logger.debug("Recovering missing data: %s", missing) for k in missing: with ignoring(KeyError): workers = self.who_has.pop(k) for worker in workers: self.has_what[worker].remove(k) self.my_heal_missing_data(missing) if msg['op'] == 'task-missing-data': key = msg['key'] with ignoring(KeyError): self.processing[msg['worker']].remove(key) self.waiting[key] = missing logger.info('task missing data, %s, %s', key, self.waiting) with ignoring(KeyError): self.processing[msg['worker']].remove(msg['key']) self.ensure_occupied(msg['worker']) self.seed_ready_tasks() elif msg['op'] == 'worker-failed': worker = msg['worker'] self.mark_worker_missing(worker) if msg.get('heal', True): self.heal_state() elif msg['op'] == 'release-held-data': if msg['key'] in self.held_data: logger.debug("Release key: %s", msg['key']) self.held_data.remove(msg['key']) self.release_key(msg['key']) else: logger.warn("Bad message: %s", msg) logger.debug('Finished scheduling') yield self.cleanup() self.status = 'done'
class Worker(Server): """ Worker Node Workers perform two functions: 1. **Serve data** from a local dictionary 2. **Perform computation** on that data and on data from peers Additionally workers keep a Center informed of their data and use that Center to gather data from other workers when necessary to perform a computation. You can start a worker with the ``dworker`` command line application:: $ dworker scheduler-ip:port **State** * **data:** ``{key: object}``: Dictionary mapping keys to actual values * **active:** ``{key}``: Set of keys currently under computation * **ncores:** ``int``: Number of cores used by this worker process * **executor:** ``concurrent.futures.ThreadPoolExecutor``: Executor used to perform computation * **local_dir:** ``path``: Path on local machine to store temporary files * **center:** ``rpc``: Location of center or scheduler. See ``.ip/.port`` attributes. * **name:** ``string``: Alias * **services:** ``{str: Server}``: Auxiliary web servers running on this worker * **service_ports:** ``{str: port}``: Examples -------- Create centers and workers in Python: >>> from distributed import Center, Worker >>> c = Center('192.168.0.100', 8787) # doctest: +SKIP >>> w = Worker(c.ip, c.port) # doctest: +SKIP >>> yield w._start(port=8788) # doctest: +SKIP Or use the command line:: $ dcenter Start center at 127.0.0.1:8787 $ dworker 127.0.0.1:8787 Start worker at: 127.0.0.1:8788 Registered with center at: 127.0.0.1:8787 See Also -------- distributed.center.Center: """ def __init__(self, center_ip, center_port, ip=None, ncores=None, loop=None, local_dir=None, services=None, service_ports=None, name=None, **kwargs): self.ip = ip or get_ip() self._port = 0 self.ncores = ncores or _ncores self.data = dict() self.loop = loop or IOLoop.current() self.status = None self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-') self.executor = ThreadPoolExecutor(self.ncores) self.thread_tokens = Queue( ) # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572 for i in range(self.ncores): self.thread_tokens.put_nowait(i) self.center = rpc(ip=center_ip, port=center_port) self.active = set() self.name = name if not os.path.exists(self.local_dir): os.mkdir(self.local_dir) if self.local_dir not in sys.path: sys.path.insert(0, self.local_dir) self.services = {} self.service_ports = service_ports or {} for k, v in (services or {}).items(): if isinstance(k, tuple): k, port = k else: port = 0 self.services[k] = v(self) self.services[k].listen(port) self.service_ports[k] = self.services[k].port handlers = { 'compute': self.compute, 'gather': self.gather, 'compute-stream': self.compute_stream, 'run': self.run, 'get_data': self.get_data, 'update_data': self.update_data, 'delete_data': self.delete_data, 'terminate': self.terminate, 'ping': pingpong, 'health': self.health, 'upload_file': self.upload_file } super(Worker, self).__init__(handlers, **kwargs) @gen.coroutine def _start(self, port=0): self.listen(port) self.name = self.name or self.address for k, v in self.services.items(): v.listen(0) self.service_ports[k] = v.port logger.info(' Start worker at: %20s:%d', self.ip, self.port) for k, v in self.service_ports.items(): logger.info(' %16s at: %20s:%d' % (k, self.ip, v)) logger.info('Waiting to connect to: %20s:%d', self.center.ip, self.center.port) while True: try: resp = yield self.center.register(ncores=self.ncores, address=(self.ip, self.port), keys=list(self.data), services=self.service_ports, name=self.name) break except (OSError, StreamClosedError): logger.debug("Unable to register with scheduler. Waiting") yield gen.sleep(0.5) if resp != 'OK': raise ValueError(resp) logger.info(' Registered to: %20s:%d', self.center.ip, self.center.port) self.status = 'running' def start(self, port=0): self.loop.add_callback(self._start, port) def identity(self, stream): return { 'type': type(self).__name__, 'id': self.id, 'center': (self.center.ip, self.center.port) } @gen.coroutine def _close(self, report=True, timeout=10): if report: yield gen.with_timeout(timedelta(seconds=timeout), self.center.unregister(address=(self.ip, self.port)), io_loop=self.loop) self.center.close_streams() self.stop() self.executor.shutdown() if os.path.exists(self.local_dir): shutil.rmtree(self.local_dir) for k, v in self.services.items(): v.stop() self.status = 'closed' self.stop() @gen.coroutine def terminate(self, stream, report=True): yield self._close(report=report) raise Return('OK') @property def address(self): return '%s:%d' % (self.ip, self.port) @property def address_tuple(self): return (self.ip, self.port) @gen.coroutine def gather(self, stream=None, who_has=None): who_has = { k: [coerce_to_address(addr) for addr in v] for k, v in who_has.items() if k not in self.data } try: result = yield gather_from_workers(who_has) except KeyError as e: logger.warn("Could not find data", e) raise Return({'status': 'missing-data', 'keys': e.args}) else: self.data.update(result) raise Return({'status': 'OK'}) @gen.coroutine def _ready_task(self, function=None, key=None, args=(), kwargs={}, task=None, who_has=None): who_has = who_has or {} diagnostics = {} data = {k: self.data[k] for k in who_has if k in self.data} who_has = { k: set(map(coerce_to_address, v)) for k, v in who_has.items() if k not in self.data } if who_has: try: logger.info("gather %d keys from peers: %s", len(who_has), str(who_has)) diagnostics['transfer-start'] = time() other = yield gather_from_workers(who_has) diagnostics['transfer-stop'] = time() self.data.update(other) yield self.center.add_keys(address=self.address, keys=list(other)) data.update(other) except KeyError as e: logger.warn("Could not find data for %s", key) raise Return({ 'status': 'missing-data', 'keys': e.args, 'key': key }) else: transfer_time = 0 try: start = default_timer() if task is not None: task = loads(task) if function is not None: function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) diagnostics['deserialization'] = default_timer() - start except Exception as e: logger.warn("Could not deserialize task", exc_info=True) raise Return(assoc(error_message(e), 'key', key)) if task is not None: assert not function and not args and not kwargs function = execute_task args = (task, ) # Fill args with data args2 = pack_data(args, data) kwargs2 = pack_data(kwargs, data) raise Return({ 'status': 'OK', 'function': function, 'args': args2, 'kwargs': kwargs2, 'diagnostics': diagnostics, 'key': key }) @gen.coroutine def executor_submit(self, key, function, *args, **kwargs): """ Safely run function in thread pool executor We've run into issues running concurrent.future futures within tornado. Apparently it's advantageous to use timeouts and periodic callbacks to ensure things run smoothly. This can get tricky, so we pull it off into an separate method. """ token = yield self.thread_tokens.get() job_counter[0] += 1 i = job_counter[0] # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key) future = self.executor.submit(function, *args, **kwargs) pc = PeriodicCallback( lambda: logger.debug("future state: %s - %s", key, future._state), 1000) pc.start() try: if sys.version_info < (3, 2): yield future else: while not future.done() and future._state != 'FINISHED': try: yield gen.with_timeout(timedelta(seconds=1), future, io_loop=self.loop) break except gen.TimeoutError: logger.info("work queue size: %d", self.executor._work_queue.qsize()) logger.info("future state: %s", future._state) logger.info("Pending job %d: %s", i, future) finally: pc.stop() self.thread_tokens.put(token) result = future.result() logger.info("Finish job %d, %s", i, key) raise gen.Return(result) @gen.coroutine def compute_stream(self, stream): with log_errors(): logger.debug("Open compute stream") bstream = BatchedSend(interval=10, loop=self.loop) bstream.start(stream) @gen.coroutine def process(msg): try: result = yield self.compute(report=False, **msg) bstream.send(result) except Exception as e: logger.exception(e) bstream.send(assoc(error_message(e), 'key', msg.get('key'))) with log_errors(): while True: try: msgs = yield read(stream) except StreamClosedError: break if not isinstance(msgs, list): msgs = [msgs] for msg in msgs: op = msg.pop('op', None) if op == 'close': break elif op == 'compute-task': self.loop.add_callback(process, msg) else: logger.warning("Unknown operation %s, %s", op, msg) yield bstream.close() logger.info("Close compute stream") @gen.coroutine def compute(self, stream=None, function=None, key=None, args=(), kwargs={}, task=None, who_has=None, report=True): """ Execute function """ self.active.add(key) # Ready function for computation msg = yield self._ready_task(function=function, key=key, args=args, kwargs=kwargs, task=task, who_has=who_has) if msg['status'] != 'OK': try: self.active.remove(key) except KeyError: pass raise Return(msg) else: function = msg['function'] args = msg['args'] kwargs = msg['kwargs'] # Log and compute in separate thread result = yield self.executor_submit(key, apply_function, function, args, kwargs) result['key'] = key result.update(msg['diagnostics']) if result['status'] == 'OK': self.data[key] = result.pop('result') if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=[key]) if not response == 'OK': logger.warn('Could not report results to center: %s', str(response)) else: logger.warn( " Compute Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) logger.debug("Send compute response to scheduler: %s, %s", key, msg) try: self.active.remove(key) except KeyError: pass raise Return(result) @gen.coroutine def run(self, stream, function=None, args=(), kwargs={}): function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) try: result = function(*args, **kwargs) except Exception as e: logger.warn( " Run Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) response = error_message(e) else: response = { 'status': 'OK', 'result': dumps(result), } raise Return(response) @gen.coroutine def update_data(self, stream, data=None, report=True): data = valmap(loads, data) self.data.update(data) if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=list(data)) assert response == 'OK' info = { 'nbytes': {k: sizeof(v) for k, v in data.items()}, 'status': 'OK' } raise Return(info) @gen.coroutine def delete_data(self, stream, keys=None, report=True): for key in keys: if key in self.data: del self.data[key] logger.info("Deleted %d keys", len(keys)) if report: logger.debug("Reporting loss of keys to center") yield self.center.remove_keys(address=self.address, keys=list(keys)) raise Return('OK') def get_data(self, stream, keys=None): return {k: dumps(self.data[k]) for k in keys if k in self.data} def upload_file(self, stream, filename=None, data=None, load=True): out_filename = os.path.join(self.local_dir, filename) if isinstance(data, unicode): data = data.encode() with open(out_filename, 'wb') as f: f.write(data) f.flush() if load: try: name, ext = os.path.splitext(filename) if ext in ('.py', '.pyc'): logger.info("Reload module %s from .py file", name) name = name.split('-')[0] reload(import_module(name)) if ext == '.egg': sys.path.append(out_filename) pkgs = pkg_resources.find_distributions(out_filename) for pkg in pkgs: logger.info("Load module %s from egg", pkg.project_name) reload(import_module(pkg.project_name)) if not pkgs: logger.warning("Found no packages in egg file") except Exception as e: logger.exception(e) return {'status': 'error', 'exception': dumps(e)} return {'status': 'OK', 'nbytes': len(data)} def health(self, stream=None): """ Information about worker """ d = { 'active': len(self.active), 'stored': len(self.data), 'time': time() } try: import psutil mem = psutil.virtual_memory() d.update({ 'cpu': psutil.cpu_percent(), 'memory': mem.total, 'memory-percent': mem.percent }) try: net_io = psutil.net_io_counters() d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv except AttributeError: pass self._last_net_io = net_io try: disk_io = psutil.disk_io_counters() d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes except AttributeError: pass self._last_disk_io = disk_io except ImportError: pass return d
class BlogBackup(object): _default_dir_name = "seg_blog_backup" def _generate_save_dir(self): cur_dir = os.path.dirname(__file__) self.save_path = os.path.join(cur_dir, self._default_dir_name) if not os.path.isdir(self.save_path): os.mkdir(self.save_path) def _parse_save_path(self): if self.save_path: if os.path.exists(self.save_path) and os.path.isdir(self.save_path): return else: raise BlogSavePathError("'%s' not exists or is not dir!" % self.save_path) else: self._generate_save_dir() @staticmethod def parse_token_from_html(content): overall_pat = re.compile(r"SF.token =.*?,\s+_\w+ = [\d,\[\]]+;", re.DOTALL) overall_res = overall_pat.search(content) if overall_res: overall_content = overall_res.group() # remove /* */ type annotation filter_res = re.sub(r"(/\*[/a-zA-Z\d' ]+\*/)", "", overall_content) str_list = re.findall(r"(?<!//)'([a-zA-Z\d]+)'", filter_res, re.DOTALL) filter_list = re.findall(r"\[(\d+),(\d+)\]", overall_content) ret = "".join(str_list) if filter_list: for m, n in filter_list: ret = ret[: int(m)] + ret[int(n) :] if len(ret) == 32: return ret raise PageHtmlChanged("website login token has changed") def _get_user_cookies(self): s = requests.Session() s.headers.update(headers) rep = s.get(target_url) post_url = "%s%s?_=%s" % (target_url, login_api_path, self.parse_token_from_html(rep.text)) data = {"mail": self.username, "password": self.passwd} s.post(post_url, data=data) return s.cookies def __init__(self, **conf): self.username = conf["username"] self.passwd = conf["passwd"] self.save_path = conf.get("save_path") self._q = Queue() self._cookies = self._get_user_cookies() self._parse_save_path() @gen.coroutine def run(self): start_url = target_url + blog_path yield self._fetch_blog_list_page(start_url) for _ in xrange(cpu_count()): self._fetch_essay_content() yield self._q.join() @gen.coroutine def _fetch_blog_list_page(self, page_link): ret = requests.get(page_link, cookies=self._cookies) d = pq(ret.text) link_elements = d(".stream-list__item > .summary > h2 > a") for link in link_elements: yield self._q.put(d(link).attr("href")) next_ele = d(".pagination li.next a") if next_ele: next_page_url = target_url + next_ele.attr("href") self._fetch_blog_list_page(next_page_url) @gen.coroutine def _fetch_essay_content(self): while True: try: essay_path = yield self._q.get(timeout=1) essay_url = target_url + essay_path + edit_suffix ret = requests.get(essay_url, cookies=self._cookies) d = pq(ret.text) title = d("#myTitle").val() content = d("#myEditor").text() real_file_name = os.path.join(self.save_path, title + ".md") logger.info("is backup essay: %s" % title) with open(real_file_name, "w") as f: f.writelines(content.encode("utf8")) except gen.TimeoutError: raise gen.Return() finally: self._q.task_done()
class TornadoTransmission(): def __init__(self, max_concurrent_batches=10, block_on_send=False, block_on_response=False, max_batch_size=100, send_frequency=timedelta(seconds=0.25), user_agent_addition=''): if not has_tornado: raise ImportError( 'TornadoTransmission requires tornado, but it was not found.' ) self.block_on_send = block_on_send self.block_on_response = block_on_response self.max_batch_size = max_batch_size self.send_frequency = send_frequency user_agent = "libhoney-py/" + VERSION if user_agent_addition: user_agent += " " + user_agent_addition self.http_client = AsyncHTTPClient( force_instance=True, defaults=dict(user_agent=user_agent)) # libhoney adds events to the pending queue for us to send self.pending = Queue(maxsize=1000) # we hand back responses from the API on the responses queue self.responses = Queue(maxsize=2000) self.batch_data = {} self.sd = statsd.StatsClient(prefix="libhoney") self.batch_sem = Semaphore(max_concurrent_batches) def start(self): ioloop.IOLoop.current().spawn_callback(self._sender) def send(self, ev): '''send accepts an event and queues it to be sent''' self.sd.gauge("queue_length", self.pending.qsize()) try: if self.block_on_send: self.pending.put(ev) else: self.pending.put_nowait(ev) self.sd.incr("messages_queued") except QueueFull: response = { "status_code": 0, "duration": 0, "metadata": ev.metadata, "body": "", "error": "event dropped; queue overflow", } if self.block_on_response: self.responses.put(response) else: try: self.responses.put_nowait(response) except QueueFull: # if the response queue is full when trying to add an event # queue is full response, just skip it. pass self.sd.incr("queue_overflow") # We're using the older decorator/yield model for compatibility with # Python versions before 3.5. # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await @gen.coroutine def _sender(self): '''_sender is the control loop that pulls events off the `self.pending` queue and submits batches for actual sending. ''' events = [] last_flush = time.time() while True: try: ev = yield self.pending.get(timeout=self.send_frequency) if ev is None: # signals shutdown yield self._flush(events) return events.append(ev) if (len(events) > self.max_batch_size or time.time() - last_flush > self.send_frequency.total_seconds()): yield self._flush(events) events = [] except TimeoutError: yield self._flush(events) events = [] last_flush = time.time() @gen.coroutine def _flush(self, events): if not events: return for dest, group in group_events_by_destination(events).items(): yield self._send_batch(dest, group) @gen.coroutine def _send_batch(self, destination, events): ''' Makes a single batch API request with the given list of events. The `destination` argument contains the write key, API host and dataset name used to build the request.''' start = time.time() status_code = 0 try: # enforce max_concurrent_batches yield self.batch_sem.acquire() url = urljoin(urljoin(destination.api_host, "/1/batch/"), destination.dataset) payload = [] for ev in events: event_time = ev.created_at.isoformat() if ev.created_at.tzinfo is None: event_time += "Z" payload.append({ "time": event_time, "samplerate": ev.sample_rate, "data": ev.fields() }) req = HTTPRequest( url, method='POST', headers={ "X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json", }, body=json.dumps(payload, default=json_default_handler), ) self.http_client.fetch(req, self._response_callback) # store the events that were sent so we can process responses later # it is important that we delete these eventually, or we'll run into memory issues self.batch_data[req] = {"start": start, "events": events} except Exception as e: # Catch all exceptions and hand them to the responses queue. self._enqueue_errors(status_code, e, start, events) finally: self.batch_sem.release() def _enqueue_errors(self, status_code, error, start, events): for ev in events: self.sd.incr("send_errors") self._enqueue_response(status_code, "", error, start, ev.metadata) def _response_callback(self, resp): # resp.request should be the same HTTPRequest object built by _send_batch # and mapped to values in batch_data events = self.batch_data[resp.request]["events"] start = self.batch_data[resp.request]["start"] try: status_code = resp.code resp.rethrow() statuses = [d["status"] for d in json.loads(resp.body)] for ev, status in zip(events, statuses): self._enqueue_response(status, "", None, start, ev.metadata) self.sd.incr("messages_sent") except Exception as e: self._enqueue_errors(status_code, e, start, events) self.sd.incr("send_errors") finally: # clean up the data for this batch del self.batch_data[resp.request] def _enqueue_response(self, status_code, body, error, start, metadata): resp = { "status_code": status_code, "body": body, "error": error, "duration": (time.time() - start) * 1000, "metadata": metadata } if self.block_on_response: self.responses.put(resp) else: try: self.responses.put_nowait(resp) except QueueFull: pass def close(self): '''call close to send all in-flight requests and shut down the senders nicely. Times out after max 20 seconds per sending thread plus 10 seconds for the response queue''' try: self.pending.put(None, 10) except QueueFull: pass # signal to the responses queue that nothing more is coming. try: self.responses.put(None, 10) except QueueFull: pass def get_response_queue(self): ''' return the responses queue on to which will be sent the response objects from each event send''' return self.responses
class AsyncConnection(object): def __init__(self, *args, **kwargs): kwargs["async"] = True if "thread_pool" in kwargs: self.__thread_pool = kwargs.pop("thread_pool") else: self.__thread_pool = futures.ThreadPoolExecutor(cpu_count()) self.__connection = connect(*args, **kwargs) self.__io_loop = IOLoop.current() self.__connected = False log.debug("Trying to connect to postgresql") f = self.__wait() self.__io_loop.add_future(f, self.__on_connect) self.__queue = Queue() self.__has_active_cursor = False for method in ("get_backend_pid", "get_parameter_status"): setattr(self, method, self.__futurize(method)) def __on_connect(self, result): log.debug("Connection establishment") self.__connected = True self.__io_loop.add_callback(self._loop) @coroutine def _loop(self): log.debug("Starting queue loop") while self.__connected: while self.__has_active_cursor or self.__connection.isexecuting(): yield sleep(0.001) func, future = yield self.__queue.get() result = func() if isinstance(result, Future): result = yield result self.__io_loop.add_callback(future.set_result, result) yield self.__wait() @coroutine def __wait(self): log.debug("Waiting for events") while not (yield sleep(0.001)): try: state = self.__connection.poll() except QueryCanceledError: yield sleep(0.1) continue f = Future() def resolve(fileno, io_op): if f.running(): f.set_result(True) self.__io_loop.remove_handler(fileno) if state == psycopg2.extensions.POLL_OK: raise Return(True) elif state == psycopg2.extensions.POLL_READ: self.__io_loop.add_handler(self.__connection.fileno(), resolve, IOLoop.READ) yield f elif state == psycopg2.extensions.POLL_WRITE: self.__io_loop.add_handler(self.__connection.fileno(), resolve, IOLoop.WRITE) yield f def __on_cursor_open(self, cursor): self.__has_active_cursor = True log.debug("Opening cursor") def __on_cursor_close(self, cursor): self.__has_active_cursor = False log.debug("Closing active cursor") def cursor(self, **kwargs): f = Future() self.__io_loop.add_callback( self.__queue.put, ( functools.partial( AsyncCursor, self.__connection, self.__thread_pool, self.__wait, on_open=self.__on_cursor_open, on_close=self.__on_cursor_close, **kwargs ), f, ), ) return f def cancel(self): return self.__thread_pool.submit(self.__connection.cancel) def close(self): self.__has_active_cursor = True @coroutine def closer(): while not (yield self.__queue.empty()): func, future = yield self.__queue.get() future.set_exception(psycopg2.Error("Connection closed")) self.__io_loop.add_callback(self.__connection.close) def __futurize(self, item): attr = getattr(self.__connection, item) @functools.wraps(attr) def wrap(*args, **kwargs): f = Future() self.__io_loop.add_callback(self.__queue.put, (functools.partial(attr, *args, **kwargs), f)) return f return wrap
class ProjectGroomer(object): """ Cleans up expired transactions for a project. """ def __init__(self, project_id, coordinator, zk_client, db_access, thread_pool): """ Creates a new ProjectGroomer. Args: project_id: A string specifying a project ID. coordinator: A GroomingCoordinator. zk_client: A KazooClient. db_access: A DatastoreProxy. thread_pool: A ThreadPoolExecutor. """ self.project_id = project_id self._coordinator = coordinator self._zk_client = zk_client self._tornado_zk = TornadoKazoo(self._zk_client) self._db_access = db_access self._thread_pool = thread_pool self._project_node = '/appscale/apps/{}'.format(self.project_id) self._containers = [] self._inactive_containers = set() self._batch_resolver = BatchResolver(self.project_id, self._db_access) self._zk_client.ensure_path(self._project_node) self._zk_client.ChildrenWatch(self._project_node, self._update_containers) self._txid_manual_offset = 0 self._offset_node = '/'.join([self._project_node, OFFSET_NODE]) self._zk_client.DataWatch(self._offset_node, self._update_offset) self._stop_event = AsyncEvent() self._stopped_event = AsyncEvent() # Keeps track of cleanup results for each round of grooming. self._txids_cleaned = 0 self._oldest_valid_tx_time = None self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY) for _ in range(MAX_CONCURRENCY): IOLoop.current().spawn_callback(self._worker) IOLoop.current().spawn_callback(self.start) @gen.coroutine def start(self): """ Starts the grooming process until the stop event is set. """ logger.info('Grooming {}'.format(self.project_id)) while True: if self._stop_event.is_set(): break try: yield self._groom_project() except Exception: # Prevent the grooming loop from stopping if an error is encountered. logger.exception( 'Unexpected error while grooming {}'.format(self.project_id)) yield gen.sleep(MAX_TX_DURATION) self._stopped_event.set() @gen.coroutine def stop(self): """ Stops the grooming process. """ logger.info('Stopping grooming process for {}'.format(self.project_id)) self._stop_event.set() yield self._stopped_event.wait() @gen.coroutine def _worker(self): """ Processes items in the worker queue. """ while True: tx_path, composite_indexes = yield self._worker_queue.get() try: tx_time = yield self._resolve_txid(tx_path, composite_indexes) if tx_time is None: self._txids_cleaned += 1 if tx_time is not None and tx_time < self._oldest_valid_tx_time: self._oldest_valid_tx_time = tx_time finally: self._worker_queue.task_done() def _update_offset(self, new_offset, _): """ Watches for updates to the manual offset node. Args: new_offset: A string specifying the new manual offset. """ self._txid_manual_offset = int(new_offset or 0) def _update_containers(self, nodes): """ Updates the list of active txid containers. Args: nodes: A list of strings specifying ZooKeeper nodes. """ counters = [int(node[len(CONTAINER_PREFIX):] or 1) for node in nodes if node.startswith(CONTAINER_PREFIX) and node not in self._inactive_containers] counters.sort() containers = [CONTAINER_PREFIX + str(counter) for counter in counters] if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX): containers[0] = CONTAINER_PREFIX self._containers = containers @gen.coroutine def _groom_project(self): """ Runs the grooming process. """ index = self._coordinator.index worker_count = self._coordinator.total_workers oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count) # Wait until there's a reasonable chance that some transactions have # timed out. next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION # The oldest ignored transaction should still be valid, but ensure that # the timeout is not negative. next_timeout = max(0, next_timeout_eta - time.time()) time_to_wait = datetime.timedelta( seconds=next_timeout + (MAX_TX_DURATION / 2)) # Allow the wait to be cut short when a project is removed. try: yield self._stop_event.wait(timeout=time_to_wait) except gen.TimeoutError: raise gen.Return() @gen.coroutine def _remove_path(self, tx_path): """ Removes a ZooKeeper node. Args: tx_path: A string specifying the path to delete. """ try: yield self._tornado_zk.delete(tx_path) except NoNodeError: pass except NotEmptyError: yield self._thread_pool.submit(self._zk_client.delete, tx_path, recursive=True) @gen.coroutine def _resolve_txid(self, tx_path, composite_indexes): """ Cleans up a transaction if it has expired. Args: tx_path: A string specifying the location of the ZooKeeper node. composite_indexes: A list of CompositeIndex objects. Returns: The transaction start time if still valid, None if invalid because this method will also delete it. """ tx_data = yield self._tornado_zk.get(tx_path) tx_time = float(tx_data[0]) _, container, tx_node = tx_path.rsplit('/', 2) tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX)) container_count = int(container[len(CONTAINER_PREFIX):] or 1) if tx_node_id < 0: yield self._remove_path(tx_path) raise gen.Return() container_size = MAX_SEQUENCE_COUNTER + 1 automatic_offset = (container_count - 1) * container_size txid = self._txid_manual_offset + automatic_offset + tx_node_id if txid < 1: yield self._remove_path(tx_path) raise gen.Return() # If the transaction is still valid, return the time it was created. if tx_time + MAX_TX_DURATION >= time.time(): raise gen.Return(tx_time) yield self._batch_resolver.resolve(txid, composite_indexes) yield self._remove_path(tx_path) yield self._batch_resolver.cleanup(txid) @gen.coroutine def _fetch_and_clean(self, worker_index, worker_count): """ Cleans up expired transactions. Args: worker_index: An integer specifying this worker's index. worker_count: An integer specifying the number of total workers. Returns: A float specifying the time of the oldest valid transaction as a unix timestamp. """ self._txids_cleaned = 0 self._oldest_valid_tx_time = time.time() children = [] for index, container in enumerate(self._containers): container_path = '/'.join([self._project_node, container]) new_children = yield self._tornado_zk.get_children(container_path) if not new_children and index < len(self._containers) - 1: self._inactive_containers.add(container) children.extend(['/'.join([container_path, node]) for node in new_children]) logger.debug( 'Found {} transaction IDs for {}'.format(len(children), self.project_id)) if not children: raise gen.Return(self._oldest_valid_tx_time) # Refresh these each time so that the indexes are fresh. encoded_indexes = yield self._thread_pool.submit( self._db_access.get_indices, self.project_id) composite_indexes = [CompositeIndex(index) for index in encoded_indexes] for tx_path in children: tx_node_id = int(tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX)) # Only resolve transactions that this worker has been assigned. if tx_node_id % worker_count != worker_index: continue yield self._worker_queue.put((tx_path, composite_indexes)) yield self._worker_queue.join() if self._txids_cleaned > 0: logger.info('Cleaned up {} expired txids for {}'.format( self._txids_cleaned, self.project_id)) raise gen.Return(self._oldest_valid_tx_time)
class Scheduler(object): def __init__(self, center, delete_batch_time=1, loop=None, resource_interval=1, resource_log_size=1000): self.scheduler_queues = [Queue()] self.report_queues = [] self.delete_queue = Queue() self.status = None self.coroutines = [] self.center = coerce_to_rpc(center) self.dask = dict() self.dependencies = dict() self.dependents = dict() self.generation = 0 self.has_what = defaultdict(set) self.held_data = set() self.in_play = set() self.keyorder = dict() self.nbytes = dict() self.ncores = dict() self.nannies = dict() self.processing = dict() self.restrictions = dict() self.stacks = dict() self.waiting = dict() self.waiting_data = dict() self.who_has = defaultdict(set) self.exceptions = dict() self.tracebacks = dict() self.exceptions_blame = dict() self.resource_logs = dict() self.loop = loop or IOLoop.current() self.delete_batch_time = delete_batch_time self.resource_interval = resource_interval self.resource_log_size = resource_log_size self.plugins = [] self.handlers = {'update-graph': self.update_graph, 'update-data': self.update_data, 'missing-data': self.mark_missing_data, 'task-missing-data': self.mark_missing_data, 'worker-failed': self.mark_worker_missing, 'release-held-data': self.release_held_data, 'restart': self._restart} def put(self, msg): return self.scheduler_queues[0].put_nowait(msg) @property def report_queue(self): return self.report_queues[0] @gen.coroutine def _sync_center(self): self.ncores, self.has_what, self.who_has, self.nannies = yield [ self.center.ncores(), self.center.has_what(), self.center.who_has(), self.center.nannies()] self._nanny_coroutines = [] for (ip, wport), nport in self.nannies.items(): if not nport: continue if (ip, nport) not in self.resource_logs: self.resource_logs[(ip, nport)] = deque(maxlen=self.resource_log_size) self._nanny_coroutines.append(self._nanny_listen(ip, nport)) def start(self, start_queues=True): collections = [self.dask, self.dependencies, self.dependents, self.waiting, self.waiting_data, self.in_play, self.keyorder, self.nbytes, self.processing, self.restrictions] for collection in collections: collection.clear() self.processing = {addr: set() for addr in self.ncores} self.stacks = {addr: list() for addr in self.ncores} self.worker_queues = {addr: Queue() for addr in self.ncores} with ignoring(AttributeError): self._delete_coroutine.cancel() with ignoring(AttributeError): for c in self._worker_coroutines: c.cancel() self._delete_coroutine = self.delete() self._worker_coroutines = [self.worker(w) for w in self.ncores] self.heal_state() if start_queues: self.handle_queues(self.scheduler_queues[0], None) for cor in self.coroutines: if cor.done(): raise cor.exception() return self._finished() @gen.coroutine def _finished(self): while any(not c.done() for c in self.coroutines): yield All(self.coroutines) @gen.coroutine def _close(self): yield self.cleanup() yield self._finished() yield self.center.close(close=True) self.center.close_streams() @gen.coroutine def cleanup(self): """ Clean up queues and coroutines, prepare to stop """ if self.status == 'closing': raise gen.Return() self.status = 'closing' logger.debug("Cleaning up coroutines") n = 0 self.delete_queue.put_nowait({'op': 'close'}); n += 1 for w, nc in self.ncores.items(): for i in range(nc): self.worker_queues[w].put_nowait({'op': 'close'}); n += 1 for s in self.scheduler_queues[1:]: s.put_nowait({'op': 'close-stream'}) for i in range(n): msg = yield self.scheduler_queues[0].get() for q in self.report_queues: q.put_nowait({'op': 'close'}) def mark_ready_to_run(self, key): """ Send task to an appropriate worker, trigger worker """ logger.debug("Mark %s ready to run", key) if key in self.waiting: assert not self.waiting[key] del self.waiting[key] new_worker = decide_worker(self.dependencies, self.stacks, self.who_has, self.restrictions, self.nbytes, key) self.stacks[new_worker].append(key) self.ensure_occupied(new_worker) def mark_key_in_memory(self, key, workers=None): """ Mark that key now lives in particular workers """ logger.debug("Mark %s in memory", key) if workers is None: workers = self.who_has[key] for worker in workers: self.who_has[key].add(worker) self.has_what[worker].add(key) with ignoring(KeyError): self.processing[worker].remove(key) for dep in sorted(self.dependents.get(key, []), key=self.keyorder.get, reverse=True): if dep in self.waiting: s = self.waiting[dep] with ignoring(KeyError): s.remove(key) if not s: # new task ready to run self.mark_ready_to_run(dep) for dep in self.dependencies.get(key, []): if dep in self.waiting_data: s = self.waiting_data[dep] with ignoring(KeyError): s.remove(key) if not s and dep: self.release_key(dep) self.report({'op': 'key-in-memory', 'key': key, 'workers': workers}) def ensure_occupied(self, worker): """ Spin up tasks on worker while it has tasks and free cores """ logger.debug('Ensure worker is occupied: %s', worker) while (self.stacks[worker] and self.ncores[worker] > len(self.processing[worker])): key = self.stacks[worker].pop() self.processing[worker].add(key) logger.debug("Send job to worker: %s, %s, %s", worker, key, self.dask[key]) self.worker_queues[worker].put_nowait( {'op': 'compute-task', 'key': key, 'task': self.dask[key], 'needed': self.dependencies[key]}) def seed_ready_tasks(self, keys=None): """ Distribute leaves among workers Takes an iterable of keys to consider for execution """ if keys is None: keys = self.dask new_stacks = assign_many_tasks( self.dependencies, self.waiting, self.keyorder, self.who_has, self.stacks, self.restrictions, self.nbytes, [k for k in keys if k in self.waiting and not self.waiting[k]]) logger.debug("Seed ready tasks: %s", new_stacks) for worker, stack in new_stacks.items(): if stack: self.ensure_occupied(worker) def release_key(self, key): """ Release key from distributed memory if its ready """ logger.debug("Release key %s", key) if key not in self.held_data and not self.waiting_data.get(key): self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) for w in self.who_has[key]: self.has_what[w].remove(key) del self.who_has[key] if key in self.waiting_data: del self.waiting_data[key] if key in self.in_play: self.in_play.remove(key) def update_data(self, who_has=None, nbytes=None): logger.debug("Update data %s", who_has) for key, workers in who_has.items(): self.mark_key_in_memory(key, workers) self.nbytes.update(nbytes) self.held_data.update(who_has) self.in_play.update(who_has) def mark_task_erred(self, key, worker, exception, traceback): """ Mark that a task has erred on a particular worker """ if key in self.processing[worker]: self.processing[worker].remove(key) self.exceptions[key] = exception self.tracebacks[key] = traceback self.mark_failed(key, key) self.ensure_occupied(worker) for plugin in self.plugins[:]: try: plugin.task_erred(self, key, worker, exception) except Exception as e: logger.exception(e) def mark_failed(self, key, failing_key=None): """ When a task fails mark it and all dependent task as failed """ logger.debug("Mark key as failed %s", key) if key in self.exceptions_blame: return self.exceptions_blame[key] = failing_key self.report({'op': 'task-erred', 'key': key, 'exception': self.exceptions[failing_key], 'traceback': self.tracebacks[failing_key]}) if key in self.waiting: del self.waiting[key] if key in self.waiting_data: del self.waiting_data[key] self.in_play.remove(key) for dep in self.dependents[key]: self.mark_failed(dep, failing_key) def mark_task_finished(self, key, worker, nbytes): """ Mark that a task has finished execution on a particular worker """ logger.debug("Mark task as finished %s, %s", key, worker) if key in self.processing[worker]: self.nbytes[key] = nbytes self.mark_key_in_memory(key, [worker]) self.ensure_occupied(worker) for plugin in self.plugins[:]: try: plugin.task_finished(self, key, worker, nbytes) except Exception as e: logger.exception(e) else: logger.debug("Key not found in processing, %s, %s, %s", key, worker, self.processing[worker]) def mark_missing_data(self, missing=None, key=None, worker=None): missing = set(missing) logger.debug("Recovering missing data: %s", missing) for k in missing: with ignoring(KeyError): workers = self.who_has.pop(k) for worker in workers: self.has_what[worker].remove(k) self.my_heal_missing_data(missing) if key and worker: with ignoring(KeyError): self.processing[worker].remove(key) self.waiting[key] = missing logger.info('task missing data, %s, %s', key, self.waiting) self.ensure_occupied(worker) self.seed_ready_tasks() def log_state(self, msg=''): logger.debug("Runtime State: %s", msg) logger.debug('\n\nwaiting: %s\n\nstacks: %s\n\nprocessing: %s\n\n' 'in_play: %s\n\n', self.waiting, self.stacks, self.processing, self.in_play) def mark_worker_missing(self, worker=None, heal=True): """ Mark that a worker no longer seems responsive """ logger.debug("Mark worker as missing %s", worker) if worker not in self.processing: return keys = self.has_what.pop(worker) for i in range(self.ncores[worker]): # send close message, in case not dead self.worker_queues[worker].put_nowait({'op': 'close', 'report': False}) del self.worker_queues[worker] del self.ncores[worker] del self.stacks[worker] del self.processing[worker] if not self.stacks: logger.critical("Lost all workers") missing_keys = set() for key in keys: self.who_has[key].remove(worker) if not self.who_has[key]: missing_keys.add(key) gone_data = {k for k, v in self.who_has.items() if not v} self.in_play.difference_update(missing_keys) for k in gone_data: del self.who_has[k] if heal: self.heal_state() def update_graph(self, dsk=None, keys=None, restrictions={}): update_state(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, dsk, keys) cover_aliases(self.dask, dsk) self.restrictions.update(restrictions) new_keyorder = order(dsk) # TODO: define order wrt old graph for key in new_keyorder: if key not in self.keyorder: # TODO: add test for this self.keyorder[key] = (self.generation, new_keyorder[key]) # prefer old if len(dsk) > 1: self.generation += 1 # older graph generations take precedence for key in dsk: for dep in self.dependencies[key]: if dep in self.exceptions_blame: self.mark_failed(key, self.exceptions_blame[dep]) self.seed_ready_tasks(dsk) for key in keys: if self.who_has[key]: self.mark_key_in_memory(key) for plugin in self.plugins[:]: try: plugin.update_graph(self, dsk, keys, restrictions) except Exception as e: logger.exception(e) def release_held_data(self, key=None): if key in self.held_data: logger.debug("Release key: %s", key) self.held_data.remove(key) self.release_key(key) def heal_state(self): """ Recover from catastrophic change """ logger.debug("Heal state") self.log_state("Before Heal") state = heal(self.dependencies, self.dependents, set(self.who_has), self.stacks, self.processing, self.waiting, self.waiting_data) released = state['released'] self.in_play.clear(); self.in_play.update(state['in_play']) add_keys = {k for k, v in self.waiting.items() if not v} for key in self.held_data & released: self.report({'op': 'lost-key', 'key': key}) if self.stacks: for key in add_keys: self.mark_ready_to_run(key) for key in set(self.who_has) & released - self.held_data: self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) self.in_play.update(self.who_has) self.log_state("After Heal") def my_heal_missing_data(self, missing): logger.debug("Heal from missing data") return heal_missing_data(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, missing) def report(self, msg): for q in self.report_queues: q.put_nowait(msg) def add_plugin(self, plugin): self.plugins.append(plugin) def handle_queues(self, scheduler_queue, report_queue): self.scheduler_queues.append(scheduler_queue) if report_queue: self.report_queues.append(report_queue) future = self.handle_scheduler(scheduler_queue, report_queue) self.coroutines.append(future) return future @gen.coroutine def handle_scheduler(self, queue, report): """ The scheduler coroutine for dask scheduling This coroutine manages interactions with all worker cores and with the delete coroutine through queues. Parameters ---------- scheduler_queue: tornado.queues.Queue Get information from outside report_queue: tornado.queues.Queue Report information to outside worker_queues: dict {worker: tornado.queues.Queue} One queue per worker node. Each queue is listened to by several worker_core coroutines. delete_queue: tornado.queues.Queue One queue listened to by ``delete`` which connects to the center to delete unnecessary intermediate data who_has: dict {key: set} Mapping key to {set of worker-identities} has_what: dict {worker: set} Mapping worker-identity to {set of keys} ncores: dict {worker: int} Mapping worker-identity to number-of-cores """ assert (not self.dask) == (not self.dependencies), (self.dask, self.dependencies) if not self.status == 'running': self.status = 'running' self.report({'op': 'start'}) if report: report.put_nowait({'op': 'stream-start'}) while True: msg = yield queue.get() logger.debug("scheduler receives message %s", msg) op = msg.pop('op') if op == 'close-stream': break elif op == 'close': self._close() elif op in self.handlers: result = self.handlers[op](**msg) if isinstance(result, gen.Future): yield result else: logger.warn("Bad message: op=%s, %s", op, msg) if op == 'close': break logger.debug('Finished scheduling coroutine') @gen.coroutine def worker(self, ident): """ Manage a single distributed worker node This coroutine manages one remote worker. It spins up several ``worker_core`` coroutines, one for each core. It reports a closed connection to scheduler if one occurs. """ try: yield All([self.worker_core(ident, i) for i in range(self.ncores[ident])]) except (IOError, OSError): logger.info("Worker failed from closed stream: %s", ident) self.put({'op': 'worker-failed', 'worker': ident}) @gen.coroutine def worker_core(self, ident, i): """ Manage one core on one distributed worker node This coroutine listens on worker_queue for the following operations **Incoming Messages**: - compute-task: call worker.compute(...) on remote node, report when done - close: close connection to worker node, report `worker-finished` to scheduler **Outgoing Messages**: - task-finished: sent to scheduler once a task completes - task-erred: sent to scheduler when a task errs - worker-finished: sent to scheduler in response to a close command """ worker = rpc(ip=ident[0], port=ident[1]) logger.debug("Start worker core %s, %d", ident, i) while True: msg = yield self.worker_queues[ident].get() if msg['op'] == 'close': logger.debug("Worker core receives close message %s, %s", ident, msg) break if msg['op'] == 'compute-task': key = msg['key'] needed = msg['needed'] task = msg['task'] if not istask(task): response, content = yield worker.update_data(data={key: task}) assert response == b'OK', response nbytes = content['nbytes'][key] else: response, content = yield worker.compute(function=execute_task, args=(task,), needed=needed, key=key, kwargs={}) if response == b'OK': nbytes = content['nbytes'] logger.debug("Compute response from worker %s, %s, %s, %s", ident, key, response, content) if response == b'error': error, traceback = content self.mark_task_erred(key, ident, error, traceback) elif response == b'missing-data': self.mark_missing_data(content.args, key=key, worker=ident) else: self.mark_task_finished(key, ident, nbytes) yield worker.close(close=True) worker.close_streams() if msg.get('report', True): self.put({'op': 'worker-finished', 'worker': ident}) logger.debug("Close worker core, %s, %d", ident, i) @gen.coroutine def delete(self): """ Delete extraneous intermediates from distributed memory This coroutine manages a connection to the center in order to send keys that should be removed from distributed memory. We batch several keys that come in over the ``delete_queue`` into a list. Roughly once a second we send this list of keys over to the center which then handles deleting these keys from workers' memory. worker \ /-> worker node worker -> scheduler -> delete -> center --> worker node worker / \-> worker node **Incoming Messages** - delete-task: holds a key to be deleted - close: close this coroutine """ batch = list() last = time() while True: msg = yield self.delete_queue.get() if msg['op'] == 'close': break # TODO: trigger coroutine to go off in a second if no activity batch.append(msg['key']) if batch and time() - last > self.delete_batch_time: # One second batching logger.debug("Ask center to delete %d keys", len(batch)) last = time() yield self.center.delete_data(keys=batch) batch = list() if batch: yield self.center.delete_data(keys=batch) self.put({'op': 'delete-finished'}) logger.debug('Delete finished') @gen.coroutine def _nanny_listen(self, ip, port): stream = yield connect(ip=ip, port=port) yield write(stream, {'op': 'monitor_resources', 'interval': self.resource_interval}) while not stream.closed(): msg = yield read(stream) self.resource_logs[(ip, port)].append(msg) @gen.coroutine def _scatter(self, stream, data=None, workers=None): if not self.ncores: raise ValueError("No workers yet found. " "Try syncing with center.\n" " e.sync_center()") ncores = workers if workers is not None else self.ncores remotes, who_has, nbytes = yield scatter_to_workers( self.center, ncores, data) self.update_data(who_has=who_has, nbytes=nbytes) raise gen.Return(remotes) @gen.coroutine def _restart(self): logger.debug("Send shutdown signal to workers") for q in self.scheduler_queues + self.report_queues: clear_queue(q) for addr in self.nannies: self.mark_worker_missing(worker=addr, heal=False) logger.debug("Send kill signal to nannies") nannies = [rpc(ip=ip, port=n_port) for (ip, w_port), n_port in self.nannies.items()] yield All([nanny.kill() for nanny in nannies]) while self.ncores: yield gen.sleep(0.01) # All quiet yield All([nanny.instantiate(close=True) for nanny in nannies]) yield self._sync_center() self.start() self.report({'op': 'restart'}) for plugin in self.plugins[:]: try: plugin.restart(self) except Exception as e: logger.exception(e)