def test_http10_no_content_length(self): # Regression test for a bug in which can_keep_alive would crash # for an HTTP/1.0 (not 1.1) response with no content-length. conn = HTTP1Connection(self.client_stream, True) self.server_stream.write(b"HTTP/1.0 200 Not Modified\r\n\r\nhello") self.server_stream.close() event = Event() test = self body = [] class Delegate(HTTPMessageDelegate): def headers_received(self, start_line, headers): test.code = start_line.code def data_received(self, data): body.append(data) def finish(self): event.set() yield conn.read_response(Delegate()) yield event.wait() self.assertEqual(self.code, 200) self.assertEqual(b''.join(body), b'hello')
def get(self): logging.debug("queuing trigger") self.queue.append(self.finish) if self.get_argument("wake", "true") == "true": self.wake_callback() never_finish = Event() yield never_finish.wait()
def test_http10_no_content_length(self): # Regression test for a bug in which can_keep_alive would crash # for an HTTP/1.0 (not 1.1) response with no content-length. conn = HTTP1Connection(self.client_stream, True) self.server_stream.write(b"HTTP/1.0 200 Not Modified\r\n\r\nhello") self.server_stream.close() event = Event() test = self body = [] class Delegate(HTTPMessageDelegate): def headers_received(self, start_line, headers): test.code = start_line.code def data_received(self, data): body.append(data) def finish(self): event.set() yield conn.read_response(Delegate()) yield event.wait() self.assertEqual(self.code, 200) self.assertEqual(b"".join(body), b"hello")
def test_read_until_regex_max_bytes(self): rs, ws = yield self.make_iostream_pair() closed = Event() rs.set_close_callback(closed.set) try: # Extra room under the limit fut = rs.read_until_regex(b"def", max_bytes=50) ws.write(b"abcdef") data = yield fut self.assertEqual(data, b"abcdef") # Just enough space fut = rs.read_until_regex(b"def", max_bytes=6) ws.write(b"abcdef") data = yield fut self.assertEqual(data, b"abcdef") # Not enough space, but we don't know it until all we can do is # log a warning and close the connection. with ExpectLog(gen_log, "Unsatisfiable read"): rs.read_until_regex(b"def", max_bytes=5) ws.write(b"123456") yield closed.wait() finally: ws.close() rs.close()
def get(self): logging.debug("queuing trigger") event = Event() self.queue.append(event.set) if self.get_argument("wake", "true") == "true": self.wake_callback() yield event.wait()
def get(self): logging.debug("queuing trigger") self.queue.append(self.finish) if self.get_argument("wake", "true") == "true": self.wake_callback() never_finish = Event() yield never_finish.wait()
class QueueDriver: def __init__(self,**settings): self.settings = settings self._finished = Event() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) self.initialize(**settings) def initialize(self,**settings): pass def over(self): self._finished.set() def save(self): raise NotImplementedError() def get(self): raise NotImplementedError() def put(self): raise NotImplementedError() def join(self,timeout=None): return self._finished.wait(timeout)
def test_read_until_regex_max_bytes(self): rs, ws = yield self.make_iostream_pair() closed = Event() rs.set_close_callback(closed.set) try: # Extra room under the limit fut = rs.read_until_regex(b"def", max_bytes=50) ws.write(b"abcdef") data = yield fut self.assertEqual(data, b"abcdef") # Just enough space fut = rs.read_until_regex(b"def", max_bytes=6) ws.write(b"abcdef") data = yield fut self.assertEqual(data, b"abcdef") # Not enough space, but we don't know it until all we can do is # log a warning and close the connection. with ExpectLog(gen_log, "Unsatisfiable read"): rs.read_until_regex(b"def", max_bytes=5) ws.write(b"123456") yield closed.wait() finally: ws.close() rs.close()
def get(self): logging.debug("queuing trigger") event = Event() self.queue.append(event.set) if self.get_argument("wake", "true") == "true": self.wake_callback() yield event.wait()
class ts(): def __init__(self, ioloop): self.ioloop = ioloop self.rEvent = Event() self.wEvent = Event() self.writeLock = threading.Lock() self.readLock = threading.Lock() self.writeBuffer = b'' self.readBuffer = b'' IOLoop.current().add_callback(self.doRead) IOLoop.current().add_callback(self.doWrite) @gen.coroutine def doRead(self): co = 0 while True: yield self.rEvent.wait() self.rEvent.clear() while True: msg = TOUMsg() self.readLock.acquire() r, self.readBuffer = msg.unpack(self.readBuffer) self.readLock.release() if not r: break co += 1 if co % 10 == 0: print('co1', co, getRunningTime()) @gen.coroutine def doWrite(self): co = 0 while True: yield self.wEvent.wait() self.wEvent.clear() while True: if len(self.writeBuffer) > con_streamBufferSize: break yield gen.sleep(random.randint(3, 20) / 10.0) msg = TOUMsg({}, b's' * random.randint(10, 2000)) self.writeLock.acquire() self.writeBuffer += msg.pack() self.writeLock.release() co += 1 if co % 10 == 0: print('co2', co, getRunningTime())
class Waiter(object): def __init__(self): self.event = Event() @gen.coroutine def set(self): self.event.set() @gen.coroutine def wait(self): yield self.event.wait()
class Waiter(object): def __init__(self): self.event = Event() @gen.coroutine def set(self): self.event.set() @gen.coroutine def wait(self): yield self.event.wait()
def test_exit_callback(): to_child = mp_context.Queue() from_child = mp_context.Queue() evt = Event() @gen.coroutine def on_stop(_proc): assert _proc is proc yield gen.moment evt.set() # Normal process exit proc = AsyncProcess(target=feed, args=(to_child, from_child)) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() to_child.put(None) yield evt.wait(timedelta(seconds=3)) assert evt.is_set() assert not proc.is_alive() # Process terminated proc = AsyncProcess(target=wait) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() yield proc.terminate() yield evt.wait(timedelta(seconds=3)) assert evt.is_set()
def test_exit_callback(): to_child = mp_context.Queue() from_child = mp_context.Queue() evt = Event() @gen.coroutine def on_stop(_proc): assert _proc is proc yield gen.moment evt.set() # Normal process exit proc = AsyncProcess(target=feed, args=(to_child, from_child)) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() to_child.put(None) yield evt.wait(timedelta(seconds=3)) assert evt.is_set() assert not proc.is_alive() # Process terminated proc = AsyncProcess(target=wait) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() yield proc.terminate() yield evt.wait(timedelta(seconds=3)) assert evt.is_set()
def _start(self): if self.scheduler.status != 'running': yield self.scheduler._sync_center() self.scheduler.start() start_event = Event() self.coroutines = [ self.scheduler.handle_queues(self.scheduler_queue, self.report_queue), self.report(start_event)] _global_executor[0] = self yield start_event.wait() logger.debug("Started scheduling coroutines. Synchronized")
class ZMQDrain(object): """Implementation of IDrain that pushes to a zmq.Socket asynchronously. This implementation overrides the high-water mark behavior from cs.eyrie.vassal.Vassal to instead use a zmq.Poller. """ def __init__(self, logger, loop, zmq_socket, metric_prefix='emitter'): self.emitter = zmq_socket self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.output_error = Event() self.state = RUNNING self._writable = Event() self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) def _handle_events(self, fd, events): if events & self.loop.ERROR: self.logger.error('Error polling socket for writability') elif events & self.loop.WRITE: self.loop.remove_handler(self.emitter) self._writable.set() @gen.coroutine def _poll(self): self.loop.add_handler(self.emitter, self._handle_events, self.loop.WRITE) yield self._writable.wait() self._writable.clear() @gen.coroutine def close(self, timeout=None): self.state = CLOSING self.logger.debug("Flushing send queue") self.emitter.close() def emit_nowait(self, msg): self.logger.debug("Drain emitting") if isinstance(msg, basestring): msg = [msg] try: self.emitter.send_multipart(msg, zmq.NOBLOCK) except zmq.Again: raise QueueFull() @gen.coroutine def emit(self, msg, retry_timeout=INITIAL_TIMEOUT): if isinstance(msg, basestring): msg = [msg] yield self._poll() self.emitter.send_multipart(msg, zmq.NOBLOCK)
def test_idle_after_use(self): stream = yield self.connect() event = Event() stream.set_close_callback(event.set) # Use the connection twice to make sure keep-alives are working for i in range(2): stream.write(b"GET / HTTP/1.1\r\n\r\n") yield stream.read_until(b"\r\n\r\n") data = yield stream.read_bytes(11) self.assertEqual(data, b"Hello world") # Now let the timeout trigger and close the connection. yield event.wait()
def test_idle_after_use(self): stream = yield self.connect() event = Event() stream.set_close_callback(event.set) # Use the connection twice to make sure keep-alives are working for i in range(2): stream.write(b"GET / HTTP/1.1\r\n\r\n") yield stream.read_until(b"\r\n\r\n") data = yield stream.read_bytes(11) self.assertEqual(data, b"Hello world") # Now let the timeout trigger and close the connection. yield event.wait()
def _start(self, timeout=3, **kwargs): if isinstance(self._start_arg, Scheduler): self.scheduler = self._start_arg self.center = self._start_arg.center if isinstance(self._start_arg, str): ip, port = tuple(self._start_arg.split(':')) self._start_arg = (ip, int(port)) if isinstance(self._start_arg, tuple): r = coerce_to_rpc(self._start_arg, timeout=timeout) try: ident = yield r.identity() except (StreamClosedError, OSError): raise IOError("Could not connect to %s:%d" % self._start_arg) if ident['type'] == 'Center': self.center = r self.scheduler = Scheduler(self.center, loop=self.loop, **kwargs) self.scheduler.listen(0) elif ident['type'] == 'Scheduler': self.scheduler = r self.scheduler_stream = yield connect(*self._start_arg) yield write(self.scheduler_stream, { 'op': 'register-client', 'client': self.id }) if 'center' in ident: cip, cport = ident['center'] self.center = rpc(ip=cip, port=cport) else: self.center = self.scheduler else: raise ValueError("Unknown Type") if isinstance(self.scheduler, Scheduler): if self.scheduler.status != 'running': yield self.scheduler.sync_center() self.scheduler.start(0) self.scheduler_queue = Queue() self.report_queue = Queue() self.coroutines.append( self.scheduler.handle_queues(self.scheduler_queue, self.report_queue)) start_event = Event() self.coroutines.append(self._handle_report(start_event)) _global_executor[0] = self yield start_event.wait() logger.debug("Started scheduling coroutines. Synchronized")
def get_response(self, data, method, show_graphiql=False): query, variables, operation_name, id = self.get_graphql_params( self.request, data) request_end = yield self.extension_stack.request_started( self.request, query, None, operation_name, variables, self.context, self.request_context) try: execution_result = yield self.execute_graphql_request( method, query, variables, operation_name, show_graphiql) status_code = 200 if execution_result: response = {} if getattr(execution_result, 'is_pending', False): event = Event() on_resolve = lambda *_: event.set() # noqa execution_result.then(on_resolve).catch(on_resolve) yield event.wait() if hasattr(execution_result, 'get'): execution_result = execution_result.get() if execution_result.errors: response['errors'] = [ self.format_error(e) for e in execution_result.errors ] if execution_result.invalid: status_code = 400 else: response['data'] = execution_result.data if self.batch: response['id'] = id response['status'] = status_code result = self.json_encode(response, pretty=self.pretty or show_graphiql) else: result = None res = (result, status_code) yield self.extension_stack.will_send_response(result, self.context) raise Return(res) finally: yield request_end()
def test_prepare_curl_callback_stack_context(self): exc_info = [] error_event = Event() def error_handler(typ, value, tb): exc_info.append((typ, value, tb)) error_event.set() return True with ExceptionStackContext(error_handler): request = HTTPRequest(self.get_url('/custom_reason'), prepare_curl_callback=lambda curl: 1 / 0) yield [error_event.wait(), self.http_client.fetch(request)] self.assertEqual(1, len(exc_info)) self.assertIs(exc_info[0][0], ZeroDivisionError)
def test_read_until_regex_max_bytes_ignores_extra(self): rs, ws = yield self.make_iostream_pair() closed = Event() rs.set_close_callback(closed.set) try: # Even though data that matches arrives the same packet that # puts us over the limit, we fail the request because it was not # found within the limit. ws.write(b"abcdef") with ExpectLog(gen_log, "Unsatisfiable read"): rs.read_until_regex(b"def", max_bytes=5) yield closed.wait() finally: ws.close() rs.close()
def asyncSetUp(self): listener, port = bind_unused_port() event = Event() def accept_callback(conn, addr): self.server_stream = IOStream(conn) self.addCleanup(self.server_stream.close) event.set() add_accept_handler(listener, accept_callback) self.client_stream = IOStream(socket.socket()) self.addCleanup(self.client_stream.close) yield [self.client_stream.connect(('127.0.0.1', port)), event.wait()] self.io_loop.remove_handler(listener) listener.close()
def test_read_until_regex_max_bytes_ignores_extra(self): rs, ws = yield self.make_iostream_pair() closed = Event() rs.set_close_callback(closed.set) try: # Even though data that matches arrives the same packet that # puts us over the limit, we fail the request because it was not # found within the limit. ws.write(b"abcdef") with ExpectLog(gen_log, "Unsatisfiable read"): rs.read_until_regex(b"def", max_bytes=5) yield closed.wait() finally: ws.close() rs.close()
def test_prepare_curl_callback_stack_context(self): exc_info = [] error_event = Event() def error_handler(typ, value, tb): exc_info.append((typ, value, tb)) error_event.set() return True with ExceptionStackContext(error_handler): request = HTTPRequest(self.get_url('/custom_reason'), prepare_curl_callback=lambda curl: 1 / 0) yield [error_event.wait(), self.http_client.fetch(request)] self.assertEqual(1, len(exc_info)) self.assertIs(exc_info[0][0], ZeroDivisionError)
def test_read_until_regex_max_bytes_inline(self): rs, ws = yield self.make_iostream_pair() closed = Event() rs.set_close_callback(closed.set) try: # Similar to the error case in the previous test, but the # ws writes first so rs reads are satisfied # inline. For consistency with the out-of-line case, we # do not raise the error synchronously. ws.write(b"123456") with ExpectLog(gen_log, "Unsatisfiable read"): rs.read_until_regex(b"def", max_bytes=5) yield closed.wait() finally: ws.close() rs.close()
def asyncSetUp(self): listener, port = bind_unused_port() event = Event() def accept_callback(conn, addr): self.server_stream = IOStream(conn) self.addCleanup(self.server_stream.close) event.set() add_accept_handler(listener, accept_callback) self.client_stream = IOStream(socket.socket()) self.addCleanup(self.client_stream.close) yield [self.client_stream.connect(('127.0.0.1', port)), event.wait()] self.io_loop.remove_handler(listener) listener.close()
def test_read_until_regex_max_bytes_inline(self): rs, ws = yield self.make_iostream_pair() closed = Event() rs.set_close_callback(closed.set) try: # Similar to the error case in the previous test, but the # ws writes first so rs reads are satisfied # inline. For consistency with the out-of-line case, we # do not raise the error synchronously. ws.write(b"123456") with ExpectLog(gen_log, "Unsatisfiable read"): rs.read_until_regex(b"def", max_bytes=5) yield closed.wait() finally: ws.close() rs.close()
def handle_stream(self, stream, address): id = str(uuid.uuid1()) conn_id = str(self.connId) self.connId += 1 self.addConnMap(conn_id) IOLoop.instance().add_callback( functools.partial(self.doRead, stream, conn_id)) IOLoop.instance().add_callback( functools.partial(self.doWrite, stream, conn_id)) pack = {'type': 'conn', 'id': id, 'conn_id': conn_id} msg = TOUMsg(pack, b'') e = Event() self.waitIdMap[id] = {'event': e} yield self.addTask(msg) self.writeLock.acquire() wbl = len(self.writeBuffer) / 1024 self.writeLock.release() s = 'conn add %s, conn:%s,in:%s,out:%s,oById:%s,addTask:%s,waitId:%s'%\ (conn_id,len(self.connMap),self.outputSize/1024,wbl,len(self.outputMap_byId),\ len(self.addTaskMap),len(self.waitIdMap)) t = int(getRunningTime() * 1000) / 1000.0 msg = '%s %s\n' % (t, s) print(msg) t = int((getRunningTime() - self.startTime) * 100) / 100.0 ss = '####### %ss ## conn sent %s %s\n' % (t, conn_id, len(self.connMap)) self.logCache.append(ss) yield e.wait() msg = self.outputMap_byId[id]['msg'] del self.outputMap_byId[id] del self.waitIdMap[id] back = msg.m_json t = int(getRunningTime() * 1000) / 1000.0 s = '%s conn reply %s, conn:%s ,ret:%s\n' % ( t, back['conn_id'], len(self.connMap), back['ret']) print(s) t = int((getRunningTime() - self.startTime) * 100) / 100.0 ss = '####### %ss ## conn back %s %s\n' % (t, back['conn_id'], len(self.connMap)) self.logCache.append(ss) if back['ret'] == 0: del self.connMap[back['conn_id']]
def _start(self, timeout=3, **kwargs): if isinstance(self._start_arg, Scheduler): self.scheduler = self._start_arg self.center = self._start_arg.center if isinstance(self._start_arg, str): ip, port = tuple(self._start_arg.split(':')) self._start_arg = (ip, int(port)) if isinstance(self._start_arg, tuple): r = coerce_to_rpc(self._start_arg, timeout=timeout) try: ident = yield r.identity() except (StreamClosedError, OSError): raise IOError("Could not connect to %s:%d" % self._start_arg) if ident['type'] == 'Center': self.center = r self.scheduler = Scheduler(self.center, loop=self.loop, **kwargs) self.scheduler.listen(0) elif ident['type'] == 'Scheduler': self.scheduler = r self.scheduler_stream = yield connect(*self._start_arg) yield write(self.scheduler_stream, {'op': 'register-client', 'client': self.id}) if 'center' in ident: cip, cport = ident['center'] self.center = rpc(ip=cip, port=cport) else: self.center = self.scheduler else: raise ValueError("Unknown Type") if isinstance(self.scheduler, Scheduler): if self.scheduler.status != 'running': yield self.scheduler.sync_center() self.scheduler.start(0) self.scheduler_queue = Queue() self.report_queue = Queue() self.coroutines.append(self.scheduler.handle_queues( self.scheduler_queue, self.report_queue)) start_event = Event() self.coroutines.append(self._handle_report(start_event)) _global_executor[0] = self yield start_event.wait() logger.debug("Started scheduling coroutines. Synchronized")
def do_exec(): result = execute(schema(), ast, executor=TornadoExecutor(), return_promise=True) if getattr(result, 'is_pending', False): event = Event() on_resolve = lambda *_: event.set() result.then(on_resolve).catch(on_resolve) yield event.wait() if hasattr(result, 'get'): result = result.get() assert not result.errors assert result.data == {'a': 'hey', 'b': 'hey2', 'c': 'hey3'} print('SUCCESS ASYNC')
def create_iostream_pair(self): _lock = Event() server_streams = [] def accept_callback(conn, addr): server_stream = MicroProxyIOStream(conn) server_streams.append(server_stream) # self.addCleanup(server_stream.close) _lock.set() listener, port = bind_unused_port() add_accept_handler(listener, accept_callback) client_stream = MicroProxyIOStream(socket.socket()) yield [client_stream.connect(('127.0.0.1', port)), _lock.wait()] self.io_loop.remove_handler(listener) listener.close() raise Return((client_stream, server_streams[0]))
class ImageMutex(): def __init__(self): self._mutex = Event() self._blocked = count() self._building_log = [] self._exception = None @gen.coroutine def block(self): value = self._blocked.__next__() # single bytecode operation if value: yield self._mutex.wait() return value def __enter__(self): if self._exception is not None: raise self._exception return self def __exit__(self, exc_type, exc_value, traceback): self._building_log = [] if isinstance(exc_value, Exception): self._exception = exc_value self._mutex.set() def timeout_happened(self): self._exception = Exception('This image is too heavy to build') self._building_log = [] def add_to_log(self, message, level=1): if not self._exception: self._building_log.append({ 'text': message, 'level': level }) @property def building_log(self): return self._building_log @property def last_exception(self): return self._exception
def create_iostream_pair(self): _lock = Event() server_streams = [] def accept_callback(conn, addr): server_stream = MicroProxyIOStream(conn) server_streams.append(server_stream) # self.addCleanup(server_stream.close) _lock.set() listener, port = bind_unused_port() add_accept_handler(listener, accept_callback) client_stream = MicroProxyIOStream(socket.socket()) yield [client_stream.connect(('127.0.0.1', port)), _lock.wait()] self.io_loop.remove_handler(listener) listener.close() raise Return((client_stream, server_streams[0]))
class QueueDriver: def __init__(self, **settings): self.settings = settings self._finished = Event() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) def over(self): self._finished.set() def save(self): raise NotImplementedError() def get(self): raise NotImplementedError() def put(self): raise NotImplementedError() def join(self, timeout): return self._finished.wait(timeout)
class ImageMutex(): def __init__(self): self._mutex = Event() self._blocked = count() self._building_log = [] self._exception = None @gen.coroutine def block(self): value = self._blocked.__next__() # single bytecode operation if value: yield self._mutex.wait() return value def __enter__(self): # if self._exception is not None: # raise self._exception return self def __exit__(self, exc_type, exc_value, traceback): self._building_log = [] if isinstance(exc_value, Exception): self._exception = exc_value self._mutex.set() def timeout_happened(self): self._exception = Exception('This image is too heavy to build') self._building_log = [] def add_to_log(self, message, level=1): if not self._exception: self._building_log.append({'text': message, 'level': level}) @property def building_log(self): return self._building_log @property def last_exception(self): return self._exception
def test_close_callback_with_pending_read(self): # Regression test for a bug that was introduced in 2.3 # where the IOStream._close_callback would never be called # if there were pending reads. OK = b"OK\r\n" rs, ws = yield self.make_iostream_pair() event = Event() rs.set_close_callback(event.set) try: ws.write(OK) res = yield rs.read_until(b"\r\n") self.assertEqual(res, OK) ws.close() rs.read_until(b"\r\n") # If _close_callback (self.stop) is not called, # an AssertionError: Async operation timed out after 5 seconds # will be raised. yield event.wait() finally: ws.close() rs.close()
def test_async_read_error_logging(self): # Socket errors on asynchronous reads should be logged (but only # once). server, client = yield self.make_iostream_pair() closed = Event() server.set_close_callback(closed.set) try: # Start a read that will be fulfilled asynchronously. server.read_bytes(1) client.write(b'a') # Stub out read_from_fd to make it fail. def fake_read_from_fd(): os.close(server.socket.fileno()) server.__class__.read_from_fd(server) server.read_from_fd = fake_read_from_fd # This log message is from _handle_read (not read_from_fd). with ExpectLog(gen_log, "error on read"): yield closed.wait() finally: server.close() client.close()
def test_close_callback_with_pending_read(self): # Regression test for a bug that was introduced in 2.3 # where the IOStream._close_callback would never be called # if there were pending reads. OK = b"OK\r\n" rs, ws = yield self.make_iostream_pair() event = Event() rs.set_close_callback(event.set) try: ws.write(OK) res = yield rs.read_until(b"\r\n") self.assertEqual(res, OK) ws.close() rs.read_until(b"\r\n") # If _close_callback (self.stop) is not called, # an AssertionError: Async operation timed out after 5 seconds # will be raised. yield event.wait() finally: ws.close() rs.close()
def test_async_read_error_logging(self): # Socket errors on asynchronous reads should be logged (but only # once). server, client = yield self.make_iostream_pair() closed = Event() server.set_close_callback(closed.set) try: # Start a read that will be fulfilled asynchronously. server.read_bytes(1) client.write(b"a") # Stub out read_from_fd to make it fail. def fake_read_from_fd(): os.close(server.socket.fileno()) server.__class__.read_from_fd(server) server.read_from_fd = fake_read_from_fd # This log message is from _handle_read (not read_from_fd). with ExpectLog(gen_log, "error on read"): yield closed.wait() finally: server.close() client.close()
def _start(self, timeout=3, **kwargs): if isinstance(self._start_arg, Scheduler): self.scheduler = self._start_arg if isinstance(self._start_arg, str): host, port = tuple(self._start_arg.split(':')) self._start_arg = (host, int(port)) if isinstance(self._start_arg, tuple): host, port = self._start_arg ip = socket.gethostbyname(host) r = coerce_to_rpc((ip, port), timeout=timeout) try: ident = yield r.identity() except (StreamClosedError, OSError): raise IOError("Could not connect to %s:%d" % (ip, port)) if ident['type'] == 'Scheduler': self.scheduler = r self.scheduler_stream = yield connect(ip, port) yield write(self.scheduler_stream, {'op': 'register-client', 'client': self.id}) else: raise ValueError("Unknown Type") if isinstance(self.scheduler, Scheduler): if self.scheduler.status != 'running': self.scheduler.start(0) self.scheduler_queue = Queue() self.report_queue = Queue() self.coroutines.append(self.scheduler.handle_queues( self.scheduler_queue, self.report_queue)) start_event = Event() self.coroutines.append(self._handle_report(start_event)) _global_executor[0] = self yield start_event.wait() logger.debug("Started scheduling coroutines. Synchronized")
def test_unused_connection(self): stream = yield self.connect() event = Event() stream.set_close_callback(event.set) yield event.wait()
class Executor(object): """ Distributed executor with data dependencies This executor resembles executors in concurrent.futures but also allows Futures within submit/map calls. Provide center address on initialization >>> executor = Executor(('127.0.0.1', 8787)) # doctest: +SKIP Use ``submit`` method like normal >>> a = executor.submit(add, 1, 2) # doctest: +SKIP >>> b = executor.submit(add, 10, 20) # doctest: +SKIP Additionally, provide results of submit calls (futures) to further submit calls: >>> c = executor.submit(add, a, b) # doctest: +SKIP This allows for the dynamic creation of complex dependencies. """ def __init__(self, center, start=True, delete_batch_time=1): self.center = coerce_to_rpc(center) self.futures = dict() self.refcount = defaultdict(lambda: 0) self.dask = dict() self.restrictions = dict() self.loop = IOLoop() self.report_queue = Queue() self.scheduler_queue = Queue() self._shutdown_event = Event() self._delete_batch_time = delete_batch_time if start: self.start() def start(self): """ Start scheduler running in separate thread """ from threading import Thread self.loop.add_callback(self._go) self._loop_thread = Thread(target=self.loop.start) self._loop_thread.start() def __enter__(self): if not self.loop._running: self.start() return self def __exit__(self, type, value, traceback): self.shutdown() def _inc_ref(self, key): self.refcount[key] += 1 def _dec_ref(self, key): self.refcount[key] -= 1 if self.refcount[key] == 0: del self.refcount[key] self._release_key(key) def _release_key(self, key): """ Release key from distributed memory """ self.futures[key]['event'].clear() logger.debug("Release key %s", key) del self.futures[key] self.scheduler_queue.put_nowait({'op': 'release-held-data', 'key': key}) @gen.coroutine def report(self): """ Listen to scheduler """ while True: msg = yield self.report_queue.get() if msg['op'] == 'close': break if msg['op'] == 'task-finished': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'finished' self.futures[msg['key']]['event'].set() if msg['op'] == 'lost-data': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'lost' self.futures[msg['key']]['event'].clear() if msg['op'] == 'task-erred': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'error' self.futures[msg['key']]['event'].set() @gen.coroutine def _shutdown(self): """ Send shutdown signal and wait until _go completes """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) yield self._shutdown_event.wait() def shutdown(self): """ Send shutdown signal and wait until scheduler terminates """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) self.loop.stop() self._loop_thread.join() @gen.coroutine def _go(self): """ Setup and run all other coroutines. Block until finished. """ self.who_has, self.has_what, self.ncores = yield [self.center.who_has(), self.center.has_what(), self.center.ncores()] self.waiting = {} self.processing = {} self.stacks = {} worker_queues = {worker: Queue() for worker in self.ncores} delete_queue = Queue() coroutines = ([ self.report(), scheduler(self.scheduler_queue, self.report_queue, worker_queues, delete_queue, self.who_has, self.has_what, self.ncores, self.dask, self.restrictions, self.waiting, self.stacks, self.processing), delete(self.scheduler_queue, delete_queue, self.center.ip, self.center.port, self._delete_batch_time)] + [worker(self.scheduler_queue, worker_queues[w], w, n) for w, n in self.ncores.items()]) results = yield All(coroutines) self._shutdown_event.set() def submit(self, func, *args, **kwargs): """ Submit a function application to the scheduler Parameters ---------- func: callable *args: **kwargs: pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> c = executor.submit(add, a, b) # doctest: +SKIP Returns ------- Future See Also -------- distributed.executor.Executor.submit: """ if not callable(func): raise TypeError("First input to submit must be a callable function") key = kwargs.pop('key', None) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if key is None: if pure: key = funcname(func) + '-' + tokenize(func, kwargs, *args) else: key = funcname(func) + '-' + next(tokens) if key in self.futures: return Future(key, self) if kwargs: task = (apply, func, args, kwargs) else: task = (func,) + args if workers is not None: restrictions = {key: workers} else: restrictions = {} if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} logger.debug("Submit %s(...), %s", funcname(func), key) self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': {key: task}, 'keys': [key], 'restrictions': restrictions}) return Future(key, self) def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- list of futures See also -------- distributed.executor.Executor.submit """ pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if not callable(func): raise TypeError("First input to map must be a callable function") iterables = [list(it) for it in iterables] if pure: keys = [funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables)] else: uid = str(uuid.uuid4()) keys = [funcname(func) + '-' + uid + '-' + next(tokens) for i in range(min(map(len, iterables)))] if not kwargs: dsk = {key: (func,) + args for key, args in zip(keys, zip(*iterables))} else: dsk = {key: (apply, func, args, kwargs) for key, args in zip(keys, zip(*iterables))} for key in dsk: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") logger.debug("map(%s, ...)", funcname(func)) self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': dsk, 'keys': keys, 'restrictions': restrictions}) return [Future(key, self) for key in keys] @gen.coroutine def _gather(self, futures): futures2, keys = unpack_remotedata(futures) keys = list(keys) while True: yield All([self.futures[key]['event'].wait() for key in keys]) try: data = yield _gather(self.center, keys) except KeyError as e: self.scheduler_queue.put_nowait({'op': 'missing-data', 'missing': e.args}) for key in e.args: self.futures[key]['event'].clear() else: break data = dict(zip(keys, data)) result = pack_data(futures2, data) raise gen.Return(result) def gather(self, futures): """ Gather futures from distributed memory Accepts a future or any nested core container of futures Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> x = e.submit(add, 1, 2) # doctest: +SKIP >>> e.gather(x) # doctest: +SKIP 3 >>> e.gather([x, [x], x]) # doctest: +SKIP [3, [3], 3] """ return sync(self.loop, self._gather, futures) @gen.coroutine def _get(self, dsk, keys, restrictions=None): flatkeys = list(flatten(keys)) for key in flatkeys: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': None} futures = {key: Future(key, self) for key in flatkeys} self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': dsk, 'keys': flatkeys, 'restrictions': restrictions or {}}) packed = pack_data(keys, futures) result = yield self._gather(packed) raise gen.Return(result) def get(self, dsk, keys, **kwargs): """ Gather futures from distributed memory Parameters ---------- dsk: dict keys: object, or nested lists of objects restrictions: dict (optional) A mapping of {key: {set of worker hostnames}} that restricts where jobs can take place Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.get({'x': (add, 1, 2)}, 'x') # doctest: +SKIP 3 """ return sync(self.loop, self._get, dsk, keys, **kwargs)
def test_unused_connection(self): stream = yield self.connect() event = Event() stream.set_close_callback(event.set) yield event.wait()
class ProjectGroomer(object): """ Cleans up expired transactions for a project. """ def __init__(self, project_id, coordinator, zk_client, db_access, thread_pool): """ Creates a new ProjectGroomer. Args: project_id: A string specifying a project ID. coordinator: A GroomingCoordinator. zk_client: A KazooClient. db_access: A DatastoreProxy. thread_pool: A ThreadPoolExecutor. """ self.project_id = project_id self._coordinator = coordinator self._zk_client = zk_client self._tornado_zk = TornadoKazoo(self._zk_client) self._db_access = db_access self._thread_pool = thread_pool self._project_node = '/appscale/apps/{}'.format(self.project_id) self._containers = [] self._inactive_containers = set() self._batch_resolver = BatchResolver(self.project_id, self._db_access) self._zk_client.ensure_path(self._project_node) self._zk_client.ChildrenWatch(self._project_node, self._update_containers) self._txid_manual_offset = 0 self._offset_node = '/'.join([self._project_node, OFFSET_NODE]) self._zk_client.DataWatch(self._offset_node, self._update_offset) self._stop_event = AsyncEvent() self._stopped_event = AsyncEvent() # Keeps track of cleanup results for each round of grooming. self._txids_cleaned = 0 self._oldest_valid_tx_time = None self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY) for _ in range(MAX_CONCURRENCY): IOLoop.current().spawn_callback(self._worker) IOLoop.current().spawn_callback(self.start) @gen.coroutine def start(self): """ Starts the grooming process until the stop event is set. """ logger.info('Grooming {}'.format(self.project_id)) while True: if self._stop_event.is_set(): break try: yield self._groom_project() except Exception: # Prevent the grooming loop from stopping if an error is encountered. logger.exception( 'Unexpected error while grooming {}'.format(self.project_id)) yield gen.sleep(MAX_TX_DURATION) self._stopped_event.set() @gen.coroutine def stop(self): """ Stops the grooming process. """ logger.info('Stopping grooming process for {}'.format(self.project_id)) self._stop_event.set() yield self._stopped_event.wait() @gen.coroutine def _worker(self): """ Processes items in the worker queue. """ while True: tx_path, composite_indexes = yield self._worker_queue.get() try: tx_time = yield self._resolve_txid(tx_path, composite_indexes) if tx_time is None: self._txids_cleaned += 1 if tx_time is not None and tx_time < self._oldest_valid_tx_time: self._oldest_valid_tx_time = tx_time finally: self._worker_queue.task_done() def _update_offset(self, new_offset, _): """ Watches for updates to the manual offset node. Args: new_offset: A string specifying the new manual offset. """ self._txid_manual_offset = int(new_offset or 0) def _update_containers(self, nodes): """ Updates the list of active txid containers. Args: nodes: A list of strings specifying ZooKeeper nodes. """ counters = [int(node[len(CONTAINER_PREFIX):] or 1) for node in nodes if node.startswith(CONTAINER_PREFIX) and node not in self._inactive_containers] counters.sort() containers = [CONTAINER_PREFIX + str(counter) for counter in counters] if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX): containers[0] = CONTAINER_PREFIX self._containers = containers @gen.coroutine def _groom_project(self): """ Runs the grooming process. """ index = self._coordinator.index worker_count = self._coordinator.total_workers oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count) # Wait until there's a reasonable chance that some transactions have # timed out. next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION # The oldest ignored transaction should still be valid, but ensure that # the timeout is not negative. next_timeout = max(0, next_timeout_eta - time.time()) time_to_wait = datetime.timedelta( seconds=next_timeout + (MAX_TX_DURATION / 2)) # Allow the wait to be cut short when a project is removed. try: yield self._stop_event.wait(timeout=time_to_wait) except gen.TimeoutError: raise gen.Return() @gen.coroutine def _remove_path(self, tx_path): """ Removes a ZooKeeper node. Args: tx_path: A string specifying the path to delete. """ try: yield self._tornado_zk.delete(tx_path) except NoNodeError: pass except NotEmptyError: yield self._thread_pool.submit(self._zk_client.delete, tx_path, recursive=True) @gen.coroutine def _resolve_txid(self, tx_path, composite_indexes): """ Cleans up a transaction if it has expired. Args: tx_path: A string specifying the location of the ZooKeeper node. composite_indexes: A list of CompositeIndex objects. Returns: The transaction start time if still valid, None if invalid because this method will also delete it. """ tx_data = yield self._tornado_zk.get(tx_path) tx_time = float(tx_data[0]) _, container, tx_node = tx_path.rsplit('/', 2) tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX)) container_count = int(container[len(CONTAINER_PREFIX):] or 1) if tx_node_id < 0: yield self._remove_path(tx_path) raise gen.Return() container_size = MAX_SEQUENCE_COUNTER + 1 automatic_offset = (container_count - 1) * container_size txid = self._txid_manual_offset + automatic_offset + tx_node_id if txid < 1: yield self._remove_path(tx_path) raise gen.Return() # If the transaction is still valid, return the time it was created. if tx_time + MAX_TX_DURATION >= time.time(): raise gen.Return(tx_time) yield self._batch_resolver.resolve(txid, composite_indexes) yield self._remove_path(tx_path) yield self._batch_resolver.cleanup(txid) @gen.coroutine def _fetch_and_clean(self, worker_index, worker_count): """ Cleans up expired transactions. Args: worker_index: An integer specifying this worker's index. worker_count: An integer specifying the number of total workers. Returns: A float specifying the time of the oldest valid transaction as a unix timestamp. """ self._txids_cleaned = 0 self._oldest_valid_tx_time = time.time() children = [] for index, container in enumerate(self._containers): container_path = '/'.join([self._project_node, container]) new_children = yield self._tornado_zk.get_children(container_path) if not new_children and index < len(self._containers) - 1: self._inactive_containers.add(container) children.extend(['/'.join([container_path, node]) for node in new_children]) logger.debug( 'Found {} transaction IDs for {}'.format(len(children), self.project_id)) if not children: raise gen.Return(self._oldest_valid_tx_time) # Refresh these each time so that the indexes are fresh. encoded_indexes = yield self._thread_pool.submit( self._db_access.get_indices, self.project_id) composite_indexes = [CompositeIndex(index) for index in encoded_indexes] for tx_path in children: tx_node_id = int(tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX)) # Only resolve transactions that this worker has been assigned. if tx_node_id % worker_count != worker_index: continue yield self._worker_queue.put((tx_path, composite_indexes)) yield self._worker_queue.join() if self._txids_cleaned > 0: logger.info('Cleaned up {} expired txids for {}'.format( self._txids_cleaned, self.project_id)) raise gen.Return(self._oldest_valid_tx_time)
class Queue(Generic[_T]): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) async def consumer(): async for item in q: try: print('Doing work on %s' % item) await gen.sleep(0.01) finally: q.task_done() async def producer(): for item in range(5): await q.put(item) print('Put %s' % item) async def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) await producer() # Wait for producer to put all tasks. await q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done In versions of Python without native coroutines (before 3.5), ``consumer()`` could be written as:: @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 Added ``async for`` support in Python 3.5. """ # Exact type depends on subclass. Could be another generic # parameter and use protocols to be more precise here. _queue = None # type: Any def __init__(self, maxsize: int = 0) -> None: if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # type: Deque[Future[_T]] self._putters = collections.deque([]) # type: Deque[Tuple[_T, Future[None]]] self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self) -> int: """Number of items allowed in the queue.""" return self._maxsize def qsize(self) -> int: """Number of items in the queue.""" return len(self._queue) def empty(self) -> bool: return not self._queue def full(self) -> bool: if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put( self, item: _T, timeout: Union[float, datetime.timedelta] = None ) -> "Future[None]": """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ future = Future() # type: Future[None] try: self.put_nowait(item) except QueueFull: self._putters.append((item, future)) _set_timeout(future, timeout) else: future.set_result(None) return future def put_nowait(self, item: _T) -> None: """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(getter, self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout: Union[float, datetime.timedelta] = None) -> Awaitable[_T]: """Remove and return an item from the queue. Returns an awaitable which resolves once an item is available, or raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. .. note:: The ``timeout`` argument of this method differs from that of the standard library's `queue.Queue.get`. That method interprets numeric values as relative timeouts; this one interprets them as absolute deadlines and requires ``timedelta`` objects for relative timeouts (consistent with other timeouts in Tornado). """ future = Future() # type: Future[_T] try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self) -> _T: """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(putter, None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self) -> None: """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError("task_done() called too many times") self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout: Union[float, datetime.timedelta] = None) -> Awaitable[None]: """Block until all items in the queue are processed. Returns an awaitable, which raises `tornado.util.TimeoutError` after a timeout. """ return self._finished.wait(timeout) def __aiter__(self) -> _QueueIterator[_T]: return _QueueIterator(self) # These three are overridable in subclasses. def _init(self) -> None: self._queue = collections.deque() def _get(self) -> _T: return self._queue.popleft() def _put(self, item: _T) -> None: self._queue.append(item) # End of the overridable methods. def __put_internal(self, item: _T) -> None: self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self) -> None: # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self) -> str: return "<%s at %s %s>" % (type(self).__name__, hex(id(self)), self._format()) def __str__(self) -> str: return "<%s %s>" % (type(self).__name__, self._format()) def _format(self) -> str: result = "maxsize=%r" % (self.maxsize,) if getattr(self, "_queue", None): result += " queue=%r" % self._queue if self._getters: result += " getters[%s]" % len(self._getters) if self._putters: result += " putters[%s]" % len(self._putters) if self._unfinished_tasks: result += " tasks=%s" % self._unfinished_tasks return result
class Executor(object): """ Distributed executor with data dependencies This executor resembles executors in concurrent.futures but also allows Futures within submit/map calls. Provide center address on initialization >>> executor = Executor(('127.0.0.1', 8787)) # doctest: +SKIP Use ``submit`` method like normal >>> a = executor.submit(add, 1, 2) # doctest: +SKIP >>> b = executor.submit(add, 10, 20) # doctest: +SKIP Additionally, provide results of submit calls (futures) to further submit calls: >>> c = executor.submit(add, a, b) # doctest: +SKIP This allows for the dynamic creation of complex dependencies. """ def __init__(self, center=None, scheduler=None, start=True, delete_batch_time=1, loop=None): self.futures = dict() self.refcount = defaultdict(lambda: 0) self.loop = loop or IOLoop() self.scheduler_queue = Queue() self.report_queue = Queue() if scheduler: if isinstance(scheduler, Scheduler): self.scheduler = scheduler if not center: self.center = scheduler.center else: raise NotImplementedError() # self.scheduler = coerce_to_rpc(scheduler) else: self.scheduler = Scheduler(center, loop=self.loop, delete_batch_time=delete_batch_time) if center: self.center = coerce_to_rpc(center) if not self.center: raise ValueError("Provide Center address") if start: self.start() def start(self): """ Start scheduler running in separate thread """ if hasattr(self, '_loop_thread'): return from threading import Thread self._loop_thread = Thread(target=self.loop.start) self._loop_thread.daemon = True _global_executor[0] = self self._loop_thread.start() sync(self.loop, self._start) def send_to_scheduler(self, msg): if isinstance(self.scheduler, Scheduler): self.loop.add_callback(self.scheduler_queue.put_nowait, msg) else: raise NotImplementedError() @gen.coroutine def _start(self): if self.scheduler.status != 'running': yield self.scheduler._sync_center() self.scheduler.start() start_event = Event() self.coroutines = [ self.scheduler.handle_queues(self.scheduler_queue, self.report_queue), self.report(start_event)] _global_executor[0] = self yield start_event.wait() logger.debug("Started scheduling coroutines. Synchronized") def __enter__(self): if not self.loop._running: self.start() return self def __exit__(self, type, value, traceback): self.shutdown() def _inc_ref(self, key): self.refcount[key] += 1 def _dec_ref(self, key): self.refcount[key] -= 1 if self.refcount[key] == 0: del self.refcount[key] self._release_key(key) def _release_key(self, key): """ Release key from distributed memory """ logger.debug("Release key %s", key) if key in self.futures: self.futures[key]['event'].clear() del self.futures[key] self.send_to_scheduler({'op': 'release-held-data', 'key': key}) @gen.coroutine def report(self, start_event): """ Listen to scheduler """ while True: if isinstance(self.scheduler, Scheduler): msg = yield self.report_queue.get() elif isinstance(self.scheduler, IOStream): raise NotImplementedError() msg = yield read(self.scheduler) else: raise NotImplementedError() if msg['op'] == 'stream-start': start_event.set() if msg['op'] == 'close': break if msg['op'] == 'key-in-memory': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'finished' self.futures[msg['key']]['event'].set() if msg['op'] == 'lost-data': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'lost' self.futures[msg['key']]['event'].clear() if msg['op'] == 'task-erred': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'error' self.futures[msg['key']]['exception'] = msg['exception'] self.futures[msg['key']]['traceback'] = msg['traceback'] self.futures[msg['key']]['event'].set() if msg['op'] == 'restart': logger.info("Receive restart signal from scheduler") events = [d['event'] for d in self.futures.values()] self.futures.clear() for e in events: e.set() with ignoring(AttributeError): self._restart_event.set() @gen.coroutine def _shutdown(self, fast=False): """ Send shutdown signal and wait until scheduler completes """ self.send_to_scheduler({'op': 'close'}) if _global_executor[0] is self: _global_executor[0] = None if not fast: yield self.coroutines def shutdown(self, timeout=10): """ Send shutdown signal and wait until scheduler terminates """ self.send_to_scheduler({'op': 'close'}) self.loop.stop() self._loop_thread.join(timeout=timeout) if _global_executor[0] is self: _global_executor[0] = None def submit(self, func, *args, **kwargs): """ Submit a function application to the scheduler Parameters ---------- func: callable *args: **kwargs: pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> c = executor.submit(add, a, b) # doctest: +SKIP Returns ------- Future See Also -------- distributed.executor.Executor.submit: """ if not callable(func): raise TypeError("First input to submit must be a callable function") key = kwargs.pop('key', None) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if key is None: if pure: key = funcname(func) + '-' + tokenize(func, kwargs, *args) else: key = funcname(func) + '-' + next(tokens) if key in self.futures: return Future(key, self) if kwargs: task = (apply, func, args, kwargs) else: task = (func,) + args if workers is not None: restrictions = {key: workers} else: restrictions = {} logger.debug("Submit %s(...), %s", funcname(func), key) self.send_to_scheduler({'op': 'update-graph', 'dsk': {key: task}, 'keys': [key], 'restrictions': restrictions}) return Future(key, self) def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- list of futures See also -------- distributed.executor.Executor.submit """ pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if not callable(func): raise TypeError("First input to map must be a callable function") iterables = [list(it) for it in iterables] if pure: keys = [funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables)] else: uid = str(uuid.uuid4()) keys = [funcname(func) + '-' + uid + '-' + next(tokens) for i in range(min(map(len, iterables)))] if not kwargs: dsk = {key: (func,) + args for key, args in zip(keys, zip(*iterables))} else: dsk = {key: (apply, func, args, kwargs) for key, args in zip(keys, zip(*iterables))} if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") logger.debug("map(%s, ...)", funcname(func)) self.send_to_scheduler({'op': 'update-graph', 'dsk': dsk, 'keys': keys, 'restrictions': restrictions}) return [Future(key, self) for key in keys] @gen.coroutine def _gather(self, futures): futures2, keys = unpack_remotedata(futures) keys = list(keys) while True: logger.debug("Waiting on futures to clear before gather") yield All([self.futures[key]['event'].wait() for key in keys if key in self.futures]) exceptions = [self.futures[key]['exception'] for key in keys if self.futures[key]['status'] == 'error'] if exceptions: raise exceptions[0] try: data = yield _gather(self.center, keys) except KeyError as e: logger.debug("Couldn't gather keys %s", e) self.send_to_scheduler({'op': 'missing-data', 'missing': e.args}) for key in e.args: self.futures[key]['event'].clear() else: break data = dict(zip(keys, data)) result = pack_data(futures2, data) raise gen.Return(result) def gather(self, futures): """ Gather futures from distributed memory Accepts a future or any nested core container of futures Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> x = e.submit(add, 1, 2) # doctest: +SKIP >>> e.gather(x) # doctest: +SKIP 3 >>> e.gather([x, [x], x]) # doctest: +SKIP [3, [3], 3] """ return sync(self.loop, self._gather, futures) @gen.coroutine def _scatter(self, data, workers=None): remotes = yield self.scheduler._scatter(None, data, workers) if isinstance(remotes, list): remotes = [Future(r.key, self) for r in remotes] keys = {r.key for r in remotes} elif isinstance(remotes, dict): remotes = {k: Future(v.key, self) for k, v in remotes.items()} keys = set(remotes) for key in keys: self.futures[key]['status'] = 'finished' self.futures[key]['event'].set() raise gen.Return(remotes) def scatter(self, data, workers=None): """ Scatter data into distributed memory Accepts a list of data elements or dict of key-value pairs Optionally provide a set of workers to constrain the scatter. Specify workers as hostname/port pairs, i.e. ('127.0.0.1', 8787). Default port is 8788. Examples -------- >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.scatter([1, 2, 3]) # doctest: +SKIP [RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>, RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>, RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>] >>> e.scatter({'x': 1, 'y': 2, 'z': 3}) # doctest: +SKIP {'x': RemoteData<center=127.0.0.1:8787, key=x>, 'y': RemoteData<center=127.0.0.1:8787, key=y>, 'z': RemoteData<center=127.0.0.1:8787, key=z>} >>> e.scatter([1, 2, 3], workers=[('hostname', 8788)]) # doctest: +SKIP """ return sync(self.loop, self._scatter, data, workers=workers) @gen.coroutine def _get(self, dsk, keys, restrictions=None, raise_on_error=True): flatkeys = list(flatten([keys])) futures = {key: Future(key, self) for key in flatkeys} self.send_to_scheduler({'op': 'update-graph', 'dsk': dsk, 'keys': flatkeys, 'restrictions': restrictions or {}}) packed = pack_data(keys, futures) if raise_on_error: result = yield self._gather(packed) else: try: result = yield self._gather(packed) result = 'OK', result except Exception as e: result = 'error', e raise gen.Return(result) def get(self, dsk, keys, **kwargs): """ Gather futures from distributed memory Parameters ---------- dsk: dict keys: object, or nested lists of objects restrictions: dict (optional) A mapping of {key: {set of worker hostnames}} that restricts where jobs can take place Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.get({'x': (add, 1, 2)}, 'x') # doctest: +SKIP 3 """ status, result = sync(self.loop, self._get, dsk, keys, raise_on_error=False, **kwargs) if status == 'error': raise result else: return result def compute(self, *args, **kwargs): """ Compute dask collections on cluster Parameters ---------- args: iterable of dask objects Collections like dask.array or dataframe or dask.value objects sync: bool (optional) Returns Futures if False (default) or concrete values if True Returns ------- Tuple of Futures or concrete values Examples -------- >>> from dask import do, value >>> from operator import add >>> x = dask.do(add)(1, 2) >>> y = dask.do(add)(x, x) >>> xx, yy = executor.compute(x, y) # doctest: +SKIP >>> xx # doctest: +SKIP <Future: status: finished, key: add-8f6e709446674bad78ea8aeecfee188e> >>> xx.result() # doctest: +SKIP 3 >>> yy.result() # doctest: +SKIP 6 """ sync = kwargs.pop('sync', False) assert not kwargs if sync: return dask.compute(*args, get=self.get) variables = [a for a in args if isinstance(a, Base)] groups = groupby(lambda x: x._optimize, variables) dsk = merge([opt(merge([v.dask for v in val]), [v._keys() for v in val]) for opt, val in groups.items()]) names = ['finalize-%s' % tokenize(v) for v in variables] dsk2 = {name: (v._finalize, v, v._keys()) for name, v in zip(names, variables)} self.loop.add_callback(self.scheduler_queue.put_nowait, {'op': 'update-graph', 'dsk': merge(dsk, dsk2), 'keys': names}) i = 0 futures = [] for arg in args: if isinstance(arg, Base): futures.append(Future(names[i], self)) i += 1 else: futures.append(arg) return futures @gen.coroutine def _restart(self): self.send_to_scheduler({'op': 'restart'}) self._restart_event = Event() yield self._restart_event.wait() raise gen.Return(self) def restart(self): """ Restart the distributed network This kills all active work, deletes all data on the network, and restarts the worker processes. """ return sync(self.loop, self._restart) @gen.coroutine def _upload_file(self, filename, raise_on_error=True): with open(filename, 'rb') as f: data = f.read() _, fn = os.path.split(filename) d = yield self.center.broadcast(msg={'op': 'upload_file', 'filename': fn, 'data': data}) if any(isinstance(v, Exception) for v in d.values()): exception = next(v for v in d.values() if isinstance(v, Exception)) if raise_on_error: raise exception else: raise gen.Return(exception) assert all(len(data) == v for v in d.values()) def upload_file(self, filename): """ Upload local package to workers Parameters ---------- filename: string Filename of .py file to send to workers """ result = sync(self.loop, self._upload_file, filename, raise_on_error=False) if isinstance(result, Exception): raise result
class ProjectIndexManager(object): """ Keeps track of composite index definitions for a project. """ def __init__(self, project_id, zk_client, index_manager, datastore_access): """ Creates a new ProjectIndexManager. Args: project_id: A string specifying a project ID. zk_client: A KazooClient. update_callback: A function that should be called with the project ID and index list every time the indexes get updated. index_manager: An IndexManager used for checking lock status. datastore_access: A DatastoreDistributed object. """ self.project_id = project_id self.indexes_node = '/appscale/projects/{}/indexes'.format(self.project_id) self.active = True self.update_event = AsyncEvent() self._creation_times = {} self._index_manager = index_manager self._zk_client = zk_client self._ds_access = datastore_access self._zk_client.DataWatch(self.indexes_node, self._update_indexes_watch) # Since this manager can be used synchronously, ensure that the indexes # are populated for this IOLoop iteration. try: encoded_indexes = self._zk_client.get(self.indexes_node)[0] except NoNodeError: encoded_indexes = '[]' self.indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] @property def indexes_pb(self): if self._zk_client.state != KazooState.CONNECTED: raise IndexInaccessible('ZooKeeper connection is not active') return [index.to_pb() for index in self.indexes] @gen.coroutine def apply_definitions(self): """ Populate composite indexes that are not marked as ready yet. """ try: yield self.update_event.wait() self.update_event.clear() if not self._index_manager.admin_lock.is_acquired or not self.active: return logger.info( 'Applying composite index definitions for {}'.format(self.project_id)) for index in self.indexes: if index.ready: continue # Wait until all clients have either timed out or received the new index # definition. This prevents entities from being added without entries # while the index is being rebuilt. creation_time = self._creation_times.get(index.id, time.time()) consensus = creation_time + (self._zk_client._session_timeout / 1000.0) yield gen.sleep(max(consensus - time.time(), 0)) yield self._ds_access.update_composite_index( self.project_id, index.to_pb()) logger.info('Index {} is now ready'.format(index.id)) self._mark_index_ready(index.id) logging.info( 'All composite indexes for {} are ready'.format(self.project_id)) finally: IOLoop.current().spawn_callback(self.apply_definitions) def delete_index_definition(self, index_id): """ Remove a definition from a project's list of configured indexes. Args: index_id: An integer specifying an index ID. """ try: encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node) except NoNodeError: # If there are no index definitions, there is nothing to do. return node_version = znode_stat.version indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] encoded_indexes = json.dumps([index.to_dict() for index in indexes if index.id != index_id]) self._zk_client.set(self.indexes_node, encoded_indexes, version=node_version) def _mark_index_ready(self, index_id): """ Updates the index metadata to reflect the new state of the index. Args: index_id: An integer specifying an index ID. """ try: encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node) node_version = znode_stat.version except NoNodeError: # If for some reason the index no longer exists, there's nothing to do. return existing_indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] for existing_index in existing_indexes: if existing_index.id == index_id: existing_index.ready = True indexes_dict = [index.to_dict() for index in existing_indexes] self._zk_client.set(self.indexes_node, json.dumps(indexes_dict), version=node_version) @gen.coroutine def _update_indexes(self, encoded_indexes): """ Handles changes to the list of a project's indexes. Args: encoded_indexes: A string containing index node data. """ encoded_indexes = encoded_indexes or '[]' self.indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] # Mark when indexes are defined so they can be backfilled later. self._creation_times.update( {index.id: time.time() for index in self.indexes if not index.ready and index.id not in self._creation_times}) self.update_event.set() def _update_indexes_watch(self, encoded_indexes, znode_stat): """ Handles updates to the project's indexes node. Args: encoded_indexes: A string containing index node data. znode_stat: A kazoo.protocol.states.ZnodeStat object. """ if not self.active: return False IOLoop.current().add_callback(self._update_indexes, encoded_indexes)
def get(self): never_finish = Event() yield never_finish.wait()
class IndexManager(object): """ Keeps track of configured datastore indexes. """ # The node which keeps track of admin lock contenders. ADMIN_LOCK_NODE = '/appscale/datastore/index_manager_lock' def __init__(self, zk_client, datastore_access, perform_admin=False): """ Creates a new IndexManager. Args: zk_client: A kazoo.client.KazooClient object. datastore_access: A DatastoreDistributed object. perform_admin: A boolean specifying whether or not to perform admin operations. """ self.projects = {} self._wake_event = AsyncEvent() self._zk_client = zk_client self.admin_lock = AsyncKazooLock(self._zk_client, self.ADMIN_LOCK_NODE) # TODO: Refactor so that this dependency is not needed. self._ds_access = datastore_access self._zk_client.ensure_path('/appscale/projects') self._zk_client.ChildrenWatch('/appscale/projects', self._update_projects) # Since this manager can be used synchronously, ensure that the projects # are populated for this IOLoop iteration. project_ids = self._zk_client.get_children('/appscale/projects') self._update_projects_sync(project_ids) if perform_admin: IOLoop.current().spawn_callback(self._contend_for_admin_lock) def _update_projects_sync(self, new_project_ids): """ Updates the list of the deployment's projects. Args: new_project_ids: A list of strings specifying current project IDs. """ for project_id in new_project_ids: if project_id not in self.projects: self.projects[project_id] = ProjectIndexManager( project_id, self._zk_client, self, self._ds_access) if self.admin_lock.is_acquired: IOLoop.current().spawn_callback( self.projects[project_id].apply_definitions) for project_id in self.projects.keys(): if project_id not in new_project_ids: self.projects[project_id].active = False del self.projects[project_id] def _update_projects(self, project_ids): """ Watches for changes to list of existing projects. Args: project_ids: A list of strings specifying current project IDs. """ persistent_update_projects = retry_children_watch_coroutine( '/appscale/projects', self._update_projects_sync) IOLoop.instance().add_callback(persistent_update_projects, project_ids) def _handle_connection_change(self, state): """ Notifies the admin lock holder when the connection changes. Args: state: The new connection state. """ IOLoop.current().add_callback(self._wake_event.set) @gen.coroutine def _contend_for_admin_lock(self): """ Waits to acquire an admin lock that gives permission to apply index definitions. The lock is useful for preventing many servers from writing the same index entries at the same time. After acquiring the lock, the individual ProjectIndexManagers are responsible for mutating state whenever a project's index definitions change. """ while True: # Set up a callback to get notified if the ZK connection changes. self._wake_event.clear() self._zk_client.add_listener(self._handle_connection_change) yield self.admin_lock.acquire() try: for project_index_manager in self.projects.values(): IOLoop.current().spawn_callback( project_index_manager.apply_definitions) # Release the lock if the kazoo client gets disconnected. yield self._wake_event.wait() finally: self.admin_lock.release()
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() self.error_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event(status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set(): self.disconnected_event.set() elif status.is_error(): self.error_queue.put_nowait(status.error_data.exception) def message(self, pubnub, message): self.message_queue.put(message) def presence(self, pubnub, presence): self.presence_queue.put(presence) @tornado.gen.coroutine def _wait_for(self, coro): error = self.error_queue.get() wi = tornado.gen.WaitIterator(coro, error) while not wi.done(): result = yield wi.next() if wi.current_future == coro: raise gen.Return(result) elif wi.current_future == error: raise result else: raise Exception("Unexpected future resolved: %s" % str(wi.current_future)) @tornado.gen.coroutine def wait_for_connect(self): if not self.connected_event.is_set(): yield self._wait_for(self.connected_event.wait()) else: raise Exception("instance is already connected") @tornado.gen.coroutine def wait_for_disconnect(self): if not self.disconnected_event.is_set(): yield self._wait_for(self.disconnected_event.wait()) else: raise Exception("instance is already disconnected") @tornado.gen.coroutine def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: # NOQA env = yield self._wait_for(self.message_queue.get()) if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.message_queue.task_done() @tornado.gen.coroutine def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: try: env = yield self._wait_for(self.presence_queue.get()) except: # NOQA E722 pylint: disable=W0702 break if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.presence_queue.task_done()
class TornadoSubscriptionManager(SubscriptionManager): def __init__(self, pubnub_instance): subscription_manager = self self._message_queue = Queue() self._consumer_event = Event() self._cancellation_event = Event() self._subscription_lock = Semaphore(1) # self._current_request_key_object = None self._heartbeat_periodic_callback = None self._reconnection_manager = TornadoReconnectionManager(pubnub_instance) super(TornadoSubscriptionManager, self).__init__(pubnub_instance) self._start_worker() class TornadoReconnectionCallback(ReconnectionCallback): def on_reconnect(self): subscription_manager.reconnect() pn_status = PNStatus() pn_status.category = PNStatusCategory.PNReconnectedCategory pn_status.error = False subscription_manager._subscription_status_announced = True subscription_manager._listener_manager.announce_status(pn_status) self._reconnection_listener = TornadoReconnectionCallback() self._reconnection_manager.set_reconnection_listener(self._reconnection_listener) def _set_consumer_event(self): self._consumer_event.set() def _message_queue_put(self, message): self._message_queue.put(message) def _start_worker(self): self._consumer = TornadoSubscribeMessageWorker(self._pubnub, self._listener_manager, self._message_queue, self._consumer_event) run = stack_context.wrap(self._consumer.run) self._pubnub.ioloop.spawn_callback(run) def reconnect(self): self._should_stop = False self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop) # self._register_heartbeat_timer() def disconnect(self): self._should_stop = True self._stop_heartbeat_timer() self._stop_subscribe_loop() @tornado.gen.coroutine def _start_subscribe_loop(self): self._stop_subscribe_loop() yield self._subscription_lock.acquire() self._cancellation_event.clear() combined_channels = self._subscription_state.prepare_channel_list(True) combined_groups = self._subscription_state.prepare_channel_group_list(True) if len(combined_channels) == 0 and len(combined_groups) == 0: return envelope_future = Subscribe(self._pubnub) \ .channels(combined_channels).channel_groups(combined_groups) \ .timetoken(self._timetoken).region(self._region) \ .filter_expression(self._pubnub.config.filter_expression) \ .cancellation_event(self._cancellation_event) \ .future() canceller_future = self._cancellation_event.wait() wi = tornado.gen.WaitIterator(envelope_future, canceller_future) # iterates 2 times: one for result one for cancelled while not wi.done(): try: result = yield wi.next() except Exception as e: # TODO: verify the error will not be eaten logger.error(e) raise else: if wi.current_future == envelope_future: e = result elif wi.current_future == canceller_future: return else: raise Exception("Unexpected future resolved: %s" % str(wi.current_future)) if e.is_error(): # 599 error doesn't works - tornado use this status code # for a wide range of errors, for ex: # HTTP Server Error (599): [Errno -2] Name or service not known if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory: self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop) return logger.error("Exception in subscribe loop: %s" % str(e)) if e.status is not None and e.status.category == PNStatusCategory.PNAccessDeniedCategory: e.status.operation = PNOperationType.PNUnsubscribeOperation self._listener_manager.announce_status(e.status) self._reconnection_manager.start_polling() self.disconnect() return else: self._handle_endpoint_call(e.result, e.status) self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop) finally: self._cancellation_event.set() yield tornado.gen.moment self._subscription_lock.release() self._cancellation_event.clear() break def _stop_subscribe_loop(self): if self._cancellation_event is not None and not self._cancellation_event.is_set(): self._cancellation_event.set() def _stop_heartbeat_timer(self): if self._heartbeat_periodic_callback is not None: self._heartbeat_periodic_callback.stop() def _register_heartbeat_timer(self): super(TornadoSubscriptionManager, self)._register_heartbeat_timer() self._heartbeat_periodic_callback = PeriodicCallback( stack_context.wrap(self._perform_heartbeat_loop), self._pubnub.config.heartbeat_interval * TornadoSubscriptionManager.HEARTBEAT_INTERVAL_MULTIPLIER, self._pubnub.ioloop) self._heartbeat_periodic_callback.start() @tornado.gen.coroutine def _perform_heartbeat_loop(self): if self._heartbeat_call is not None: # TODO: cancel call pass cancellation_event = Event() state_payload = self._subscription_state.state_payload() presence_channels = self._subscription_state.prepare_channel_list(False) presence_groups = self._subscription_state.prepare_channel_group_list(False) if len(presence_channels) == 0 and len(presence_groups) == 0: return try: envelope = yield self._pubnub.heartbeat() \ .channels(presence_channels) \ .channel_groups(presence_groups) \ .state(state_payload) \ .cancellation_event(cancellation_event) \ .future() heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options if envelope.status.is_error: if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \ heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL: self._listener_manager.announce_status(envelope.status) else: if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL: self._listener_manager.announce_status(envelope.status) except PubNubTornadoException: pass # TODO: check correctness # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory: # self._start_subscribe_loop() # else: # self._listener_manager.announce_status(e.status) except Exception as e: print(e) finally: cancellation_event.set() @tornado.gen.coroutine def _send_leave(self, unsubscribe_operation): envelope = yield Leave(self._pubnub) \ .channels(unsubscribe_operation.channels) \ .channel_groups(unsubscribe_operation.channel_groups).future() self._listener_manager.announce_status(envelope.status)
class Executor(object): """ Drive computations on a distributed cluster The Executor connects users to a distributed compute cluster. It provides an asynchronous user interface around functions and futures. This class resembles executors in ``concurrent.futures`` but also allows ``Future`` objects within ``submit/map`` calls. Parameters ---------- address: string, tuple, or ``Scheduler`` This can be the address of a ``Center`` or ``Scheduler`` servers, either as a string ``'127.0.0.1:8787'`` or tuple ``('127.0.0.1', 8787)`` or it can be a local ``Scheduler`` object. Examples -------- Provide cluster's head node address on initialization: >>> executor = Executor('127.0.0.1:8787') # doctest: +SKIP Use ``submit`` method to send individual computations to the cluster >>> a = executor.submit(add, 1, 2) # doctest: +SKIP >>> b = executor.submit(add, 10, 20) # doctest: +SKIP Continue using submit or map on results to build up larger computations >>> c = executor.submit(add, a, b) # doctest: +SKIP Gather results with the ``gather`` method. >>> executor.gather([c]) # doctest: +SKIP 33 See Also -------- distributed.scheduler.Scheduler: Internal scheduler """ def __init__(self, address, start=True, loop=None, timeout=3): self.futures = dict() self.refcount = defaultdict(lambda: 0) self.loop = loop or IOLoop() if start else IOLoop.current() self.coroutines = [] self.id = str(uuid.uuid1()) self._start_arg = address if start: self.start(timeout=timeout) def __str__(self): if hasattr(self, '_loop_thread'): n = sync(self.loop, self.scheduler.ncores) return '<Executor: scheduler=%s:%d workers=%d threads=%d>' % ( self.scheduler.ip, self.scheduler.port, len(n), sum(n.values())) else: return '<Executor: scheduler=%s:%d>' % ( self.scheduler.ip, self.scheduler.port) __repr__ = __str__ def start(self, **kwargs): """ Start scheduler running in separate thread """ if hasattr(self, '_loop_thread'): return from threading import Thread self._loop_thread = Thread(target=self.loop.start) self._loop_thread.daemon = True pc = PeriodicCallback(lambda: None, 1000, io_loop=self.loop) self.loop.add_callback(pc.start) _global_executor[0] = self self._loop_thread.start() while not self.loop._running: sleep(0.001) sync(self.loop, self._start, **kwargs) def _send_to_scheduler(self, msg): if isinstance(self.scheduler, Scheduler): self.loop.add_callback(self.scheduler_queue.put_nowait, msg) elif isinstance(self.scheduler_stream, IOStream): self.loop.add_callback(write, self.scheduler_stream, msg) else: raise NotImplementedError() @gen.coroutine def _start(self, timeout=3, **kwargs): if isinstance(self._start_arg, Scheduler): self.scheduler = self._start_arg self.center = self._start_arg.center if isinstance(self._start_arg, str): ip, port = tuple(self._start_arg.split(':')) self._start_arg = (ip, int(port)) if isinstance(self._start_arg, tuple): r = coerce_to_rpc(self._start_arg, timeout=timeout) try: ident = yield r.identity() except (StreamClosedError, OSError): raise IOError("Could not connect to %s:%d" % self._start_arg) if ident['type'] == 'Center': self.center = r self.scheduler = Scheduler(self.center, loop=self.loop, **kwargs) self.scheduler.listen(0) elif ident['type'] == 'Scheduler': self.scheduler = r self.scheduler_stream = yield connect(*self._start_arg) yield write(self.scheduler_stream, {'op': 'register-client', 'client': self.id}) if 'center' in ident: cip, cport = ident['center'] self.center = rpc(ip=cip, port=cport) else: self.center = self.scheduler else: raise ValueError("Unknown Type") if isinstance(self.scheduler, Scheduler): if self.scheduler.status != 'running': yield self.scheduler.sync_center() self.scheduler.start(0) self.scheduler_queue = Queue() self.report_queue = Queue() self.coroutines.append(self.scheduler.handle_queues( self.scheduler_queue, self.report_queue)) start_event = Event() self.coroutines.append(self._handle_report(start_event)) _global_executor[0] = self yield start_event.wait() logger.debug("Started scheduling coroutines. Synchronized") def __enter__(self): if not self.loop._running: self.start() return self def __exit__(self, type, value, traceback): self.shutdown() def _inc_ref(self, key): self.refcount[key] += 1 def _dec_ref(self, key): self.refcount[key] -= 1 if self.refcount[key] == 0: del self.refcount[key] self._release_key(key) def _release_key(self, key): """ Release key from distributed memory """ logger.debug("Release key %s", key) if key in self.futures: self.futures[key]['event'].clear() del self.futures[key] self._send_to_scheduler({'op': 'client-releases-keys', 'keys': [key], 'client': self.id}) @gen.coroutine def _handle_report(self, start_event): """ Listen to scheduler """ if isinstance(self.scheduler, Scheduler): next_message = self.report_queue.get elif isinstance(self.scheduler_stream, IOStream): next_message = lambda: read(self.scheduler_stream) else: raise NotImplemented() while True: try: msg = yield next_message() except StreamClosedError: break logger.debug("Executor receives message %s", msg) if msg['op'] == 'stream-start': start_event.set() if msg['op'] == 'close': break if msg['op'] == 'key-in-memory': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'finished' self.futures[msg['key']]['event'].set() if (msg.get('type') and not self.futures[msg['key']].get('type')): self.futures[msg['key']]['type'] = msg['type'] if msg['op'] == 'lost-data': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'lost' self.futures[msg['key']]['event'].clear() if msg['op'] == 'cancelled-key': if msg['key'] in self.futures: self.futures[msg['key']]['event'].set() del self.futures[msg['key']] if msg['op'] == 'task-erred': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'error' self.futures[msg['key']]['exception'] = msg['exception'] self.futures[msg['key']]['traceback'] = msg['traceback'] self.futures[msg['key']]['event'].set() if msg['op'] == 'restart': logger.info("Receive restart signal from scheduler") events = [d['event'] for d in self.futures.values()] self.futures.clear() for e in events: e.set() with ignoring(AttributeError): self._restart_event.set() if msg['op'] == 'scheduler-error': logger.warn("Scheduler exception:") logger.exception(msg['exception']) @gen.coroutine def _shutdown(self, fast=False): """ Send shutdown signal and wait until scheduler completes """ self._send_to_scheduler({'op': 'close-stream'}) if _global_executor[0] is self: _global_executor[0] = None if not fast: with ignoring(TimeoutError): yield [gen.with_timeout(timedelta(seconds=2), f) for f in self.coroutines] def shutdown(self, timeout=10): """ Send shutdown signal and wait until scheduler terminates """ self._send_to_scheduler({'op': 'close'}) self.loop.stop() self._loop_thread.join(timeout=timeout) if _global_executor[0] is self: _global_executor[0] = None def submit(self, func, *args, **kwargs): """ Submit a function application to the scheduler Parameters ---------- func: callable *args: **kwargs: pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> c = executor.submit(add, a, b) # doctest: +SKIP Returns ------- Future See Also -------- Executor.map: Submit on many arguments at once """ if not callable(func): raise TypeError("First input to submit must be a callable function") key = kwargs.pop('key', None) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) allow_other_workers = kwargs.pop('allow_other_workers', False) if allow_other_workers not in (True, False, None): raise TypeError("allow_other_workers= must be True or False") if key is None: if pure: key = funcname(func) + '-' + tokenize(func, kwargs, *args) else: key = funcname(func) + '-' + str(uuid.uuid4()) if key in self.futures: return Future(key, self) if allow_other_workers and workers is None: raise ValueError("Only use allow_other_workers= if using workers=") if isinstance(workers, str): workers = [workers] if workers is not None: restrictions = {key: workers} loose_restrictions = {key} if allow_other_workers else set() else: restrictions = {} loose_restrictions = set() args2, arg_dependencies = unpack_remotedata(args) kwargs2, kwarg_dependencies = unpack_remotedata(kwargs) dependencies = arg_dependencies | kwarg_dependencies task = {'function': dumps_function(func)} if args2: task['args'] = dumps(args2) if kwargs2: task['kwargs'] = dumps(kwargs2) logger.debug("Submit %s(...), %s", funcname(func), key) self._send_to_scheduler({'op': 'update-graph', 'tasks': {key: task}, 'keys': [key], 'dependencies': {key: dependencies}, 'restrictions': restrictions, 'loose_restrictions': loose_restrictions, 'client': self.id}) return Future(key, self) def _threaded_map(self, q_out, func, qs_in, **kwargs): """ Internal function for mapping Queue """ if isqueue(qs_in[0]): get = pyQueue.get elif isinstance(qs_in[0], Iterator): get = next else: raise NotImplementedError() while True: args = [get(q) for q in qs_in] f = self.submit(func, *args, **kwargs) q_out.put(f) def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables, Iterators, or Queues pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- List, iterator, or Queue of futures, depending on the type of the inputs. See also -------- Executor.submit: Submit a single function """ if not callable(func): raise TypeError("First input to map must be a callable function") if (all(map(isqueue, iterables)) or all(isinstance(i, Iterator) for i in iterables)): q_out = pyQueue() t = Thread(target=self._threaded_map, args=(q_out, func, iterables), kwargs=kwargs) t.daemon = True t.start() if isqueue(iterables[0]): return q_out else: return queue_to_iterator(q_out) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) allow_other_workers = kwargs.pop('allow_other_workers', False) if allow_other_workers and workers is None: raise ValueError("Only use allow_other_workers= if using workers=") iterables = list(zip(*zip(*iterables))) if pure: keys = [funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables)] else: uid = str(uuid.uuid4()) keys = [funcname(func) + '-' + uid + '-' + str(uuid.uuid4()) for i in range(min(map(len, iterables)))] if not kwargs: dsk = {key: (func,) + args for key, args in zip(keys, zip(*iterables))} else: dsk = {key: (apply, func, (tuple, list(args)), kwargs) for key, args in zip(keys, zip(*iterables))} d = {key: unpack_remotedata(task) for key, task in dsk.items()} dsk = {k: v[0] for k, v in d.items()} dependencies = {k: v[1] for k, v in d.items()} if isinstance(workers, str): workers = [workers] if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") if allow_other_workers not in (True, False, None): raise TypeError("allow_other_workers= must be True or False") if allow_other_workers is True: loose_restrictions = set(keys) else: loose_restrictions = set() logger.debug("map(%s, ...)", funcname(func)) self._send_to_scheduler({'op': 'update-graph', 'tasks': valmap(dumps_task, dsk), 'dependencies': dependencies, 'keys': keys, 'restrictions': restrictions, 'loose_restrictions': loose_restrictions, 'client': self.id}) return [Future(key, self) for key in keys] @gen.coroutine def _gather(self, futures, errors='raise'): futures2, keys = unpack_remotedata(futures) keys = list(keys) bad_data = dict() while True: logger.debug("Waiting on futures to clear before gather") yield All([self.futures[key]['event'].wait() for key in keys if key in self.futures]) exceptions = {key: self.futures[key]['exception'] for key in keys if self.futures[key]['status'] == 'error'} if exceptions: if errors == 'raise': raise first(exceptions.values()) if errors == 'skip': keys = [key for key in keys if key not in exceptions] bad_data.update({key: None for key in exceptions}) else: raise ValueError("Bad value, `errors=%s`" % errors) response, data = yield self.scheduler.gather(keys=keys) if response == b'error': logger.debug("Couldn't gather keys %s", data) self._send_to_scheduler({'op': 'missing-data', 'missing': data.args}) for key in data.args: self.futures[key]['event'].clear() else: break if bad_data and errors == 'skip' and isinstance(futures2, list): futures2 = [f for f in futures2 if f not in exceptions] result = pack_data(futures2, merge(data, bad_data)) raise gen.Return(result) def _threaded_gather(self, qin, qout, **kwargs): """ Internal function for gathering Queue """ while True: d = qin.get() f = self.gather(d, **kwargs) qout.put(f) def gather(self, futures, errors='raise'): """ Gather futures from distributed memory Accepts a future, nested container of futures, iterator, or queue. The return type will match the input type. Returns ------- Future results Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> x = e.submit(add, 1, 2) # doctest: +SKIP >>> e.gather(x) # doctest: +SKIP 3 >>> e.gather([x, [x], x]) # support lists and dicts # doctest: +SKIP [3, [3], 3] >>> seq = e.gather(iter([x, x])) # support iterators # doctest: +SKIP >>> next(seq) # doctest: +SKIP 3 See Also -------- Executor.scatter: Send data out to cluster """ if isqueue(futures): qout = pyQueue() t = Thread(target=self._threaded_gather, args=(futures, qout), kwargs={'errors': errors}) t.daemon = True t.start() return qout elif isinstance(futures, Iterator): return (self.gather(f, errors=errors) for f in futures) else: return sync(self.loop, self._gather, futures, errors=errors) @gen.coroutine def _scatter(self, data, workers=None, broadcast=False): keys = yield self.scheduler.scatter(data=data, workers=workers, client=self.id, broadcast=broadcast) if isinstance(data, (tuple, list, set, frozenset)): out = type(data)([Future(k, self) for k in keys]) elif isinstance(data, dict): out = {k: Future(k, self) for k in keys} else: raise TypeError("") for key in keys: self.futures[key]['status'] = 'finished' self.futures[key]['event'].set() raise gen.Return(out) def _threaded_scatter(self, q_or_i, qout, **kwargs): """ Internal function for scattering Iterable/Queue data """ if isqueue(q_or_i): # py2 Queue doesn't support mro get = pyQueue.get elif isinstance(q_or_i, Iterator): get = next while True: try: d = get(q_or_i) except StopIteration: qout.put(StopIteration) break [f] = self.scatter([d], **kwargs) qout.put(f) def scatter(self, data, workers=None, broadcast=False): """ Scatter data into distributed memory Parameters ---------- data: list, iterator, dict, or Queue Data to scatter out to workers. Output type matches input type. workers: list of tuples (optional) Optionally constrain locations of data. Specify workers as hostname/port pairs, e.g. ``('127.0.0.1', 8787)``. broadcast: bool (defaults to False) Whether to send each data element to all workers. By default we round-robin based on number of cores. Returns ------- List, dict, iterator, or queue of futures matching the type of input. Examples -------- >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.scatter([1, 2, 3]) # doctest: +SKIP [<Future: status: finished, key: c0a8a20f903a4915b94db8de3ea63195>, <Future: status: finished, key: 58e78e1b34eb49a68c65b54815d1b158>, <Future: status: finished, key: d3395e15f605bc35ab1bac6341a285e2>] >>> e.scatter({'x': 1, 'y': 2, 'z': 3}) # doctest: +SKIP {'x': <Future: status: finished, key: x>, 'y': <Future: status: finished, key: y>, 'z': <Future: status: finished, key: z>} Constrain location of data to subset of workers >>> e.scatter([1, 2, 3], workers=[('hostname', 8788)]) # doctest: +SKIP Handle streaming sequences of data with iterators or queues >>> seq = e.scatter(iter([1, 2, 3])) # doctest: +SKIP >>> next(seq) # doctest: +SKIP <Future: status: finished, key: c0a8a20f903a4915b94db8de3ea63195>, Broadcast data to all workers >>> [future] = e.scatter([element], broadcast=True) # doctest: +SKIP See Also -------- Executor.gather: Gather data back to local process """ if isqueue(data) or isinstance(data, Iterator): logger.debug("Starting thread for streaming data") qout = pyQueue() t = Thread(target=self._threaded_scatter, args=(data, qout), kwargs={'workers': workers, 'broadcast': broadcast}) t.daemon = True t.start() if isqueue(data): return qout else: return queue_to_iterator(qout) else: return sync(self.loop, self._scatter, data, workers=workers, broadcast=broadcast) @gen.coroutine def _cancel(self, futures, block=False): keys = {f.key for f in futures_of(futures)} f = self.scheduler.cancel(keys=keys, client=self.id) if block: yield f for k in keys: with ignoring(KeyError): del self.futures[k] def cancel(self, futures, block=False): """ Cancel running futures This stops future tasks from being scheduled if they have not yet run and deletes them if they have already run. After calling, this result and all dependent results will no longer be accessible Parameters ---------- futures: list of Futures """ return sync(self.loop, self._cancel, futures, block=False) @gen.coroutine def _get(self, dsk, keys, restrictions=None, raise_on_error=True): flatkeys = list(flatten([keys])) futures = {key: Future(key, self) for key in flatkeys} d = {k: unpack_remotedata(v) for k, v in dsk.items()} dsk2 = {k: v[0] for k, v in d.items()} dsk3 = {k: v for k, v in dsk2.items() if (k == v) is not True} dependencies = {k: v[1] for k, v in d.items()} for k, v in dsk3.items(): dependencies[k] |= set(_deps(dsk, v)) self._send_to_scheduler({'op': 'update-graph', 'tasks': valmap(dumps_task, dsk3), 'dependencies': dependencies, 'keys': flatkeys, 'restrictions': restrictions or {}, 'client': self.id}) packed = pack_data(keys, futures) if raise_on_error: result = yield self._gather(packed) else: try: result = yield self._gather(packed) result = 'OK', result except Exception as e: result = 'error', e raise gen.Return(result) def get(self, dsk, keys, **kwargs): """ Compute dask graph Parameters ---------- dsk: dict keys: object, or nested lists of objects restrictions: dict (optional) A mapping of {key: {set of worker hostnames}} that restricts where jobs can take place Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.get({'x': (add, 1, 2)}, 'x') # doctest: +SKIP 3 See Also -------- Executor.compute: Compute asynchronous collections """ status, result = sync(self.loop, self._get, dsk, keys, raise_on_error=False, **kwargs) if status == 'error': raise result else: return result def compute(self, args, sync=False): """ Compute dask collections on cluster Parameters ---------- args: iterable of dask objects or single dask object Collections like dask.array or dataframe or dask.value objects sync: bool (optional) Returns Futures if False (default) or concrete values if True Returns ------- List of Futures if input is a sequence, or a single future otherwise Examples -------- >>> from dask import do, value >>> from operator import add >>> x = dask.do(add)(1, 2) >>> y = dask.do(add)(x, x) >>> xx, yy = executor.compute([x, y]) # doctest: +SKIP >>> xx # doctest: +SKIP <Future: status: finished, key: add-8f6e709446674bad78ea8aeecfee188e> >>> xx.result() # doctest: +SKIP 3 >>> yy.result() # doctest: +SKIP 6 Also support single arguments >>> xx = executor.compute(x) # doctest: +SKIP See Also -------- Executor.get: Normal synchronous dask.get function """ if isinstance(args, (list, tuple, set, frozenset)): singleton = False else: args = [args] singleton = True variables = [a for a in args if isinstance(a, Base)] groups = groupby(lambda x: x._optimize, variables) dsk = merge([opt(merge([v.dask for v in val]), [v._keys() for v in val]) for opt, val in groups.items()]) names = ['finalize-%s' % tokenize(v) for v in variables] dsk2 = {name: (v._finalize, v._keys()) for name, v in zip(names, variables)} d = {k: unpack_remotedata(v) for k, v in merge(dsk, dsk2).items()} dsk3 = {k: v[0] for k, v in d.items()} dependencies = {k: v[1] for k, v in d.items()} for k, v in dsk3.items(): dependencies[k] |= set(_deps(dsk, v)) self._send_to_scheduler({'op': 'update-graph', 'tasks': valmap(dumps_task, dsk3), 'dependencies': dependencies, 'keys': names, 'client': self.id}) i = 0 futures = [] for arg in args: if isinstance(arg, Base): futures.append(Future(names[i], self)) i += 1 else: futures.append(arg) if sync: result = self.gather(futures) else: result = futures if singleton: return first(result) else: return result def persist(self, collections): """ Persist dask collections on cluster Starts computation of the collection on the cluster in the background. Provides a new dask collection that is semantically identical to the previous one, but now based off of futures currently in execution. Parameters ---------- collections: sequence or single dask object Collections like dask.array or dataframe or dask.value objects Returns ------- List of collections, or single collection, depending on type of input. Examples -------- >>> xx = executor.persist(x) # doctest: +SKIP >>> xx, yy = executor.persist([x, y]) # doctest: +SKIP See Also -------- Executor.compute """ if isinstance(collections, (tuple, list, set, frozenset)): singleton = False else: singleton = True collections = [collections] assert all(isinstance(c, Base) for c in collections) groups = groupby(lambda x: x._optimize, collections) dsk = merge([opt(merge([v.dask for v in val]), [v._keys() for v in val]) for opt, val in groups.items()]) d = {k: unpack_remotedata(v) for k, v in dsk.items()} dsk2 = {k: v[0] for k, v in d.items()} dependencies = {k: v[1] for k, v in d.items()} for k, v in dsk2.items(): dependencies[k] |= set(_deps(dsk, v)) names = list({k for c in collections for k in flatten(c._keys())}) self._send_to_scheduler({'op': 'update-graph', 'tasks': valmap(dumps_task, dsk2), 'dependencies': dependencies, 'keys': names, 'client': self.id}) result = [redict_collection(c, {k: Future(k, self) for k in flatten(c._keys())}) for c in collections] if singleton: return first(result) else: return result @gen.coroutine def _restart(self): self._send_to_scheduler({'op': 'restart'}) self._restart_event = Event() yield self._restart_event.wait() raise gen.Return(self) def restart(self): """ Restart the distributed network This kills all active work, deletes all data on the network, and restarts the worker processes. """ return sync(self.loop, self._restart) @gen.coroutine def _upload_file(self, filename, raise_on_error=True): with open(filename, 'rb') as f: data = f.read() _, fn = os.path.split(filename) d = yield self.center.broadcast(msg={'op': 'upload_file', 'filename': fn, 'data': data}) if any(isinstance(v, Exception) for v in d.values()): exception = next(v for v in d.values() if isinstance(v, Exception)) if raise_on_error: raise exception else: raise gen.Return(exception) assert all(len(data) == v for v in d.values()) def upload_file(self, filename): """ Upload local package to workers This sends a local file up to all worker nodes. This file is placed into a temporary directory on Python's system path so any .py or .egg files will be importable. Parameters ---------- filename: string Filename of .py or .egg file to send to workers Examples -------- >>> executor.upload_file('mylibrary.egg') # doctest: +SKIP >>> from mylibrary import myfunc # doctest: +SKIP >>> L = e.map(myfunc, seq) # doctest: +SKIP """ result = sync(self.loop, self._upload_file, filename, raise_on_error=False) if isinstance(result, Exception): raise result
class ConnectionPool(object): """ A maximum sized pool of Tornado IOStreams This provides a connect method that mirrors the normal distributed.connect method, but provides connection sharing and tracks connection limits. This object provides an ``rpc`` like interface:: >>> rpc = ConnectionPool(limit=512) >>> scheduler = rpc('127.0.0.1:8786') >>> workers = [rpc(ip=ip, port=port) for ip, port in ...] >>> info = yield scheduler.identity() It creates enough streams to satisfy concurrent connections to any particular address:: >>> a, b = yield [scheduler.who_has(), scheduler.has_what()] It reuses existing streams so that we don't have to continuously reconnect. It also maintains a stream limit to avoid "too many open file handle" issues. Whenever this maximum is reached we clear out all idling streams. If that doesn't do the trick then we wait until one of the occupied streams closes. """ def __init__(self, limit=512): self.open = 0 self.active = 0 self.limit = limit self.available = defaultdict(set) self.occupied = defaultdict(set) self.event = Event() def __str__(self): return "<ConnectionPool: open=%d, active=%d>" % (self.open, self.active) __repr__ = __str__ def __call__(self, arg=None, ip=None, port=None, addr=None): """ Cached rpc objects """ ip, port = ip_port_from_args(arg=arg, addr=addr, ip=ip, port=port) return RPCCall(ip, port, self) @gen.coroutine def connect(self, ip, port, timeout=3): if self.available.get((ip, port)): stream = self.available[ip, port].pop() self.active += 1 self.occupied[ip, port].add(stream) raise gen.Return(stream) while self.open >= self.limit: self.event.clear() self.collect() yield self.event.wait() self.open += 1 stream = yield connect(ip=ip, port=port, timeout=timeout) stream.set_close_callback(lambda: self.on_close(ip, port, stream)) self.active += 1 self.occupied[ip, port].add(stream) if self.open >= self.limit: self.event.clear() raise gen.Return(stream) def on_close(self, ip, port, stream): self.open -= 1 if stream in self.available[ip, port]: self.available[ip, port].remove(stream) if stream in self.occupied[ip, port]: self.occupied[ip, port].remove(stream) self.active -= 1 if self.open <= self.limit: self.event.set() def collect(self): logger.info("Collecting unused streams. open: %d, active: %d", self.open, self.active) for streams in list(self.available.values()): for stream in streams: stream.close() def close(self): for streams in list(self.available.values()): for stream in streams: stream.close() for streams in list(self.occupied.values()): for stream in streams: stream.close()
class ConnectionPool(object): """ A maximum sized pool of Comm objects. This provides a connect method that mirrors the normal distributed.connect method, but provides connection sharing and tracks connection limits. This object provides an ``rpc`` like interface:: >>> rpc = ConnectionPool(limit=512) >>> scheduler = rpc('127.0.0.1:8786') >>> workers = [rpc(address) for address ...] >>> info = yield scheduler.identity() It creates enough comms to satisfy concurrent connections to any particular address:: >>> a, b = yield [scheduler.who_has(), scheduler.has_what()] It reuses existing comms so that we don't have to continuously reconnect. It also maintains a comm limit to avoid "too many open file handle" issues. Whenever this maximum is reached we clear out all idling comms. If that doesn't do the trick then we wait until one of the occupied comms closes. Parameters ---------- limit: int The number of open comms to maintain at once deserialize: bool Whether or not to deserialize data by default or pass it through """ def __init__(self, limit=512, deserialize=True): self.open = 0 # Total number of open comms self.active = 0 # Number of comms currently in use self.limit = limit # Max number of open comms # Invariant: len(available) == open - active self.available = defaultdict(set) # Invariant: len(occupied) == active self.occupied = defaultdict(set) self.deserialize = deserialize self.event = Event() def __str__(self): return "<ConnectionPool: open=%d, active=%d>" % (self.open, self.active) __repr__ = __str__ def __call__(self, addr=None, ip=None, port=None): """ Cached rpc objects """ addr = addr_from_args(addr=addr, ip=ip, port=port) return PooledRPCCall(addr, self) @gen.coroutine def connect(self, addr, timeout=3): """ Get a Comm to the given address. For internal use. """ available = self.available[addr] occupied = self.occupied[addr] if available: comm = available.pop() if not comm.closed(): self.active += 1 occupied.add(comm) raise gen.Return(comm) else: self.open -= 1 while self.open >= self.limit: self.event.clear() self.collect() yield self.event.wait() self.open += 1 try: comm = yield connect(addr, timeout=timeout, deserialize=self.deserialize) except Exception: self.open -= 1 raise self.active += 1 occupied.add(comm) if self.open >= self.limit: self.event.clear() raise gen.Return(comm) def reuse(self, addr, comm): """ Reuse an open communication to the given address. For internal use. """ self.occupied[addr].remove(comm) self.active -= 1 if comm.closed(): self.open -= 1 if self.open < self.limit: self.event.set() else: self.available[addr].add(comm) def collect(self): """ Collect open but unused communications, to allow opening other ones. """ logger.info("Collecting unused comms. open: %d, active: %d", self.open, self.active) for addr, comms in self.available.items(): for comm in comms: comm.close() comms.clear() self.open = self.active if self.open < self.limit: self.event.set() def close(self): """ Close all communications abruptly. """ for comms in self.available.values(): for comm in comms: comm.abort() for comms in self.occupied.values(): for comm in comms: comm.abort()
class SQSDrain(object): """Implementation of IDrain that writes to an AWS SQS queue. """ def __init__(self, logger, loop, sqs_client, metric_prefix='emitter'): self.emitter = sqs_client self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.output_error = Event() self.state = RUNNING self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self._send_queue = Queue() self._should_flush_queue = Event() self._flush_handle = None self.loop.spawn_callback(self._onSend) @gen.coroutine def _flush_send_batch(self, batch_size): send_batch = [ self._send_queue.get_nowait() for pos in range(min(batch_size, self.emitter.max_messages)) ] try: response = yield self.emitter.send_message_batch(*send_batch) except SQSError as err: self.logger.exception('Error encountered flushing data to SQS: %s', err) self.output_error.set() for msg in send_batch: self._send_queue.put_nowait(msg) else: if response.Failed: self.output_error.set() for req in response.Failed: self.logger.error('Message failed to send: %s', req.Id) self._send_queue.put_nowait(req) @gen.coroutine def _onSend(self): respawn = True while respawn: qsize = self._send_queue.qsize() # This will keep flushing until clear, # including items that show up in between flushes while qsize > 0: try: yield self._flush_send_batch(qsize) except Exception as err: self.logger.exception(err) self.output_error.set() qsize = self._send_queue.qsize() # We've cleared the backlog, remove any possible future flush if self._flush_handle: self.loop.remove_timeout(self._flush_handle) self._flush_handle = None self._should_flush_queue.clear() yield self._should_flush_queue.wait() @gen.coroutine def close(self, timeout=None): self.state = CLOSING yield self._send_queue.join(timeout) def emit_nowait(self, msg): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() raise QueueFull() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) self.logger.debug("Drain emitting") self._send_queue.put_nowait(msg) @gen.coroutine def emit(self, msg, timeout=None): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) yield self._send_queue.put(msg, timeout)
class WorkerProcess(object): def __init__( self, worker_args, worker_kwargs, worker_start_args, silence_logs, on_exit, worker, env, ): self.status = "init" self.silence_logs = silence_logs self.worker_args = worker_args self.worker_kwargs = worker_kwargs self.worker_start_args = worker_start_args self.on_exit = on_exit self.process = None self.Worker = worker self.env = env # Initialized when worker is ready self.worker_dir = None self.worker_address = None @gen.coroutine def start(self): """ Ensure the worker process is started. """ enable_proctitle_on_children() if self.status == "running": raise gen.Return(self.status) if self.status == "starting": yield self.running.wait() raise gen.Return(self.status) self.init_result_q = init_q = mp_context.Queue() self.child_stop_q = mp_context.Queue() uid = uuid.uuid4().hex self.process = AsyncProcess( target=self._run, name="Dask Worker process (from Nanny)", kwargs=dict( worker_args=self.worker_args, worker_kwargs=self.worker_kwargs, worker_start_args=self.worker_start_args, silence_logs=self.silence_logs, init_result_q=self.init_result_q, child_stop_q=self.child_stop_q, uid=uid, Worker=self.Worker, env=self.env, ), ) self.process.daemon = True self.process.set_exit_callback(self._on_exit) self.running = Event() self.stopped = Event() self.status = "starting" yield self.process.start() msg = yield self._wait_until_connected(uid) if not msg: raise gen.Return(self.status) self.worker_address = msg["address"] self.worker_dir = msg["dir"] assert self.worker_address self.status = "running" self.running.set() init_q.close() raise gen.Return(self.status) def _on_exit(self, proc): if proc is not self.process: # Ignore exit of old process instance return self.mark_stopped() def _death_message(self, pid, exitcode): assert exitcode is not None if exitcode == 255: return "Worker process %d was killed by unknown signal" % (pid,) elif exitcode >= 0: return "Worker process %d exited with status %d" % (pid, exitcode) else: return "Worker process %d was killed by signal %d" % (pid, -exitcode) def is_alive(self): return self.process is not None and self.process.is_alive() @property def pid(self): return self.process.pid if self.process and self.process.is_alive() else None def mark_stopped(self): if self.status != "stopped": r = self.process.exitcode assert r is not None if r != 0: msg = self._death_message(self.process.pid, r) logger.warning(msg) self.status = "stopped" self.stopped.set() # Release resources self.process.close() self.init_result_q = None self.child_stop_q = None self.process = None # Best effort to clean up worker directory if self.worker_dir and os.path.exists(self.worker_dir): shutil.rmtree(self.worker_dir, ignore_errors=True) self.worker_dir = None # User hook if self.on_exit is not None: self.on_exit(r) @gen.coroutine def kill(self, timeout=2, executor_wait=True): """ Ensure the worker process is stopped, waiting at most *timeout* seconds before terminating it abruptly. """ loop = IOLoop.current() deadline = loop.time() + timeout if self.status == "stopped": return if self.status == "stopping": yield self.stopped.wait() return assert self.status in ("starting", "running") self.status = "stopping" process = self.process self.child_stop_q.put( { "op": "stop", "timeout": max(0, deadline - loop.time()) * 0.8, "executor_wait": executor_wait, } ) self.child_stop_q.close() while process.is_alive() and loop.time() < deadline: yield gen.sleep(0.05) if process.is_alive(): logger.warning( "Worker process still alive after %d seconds, killing", timeout ) try: yield process.terminate() except Exception as e: logger.error("Failed to kill worker process: %s", e) @gen.coroutine def _wait_until_connected(self, uid): delay = 0.05 while True: if self.status != "starting": return try: msg = self.init_result_q.get_nowait() except Empty: yield gen.sleep(delay) continue if msg["uid"] != uid: # ensure that we didn't cross queues continue if "exception" in msg: logger.error( "Failed while trying to start worker process: %s", msg["exception"] ) yield self.process.join() raise msg else: raise gen.Return(msg) @classmethod def _run( cls, worker_args, worker_kwargs, worker_start_args, silence_logs, init_result_q, child_stop_q, uid, env, Worker, ): # pragma: no cover os.environ.update(env) try: from dask.multiprocessing import initialize_worker_process except ImportError: # old Dask version pass else: initialize_worker_process() if silence_logs: logger.setLevel(silence_logs) IOLoop.clear_instance() loop = IOLoop() loop.make_current() worker = Worker(*worker_args, **worker_kwargs) @gen.coroutine def do_stop(timeout=5, executor_wait=True): try: yield worker.close( report=False, nanny=False, executor_wait=executor_wait, timeout=timeout, ) finally: loop.stop() def watch_stop_q(): """ Wait for an incoming stop message and then stop the worker cleanly. """ while True: try: msg = child_stop_q.get(timeout=1000) except Empty: pass else: child_stop_q.close() assert msg.pop("op") == "stop" loop.add_callback(do_stop, **msg) break t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch") t.daemon = True t.start() @gen.coroutine def run(): """ Try to start worker and inform parent of outcome. """ try: yield worker._start(*worker_start_args) except Exception as e: logger.exception("Failed to start worker") init_result_q.put({"uid": uid, "exception": e}) init_result_q.close() else: assert worker.address init_result_q.put( {"address": worker.address, "dir": worker.local_dir, "uid": uid} ) init_result_q.close() yield worker.wait_until_closed() logger.info("Worker closed") try: loop.run_sync(run) except TimeoutError: # Loop was stopped before wait_until_closed() returned, ignore pass except KeyboardInterrupt: pass
class ZMQDrain(object): """Implementation of IDrain that pushes to a zmq.Socket asynchronously. This implementation overrides the high-water mark behavior from cs.eyrie.vassal.Vassal to instead use a zmq.Poller. """ def __init__(self, logger, loop, zmq_socket, metric_prefix='emitter'): self.emitter = zmq_socket self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.output_error = Event() self.state = RUNNING self._writable = Event() self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) def _handle_events(self, fd, events): if events & self.loop.ERROR: self.logger.error('Error polling socket for writability') elif events & self.loop.WRITE: self.loop.remove_handler(self.emitter) self._writable.set() @gen.coroutine def _poll(self): self.loop.add_handler(self.emitter, self._handle_events, self.loop.WRITE) yield self._writable.wait() self._writable.clear() @gen.coroutine def close(self, timeout=None): self.state = CLOSING self.logger.debug("Flushing send queue") self.emitter.close() def emit_nowait(self, msg): self.logger.debug("Drain emitting") if isinstance(msg, basestring): msg = [msg] try: self.emitter.send_multipart(msg, zmq.NOBLOCK) except zmq.Again: raise QueueFull() @gen.coroutine def emit(self, msg, retry_timeout=INITIAL_TIMEOUT): if isinstance(msg, basestring): msg = [msg] while True: # This should ensure the ZMQ socket can accept more data yield self._poll() try: self.emitter.send_multipart(msg, zmq.NOBLOCK) except zmq.Again: # But sometimes it's not enough self.logger.debug('Error polling for socket writability') retry_timeout = min(retry_timeout*2, MAX_TIMEOUT) yield gen.sleep(retry_timeout.total_seconds()) else: break
class Queue(object): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) async def consumer(): async for item in q: try: print('Doing work on %s' % item) await gen.sleep(0.01) finally: q.task_done() async def producer(): for item in range(5): await q.put(item) print('Put %s' % item) async def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) await producer() # Wait for producer to put all tasks. await q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done In versions of Python without native coroutines (before 3.5), ``consumer()`` could be written as:: @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 Added ``async for`` support in Python 3.5. """ def __init__(self, maxsize=0): if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) # Pairs of (item, Future). self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self): """Number of items allowed in the queue.""" return self._maxsize def qsize(self): """Number of items in the queue.""" return len(self._queue) def empty(self): return not self._queue def full(self): if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put(self, item, timeout=None): """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ future = Future() try: self.put_nowait(item) except QueueFull: self._putters.append((item, future)) _set_timeout(future, timeout) else: future.set_result(None) return future def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(getter, self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout=None): """Remove and return an item from the queue. Returns a Future which resolves once an item is available, or raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ future = Future() try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self): """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(putter, None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self): """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout=None): """Block until all items in the queue are processed. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. """ return self._finished.wait(timeout) def __aiter__(self): return _QueueIterator(self) # These three are overridable in subclasses. def _init(self): self._queue = collections.deque() def _get(self): return self._queue.popleft() def _put(self, item): self._queue.append(item) # End of the overridable methods. def __put_internal(self, item): self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self): # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self): return '<%s at %s %s>' % (type(self).__name__, hex( id(self)), self._format()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._format()) def _format(self): result = 'maxsize=%r' % (self.maxsize, ) if getattr(self, '_queue', None): result += ' queue=%r' % self._queue if self._getters: result += ' getters[%s]' % len(self._getters) if self._putters: result += ' putters[%s]' % len(self._putters) if self._unfinished_tasks: result += ' tasks=%s' % self._unfinished_tasks return result
class TornadoReconnectionManager(ReconnectionManager): def __init__(self, pubnub): self._cancelled_event = Event() super(TornadoReconnectionManager, self).__init__(pubnub) @gen.coroutine def _register_heartbeat_timer(self): self._cancelled_event.clear() while not self._cancelled_event.is_set(): if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.EXPONENTIAL: self._timer_interval = int(math.pow(2, self._connection_errors) - 1) if self._timer_interval > self.MAXEXPONENTIALBACKOFF: self._timer_interval = self.MINEXPONENTIALBACKOFF self._connection_errors = 1 logger.debug("timerInterval > MAXEXPONENTIALBACKOFF at: %s" % utils.datetime_now()) elif self._timer_interval < 1: self._timer_interval = self.MINEXPONENTIALBACKOFF logger.debug("timerInterval = %d at: %s" % (self._timer_interval, utils.datetime_now())) else: self._timer_interval = self.INTERVAL # >>> Wait given interval or cancel sleeper = tornado.gen.sleep(self._timer_interval) canceller = self._cancelled_event.wait() wi = tornado.gen.WaitIterator(canceller, sleeper) while not wi.done(): try: future = wi.next() yield future except Exception as e: # TODO: verify the error will not be eaten logger.error(e) raise else: if wi.current_future == sleeper: break elif wi.current_future == canceller: return else: raise Exception("unknown future raised") logger.debug("reconnect loop at: %s" % utils.datetime_now()) # >>> Attempt to request /time/0 endpoint try: yield self._pubnub.time().result() self._connection_errors = 1 self._callback.on_reconnect() logger.debug("reconnection manager stop due success time endpoint call: %s" % utils.datetime_now()) break except Exception: if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.EXPONENTIAL: logger.debug("reconnect interval increment at: %s" % utils.datetime_now()) self._connection_errors += 1 def start_polling(self): if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.NONE: logger.warn("reconnection policy is disabled, please handle reconnection manually.") return self._pubnub.ioloop.spawn_callback(self._register_heartbeat_timer) def stop_polling(self): if self._cancelled_event is not None and not self._cancelled_event.is_set(): self._cancelled_event.set()