コード例 #1
0
    def test_http10_no_content_length(self):
        # Regression test for a bug in which can_keep_alive would crash
        # for an HTTP/1.0 (not 1.1) response with no content-length.
        conn = HTTP1Connection(self.client_stream, True)
        self.server_stream.write(b"HTTP/1.0 200 Not Modified\r\n\r\nhello")
        self.server_stream.close()

        event = Event()
        test = self
        body = []

        class Delegate(HTTPMessageDelegate):
            def headers_received(self, start_line, headers):
                test.code = start_line.code

            def data_received(self, data):
                body.append(data)

            def finish(self):
                event.set()

        yield conn.read_response(Delegate())
        yield event.wait()
        self.assertEqual(self.code, 200)
        self.assertEqual(b''.join(body), b'hello')
コード例 #2
0
 def get(self):
     logging.debug("queuing trigger")
     self.queue.append(self.finish)
     if self.get_argument("wake", "true") == "true":
         self.wake_callback()
     never_finish = Event()
     yield never_finish.wait()
コード例 #3
0
    def test_http10_no_content_length(self):
        # Regression test for a bug in which can_keep_alive would crash
        # for an HTTP/1.0 (not 1.1) response with no content-length.
        conn = HTTP1Connection(self.client_stream, True)
        self.server_stream.write(b"HTTP/1.0 200 Not Modified\r\n\r\nhello")
        self.server_stream.close()

        event = Event()
        test = self
        body = []

        class Delegate(HTTPMessageDelegate):
            def headers_received(self, start_line, headers):
                test.code = start_line.code

            def data_received(self, data):
                body.append(data)

            def finish(self):
                event.set()

        yield conn.read_response(Delegate())
        yield event.wait()
        self.assertEqual(self.code, 200)
        self.assertEqual(b"".join(body), b"hello")
コード例 #4
0
    def test_read_until_regex_max_bytes(self):
        rs, ws = yield self.make_iostream_pair()
        closed = Event()
        rs.set_close_callback(closed.set)
        try:
            # Extra room under the limit
            fut = rs.read_until_regex(b"def", max_bytes=50)
            ws.write(b"abcdef")
            data = yield fut
            self.assertEqual(data, b"abcdef")

            # Just enough space
            fut = rs.read_until_regex(b"def", max_bytes=6)
            ws.write(b"abcdef")
            data = yield fut
            self.assertEqual(data, b"abcdef")

            # Not enough space, but we don't know it until all we can do is
            # log a warning and close the connection.
            with ExpectLog(gen_log, "Unsatisfiable read"):
                rs.read_until_regex(b"def", max_bytes=5)
                ws.write(b"123456")
                yield closed.wait()
        finally:
            ws.close()
            rs.close()
コード例 #5
0
 def get(self):
     logging.debug("queuing trigger")
     event = Event()
     self.queue.append(event.set)
     if self.get_argument("wake", "true") == "true":
         self.wake_callback()
     yield event.wait()
コード例 #6
0
 def get(self):
     logging.debug("queuing trigger")
     self.queue.append(self.finish)
     if self.get_argument("wake", "true") == "true":
         self.wake_callback()
     never_finish = Event()
     yield never_finish.wait()
コード例 #7
0
ファイル: driver.py プロジェクト: happyAnger6/OpenSpider
class QueueDriver:
    def __init__(self,**settings):
        self.settings = settings
        self._finished = Event()
        self._getters = collections.deque([])  # Futures.
        self._putters = collections.deque([])
        self.initialize(**settings)

    def initialize(self,**settings):
        pass

    def over(self):
        self._finished.set()

    def save(self):
        raise NotImplementedError()

    def get(self):
        raise NotImplementedError()

    def put(self):
        raise NotImplementedError()

    def join(self,timeout=None):
        return self._finished.wait(timeout)
コード例 #8
0
ファイル: iostream_test.py プロジェクト: bdarnell/tornado
    def test_read_until_regex_max_bytes(self):
        rs, ws = yield self.make_iostream_pair()
        closed = Event()
        rs.set_close_callback(closed.set)
        try:
            # Extra room under the limit
            fut = rs.read_until_regex(b"def", max_bytes=50)
            ws.write(b"abcdef")
            data = yield fut
            self.assertEqual(data, b"abcdef")

            # Just enough space
            fut = rs.read_until_regex(b"def", max_bytes=6)
            ws.write(b"abcdef")
            data = yield fut
            self.assertEqual(data, b"abcdef")

            # Not enough space, but we don't know it until all we can do is
            # log a warning and close the connection.
            with ExpectLog(gen_log, "Unsatisfiable read"):
                rs.read_until_regex(b"def", max_bytes=5)
                ws.write(b"123456")
                yield closed.wait()
        finally:
            ws.close()
            rs.close()
コード例 #9
0
 def get(self):
     logging.debug("queuing trigger")
     event = Event()
     self.queue.append(event.set)
     if self.get_argument("wake", "true") == "true":
         self.wake_callback()
     yield event.wait()
コード例 #10
0
class ts():
    def __init__(self, ioloop):
        self.ioloop = ioloop
        self.rEvent = Event()
        self.wEvent = Event()
        self.writeLock = threading.Lock()
        self.readLock = threading.Lock()
        self.writeBuffer = b''
        self.readBuffer = b''
        IOLoop.current().add_callback(self.doRead)
        IOLoop.current().add_callback(self.doWrite)

    @gen.coroutine
    def doRead(self):
        co = 0
        while True:
            yield self.rEvent.wait()
            self.rEvent.clear()
            while True:
                msg = TOUMsg()
                self.readLock.acquire()
                r, self.readBuffer = msg.unpack(self.readBuffer)
                self.readLock.release()
                if not r:
                    break
                co += 1
                if co % 10 == 0:
                    print('co1', co, getRunningTime())

    @gen.coroutine
    def doWrite(self):
        co = 0
        while True:
            yield self.wEvent.wait()
            self.wEvent.clear()
            while True:
                if len(self.writeBuffer) > con_streamBufferSize:
                    break
                yield gen.sleep(random.randint(3, 20) / 10.0)
                msg = TOUMsg({}, b's' * random.randint(10, 2000))
                self.writeLock.acquire()
                self.writeBuffer += msg.pack()
                self.writeLock.release()
                co += 1
                if co % 10 == 0:
                    print('co2', co, getRunningTime())
コード例 #11
0
ファイル: test_actor.py プロジェクト: tomMoral/distributed
    class Waiter(object):
        def __init__(self):
            self.event = Event()

        @gen.coroutine
        def set(self):
            self.event.set()

        @gen.coroutine
        def wait(self):
            yield self.event.wait()
コード例 #12
0
    class Waiter(object):
        def __init__(self):
            self.event = Event()

        @gen.coroutine
        def set(self):
            self.event.set()

        @gen.coroutine
        def wait(self):
            yield self.event.wait()
コード例 #13
0
def test_exit_callback():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()
    evt = Event()

    @gen.coroutine
    def on_stop(_proc):
        assert _proc is proc
        yield gen.moment
        evt.set()

    # Normal process exit
    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    to_child.put(None)
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
    assert not proc.is_alive()

    # Process terminated
    proc = AsyncProcess(target=wait)
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    yield proc.terminate()
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
コード例 #14
0
def test_exit_callback():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()
    evt = Event()

    @gen.coroutine
    def on_stop(_proc):
        assert _proc is proc
        yield gen.moment
        evt.set()

    # Normal process exit
    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    to_child.put(None)
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
    assert not proc.is_alive()

    # Process terminated
    proc = AsyncProcess(target=wait)
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    yield proc.terminate()
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
コード例 #15
0
ファイル: executor.py プロジェクト: aterrel/distributed
    def _start(self):
        if self.scheduler.status != 'running':
            yield self.scheduler._sync_center()
            self.scheduler.start()

        start_event = Event()
        self.coroutines = [
                self.scheduler.handle_queues(self.scheduler_queue, self.report_queue),
                self.report(start_event)]

        _global_executor[0] = self
        yield start_event.wait()
        logger.debug("Started scheduling coroutines. Synchronized")
コード例 #16
0
ファイル: drain.py プロジェクト: davidblewett/transistor
class ZMQDrain(object):
    """Implementation of IDrain that pushes to a zmq.Socket asynchronously.
    This implementation overrides the high-water mark behavior from
    cs.eyrie.vassal.Vassal to instead use a zmq.Poller.
    """
    def __init__(self, logger, loop, zmq_socket, metric_prefix='emitter'):
        self.emitter = zmq_socket
        self.logger = logger
        self.loop = loop
        self.metric_prefix = metric_prefix
        self.output_error = Event()
        self.state = RUNNING
        self._writable = Event()
        self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__,
                                            self.__class__.__name__)

    def _handle_events(self, fd, events):
        if events & self.loop.ERROR:
            self.logger.error('Error polling socket for writability')
        elif events & self.loop.WRITE:
            self.loop.remove_handler(self.emitter)
            self._writable.set()

    @gen.coroutine
    def _poll(self):
        self.loop.add_handler(self.emitter, self._handle_events,
                              self.loop.WRITE)
        yield self._writable.wait()
        self._writable.clear()

    @gen.coroutine
    def close(self, timeout=None):
        self.state = CLOSING
        self.logger.debug("Flushing send queue")
        self.emitter.close()

    def emit_nowait(self, msg):
        self.logger.debug("Drain emitting")
        if isinstance(msg, basestring):
            msg = [msg]
        try:
            self.emitter.send_multipart(msg, zmq.NOBLOCK)
        except zmq.Again:
            raise QueueFull()

    @gen.coroutine
    def emit(self, msg, retry_timeout=INITIAL_TIMEOUT):
        if isinstance(msg, basestring):
            msg = [msg]
        yield self._poll()
        self.emitter.send_multipart(msg, zmq.NOBLOCK)
コード例 #17
0
    def test_idle_after_use(self):
        stream = yield self.connect()
        event = Event()
        stream.set_close_callback(event.set)

        # Use the connection twice to make sure keep-alives are working
        for i in range(2):
            stream.write(b"GET / HTTP/1.1\r\n\r\n")
            yield stream.read_until(b"\r\n\r\n")
            data = yield stream.read_bytes(11)
            self.assertEqual(data, b"Hello world")

        # Now let the timeout trigger and close the connection.
        yield event.wait()
コード例 #18
0
ファイル: httpserver_test.py プロジェクト: bdarnell/tornado
    def test_idle_after_use(self):
        stream = yield self.connect()
        event = Event()
        stream.set_close_callback(event.set)

        # Use the connection twice to make sure keep-alives are working
        for i in range(2):
            stream.write(b"GET / HTTP/1.1\r\n\r\n")
            yield stream.read_until(b"\r\n\r\n")
            data = yield stream.read_bytes(11)
            self.assertEqual(data, b"Hello world")

        # Now let the timeout trigger and close the connection.
        yield event.wait()
コード例 #19
0
ファイル: executor.py プロジェクト: ogrisel/distributed
    def _start(self, timeout=3, **kwargs):
        if isinstance(self._start_arg, Scheduler):
            self.scheduler = self._start_arg
            self.center = self._start_arg.center
        if isinstance(self._start_arg, str):
            ip, port = tuple(self._start_arg.split(':'))
            self._start_arg = (ip, int(port))
        if isinstance(self._start_arg, tuple):
            r = coerce_to_rpc(self._start_arg, timeout=timeout)
            try:
                ident = yield r.identity()
            except (StreamClosedError, OSError):
                raise IOError("Could not connect to %s:%d" % self._start_arg)
            if ident['type'] == 'Center':
                self.center = r
                self.scheduler = Scheduler(self.center,
                                           loop=self.loop,
                                           **kwargs)
                self.scheduler.listen(0)
            elif ident['type'] == 'Scheduler':
                self.scheduler = r
                self.scheduler_stream = yield connect(*self._start_arg)
                yield write(self.scheduler_stream, {
                    'op': 'register-client',
                    'client': self.id
                })
                if 'center' in ident:
                    cip, cport = ident['center']
                    self.center = rpc(ip=cip, port=cport)
                else:
                    self.center = self.scheduler
            else:
                raise ValueError("Unknown Type")

        if isinstance(self.scheduler, Scheduler):
            if self.scheduler.status != 'running':
                yield self.scheduler.sync_center()
                self.scheduler.start(0)
            self.scheduler_queue = Queue()
            self.report_queue = Queue()
            self.coroutines.append(
                self.scheduler.handle_queues(self.scheduler_queue,
                                             self.report_queue))

        start_event = Event()
        self.coroutines.append(self._handle_report(start_event))

        _global_executor[0] = self
        yield start_event.wait()
        logger.debug("Started scheduling coroutines. Synchronized")
コード例 #20
0
    def get_response(self, data, method, show_graphiql=False):
        query, variables, operation_name, id = self.get_graphql_params(
            self.request, data)

        request_end = yield self.extension_stack.request_started(
            self.request, query, None, operation_name, variables, self.context,
            self.request_context)

        try:
            execution_result = yield self.execute_graphql_request(
                method, query, variables, operation_name, show_graphiql)

            status_code = 200
            if execution_result:
                response = {}

                if getattr(execution_result, 'is_pending', False):
                    event = Event()
                    on_resolve = lambda *_: event.set()  # noqa
                    execution_result.then(on_resolve).catch(on_resolve)
                    yield event.wait()

                if hasattr(execution_result, 'get'):
                    execution_result = execution_result.get()

                if execution_result.errors:
                    response['errors'] = [
                        self.format_error(e) for e in execution_result.errors
                    ]

                if execution_result.invalid:
                    status_code = 400
                else:
                    response['data'] = execution_result.data

                if self.batch:
                    response['id'] = id
                    response['status'] = status_code

                result = self.json_encode(response,
                                          pretty=self.pretty or show_graphiql)
            else:
                result = None

            res = (result, status_code)
            yield self.extension_stack.will_send_response(result, self.context)
            raise Return(res)
        finally:
            yield request_end()
コード例 #21
0
    def test_prepare_curl_callback_stack_context(self):
        exc_info = []
        error_event = Event()

        def error_handler(typ, value, tb):
            exc_info.append((typ, value, tb))
            error_event.set()
            return True

        with ExceptionStackContext(error_handler):
            request = HTTPRequest(self.get_url('/custom_reason'),
                                  prepare_curl_callback=lambda curl: 1 / 0)
        yield [error_event.wait(), self.http_client.fetch(request)]
        self.assertEqual(1, len(exc_info))
        self.assertIs(exc_info[0][0], ZeroDivisionError)
コード例 #22
0
ファイル: iostream_test.py プロジェクト: bdarnell/tornado
 def test_read_until_regex_max_bytes_ignores_extra(self):
     rs, ws = yield self.make_iostream_pair()
     closed = Event()
     rs.set_close_callback(closed.set)
     try:
         # Even though data that matches arrives the same packet that
         # puts us over the limit, we fail the request because it was not
         # found within the limit.
         ws.write(b"abcdef")
         with ExpectLog(gen_log, "Unsatisfiable read"):
             rs.read_until_regex(b"def", max_bytes=5)
             yield closed.wait()
     finally:
         ws.close()
         rs.close()
コード例 #23
0
    def asyncSetUp(self):
        listener, port = bind_unused_port()
        event = Event()

        def accept_callback(conn, addr):
            self.server_stream = IOStream(conn)
            self.addCleanup(self.server_stream.close)
            event.set()

        add_accept_handler(listener, accept_callback)
        self.client_stream = IOStream(socket.socket())
        self.addCleanup(self.client_stream.close)
        yield [self.client_stream.connect(('127.0.0.1', port)), event.wait()]
        self.io_loop.remove_handler(listener)
        listener.close()
コード例 #24
0
 def test_read_until_regex_max_bytes_ignores_extra(self):
     rs, ws = yield self.make_iostream_pair()
     closed = Event()
     rs.set_close_callback(closed.set)
     try:
         # Even though data that matches arrives the same packet that
         # puts us over the limit, we fail the request because it was not
         # found within the limit.
         ws.write(b"abcdef")
         with ExpectLog(gen_log, "Unsatisfiable read"):
             rs.read_until_regex(b"def", max_bytes=5)
             yield closed.wait()
     finally:
         ws.close()
         rs.close()
コード例 #25
0
    def test_prepare_curl_callback_stack_context(self):
        exc_info = []
        error_event = Event()

        def error_handler(typ, value, tb):
            exc_info.append((typ, value, tb))
            error_event.set()
            return True

        with ExceptionStackContext(error_handler):
            request = HTTPRequest(self.get_url('/custom_reason'),
                                  prepare_curl_callback=lambda curl: 1 / 0)
        yield [error_event.wait(), self.http_client.fetch(request)]
        self.assertEqual(1, len(exc_info))
        self.assertIs(exc_info[0][0], ZeroDivisionError)
コード例 #26
0
 def test_read_until_regex_max_bytes_inline(self):
     rs, ws = yield self.make_iostream_pair()
     closed = Event()
     rs.set_close_callback(closed.set)
     try:
         # Similar to the error case in the previous test, but the
         # ws writes first so rs reads are satisfied
         # inline.  For consistency with the out-of-line case, we
         # do not raise the error synchronously.
         ws.write(b"123456")
         with ExpectLog(gen_log, "Unsatisfiable read"):
             rs.read_until_regex(b"def", max_bytes=5)
             yield closed.wait()
     finally:
         ws.close()
         rs.close()
コード例 #27
0
    def asyncSetUp(self):
        listener, port = bind_unused_port()
        event = Event()

        def accept_callback(conn, addr):
            self.server_stream = IOStream(conn)
            self.addCleanup(self.server_stream.close)
            event.set()

        add_accept_handler(listener, accept_callback)
        self.client_stream = IOStream(socket.socket())
        self.addCleanup(self.client_stream.close)
        yield [self.client_stream.connect(('127.0.0.1', port)),
               event.wait()]
        self.io_loop.remove_handler(listener)
        listener.close()
コード例 #28
0
ファイル: iostream_test.py プロジェクト: bdarnell/tornado
 def test_read_until_regex_max_bytes_inline(self):
     rs, ws = yield self.make_iostream_pair()
     closed = Event()
     rs.set_close_callback(closed.set)
     try:
         # Similar to the error case in the previous test, but the
         # ws writes first so rs reads are satisfied
         # inline.  For consistency with the out-of-line case, we
         # do not raise the error synchronously.
         ws.write(b"123456")
         with ExpectLog(gen_log, "Unsatisfiable read"):
             rs.read_until_regex(b"def", max_bytes=5)
             yield closed.wait()
     finally:
         ws.close()
         rs.close()
コード例 #29
0
    def handle_stream(self, stream, address):
        id = str(uuid.uuid1())
        conn_id = str(self.connId)
        self.connId += 1
        self.addConnMap(conn_id)
        IOLoop.instance().add_callback(
            functools.partial(self.doRead, stream, conn_id))
        IOLoop.instance().add_callback(
            functools.partial(self.doWrite, stream, conn_id))
        pack = {'type': 'conn', 'id': id, 'conn_id': conn_id}
        msg = TOUMsg(pack, b'')
        e = Event()
        self.waitIdMap[id] = {'event': e}
        yield self.addTask(msg)

        self.writeLock.acquire()
        wbl = len(self.writeBuffer) / 1024
        self.writeLock.release()
        s = 'conn add  %s, conn:%s,in:%s,out:%s,oById:%s,addTask:%s,waitId:%s'%\
            (conn_id,len(self.connMap),self.outputSize/1024,wbl,len(self.outputMap_byId),\
            len(self.addTaskMap),len(self.waitIdMap))
        t = int(getRunningTime() * 1000) / 1000.0
        msg = '%s  %s\n' % (t, s)
        print(msg)
        t = int((getRunningTime() - self.startTime) * 100) / 100.0
        ss = '#######  %ss  ##  conn sent  %s  %s\n' % (t, conn_id,
                                                        len(self.connMap))
        self.logCache.append(ss)

        yield e.wait()
        msg = self.outputMap_byId[id]['msg']
        del self.outputMap_byId[id]
        del self.waitIdMap[id]

        back = msg.m_json
        t = int(getRunningTime() * 1000) / 1000.0
        s = '%s  conn reply %s, conn:%s ,ret:%s\n' % (
            t, back['conn_id'], len(self.connMap), back['ret'])
        print(s)

        t = int((getRunningTime() - self.startTime) * 100) / 100.0
        ss = '#######  %ss  ##  conn back  %s  %s\n' % (t, back['conn_id'],
                                                        len(self.connMap))
        self.logCache.append(ss)

        if back['ret'] == 0:
            del self.connMap[back['conn_id']]
コード例 #30
0
ファイル: executor.py プロジェクト: canavandl/distributed
    def _start(self, timeout=3, **kwargs):
        if isinstance(self._start_arg, Scheduler):
            self.scheduler = self._start_arg
            self.center = self._start_arg.center
        if isinstance(self._start_arg, str):
            ip, port = tuple(self._start_arg.split(':'))
            self._start_arg = (ip, int(port))
        if isinstance(self._start_arg, tuple):
            r = coerce_to_rpc(self._start_arg, timeout=timeout)
            try:
                ident = yield r.identity()
            except (StreamClosedError, OSError):
                raise IOError("Could not connect to %s:%d" % self._start_arg)
            if ident['type'] == 'Center':
                self.center = r
                self.scheduler = Scheduler(self.center, loop=self.loop,
                                           **kwargs)
                self.scheduler.listen(0)
            elif ident['type'] == 'Scheduler':
                self.scheduler = r
                self.scheduler_stream = yield connect(*self._start_arg)
                yield write(self.scheduler_stream, {'op': 'register-client',
                                                    'client': self.id})
                if 'center' in ident:
                    cip, cport = ident['center']
                    self.center = rpc(ip=cip, port=cport)
                else:
                    self.center = self.scheduler
            else:
                raise ValueError("Unknown Type")

        if isinstance(self.scheduler, Scheduler):
            if self.scheduler.status != 'running':
                yield self.scheduler.sync_center()
                self.scheduler.start(0)
            self.scheduler_queue = Queue()
            self.report_queue = Queue()
            self.coroutines.append(self.scheduler.handle_queues(
                self.scheduler_queue, self.report_queue))

        start_event = Event()
        self.coroutines.append(self._handle_report(start_event))

        _global_executor[0] = self
        yield start_event.wait()
        logger.debug("Started scheduling coroutines. Synchronized")
コード例 #31
0
ファイル: snippet.py プロジェクト: szabo92/gistable
def do_exec():
    result = execute(schema(),
                     ast,
                     executor=TornadoExecutor(),
                     return_promise=True)

    if getattr(result, 'is_pending', False):
        event = Event()
        on_resolve = lambda *_: event.set()
        result.then(on_resolve).catch(on_resolve)
        yield event.wait()

    if hasattr(result, 'get'):
        result = result.get()
    assert not result.errors
    assert result.data == {'a': 'hey', 'b': 'hey2', 'c': 'hey3'}
    print('SUCCESS ASYNC')
コード例 #32
0
    def create_iostream_pair(self):
        _lock = Event()
        server_streams = []

        def accept_callback(conn, addr):
            server_stream = MicroProxyIOStream(conn)
            server_streams.append(server_stream)
            # self.addCleanup(server_stream.close)
            _lock.set()

        listener, port = bind_unused_port()
        add_accept_handler(listener, accept_callback)
        client_stream = MicroProxyIOStream(socket.socket())
        yield [client_stream.connect(('127.0.0.1', port)), _lock.wait()]
        self.io_loop.remove_handler(listener)
        listener.close()

        raise Return((client_stream, server_streams[0]))
コード例 #33
0
class ImageMutex():

    def __init__(self):
        self._mutex = Event()
        self._blocked = count()
        self._building_log = []
        self._exception = None

    @gen.coroutine
    def block(self):
        value = self._blocked.__next__()  # single bytecode operation
        if value:
            yield self._mutex.wait()
        return value

    def __enter__(self):
        if self._exception is not None:
            raise self._exception
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self._building_log = []
        if isinstance(exc_value, Exception):
            self._exception = exc_value
        self._mutex.set()

    def timeout_happened(self):
        self._exception = Exception('This image is too heavy to build')
        self._building_log = []

    def add_to_log(self, message, level=1):
        if not self._exception:
            self._building_log.append({
                'text': message,
                'level': level
            })

    @property
    def building_log(self):
        return self._building_log

    @property
    def last_exception(self):
        return self._exception
コード例 #34
0
ファイル: utils.py プロジェクト: mike820324/microProxy
    def create_iostream_pair(self):
        _lock = Event()
        server_streams = []

        def accept_callback(conn, addr):
            server_stream = MicroProxyIOStream(conn)
            server_streams.append(server_stream)
            # self.addCleanup(server_stream.close)
            _lock.set()

        listener, port = bind_unused_port()
        add_accept_handler(listener, accept_callback)
        client_stream = MicroProxyIOStream(socket.socket())
        yield [client_stream.connect(('127.0.0.1', port)),
               _lock.wait()]
        self.io_loop.remove_handler(listener)
        listener.close()

        raise Return((client_stream, server_streams[0]))
コード例 #35
0
class QueueDriver:
    def __init__(self, **settings):
        self.settings = settings
        self._finished = Event()
        self._getters = collections.deque([])  # Futures.
        self._putters = collections.deque([])

    def over(self):
        self._finished.set()

    def save(self):
        raise NotImplementedError()

    def get(self):
        raise NotImplementedError()

    def put(self):
        raise NotImplementedError()

    def join(self, timeout):
        return self._finished.wait(timeout)
コード例 #36
0
class ImageMutex():
    def __init__(self):
        self._mutex = Event()
        self._blocked = count()
        self._building_log = []
        self._exception = None

    @gen.coroutine
    def block(self):
        value = self._blocked.__next__()  # single bytecode operation
        if value:
            yield self._mutex.wait()
        return value

    def __enter__(self):
        # if self._exception is not None:
        #     raise self._exception
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self._building_log = []
        if isinstance(exc_value, Exception):
            self._exception = exc_value
        self._mutex.set()

    def timeout_happened(self):
        self._exception = Exception('This image is too heavy to build')
        self._building_log = []

    def add_to_log(self, message, level=1):
        if not self._exception:
            self._building_log.append({'text': message, 'level': level})

    @property
    def building_log(self):
        return self._building_log

    @property
    def last_exception(self):
        return self._exception
コード例 #37
0
ファイル: iostream_test.py プロジェクト: bdarnell/tornado
    def test_close_callback_with_pending_read(self):
        # Regression test for a bug that was introduced in 2.3
        # where the IOStream._close_callback would never be called
        # if there were pending reads.
        OK = b"OK\r\n"
        rs, ws = yield self.make_iostream_pair()
        event = Event()
        rs.set_close_callback(event.set)
        try:
            ws.write(OK)
            res = yield rs.read_until(b"\r\n")
            self.assertEqual(res, OK)

            ws.close()
            rs.read_until(b"\r\n")
            # If _close_callback (self.stop) is not called,
            # an AssertionError: Async operation timed out after 5 seconds
            # will be raised.
            yield event.wait()
        finally:
            ws.close()
            rs.close()
コード例 #38
0
ファイル: iostream_test.py プロジェクト: leeclemens/tornado
    def test_async_read_error_logging(self):
        # Socket errors on asynchronous reads should be logged (but only
        # once).
        server, client = yield self.make_iostream_pair()
        closed = Event()
        server.set_close_callback(closed.set)
        try:
            # Start a read that will be fulfilled asynchronously.
            server.read_bytes(1)
            client.write(b'a')
            # Stub out read_from_fd to make it fail.

            def fake_read_from_fd():
                os.close(server.socket.fileno())
                server.__class__.read_from_fd(server)
            server.read_from_fd = fake_read_from_fd
            # This log message is from _handle_read (not read_from_fd).
            with ExpectLog(gen_log, "error on read"):
                yield closed.wait()
        finally:
            server.close()
            client.close()
コード例 #39
0
    def test_close_callback_with_pending_read(self):
        # Regression test for a bug that was introduced in 2.3
        # where the IOStream._close_callback would never be called
        # if there were pending reads.
        OK = b"OK\r\n"
        rs, ws = yield self.make_iostream_pair()
        event = Event()
        rs.set_close_callback(event.set)
        try:
            ws.write(OK)
            res = yield rs.read_until(b"\r\n")
            self.assertEqual(res, OK)

            ws.close()
            rs.read_until(b"\r\n")
            # If _close_callback (self.stop) is not called,
            # an AssertionError: Async operation timed out after 5 seconds
            # will be raised.
            yield event.wait()
        finally:
            ws.close()
            rs.close()
コード例 #40
0
ファイル: iostream_test.py プロジェクト: 5l1v3r1/tornado-1
    def test_async_read_error_logging(self):
        # Socket errors on asynchronous reads should be logged (but only
        # once).
        server, client = yield self.make_iostream_pair()
        closed = Event()
        server.set_close_callback(closed.set)
        try:
            # Start a read that will be fulfilled asynchronously.
            server.read_bytes(1)
            client.write(b"a")
            # Stub out read_from_fd to make it fail.

            def fake_read_from_fd():
                os.close(server.socket.fileno())
                server.__class__.read_from_fd(server)

            server.read_from_fd = fake_read_from_fd
            # This log message is from _handle_read (not read_from_fd).
            with ExpectLog(gen_log, "error on read"):
                yield closed.wait()
        finally:
            server.close()
            client.close()
コード例 #41
0
    def _start(self, timeout=3, **kwargs):
        if isinstance(self._start_arg, Scheduler):
            self.scheduler = self._start_arg
        if isinstance(self._start_arg, str):
            host, port = tuple(self._start_arg.split(':'))
            self._start_arg = (host, int(port))
        if isinstance(self._start_arg, tuple):
            host, port = self._start_arg
            ip = socket.gethostbyname(host)
            r = coerce_to_rpc((ip, port), timeout=timeout)
            try:
                ident = yield r.identity()
            except (StreamClosedError, OSError):
                raise IOError("Could not connect to %s:%d" % (ip, port))
            if ident['type'] == 'Scheduler':
                self.scheduler = r
                self.scheduler_stream = yield connect(ip, port)
                yield write(self.scheduler_stream, {'op': 'register-client',
                                                    'client': self.id})
            else:
                raise ValueError("Unknown Type")

        if isinstance(self.scheduler, Scheduler):
            if self.scheduler.status != 'running':
                self.scheduler.start(0)
            self.scheduler_queue = Queue()
            self.report_queue = Queue()
            self.coroutines.append(self.scheduler.handle_queues(
                self.scheduler_queue, self.report_queue))

        start_event = Event()
        self.coroutines.append(self._handle_report(start_event))

        _global_executor[0] = self
        yield start_event.wait()
        logger.debug("Started scheduling coroutines. Synchronized")
コード例 #42
0
 def test_unused_connection(self):
     stream = yield self.connect()
     event = Event()
     stream.set_close_callback(event.set)
     yield event.wait()
コード例 #43
0
ファイル: executor.py プロジェクト: cowlicks/distributed
class Executor(object):
    """ Distributed executor with data dependencies

    This executor resembles executors in concurrent.futures but also allows
    Futures within submit/map calls.

    Provide center address on initialization

    >>> executor = Executor(('127.0.0.1', 8787))  # doctest: +SKIP

    Use ``submit`` method like normal

    >>> a = executor.submit(add, 1, 2)  # doctest: +SKIP
    >>> b = executor.submit(add, 10, 20)  # doctest: +SKIP

    Additionally, provide results of submit calls (futures) to further submit
    calls:

    >>> c = executor.submit(add, a, b)  # doctest: +SKIP

    This allows for the dynamic creation of complex dependencies.
    """
    def __init__(self, center, start=True, delete_batch_time=1):
        self.center = coerce_to_rpc(center)
        self.futures = dict()
        self.refcount = defaultdict(lambda: 0)
        self.dask = dict()
        self.restrictions = dict()
        self.loop = IOLoop()
        self.report_queue = Queue()
        self.scheduler_queue = Queue()
        self._shutdown_event = Event()
        self._delete_batch_time = delete_batch_time

        if start:
            self.start()

    def start(self):
        """ Start scheduler running in separate thread """
        from threading import Thread
        self.loop.add_callback(self._go)
        self._loop_thread = Thread(target=self.loop.start)
        self._loop_thread.start()

    def __enter__(self):
        if not self.loop._running:
            self.start()
        return self

    def __exit__(self, type, value, traceback):
        self.shutdown()

    def _inc_ref(self, key):
        self.refcount[key] += 1

    def _dec_ref(self, key):
        self.refcount[key] -= 1
        if self.refcount[key] == 0:
            del self.refcount[key]
            self._release_key(key)

    def _release_key(self, key):
        """ Release key from distributed memory """
        self.futures[key]['event'].clear()
        logger.debug("Release key %s", key)
        del self.futures[key]
        self.scheduler_queue.put_nowait({'op': 'release-held-data',
                                         'key': key})

    @gen.coroutine
    def report(self):
        """ Listen to scheduler """
        while True:
            msg = yield self.report_queue.get()
            if msg['op'] == 'close':
                break
            if msg['op'] == 'task-finished':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'finished'
                    self.futures[msg['key']]['event'].set()
            if msg['op'] == 'lost-data':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'lost'
                    self.futures[msg['key']]['event'].clear()
            if msg['op'] == 'task-erred':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'error'
                    self.futures[msg['key']]['event'].set()

    @gen.coroutine
    def _shutdown(self):
        """ Send shutdown signal and wait until _go completes """
        self.report_queue.put_nowait({'op': 'close'})
        self.scheduler_queue.put_nowait({'op': 'close'})
        yield self._shutdown_event.wait()

    def shutdown(self):
        """ Send shutdown signal and wait until scheduler terminates """
        self.report_queue.put_nowait({'op': 'close'})
        self.scheduler_queue.put_nowait({'op': 'close'})
        self.loop.stop()
        self._loop_thread.join()

    @gen.coroutine
    def _go(self):
        """ Setup and run all other coroutines.  Block until finished. """
        self.who_has, self.has_what, self.ncores = yield [self.center.who_has(),
                                                         self.center.has_what(),
                                                         self.center.ncores()]
        self.waiting = {}
        self.processing = {}
        self.stacks = {}

        worker_queues = {worker: Queue() for worker in self.ncores}
        delete_queue = Queue()

        coroutines = ([
            self.report(),
            scheduler(self.scheduler_queue, self.report_queue, worker_queues,
                      delete_queue, self.who_has, self.has_what, self.ncores,
                      self.dask, self.restrictions, self.waiting, self.stacks,
                      self.processing),
            delete(self.scheduler_queue, delete_queue,
                   self.center.ip, self.center.port, self._delete_batch_time)]
         + [worker(self.scheduler_queue, worker_queues[w], w, n)
            for w, n in self.ncores.items()])

        results = yield All(coroutines)
        self._shutdown_event.set()

    def submit(self, func, *args, **kwargs):
        """ Submit a function application to the scheduler

        Parameters
        ----------
        func: callable
        *args:
        **kwargs:
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> c = executor.submit(add, a, b)  # doctest: +SKIP

        Returns
        -------
        Future

        See Also
        --------
        distributed.executor.Executor.submit:
        """
        if not callable(func):
            raise TypeError("First input to submit must be a callable function")

        key = kwargs.pop('key', None)
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)

        if key is None:
            if pure:
                key = funcname(func) + '-' + tokenize(func, kwargs, *args)
            else:
                key = funcname(func) + '-' + next(tokens)

        if key in self.futures:
            return Future(key, self)

        if kwargs:
            task = (apply, func, args, kwargs)
        else:
            task = (func,) + args

        if workers is not None:
            restrictions = {key: workers}
        else:
            restrictions = {}

        if key not in self.futures:
            self.futures[key] = {'event': Event(), 'status': 'waiting'}

        logger.debug("Submit %s(...), %s", funcname(func), key)
        self.scheduler_queue.put_nowait({'op': 'update-graph',
                                         'dsk': {key: task},
                                         'keys': [key],
                                         'restrictions': restrictions})

        return Future(key, self)

    def map(self, func, *iterables, **kwargs):
        """ Map a function on a sequence of arguments

        Arguments can be normal objects or Futures

        Parameters
        ----------
        func: callable
        iterables: Iterables
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> L = executor.map(func, sequence)  # doctest: +SKIP

        Returns
        -------
        list of futures

        See also
        --------
        distributed.executor.Executor.submit
        """
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)
        if not callable(func):
            raise TypeError("First input to map must be a callable function")
        iterables = [list(it) for it in iterables]
        if pure:
            keys = [funcname(func) + '-' + tokenize(func, kwargs, *args)
                    for args in zip(*iterables)]
        else:
            uid = str(uuid.uuid4())
            keys = [funcname(func) + '-' + uid + '-' + next(tokens)
                    for i in range(min(map(len, iterables)))]

        if not kwargs:
            dsk = {key: (func,) + args
                   for key, args in zip(keys, zip(*iterables))}
        else:
            dsk = {key: (apply, func, args, kwargs)
                   for key, args in zip(keys, zip(*iterables))}

        for key in dsk:
            if key not in self.futures:
                self.futures[key] = {'event': Event(), 'status': 'waiting'}

        if isinstance(workers, (list, set)):
            if workers and isinstance(first(workers), (list, set)):
                if len(workers) != len(keys):
                    raise ValueError("You only provided %d worker restrictions"
                    " for a sequence of length %d" % (len(workers), len(keys)))
                restrictions = dict(zip(keys, workers))
            else:
                restrictions = {key: workers for key in keys}
        elif workers is None:
            restrictions = {}
        else:
            raise TypeError("Workers must be a list or set of workers or None")

        logger.debug("map(%s, ...)", funcname(func))
        self.scheduler_queue.put_nowait({'op': 'update-graph',
                                         'dsk': dsk,
                                         'keys': keys,
                                         'restrictions': restrictions})

        return [Future(key, self) for key in keys]

    @gen.coroutine
    def _gather(self, futures):
        futures2, keys = unpack_remotedata(futures)
        keys = list(keys)

        while True:
            yield All([self.futures[key]['event'].wait() for key in keys])
            try:
                data = yield _gather(self.center, keys)
            except KeyError as e:
                self.scheduler_queue.put_nowait({'op': 'missing-data',
                                                 'missing': e.args})
                for key in e.args:
                    self.futures[key]['event'].clear()
            else:
                break

        data = dict(zip(keys, data))

        result = pack_data(futures2, data)
        raise gen.Return(result)

    def gather(self, futures):
        """ Gather futures from distributed memory

        Accepts a future or any nested core container of futures

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> x = e.submit(add, 1, 2)  # doctest: +SKIP
        >>> e.gather(x)  # doctest: +SKIP
        3
        >>> e.gather([x, [x], x])  # doctest: +SKIP
        [3, [3], 3]
        """
        return sync(self.loop, self._gather, futures)

    @gen.coroutine
    def _get(self, dsk, keys, restrictions=None):
        flatkeys = list(flatten(keys))
        for key in flatkeys:
            if key not in self.futures:
                self.futures[key] = {'event': Event(), 'status': None}
        futures = {key: Future(key, self) for key in flatkeys}

        self.scheduler_queue.put_nowait({'op': 'update-graph',
                                         'dsk': dsk,
                                         'keys': flatkeys,
                                         'restrictions': restrictions or {}})

        packed = pack_data(keys, futures)
        result = yield self._gather(packed)
        raise gen.Return(result)

    def get(self, dsk, keys, **kwargs):
        """ Gather futures from distributed memory

        Parameters
        ----------
        dsk: dict
        keys: object, or nested lists of objects
        restrictions: dict (optional)
            A mapping of {key: {set of worker hostnames}} that restricts where
            jobs can take place

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.get({'x': (add, 1, 2)}, 'x')  # doctest: +SKIP
        3
        """
        return sync(self.loop, self._get, dsk, keys, **kwargs)
コード例 #44
0
ファイル: httpserver_test.py プロジェクト: bdarnell/tornado
 def test_unused_connection(self):
     stream = yield self.connect()
     event = Event()
     stream.set_close_callback(event.set)
     yield event.wait()
コード例 #45
0
class ProjectGroomer(object):
  """ Cleans up expired transactions for a project. """
  def __init__(self, project_id, coordinator, zk_client, db_access,
               thread_pool):
    """ Creates a new ProjectGroomer.

    Args:
      project_id: A string specifying a project ID.
      coordinator: A GroomingCoordinator.
      zk_client: A KazooClient.
      db_access: A DatastoreProxy.
      thread_pool: A ThreadPoolExecutor.
    """
    self.project_id = project_id

    self._coordinator = coordinator
    self._zk_client = zk_client
    self._tornado_zk = TornadoKazoo(self._zk_client)
    self._db_access = db_access
    self._thread_pool = thread_pool
    self._project_node = '/appscale/apps/{}'.format(self.project_id)
    self._containers = []
    self._inactive_containers = set()
    self._batch_resolver = BatchResolver(self.project_id, self._db_access)

    self._zk_client.ensure_path(self._project_node)
    self._zk_client.ChildrenWatch(self._project_node, self._update_containers)

    self._txid_manual_offset = 0
    self._offset_node = '/'.join([self._project_node, OFFSET_NODE])
    self._zk_client.DataWatch(self._offset_node, self._update_offset)

    self._stop_event = AsyncEvent()
    self._stopped_event = AsyncEvent()

    # Keeps track of cleanup results for each round of grooming.
    self._txids_cleaned = 0
    self._oldest_valid_tx_time = None

    self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY)
    for _ in range(MAX_CONCURRENCY):
      IOLoop.current().spawn_callback(self._worker)

    IOLoop.current().spawn_callback(self.start)

  @gen.coroutine
  def start(self):
    """ Starts the grooming process until the stop event is set. """
    logger.info('Grooming {}'.format(self.project_id))
    while True:
      if self._stop_event.is_set():
        break

      try:
        yield self._groom_project()
      except Exception:
        # Prevent the grooming loop from stopping if an error is encountered.
        logger.exception(
          'Unexpected error while grooming {}'.format(self.project_id))
        yield gen.sleep(MAX_TX_DURATION)

    self._stopped_event.set()

  @gen.coroutine
  def stop(self):
    """ Stops the grooming process. """
    logger.info('Stopping grooming process for {}'.format(self.project_id))
    self._stop_event.set()
    yield self._stopped_event.wait()

  @gen.coroutine
  def _worker(self):
    """ Processes items in the worker queue. """
    while True:
      tx_path, composite_indexes = yield self._worker_queue.get()
      try:
        tx_time = yield self._resolve_txid(tx_path, composite_indexes)
        if tx_time is None:
          self._txids_cleaned += 1

        if tx_time is not None and tx_time < self._oldest_valid_tx_time:
          self._oldest_valid_tx_time = tx_time
      finally:
        self._worker_queue.task_done()

  def _update_offset(self, new_offset, _):
    """ Watches for updates to the manual offset node.

    Args:
      new_offset: A string specifying the new manual offset.
    """
    self._txid_manual_offset = int(new_offset or 0)

  def _update_containers(self, nodes):
    """ Updates the list of active txid containers.

    Args:
      nodes: A list of strings specifying ZooKeeper nodes.
    """
    counters = [int(node[len(CONTAINER_PREFIX):] or 1)
                for node in nodes if node.startswith(CONTAINER_PREFIX)
                and node not in self._inactive_containers]
    counters.sort()

    containers = [CONTAINER_PREFIX + str(counter) for counter in counters]
    if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX):
      containers[0] = CONTAINER_PREFIX

    self._containers = containers

  @gen.coroutine
  def _groom_project(self):
    """ Runs the grooming process. """
    index = self._coordinator.index
    worker_count = self._coordinator.total_workers

    oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count)

    # Wait until there's a reasonable chance that some transactions have
    # timed out.
    next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION

    # The oldest ignored transaction should still be valid, but ensure that
    # the timeout is not negative.
    next_timeout = max(0, next_timeout_eta - time.time())
    time_to_wait = datetime.timedelta(
      seconds=next_timeout + (MAX_TX_DURATION / 2))

    # Allow the wait to be cut short when a project is removed.
    try:
      yield self._stop_event.wait(timeout=time_to_wait)
    except gen.TimeoutError:
      raise gen.Return()

  @gen.coroutine
  def _remove_path(self, tx_path):
    """ Removes a ZooKeeper node.

    Args:
      tx_path: A string specifying the path to delete.
    """
    try:
      yield self._tornado_zk.delete(tx_path)
    except NoNodeError:
      pass
    except NotEmptyError:
      yield self._thread_pool.submit(self._zk_client.delete, tx_path,
                                     recursive=True)

  @gen.coroutine
  def _resolve_txid(self, tx_path, composite_indexes):
    """ Cleans up a transaction if it has expired.

    Args:
      tx_path: A string specifying the location of the ZooKeeper node.
      composite_indexes: A list of CompositeIndex objects.
    Returns:
      The transaction start time if still valid, None if invalid because this
      method will also delete it.
    """
    tx_data = yield self._tornado_zk.get(tx_path)
    tx_time = float(tx_data[0])

    _, container, tx_node = tx_path.rsplit('/', 2)
    tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX))
    container_count = int(container[len(CONTAINER_PREFIX):] or 1)
    if tx_node_id < 0:
      yield self._remove_path(tx_path)
      raise gen.Return()

    container_size = MAX_SEQUENCE_COUNTER + 1
    automatic_offset = (container_count - 1) * container_size
    txid = self._txid_manual_offset + automatic_offset + tx_node_id

    if txid < 1:
      yield self._remove_path(tx_path)
      raise gen.Return()

    # If the transaction is still valid, return the time it was created.
    if tx_time + MAX_TX_DURATION >= time.time():
      raise gen.Return(tx_time)

    yield self._batch_resolver.resolve(txid, composite_indexes)
    yield self._remove_path(tx_path)
    yield self._batch_resolver.cleanup(txid)

  @gen.coroutine
  def _fetch_and_clean(self, worker_index, worker_count):
    """ Cleans up expired transactions.

    Args:
      worker_index: An integer specifying this worker's index.
      worker_count: An integer specifying the number of total workers.
    Returns:
      A float specifying the time of the oldest valid transaction as a unix
      timestamp.
    """
    self._txids_cleaned = 0
    self._oldest_valid_tx_time = time.time()

    children = []
    for index, container in enumerate(self._containers):
      container_path = '/'.join([self._project_node, container])
      new_children = yield self._tornado_zk.get_children(container_path)

      if not new_children and index < len(self._containers) - 1:
        self._inactive_containers.add(container)

      children.extend(['/'.join([container_path, node])
                       for node in new_children])

    logger.debug(
      'Found {} transaction IDs for {}'.format(len(children), self.project_id))

    if not children:
      raise gen.Return(self._oldest_valid_tx_time)

    # Refresh these each time so that the indexes are fresh.
    encoded_indexes = yield self._thread_pool.submit(
      self._db_access.get_indices, self.project_id)
    composite_indexes = [CompositeIndex(index) for index in encoded_indexes]

    for tx_path in children:
      tx_node_id = int(tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX))
      # Only resolve transactions that this worker has been assigned.
      if tx_node_id % worker_count != worker_index:
        continue

      yield self._worker_queue.put((tx_path, composite_indexes))

    yield self._worker_queue.join()

    if self._txids_cleaned > 0:
      logger.info('Cleaned up {} expired txids for {}'.format(
        self._txids_cleaned, self.project_id))

    raise gen.Return(self._oldest_valid_tx_time)
コード例 #46
0
ファイル: queues.py プロジェクト: bdarnell/tornado
class Queue(Generic[_T]):
    """Coordinate producer and consumer coroutines.

    If maxsize is 0 (the default) the queue size is unbounded.

    .. testcode::

        from tornado import gen
        from tornado.ioloop import IOLoop
        from tornado.queues import Queue

        q = Queue(maxsize=2)

        async def consumer():
            async for item in q:
                try:
                    print('Doing work on %s' % item)
                    await gen.sleep(0.01)
                finally:
                    q.task_done()

        async def producer():
            for item in range(5):
                await q.put(item)
                print('Put %s' % item)

        async def main():
            # Start consumer without waiting (since it never finishes).
            IOLoop.current().spawn_callback(consumer)
            await producer()     # Wait for producer to put all tasks.
            await q.join()       # Wait for consumer to finish all tasks.
            print('Done')

        IOLoop.current().run_sync(main)

    .. testoutput::

        Put 0
        Put 1
        Doing work on 0
        Put 2
        Doing work on 1
        Put 3
        Doing work on 2
        Put 4
        Doing work on 3
        Doing work on 4
        Done


    In versions of Python without native coroutines (before 3.5),
    ``consumer()`` could be written as::

        @gen.coroutine
        def consumer():
            while True:
                item = yield q.get()
                try:
                    print('Doing work on %s' % item)
                    yield gen.sleep(0.01)
                finally:
                    q.task_done()

    .. versionchanged:: 4.3
       Added ``async for`` support in Python 3.5.

    """

    # Exact type depends on subclass. Could be another generic
    # parameter and use protocols to be more precise here.
    _queue = None  # type: Any

    def __init__(self, maxsize: int = 0) -> None:
        if maxsize is None:
            raise TypeError("maxsize can't be None")

        if maxsize < 0:
            raise ValueError("maxsize can't be negative")

        self._maxsize = maxsize
        self._init()
        self._getters = collections.deque([])  # type: Deque[Future[_T]]
        self._putters = collections.deque([])  # type: Deque[Tuple[_T, Future[None]]]
        self._unfinished_tasks = 0
        self._finished = Event()
        self._finished.set()

    @property
    def maxsize(self) -> int:
        """Number of items allowed in the queue."""
        return self._maxsize

    def qsize(self) -> int:
        """Number of items in the queue."""
        return len(self._queue)

    def empty(self) -> bool:
        return not self._queue

    def full(self) -> bool:
        if self.maxsize == 0:
            return False
        else:
            return self.qsize() >= self.maxsize

    def put(
        self, item: _T, timeout: Union[float, datetime.timedelta] = None
    ) -> "Future[None]":
        """Put an item into the queue, perhaps waiting until there is room.

        Returns a Future, which raises `tornado.util.TimeoutError` after a
        timeout.

        ``timeout`` may be a number denoting a time (on the same
        scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a
        `datetime.timedelta` object for a deadline relative to the
        current time.
        """
        future = Future()  # type: Future[None]
        try:
            self.put_nowait(item)
        except QueueFull:
            self._putters.append((item, future))
            _set_timeout(future, timeout)
        else:
            future.set_result(None)
        return future

    def put_nowait(self, item: _T) -> None:
        """Put an item into the queue without blocking.

        If no free slot is immediately available, raise `QueueFull`.
        """
        self._consume_expired()
        if self._getters:
            assert self.empty(), "queue non-empty, why are getters waiting?"
            getter = self._getters.popleft()
            self.__put_internal(item)
            future_set_result_unless_cancelled(getter, self._get())
        elif self.full():
            raise QueueFull
        else:
            self.__put_internal(item)

    def get(self, timeout: Union[float, datetime.timedelta] = None) -> Awaitable[_T]:
        """Remove and return an item from the queue.

        Returns an awaitable which resolves once an item is available, or raises
        `tornado.util.TimeoutError` after a timeout.

        ``timeout`` may be a number denoting a time (on the same
        scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a
        `datetime.timedelta` object for a deadline relative to the
        current time.

        .. note::

           The ``timeout`` argument of this method differs from that
           of the standard library's `queue.Queue.get`. That method
           interprets numeric values as relative timeouts; this one
           interprets them as absolute deadlines and requires
           ``timedelta`` objects for relative timeouts (consistent
           with other timeouts in Tornado).

        """
        future = Future()  # type: Future[_T]
        try:
            future.set_result(self.get_nowait())
        except QueueEmpty:
            self._getters.append(future)
            _set_timeout(future, timeout)
        return future

    def get_nowait(self) -> _T:
        """Remove and return an item from the queue without blocking.

        Return an item if one is immediately available, else raise
        `QueueEmpty`.
        """
        self._consume_expired()
        if self._putters:
            assert self.full(), "queue not full, why are putters waiting?"
            item, putter = self._putters.popleft()
            self.__put_internal(item)
            future_set_result_unless_cancelled(putter, None)
            return self._get()
        elif self.qsize():
            return self._get()
        else:
            raise QueueEmpty

    def task_done(self) -> None:
        """Indicate that a formerly enqueued task is complete.

        Used by queue consumers. For each `.get` used to fetch a task, a
        subsequent call to `.task_done` tells the queue that the processing
        on the task is complete.

        If a `.join` is blocking, it resumes when all items have been
        processed; that is, when every `.put` is matched by a `.task_done`.

        Raises `ValueError` if called more times than `.put`.
        """
        if self._unfinished_tasks <= 0:
            raise ValueError("task_done() called too many times")
        self._unfinished_tasks -= 1
        if self._unfinished_tasks == 0:
            self._finished.set()

    def join(self, timeout: Union[float, datetime.timedelta] = None) -> Awaitable[None]:
        """Block until all items in the queue are processed.

        Returns an awaitable, which raises `tornado.util.TimeoutError` after a
        timeout.
        """
        return self._finished.wait(timeout)

    def __aiter__(self) -> _QueueIterator[_T]:
        return _QueueIterator(self)

    # These three are overridable in subclasses.
    def _init(self) -> None:
        self._queue = collections.deque()

    def _get(self) -> _T:
        return self._queue.popleft()

    def _put(self, item: _T) -> None:
        self._queue.append(item)

    # End of the overridable methods.

    def __put_internal(self, item: _T) -> None:
        self._unfinished_tasks += 1
        self._finished.clear()
        self._put(item)

    def _consume_expired(self) -> None:
        # Remove timed-out waiters.
        while self._putters and self._putters[0][1].done():
            self._putters.popleft()

        while self._getters and self._getters[0].done():
            self._getters.popleft()

    def __repr__(self) -> str:
        return "<%s at %s %s>" % (type(self).__name__, hex(id(self)), self._format())

    def __str__(self) -> str:
        return "<%s %s>" % (type(self).__name__, self._format())

    def _format(self) -> str:
        result = "maxsize=%r" % (self.maxsize,)
        if getattr(self, "_queue", None):
            result += " queue=%r" % self._queue
        if self._getters:
            result += " getters[%s]" % len(self._getters)
        if self._putters:
            result += " putters[%s]" % len(self._putters)
        if self._unfinished_tasks:
            result += " tasks=%s" % self._unfinished_tasks
        return result
コード例 #47
0
ファイル: executor.py プロジェクト: aterrel/distributed
class Executor(object):
    """ Distributed executor with data dependencies

    This executor resembles executors in concurrent.futures but also allows
    Futures within submit/map calls.

    Provide center address on initialization

    >>> executor = Executor(('127.0.0.1', 8787))  # doctest: +SKIP

    Use ``submit`` method like normal

    >>> a = executor.submit(add, 1, 2)  # doctest: +SKIP
    >>> b = executor.submit(add, 10, 20)  # doctest: +SKIP

    Additionally, provide results of submit calls (futures) to further submit
    calls:

    >>> c = executor.submit(add, a, b)  # doctest: +SKIP

    This allows for the dynamic creation of complex dependencies.
    """
    def __init__(self, center=None, scheduler=None, start=True, delete_batch_time=1, loop=None):
        self.futures = dict()
        self.refcount = defaultdict(lambda: 0)
        self.loop = loop or IOLoop()
        self.scheduler_queue = Queue()
        self.report_queue = Queue()

        if scheduler:
            if isinstance(scheduler, Scheduler):
                self.scheduler = scheduler
                if not center:
                    self.center = scheduler.center
            else:
                raise NotImplementedError()
                # self.scheduler = coerce_to_rpc(scheduler)
        else:
            self.scheduler = Scheduler(center, loop=self.loop,
                                       delete_batch_time=delete_batch_time)
        if center:
            self.center = coerce_to_rpc(center)

        if not self.center:
            raise ValueError("Provide Center address")

        if start:
            self.start()

    def start(self):
        """ Start scheduler running in separate thread """
        if hasattr(self, '_loop_thread'):
            return
        from threading import Thread
        self._loop_thread = Thread(target=self.loop.start)
        self._loop_thread.daemon = True
        _global_executor[0] = self
        self._loop_thread.start()
        sync(self.loop, self._start)

    def send_to_scheduler(self, msg):
        if isinstance(self.scheduler, Scheduler):
            self.loop.add_callback(self.scheduler_queue.put_nowait, msg)
        else:
            raise NotImplementedError()

    @gen.coroutine
    def _start(self):
        if self.scheduler.status != 'running':
            yield self.scheduler._sync_center()
            self.scheduler.start()

        start_event = Event()
        self.coroutines = [
                self.scheduler.handle_queues(self.scheduler_queue, self.report_queue),
                self.report(start_event)]

        _global_executor[0] = self
        yield start_event.wait()
        logger.debug("Started scheduling coroutines. Synchronized")

    def __enter__(self):
        if not self.loop._running:
            self.start()
        return self

    def __exit__(self, type, value, traceback):
        self.shutdown()

    def _inc_ref(self, key):
        self.refcount[key] += 1

    def _dec_ref(self, key):
        self.refcount[key] -= 1
        if self.refcount[key] == 0:
            del self.refcount[key]
            self._release_key(key)

    def _release_key(self, key):
        """ Release key from distributed memory """
        logger.debug("Release key %s", key)
        if key in self.futures:
            self.futures[key]['event'].clear()
            del self.futures[key]
        self.send_to_scheduler({'op': 'release-held-data', 'key': key})

    @gen.coroutine
    def report(self, start_event):
        """ Listen to scheduler """
        while True:
            if isinstance(self.scheduler, Scheduler):
                msg = yield self.report_queue.get()
            elif isinstance(self.scheduler, IOStream):
                raise NotImplementedError()
                msg = yield read(self.scheduler)
            else:
                raise NotImplementedError()

            if msg['op'] == 'stream-start':
                start_event.set()
            if msg['op'] == 'close':
                break
            if msg['op'] == 'key-in-memory':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'finished'
                    self.futures[msg['key']]['event'].set()
            if msg['op'] == 'lost-data':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'lost'
                    self.futures[msg['key']]['event'].clear()
            if msg['op'] == 'task-erred':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'error'
                    self.futures[msg['key']]['exception'] = msg['exception']
                    self.futures[msg['key']]['traceback'] = msg['traceback']
                    self.futures[msg['key']]['event'].set()
            if msg['op'] == 'restart':
                logger.info("Receive restart signal from scheduler")
                events = [d['event'] for d in self.futures.values()]
                self.futures.clear()
                for e in events:
                    e.set()
                with ignoring(AttributeError):
                    self._restart_event.set()

    @gen.coroutine
    def _shutdown(self, fast=False):
        """ Send shutdown signal and wait until scheduler completes """
        self.send_to_scheduler({'op': 'close'})
        if _global_executor[0] is self:
            _global_executor[0] = None
        if not fast:
            yield self.coroutines

    def shutdown(self, timeout=10):
        """ Send shutdown signal and wait until scheduler terminates """
        self.send_to_scheduler({'op': 'close'})
        self.loop.stop()
        self._loop_thread.join(timeout=timeout)
        if _global_executor[0] is self:
            _global_executor[0] = None

    def submit(self, func, *args, **kwargs):
        """ Submit a function application to the scheduler

        Parameters
        ----------
        func: callable
        *args:
        **kwargs:
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> c = executor.submit(add, a, b)  # doctest: +SKIP

        Returns
        -------
        Future

        See Also
        --------
        distributed.executor.Executor.submit:
        """
        if not callable(func):
            raise TypeError("First input to submit must be a callable function")

        key = kwargs.pop('key', None)
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)

        if key is None:
            if pure:
                key = funcname(func) + '-' + tokenize(func, kwargs, *args)
            else:
                key = funcname(func) + '-' + next(tokens)

        if key in self.futures:
            return Future(key, self)

        if kwargs:
            task = (apply, func, args, kwargs)
        else:
            task = (func,) + args

        if workers is not None:
            restrictions = {key: workers}
        else:
            restrictions = {}

        logger.debug("Submit %s(...), %s", funcname(func), key)
        self.send_to_scheduler({'op': 'update-graph',
                                'dsk': {key: task},
                                'keys': [key],
                                'restrictions': restrictions})

        return Future(key, self)

    def map(self, func, *iterables, **kwargs):
        """ Map a function on a sequence of arguments

        Arguments can be normal objects or Futures

        Parameters
        ----------
        func: callable
        iterables: Iterables
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> L = executor.map(func, sequence)  # doctest: +SKIP

        Returns
        -------
        list of futures

        See also
        --------
        distributed.executor.Executor.submit
        """
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)
        if not callable(func):
            raise TypeError("First input to map must be a callable function")
        iterables = [list(it) for it in iterables]
        if pure:
            keys = [funcname(func) + '-' + tokenize(func, kwargs, *args)
                    for args in zip(*iterables)]
        else:
            uid = str(uuid.uuid4())
            keys = [funcname(func) + '-' + uid + '-' + next(tokens)
                    for i in range(min(map(len, iterables)))]

        if not kwargs:
            dsk = {key: (func,) + args
                   for key, args in zip(keys, zip(*iterables))}
        else:
            dsk = {key: (apply, func, args, kwargs)
                   for key, args in zip(keys, zip(*iterables))}

        if isinstance(workers, (list, set)):
            if workers and isinstance(first(workers), (list, set)):
                if len(workers) != len(keys):
                    raise ValueError("You only provided %d worker restrictions"
                    " for a sequence of length %d" % (len(workers), len(keys)))
                restrictions = dict(zip(keys, workers))
            else:
                restrictions = {key: workers for key in keys}
        elif workers is None:
            restrictions = {}
        else:
            raise TypeError("Workers must be a list or set of workers or None")

        logger.debug("map(%s, ...)", funcname(func))
        self.send_to_scheduler({'op': 'update-graph',
                                'dsk': dsk,
                                'keys': keys,
                                'restrictions': restrictions})

        return [Future(key, self) for key in keys]

    @gen.coroutine
    def _gather(self, futures):
        futures2, keys = unpack_remotedata(futures)
        keys = list(keys)

        while True:
            logger.debug("Waiting on futures to clear before gather")
            yield All([self.futures[key]['event'].wait() for key in keys
                                                    if key in self.futures])
            exceptions = [self.futures[key]['exception'] for key in keys
                          if self.futures[key]['status'] == 'error']
            if exceptions:
                raise exceptions[0]
            try:
                data = yield _gather(self.center, keys)
            except KeyError as e:
                logger.debug("Couldn't gather keys %s", e)
                self.send_to_scheduler({'op': 'missing-data',
                                        'missing': e.args})
                for key in e.args:
                    self.futures[key]['event'].clear()
            else:
                break

        data = dict(zip(keys, data))

        result = pack_data(futures2, data)
        raise gen.Return(result)

    def gather(self, futures):
        """ Gather futures from distributed memory

        Accepts a future or any nested core container of futures

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> x = e.submit(add, 1, 2)  # doctest: +SKIP
        >>> e.gather(x)  # doctest: +SKIP
        3
        >>> e.gather([x, [x], x])  # doctest: +SKIP
        [3, [3], 3]
        """
        return sync(self.loop, self._gather, futures)

    @gen.coroutine
    def _scatter(self, data, workers=None):
        remotes = yield self.scheduler._scatter(None, data, workers)
        if isinstance(remotes, list):
            remotes = [Future(r.key, self) for r in remotes]
            keys = {r.key for r in remotes}
        elif isinstance(remotes, dict):
            remotes = {k: Future(v.key, self) for k, v in remotes.items()}
            keys = set(remotes)

        for key in keys:
            self.futures[key]['status'] = 'finished'
            self.futures[key]['event'].set()

        raise gen.Return(remotes)

    def scatter(self, data, workers=None):
        """ Scatter data into distributed memory

        Accepts a list of data elements or dict of key-value pairs

        Optionally provide a set of workers to constrain the scatter.  Specify
        workers as hostname/port pairs, i.e.  ('127.0.0.1', 8787).
        Default port is 8788.

        Examples
        --------
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.scatter([1, 2, 3])  # doctest: +SKIP
        [RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>,
         RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>,
         RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>]
        >>> e.scatter({'x': 1, 'y': 2, 'z': 3})  # doctest: +SKIP
        {'x': RemoteData<center=127.0.0.1:8787, key=x>,
         'y': RemoteData<center=127.0.0.1:8787, key=y>,
         'z': RemoteData<center=127.0.0.1:8787, key=z>}

        >>> e.scatter([1, 2, 3], workers=[('hostname', 8788)])  # doctest: +SKIP
        """
        return sync(self.loop, self._scatter, data, workers=workers)

    @gen.coroutine
    def _get(self, dsk, keys, restrictions=None, raise_on_error=True):
        flatkeys = list(flatten([keys]))
        futures = {key: Future(key, self) for key in flatkeys}

        self.send_to_scheduler({'op': 'update-graph',
                                'dsk': dsk,
                                'keys': flatkeys,
                                'restrictions': restrictions or {}})

        packed = pack_data(keys, futures)
        if raise_on_error:
            result = yield self._gather(packed)
        else:
            try:
                result = yield self._gather(packed)
                result = 'OK', result
            except Exception as e:
                result = 'error', e
        raise gen.Return(result)

    def get(self, dsk, keys, **kwargs):
        """ Gather futures from distributed memory

        Parameters
        ----------
        dsk: dict
        keys: object, or nested lists of objects
        restrictions: dict (optional)
            A mapping of {key: {set of worker hostnames}} that restricts where
            jobs can take place

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.get({'x': (add, 1, 2)}, 'x')  # doctest: +SKIP
        3
        """
        status, result = sync(self.loop, self._get, dsk, keys,
                              raise_on_error=False, **kwargs)

        if status == 'error':
            raise result
        else:
            return result

    def compute(self, *args, **kwargs):
        """ Compute dask collections on cluster

        Parameters
        ----------
        args: iterable of dask objects
            Collections like dask.array or dataframe or dask.value objects
        sync: bool (optional)
            Returns Futures if False (default) or concrete values if True

        Returns
        -------
        Tuple of Futures or concrete values

        Examples
        --------

        >>> from dask import do, value
        >>> from operator import add
        >>> x = dask.do(add)(1, 2)
        >>> y = dask.do(add)(x, x)
        >>> xx, yy = executor.compute(x, y)  # doctest: +SKIP
        >>> xx  # doctest: +SKIP
        <Future: status: finished, key: add-8f6e709446674bad78ea8aeecfee188e>
        >>> xx.result()  # doctest: +SKIP
        3
        >>> yy.result()  # doctest: +SKIP
        6
        """
        sync = kwargs.pop('sync', False)
        assert not kwargs
        if sync:
            return dask.compute(*args, get=self.get)

        variables = [a for a in args if isinstance(a, Base)]

        groups = groupby(lambda x: x._optimize, variables)
        dsk = merge([opt(merge([v.dask for v in val]),
                         [v._keys() for v in val])
                    for opt, val in groups.items()])
        names = ['finalize-%s' % tokenize(v) for v in variables]
        dsk2 = {name: (v._finalize, v, v._keys()) for name, v in zip(names, variables)}

        self.loop.add_callback(self.scheduler_queue.put_nowait,
                                {'op': 'update-graph',
                                'dsk': merge(dsk, dsk2),
                                'keys': names})

        i = 0
        futures = []
        for arg in args:
            if isinstance(arg, Base):
                futures.append(Future(names[i], self))
                i += 1
            else:
                futures.append(arg)

        return futures

    @gen.coroutine
    def _restart(self):
        self.send_to_scheduler({'op': 'restart'})
        self._restart_event = Event()
        yield self._restart_event.wait()

        raise gen.Return(self)

    def restart(self):
        """ Restart the distributed network

        This kills all active work, deletes all data on the network, and
        restarts the worker processes.
        """
        return sync(self.loop, self._restart)

    @gen.coroutine
    def _upload_file(self, filename, raise_on_error=True):
        with open(filename, 'rb') as f:
            data = f.read()
        _, fn = os.path.split(filename)
        d = yield self.center.broadcast(msg={'op': 'upload_file',
                                             'filename': fn,
                                             'data': data})

        if any(isinstance(v, Exception) for v in d.values()):
            exception = next(v for v in d.values() if isinstance(v, Exception))
            if raise_on_error:
                raise exception
            else:
                raise gen.Return(exception)

        assert all(len(data) == v for v in d.values())

    def upload_file(self, filename):
        """ Upload local package to workers

        Parameters
        ----------
        filename: string
            Filename of .py file to send to workers
        """
        result = sync(self.loop, self._upload_file, filename,
                        raise_on_error=False)
        if isinstance(result, Exception):
            raise result
コード例 #48
0
ファイル: index_manager.py プロジェクト: AppScale/appscale
class ProjectIndexManager(object):
  """ Keeps track of composite index definitions for a project. """

  def __init__(self, project_id, zk_client, index_manager, datastore_access):
    """ Creates a new ProjectIndexManager.

    Args:
      project_id: A string specifying a project ID.
      zk_client: A KazooClient.
      update_callback: A function that should be called with the project ID
        and index list every time the indexes get updated.
      index_manager: An IndexManager used for checking lock status.
      datastore_access: A DatastoreDistributed object.
    """
    self.project_id = project_id
    self.indexes_node = '/appscale/projects/{}/indexes'.format(self.project_id)
    self.active = True
    self.update_event = AsyncEvent()

    self._creation_times = {}
    self._index_manager = index_manager
    self._zk_client = zk_client
    self._ds_access = datastore_access

    self._zk_client.DataWatch(self.indexes_node, self._update_indexes_watch)

    # Since this manager can be used synchronously, ensure that the indexes
    # are populated for this IOLoop iteration.
    try:
      encoded_indexes = self._zk_client.get(self.indexes_node)[0]
    except NoNodeError:
      encoded_indexes = '[]'

    self.indexes = [DatastoreIndex.from_dict(self.project_id, index)
                    for index in json.loads(encoded_indexes)]

  @property
  def indexes_pb(self):
    if self._zk_client.state != KazooState.CONNECTED:
      raise IndexInaccessible('ZooKeeper connection is not active')

    return [index.to_pb() for index in self.indexes]

  @gen.coroutine
  def apply_definitions(self):
    """ Populate composite indexes that are not marked as ready yet. """
    try:
      yield self.update_event.wait()
      self.update_event.clear()
      if not self._index_manager.admin_lock.is_acquired or not self.active:
        return

      logger.info(
        'Applying composite index definitions for {}'.format(self.project_id))

      for index in self.indexes:
        if index.ready:
          continue

        # Wait until all clients have either timed out or received the new index
        # definition. This prevents entities from being added without entries
        # while the index is being rebuilt.
        creation_time = self._creation_times.get(index.id, time.time())
        consensus = creation_time + (self._zk_client._session_timeout / 1000.0)
        yield gen.sleep(max(consensus - time.time(), 0))

        yield self._ds_access.update_composite_index(
          self.project_id, index.to_pb())
        logger.info('Index {} is now ready'.format(index.id))
        self._mark_index_ready(index.id)

      logging.info(
        'All composite indexes for {} are ready'.format(self.project_id))
    finally:
      IOLoop.current().spawn_callback(self.apply_definitions)

  def delete_index_definition(self, index_id):
    """ Remove a definition from a project's list of configured indexes.

    Args:
      index_id: An integer specifying an index ID.
    """
    try:
      encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node)
    except NoNodeError:
      # If there are no index definitions, there is nothing to do.
      return

    node_version = znode_stat.version
    indexes = [DatastoreIndex.from_dict(self.project_id, index)
               for index in json.loads(encoded_indexes)]

    encoded_indexes = json.dumps([index.to_dict() for index in indexes
                                  if index.id != index_id])
    self._zk_client.set(self.indexes_node, encoded_indexes,
                        version=node_version)

  def _mark_index_ready(self, index_id):
    """ Updates the index metadata to reflect the new state of the index.

    Args:
      index_id: An integer specifying an index ID.
    """
    try:
      encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node)
      node_version = znode_stat.version
    except NoNodeError:
      # If for some reason the index no longer exists, there's nothing to do.
      return

    existing_indexes = [DatastoreIndex.from_dict(self.project_id, index)
                        for index in json.loads(encoded_indexes)]
    for existing_index in existing_indexes:
      if existing_index.id == index_id:
        existing_index.ready = True

    indexes_dict = [index.to_dict() for index in existing_indexes]
    self._zk_client.set(self.indexes_node, json.dumps(indexes_dict),
                        version=node_version)

  @gen.coroutine
  def _update_indexes(self, encoded_indexes):
    """ Handles changes to the list of a project's indexes.

    Args:
      encoded_indexes: A string containing index node data.
    """
    encoded_indexes = encoded_indexes or '[]'
    self.indexes = [DatastoreIndex.from_dict(self.project_id, index)
                    for index in json.loads(encoded_indexes)]

    # Mark when indexes are defined so they can be backfilled later.
    self._creation_times.update(
      {index.id: time.time() for index in self.indexes
       if not index.ready and index.id not in self._creation_times})

    self.update_event.set()

  def _update_indexes_watch(self, encoded_indexes, znode_stat):
    """ Handles updates to the project's indexes node.

    Args:
      encoded_indexes: A string containing index node data.
      znode_stat: A kazoo.protocol.states.ZnodeStat object.
    """
    if not self.active:
      return False

    IOLoop.current().add_callback(self._update_indexes, encoded_indexes)
コード例 #49
0
 def get(self):
     never_finish = Event()
     yield never_finish.wait()
コード例 #50
0
ファイル: index_manager.py プロジェクト: AppScale/appscale
class IndexManager(object):
  """ Keeps track of configured datastore indexes. """
  # The node which keeps track of admin lock contenders.
  ADMIN_LOCK_NODE = '/appscale/datastore/index_manager_lock'

  def __init__(self, zk_client, datastore_access, perform_admin=False):
    """ Creates a new IndexManager.

    Args:
      zk_client: A kazoo.client.KazooClient object.
      datastore_access: A DatastoreDistributed object.
      perform_admin: A boolean specifying whether or not to perform admin
        operations.
    """
    self.projects = {}
    self._wake_event = AsyncEvent()
    self._zk_client = zk_client
    self.admin_lock = AsyncKazooLock(self._zk_client, self.ADMIN_LOCK_NODE)

    # TODO: Refactor so that this dependency is not needed.
    self._ds_access = datastore_access

    self._zk_client.ensure_path('/appscale/projects')
    self._zk_client.ChildrenWatch('/appscale/projects', self._update_projects)

    # Since this manager can be used synchronously, ensure that the projects
    # are populated for this IOLoop iteration.
    project_ids = self._zk_client.get_children('/appscale/projects')
    self._update_projects_sync(project_ids)

    if perform_admin:
      IOLoop.current().spawn_callback(self._contend_for_admin_lock)

  def _update_projects_sync(self, new_project_ids):
    """ Updates the list of the deployment's projects.

    Args:
      new_project_ids: A list of strings specifying current project IDs.
    """
    for project_id in new_project_ids:
      if project_id not in self.projects:
        self.projects[project_id] = ProjectIndexManager(
          project_id, self._zk_client, self, self._ds_access)
        if self.admin_lock.is_acquired:
          IOLoop.current().spawn_callback(
            self.projects[project_id].apply_definitions)

    for project_id in self.projects.keys():
      if project_id not in new_project_ids:
        self.projects[project_id].active = False
        del self.projects[project_id]

  def _update_projects(self, project_ids):
    """ Watches for changes to list of existing projects.

    Args:
      project_ids: A list of strings specifying current project IDs.
    """
    persistent_update_projects = retry_children_watch_coroutine(
      '/appscale/projects', self._update_projects_sync)
    IOLoop.instance().add_callback(persistent_update_projects, project_ids)

  def _handle_connection_change(self, state):
    """ Notifies the admin lock holder when the connection changes.

    Args:
      state: The new connection state.
    """
    IOLoop.current().add_callback(self._wake_event.set)

  @gen.coroutine
  def _contend_for_admin_lock(self):
    """
    Waits to acquire an admin lock that gives permission to apply index
    definitions. The lock is useful for preventing many servers from writing
    the same index entries at the same time. After acquiring the lock, the
    individual ProjectIndexManagers are responsible for mutating state whenever
    a project's index definitions change.
    """
    while True:
      # Set up a callback to get notified if the ZK connection changes.
      self._wake_event.clear()
      self._zk_client.add_listener(self._handle_connection_change)

      yield self.admin_lock.acquire()
      try:
        for project_index_manager in self.projects.values():
          IOLoop.current().spawn_callback(
            project_index_manager.apply_definitions)

        # Release the lock if the kazoo client gets disconnected.
        yield self._wake_event.wait()
      finally:
        self.admin_lock.release()
コード例 #51
0
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()
        self.error_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()
        elif status.is_error():
            self.error_queue.put_nowait(status.error_data.exception)

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    @tornado.gen.coroutine
    def _wait_for(self, coro):
        error = self.error_queue.get()
        wi = tornado.gen.WaitIterator(coro, error)

        while not wi.done():
            result = yield wi.next()

            if wi.current_future == coro:
                raise gen.Return(result)
            elif wi.current_future == error:
                raise result
            else:
                raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

    @tornado.gen.coroutine
    def wait_for_connect(self):
        if not self.connected_event.is_set():
            yield self._wait_for(self.connected_event.wait())
        else:
            raise Exception("instance is already connected")

    @tornado.gen.coroutine
    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            yield self._wait_for(self.disconnected_event.wait())
        else:
            raise Exception("instance is already disconnected")

    @tornado.gen.coroutine
    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:  # NOQA
                env = yield self._wait_for(self.message_queue.get())
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.message_queue.task_done()

    @tornado.gen.coroutine
    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                try:
                    env = yield self._wait_for(self.presence_queue.get())
                except:  # NOQA E722 pylint: disable=W0702
                    break
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.presence_queue.task_done()
コード例 #52
0
class TornadoSubscriptionManager(SubscriptionManager):
    def __init__(self, pubnub_instance):

        subscription_manager = self

        self._message_queue = Queue()
        self._consumer_event = Event()
        self._cancellation_event = Event()
        self._subscription_lock = Semaphore(1)
        # self._current_request_key_object = None
        self._heartbeat_periodic_callback = None
        self._reconnection_manager = TornadoReconnectionManager(pubnub_instance)

        super(TornadoSubscriptionManager, self).__init__(pubnub_instance)
        self._start_worker()

        class TornadoReconnectionCallback(ReconnectionCallback):
            def on_reconnect(self):
                subscription_manager.reconnect()

                pn_status = PNStatus()
                pn_status.category = PNStatusCategory.PNReconnectedCategory
                pn_status.error = False

                subscription_manager._subscription_status_announced = True
                subscription_manager._listener_manager.announce_status(pn_status)

        self._reconnection_listener = TornadoReconnectionCallback()
        self._reconnection_manager.set_reconnection_listener(self._reconnection_listener)

    def _set_consumer_event(self):
        self._consumer_event.set()

    def _message_queue_put(self, message):
        self._message_queue.put(message)

    def _start_worker(self):
        self._consumer = TornadoSubscribeMessageWorker(self._pubnub,
                                                       self._listener_manager,
                                                       self._message_queue,
                                                       self._consumer_event)
        run = stack_context.wrap(self._consumer.run)
        self._pubnub.ioloop.spawn_callback(run)

    def reconnect(self):
        self._should_stop = False
        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
        # self._register_heartbeat_timer()

    def disconnect(self):
        self._should_stop = True
        self._stop_heartbeat_timer()
        self._stop_subscribe_loop()

    @tornado.gen.coroutine
    def _start_subscribe_loop(self):
        self._stop_subscribe_loop()

        yield self._subscription_lock.acquire()

        self._cancellation_event.clear()

        combined_channels = self._subscription_state.prepare_channel_list(True)
        combined_groups = self._subscription_state.prepare_channel_group_list(True)

        if len(combined_channels) == 0 and len(combined_groups) == 0:
            return

        envelope_future = Subscribe(self._pubnub) \
            .channels(combined_channels).channel_groups(combined_groups) \
            .timetoken(self._timetoken).region(self._region) \
            .filter_expression(self._pubnub.config.filter_expression) \
            .cancellation_event(self._cancellation_event) \
            .future()

        canceller_future = self._cancellation_event.wait()

        wi = tornado.gen.WaitIterator(envelope_future, canceller_future)

        # iterates 2 times: one for result one for cancelled
        while not wi.done():
            try:
                result = yield wi.next()
            except Exception as e:
                # TODO: verify the error will not be eaten
                logger.error(e)
                raise
            else:
                if wi.current_future == envelope_future:
                    e = result
                elif wi.current_future == canceller_future:
                    return
                else:
                    raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

                if e.is_error():
                    # 599 error doesn't works - tornado use this status code
                    # for a wide range of errors, for ex:
                    # HTTP Server Error (599): [Errno -2] Name or service not known
                    if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
                        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
                        return

                    logger.error("Exception in subscribe loop: %s" % str(e))

                    if e.status is not None and e.status.category == PNStatusCategory.PNAccessDeniedCategory:
                        e.status.operation = PNOperationType.PNUnsubscribeOperation

                    self._listener_manager.announce_status(e.status)

                    self._reconnection_manager.start_polling()
                    self.disconnect()
                    return
                else:
                    self._handle_endpoint_call(e.result, e.status)

                    self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)

            finally:
                self._cancellation_event.set()
                yield tornado.gen.moment
                self._subscription_lock.release()
                self._cancellation_event.clear()
                break

    def _stop_subscribe_loop(self):
        if self._cancellation_event is not None and not self._cancellation_event.is_set():
            self._cancellation_event.set()

    def _stop_heartbeat_timer(self):
        if self._heartbeat_periodic_callback is not None:
            self._heartbeat_periodic_callback.stop()

    def _register_heartbeat_timer(self):
        super(TornadoSubscriptionManager, self)._register_heartbeat_timer()
        self._heartbeat_periodic_callback = PeriodicCallback(
            stack_context.wrap(self._perform_heartbeat_loop),
            self._pubnub.config.heartbeat_interval * TornadoSubscriptionManager.HEARTBEAT_INTERVAL_MULTIPLIER,
            self._pubnub.ioloop)
        self._heartbeat_periodic_callback.start()

    @tornado.gen.coroutine
    def _perform_heartbeat_loop(self):
        if self._heartbeat_call is not None:
            # TODO: cancel call
            pass

        cancellation_event = Event()
        state_payload = self._subscription_state.state_payload()
        presence_channels = self._subscription_state.prepare_channel_list(False)
        presence_groups = self._subscription_state.prepare_channel_group_list(False)

        if len(presence_channels) == 0 and len(presence_groups) == 0:
            return

        try:
            envelope = yield self._pubnub.heartbeat() \
                .channels(presence_channels) \
                .channel_groups(presence_groups) \
                .state(state_payload) \
                .cancellation_event(cancellation_event) \
                .future()

            heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options
            if envelope.status.is_error:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \
                        heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)
            else:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)

        except PubNubTornadoException:
            pass
            # TODO: check correctness
            # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
            #     self._start_subscribe_loop()
            # else:
            #     self._listener_manager.announce_status(e.status)
        except Exception as e:
            print(e)
        finally:
            cancellation_event.set()

    @tornado.gen.coroutine
    def _send_leave(self, unsubscribe_operation):
        envelope = yield Leave(self._pubnub) \
            .channels(unsubscribe_operation.channels) \
            .channel_groups(unsubscribe_operation.channel_groups).future()
        self._listener_manager.announce_status(envelope.status)
コード例 #53
0
ファイル: executor.py プロジェクト: canavandl/distributed
class Executor(object):
    """ Drive computations on a distributed cluster

    The Executor connects users to a distributed compute cluster.  It provides
    an asynchronous user interface around functions and futures.  This class
    resembles executors in ``concurrent.futures`` but also allows ``Future``
    objects within ``submit/map`` calls.

    Parameters
    ----------
    address: string, tuple, or ``Scheduler``
        This can be the address of a ``Center`` or ``Scheduler`` servers, either
        as a string ``'127.0.0.1:8787'`` or tuple ``('127.0.0.1', 8787)``
        or it can be a local ``Scheduler`` object.

    Examples
    --------
    Provide cluster's head node address on initialization:

    >>> executor = Executor('127.0.0.1:8787')  # doctest: +SKIP

    Use ``submit`` method to send individual computations to the cluster

    >>> a = executor.submit(add, 1, 2)  # doctest: +SKIP
    >>> b = executor.submit(add, 10, 20)  # doctest: +SKIP

    Continue using submit or map on results to build up larger computations

    >>> c = executor.submit(add, a, b)  # doctest: +SKIP

    Gather results with the ``gather`` method.

    >>> executor.gather([c])  # doctest: +SKIP
    33

    See Also
    --------
    distributed.scheduler.Scheduler: Internal scheduler
    """
    def __init__(self, address, start=True, loop=None, timeout=3):
        self.futures = dict()
        self.refcount = defaultdict(lambda: 0)
        self.loop = loop or IOLoop() if start else IOLoop.current()
        self.coroutines = []
        self.id = str(uuid.uuid1())
        self._start_arg = address

        if start:
            self.start(timeout=timeout)

    def __str__(self):
        if hasattr(self, '_loop_thread'):
            n = sync(self.loop, self.scheduler.ncores)
            return '<Executor: scheduler=%s:%d workers=%d threads=%d>' % (
                    self.scheduler.ip, self.scheduler.port, len(n),
                    sum(n.values()))
        else:
            return '<Executor: scheduler=%s:%d>' % (
                    self.scheduler.ip, self.scheduler.port)

    __repr__ = __str__

    def start(self, **kwargs):
        """ Start scheduler running in separate thread """
        if hasattr(self, '_loop_thread'):
            return
        from threading import Thread
        self._loop_thread = Thread(target=self.loop.start)
        self._loop_thread.daemon = True
        pc = PeriodicCallback(lambda: None, 1000, io_loop=self.loop)
        self.loop.add_callback(pc.start)
        _global_executor[0] = self
        self._loop_thread.start()
        while not self.loop._running:
            sleep(0.001)
        sync(self.loop, self._start, **kwargs)

    def _send_to_scheduler(self, msg):
        if isinstance(self.scheduler, Scheduler):
            self.loop.add_callback(self.scheduler_queue.put_nowait, msg)
        elif isinstance(self.scheduler_stream, IOStream):
            self.loop.add_callback(write, self.scheduler_stream, msg)
        else:
            raise NotImplementedError()

    @gen.coroutine
    def _start(self, timeout=3, **kwargs):
        if isinstance(self._start_arg, Scheduler):
            self.scheduler = self._start_arg
            self.center = self._start_arg.center
        if isinstance(self._start_arg, str):
            ip, port = tuple(self._start_arg.split(':'))
            self._start_arg = (ip, int(port))
        if isinstance(self._start_arg, tuple):
            r = coerce_to_rpc(self._start_arg, timeout=timeout)
            try:
                ident = yield r.identity()
            except (StreamClosedError, OSError):
                raise IOError("Could not connect to %s:%d" % self._start_arg)
            if ident['type'] == 'Center':
                self.center = r
                self.scheduler = Scheduler(self.center, loop=self.loop,
                                           **kwargs)
                self.scheduler.listen(0)
            elif ident['type'] == 'Scheduler':
                self.scheduler = r
                self.scheduler_stream = yield connect(*self._start_arg)
                yield write(self.scheduler_stream, {'op': 'register-client',
                                                    'client': self.id})
                if 'center' in ident:
                    cip, cport = ident['center']
                    self.center = rpc(ip=cip, port=cport)
                else:
                    self.center = self.scheduler
            else:
                raise ValueError("Unknown Type")

        if isinstance(self.scheduler, Scheduler):
            if self.scheduler.status != 'running':
                yield self.scheduler.sync_center()
                self.scheduler.start(0)
            self.scheduler_queue = Queue()
            self.report_queue = Queue()
            self.coroutines.append(self.scheduler.handle_queues(
                self.scheduler_queue, self.report_queue))

        start_event = Event()
        self.coroutines.append(self._handle_report(start_event))

        _global_executor[0] = self
        yield start_event.wait()
        logger.debug("Started scheduling coroutines. Synchronized")

    def __enter__(self):
        if not self.loop._running:
            self.start()
        return self

    def __exit__(self, type, value, traceback):
        self.shutdown()

    def _inc_ref(self, key):
        self.refcount[key] += 1

    def _dec_ref(self, key):
        self.refcount[key] -= 1
        if self.refcount[key] == 0:
            del self.refcount[key]
            self._release_key(key)

    def _release_key(self, key):
        """ Release key from distributed memory """
        logger.debug("Release key %s", key)
        if key in self.futures:
            self.futures[key]['event'].clear()
            del self.futures[key]
        self._send_to_scheduler({'op': 'client-releases-keys', 'keys': [key],
                                 'client': self.id})

    @gen.coroutine
    def _handle_report(self, start_event):
        """ Listen to scheduler """
        if isinstance(self.scheduler, Scheduler):
            next_message = self.report_queue.get
        elif isinstance(self.scheduler_stream, IOStream):
            next_message = lambda: read(self.scheduler_stream)
        else:
            raise NotImplemented()

        while True:
            try:
                msg = yield next_message()
            except StreamClosedError:
                break

            logger.debug("Executor receives message %s", msg)

            if msg['op'] == 'stream-start':
                start_event.set()
            if msg['op'] == 'close':
                break
            if msg['op'] == 'key-in-memory':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'finished'
                    self.futures[msg['key']]['event'].set()
                    if (msg.get('type') and
                        not self.futures[msg['key']].get('type')):
                        self.futures[msg['key']]['type'] = msg['type']
            if msg['op'] == 'lost-data':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'lost'
                    self.futures[msg['key']]['event'].clear()
            if msg['op'] == 'cancelled-key':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['event'].set()
                    del self.futures[msg['key']]
            if msg['op'] == 'task-erred':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'error'
                    self.futures[msg['key']]['exception'] = msg['exception']
                    self.futures[msg['key']]['traceback'] = msg['traceback']
                    self.futures[msg['key']]['event'].set()
            if msg['op'] == 'restart':
                logger.info("Receive restart signal from scheduler")
                events = [d['event'] for d in self.futures.values()]
                self.futures.clear()
                for e in events:
                    e.set()
                with ignoring(AttributeError):
                    self._restart_event.set()
            if msg['op'] == 'scheduler-error':
                logger.warn("Scheduler exception:")
                logger.exception(msg['exception'])

    @gen.coroutine
    def _shutdown(self, fast=False):
        """ Send shutdown signal and wait until scheduler completes """
        self._send_to_scheduler({'op': 'close-stream'})
        if _global_executor[0] is self:
            _global_executor[0] = None
        if not fast:
            with ignoring(TimeoutError):
                yield [gen.with_timeout(timedelta(seconds=2), f)
                        for f in self.coroutines]

    def shutdown(self, timeout=10):
        """ Send shutdown signal and wait until scheduler terminates """
        self._send_to_scheduler({'op': 'close'})
        self.loop.stop()
        self._loop_thread.join(timeout=timeout)
        if _global_executor[0] is self:
            _global_executor[0] = None

    def submit(self, func, *args, **kwargs):
        """ Submit a function application to the scheduler

        Parameters
        ----------
        func: callable
        *args:
        **kwargs:
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> c = executor.submit(add, a, b)  # doctest: +SKIP

        Returns
        -------
        Future

        See Also
        --------
        Executor.map: Submit on many arguments at once
        """
        if not callable(func):
            raise TypeError("First input to submit must be a callable function")

        key = kwargs.pop('key', None)
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)
        allow_other_workers = kwargs.pop('allow_other_workers', False)

        if allow_other_workers not in (True, False, None):
            raise TypeError("allow_other_workers= must be True or False")

        if key is None:
            if pure:
                key = funcname(func) + '-' + tokenize(func, kwargs, *args)
            else:
                key = funcname(func) + '-' + str(uuid.uuid4())

        if key in self.futures:
            return Future(key, self)

        if allow_other_workers and workers is None:
            raise ValueError("Only use allow_other_workers= if using workers=")

        if isinstance(workers, str):
            workers = [workers]
        if workers is not None:
            restrictions = {key: workers}
            loose_restrictions = {key} if allow_other_workers else set()
        else:
            restrictions = {}
            loose_restrictions = set()

        args2, arg_dependencies = unpack_remotedata(args)
        kwargs2, kwarg_dependencies = unpack_remotedata(kwargs)
        dependencies = arg_dependencies | kwarg_dependencies

        task = {'function': dumps_function(func)}
        if args2:
            task['args'] = dumps(args2)
        if kwargs2:
            task['kwargs'] = dumps(kwargs2)

        logger.debug("Submit %s(...), %s", funcname(func), key)
        self._send_to_scheduler({'op': 'update-graph',
                                 'tasks': {key: task},
                                 'keys': [key],
                                 'dependencies': {key: dependencies},
                                 'restrictions': restrictions,
                                 'loose_restrictions': loose_restrictions,
                                 'client': self.id})

        return Future(key, self)

    def _threaded_map(self, q_out, func, qs_in, **kwargs):
        """ Internal function for mapping Queue """
        if isqueue(qs_in[0]):
            get = pyQueue.get
        elif isinstance(qs_in[0], Iterator):
            get = next
        else:
            raise NotImplementedError()

        while True:
            args = [get(q) for q in qs_in]
            f = self.submit(func, *args, **kwargs)
            q_out.put(f)

    def map(self, func, *iterables, **kwargs):
        """ Map a function on a sequence of arguments

        Arguments can be normal objects or Futures

        Parameters
        ----------
        func: callable
        iterables: Iterables, Iterators, or Queues
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> L = executor.map(func, sequence)  # doctest: +SKIP

        Returns
        -------
        List, iterator, or Queue of futures, depending on the type of the
        inputs.

        See also
        --------
        Executor.submit: Submit a single function
        """
        if not callable(func):
            raise TypeError("First input to map must be a callable function")

        if (all(map(isqueue, iterables)) or
            all(isinstance(i, Iterator) for i in iterables)):
            q_out = pyQueue()
            t = Thread(target=self._threaded_map, args=(q_out, func, iterables),
                                                  kwargs=kwargs)
            t.daemon = True
            t.start()
            if isqueue(iterables[0]):
                return q_out
            else:
                return queue_to_iterator(q_out)

        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)
        allow_other_workers = kwargs.pop('allow_other_workers', False)

        if allow_other_workers and workers is None:
            raise ValueError("Only use allow_other_workers= if using workers=")

        iterables = list(zip(*zip(*iterables)))
        if pure:
            keys = [funcname(func) + '-' + tokenize(func, kwargs, *args)
                    for args in zip(*iterables)]
        else:
            uid = str(uuid.uuid4())
            keys = [funcname(func) + '-' + uid + '-' + str(uuid.uuid4())
                    for i in range(min(map(len, iterables)))]

        if not kwargs:
            dsk = {key: (func,) + args
                   for key, args in zip(keys, zip(*iterables))}
        else:
            dsk = {key: (apply, func, (tuple, list(args)), kwargs)
                   for key, args in zip(keys, zip(*iterables))}

        d = {key: unpack_remotedata(task) for key, task in dsk.items()}
        dsk = {k: v[0] for k, v in d.items()}
        dependencies = {k: v[1] for k, v in d.items()}

        if isinstance(workers, str):
            workers = [workers]
        if isinstance(workers, (list, set)):
            if workers and isinstance(first(workers), (list, set)):
                if len(workers) != len(keys):
                    raise ValueError("You only provided %d worker restrictions"
                    " for a sequence of length %d" % (len(workers), len(keys)))
                restrictions = dict(zip(keys, workers))
            else:
                restrictions = {key: workers for key in keys}
        elif workers is None:
            restrictions = {}
        else:
            raise TypeError("Workers must be a list or set of workers or None")
        if allow_other_workers not in (True, False, None):
            raise TypeError("allow_other_workers= must be True or False")
        if allow_other_workers is True:
            loose_restrictions = set(keys)
        else:
            loose_restrictions = set()


        logger.debug("map(%s, ...)", funcname(func))
        self._send_to_scheduler({'op': 'update-graph',
                                 'tasks': valmap(dumps_task, dsk),
                                 'dependencies': dependencies,
                                 'keys': keys,
                                 'restrictions': restrictions,
                                 'loose_restrictions': loose_restrictions,
                                 'client': self.id})

        return [Future(key, self) for key in keys]

    @gen.coroutine
    def _gather(self, futures, errors='raise'):
        futures2, keys = unpack_remotedata(futures)
        keys = list(keys)
        bad_data = dict()

        while True:
            logger.debug("Waiting on futures to clear before gather")
            yield All([self.futures[key]['event'].wait() for key in keys
                                                    if key in self.futures])
            exceptions = {key: self.futures[key]['exception'] for key in keys
                          if self.futures[key]['status'] == 'error'}
            if exceptions:
                if errors == 'raise':
                    raise first(exceptions.values())
                if errors == 'skip':
                    keys = [key for key in keys if key not in exceptions]
                    bad_data.update({key: None for key in exceptions})
                else:
                    raise ValueError("Bad value, `errors=%s`" % errors)

            response, data = yield self.scheduler.gather(keys=keys)

            if response == b'error':
                logger.debug("Couldn't gather keys %s", data)
                self._send_to_scheduler({'op': 'missing-data',
                                         'missing': data.args})
                for key in data.args:
                    self.futures[key]['event'].clear()
            else:
                break

        if bad_data and errors == 'skip' and isinstance(futures2, list):
            futures2 = [f for f in futures2 if f not in exceptions]

        result = pack_data(futures2, merge(data, bad_data))
        raise gen.Return(result)

    def _threaded_gather(self, qin, qout, **kwargs):
        """ Internal function for gathering Queue """
        while True:
            d = qin.get()
            f = self.gather(d, **kwargs)
            qout.put(f)

    def gather(self, futures, errors='raise'):
        """ Gather futures from distributed memory

        Accepts a future, nested container of futures, iterator, or queue.
        The return type will match the input type.

        Returns
        -------
        Future results

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> x = e.submit(add, 1, 2)  # doctest: +SKIP
        >>> e.gather(x)  # doctest: +SKIP
        3
        >>> e.gather([x, [x], x])  # support lists and dicts # doctest: +SKIP
        [3, [3], 3]

        >>> seq = e.gather(iter([x, x]))  # support iterators # doctest: +SKIP
        >>> next(seq)  # doctest: +SKIP
        3

        See Also
        --------
        Executor.scatter: Send data out to cluster
        """
        if isqueue(futures):
            qout = pyQueue()
            t = Thread(target=self._threaded_gather, args=(futures, qout),
                        kwargs={'errors': errors})
            t.daemon = True
            t.start()
            return qout
        elif isinstance(futures, Iterator):
            return (self.gather(f, errors=errors) for f in futures)
        else:
            return sync(self.loop, self._gather, futures, errors=errors)

    @gen.coroutine
    def _scatter(self, data, workers=None, broadcast=False):
        keys = yield self.scheduler.scatter(data=data, workers=workers,
                                            client=self.id, broadcast=broadcast)
        if isinstance(data, (tuple, list, set, frozenset)):
            out = type(data)([Future(k, self) for k in keys])
        elif isinstance(data, dict):
            out = {k: Future(k, self) for k in keys}
        else:
            raise TypeError("")

        for key in keys:
            self.futures[key]['status'] = 'finished'
            self.futures[key]['event'].set()

        raise gen.Return(out)

    def _threaded_scatter(self, q_or_i, qout, **kwargs):
        """ Internal function for scattering Iterable/Queue data """
        if isqueue(q_or_i):  # py2 Queue doesn't support mro
            get = pyQueue.get
        elif isinstance(q_or_i, Iterator):
            get = next

        while True:
            try:
                d = get(q_or_i)
            except StopIteration:
                qout.put(StopIteration)
                break

            [f] = self.scatter([d], **kwargs)
            qout.put(f)

    def scatter(self, data, workers=None, broadcast=False):
        """ Scatter data into distributed memory

        Parameters
        ----------
        data: list, iterator, dict, or Queue
            Data to scatter out to workers.  Output type matches input type.
        workers: list of tuples (optional)
            Optionally constrain locations of data.
            Specify workers as hostname/port pairs, e.g. ``('127.0.0.1', 8787)``.
        broadcast: bool (defaults to False)
            Whether to send each data element to all workers.
            By default we round-robin based on number of cores.

        Returns
        -------
        List, dict, iterator, or queue of futures matching the type of input.

        Examples
        --------
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.scatter([1, 2, 3])  # doctest: +SKIP
        [<Future: status: finished, key: c0a8a20f903a4915b94db8de3ea63195>,
         <Future: status: finished, key: 58e78e1b34eb49a68c65b54815d1b158>,
         <Future: status: finished, key: d3395e15f605bc35ab1bac6341a285e2>]

        >>> e.scatter({'x': 1, 'y': 2, 'z': 3})  # doctest: +SKIP
        {'x': <Future: status: finished, key: x>,
         'y': <Future: status: finished, key: y>,
         'z': <Future: status: finished, key: z>}

        Constrain location of data to subset of workers
        >>> e.scatter([1, 2, 3], workers=[('hostname', 8788)])   # doctest: +SKIP

        Handle streaming sequences of data with iterators or queues
        >>> seq = e.scatter(iter([1, 2, 3]))  # doctest: +SKIP
        >>> next(seq)  # doctest: +SKIP
        <Future: status: finished, key: c0a8a20f903a4915b94db8de3ea63195>,

        Broadcast data to all workers
        >>> [future] = e.scatter([element], broadcast=True)  # doctest: +SKIP

        See Also
        --------
        Executor.gather: Gather data back to local process
        """
        if isqueue(data) or isinstance(data, Iterator):
            logger.debug("Starting thread for streaming data")
            qout = pyQueue()

            t = Thread(target=self._threaded_scatter,
                       args=(data, qout),
                       kwargs={'workers': workers, 'broadcast': broadcast})
            t.daemon = True
            t.start()

            if isqueue(data):
                return qout
            else:
                return queue_to_iterator(qout)
        else:
            return sync(self.loop, self._scatter, data, workers=workers,
                        broadcast=broadcast)
    @gen.coroutine
    def _cancel(self, futures, block=False):
        keys = {f.key for f in futures_of(futures)}
        f = self.scheduler.cancel(keys=keys, client=self.id)
        if block:
            yield f
        for k in keys:
            with ignoring(KeyError):
                del self.futures[k]

    def cancel(self, futures, block=False):
        """
        Cancel running futures

        This stops future tasks from being scheduled if they have not yet run
        and deletes them if they have already run.  After calling, this result
        and all dependent results will no longer be accessible

        Parameters
        ----------
        futures: list of Futures
        """
        return sync(self.loop, self._cancel, futures, block=False)

    @gen.coroutine
    def _get(self, dsk, keys, restrictions=None, raise_on_error=True):
        flatkeys = list(flatten([keys]))
        futures = {key: Future(key, self) for key in flatkeys}

        d = {k: unpack_remotedata(v) for k, v in dsk.items()}
        dsk2 = {k: v[0] for k, v in d.items()}
        dsk3 = {k: v for k, v in dsk2.items() if (k == v) is not True}

        dependencies = {k: v[1] for k, v in d.items()}

        for k, v in dsk3.items():
            dependencies[k] |= set(_deps(dsk, v))

        self._send_to_scheduler({'op': 'update-graph',
                                 'tasks': valmap(dumps_task, dsk3),
                                 'dependencies': dependencies,
                                 'keys': flatkeys,
                                 'restrictions': restrictions or {},
                                 'client': self.id})

        packed = pack_data(keys, futures)
        if raise_on_error:
            result = yield self._gather(packed)
        else:
            try:
                result = yield self._gather(packed)
                result = 'OK', result
            except Exception as e:
                result = 'error', e
        raise gen.Return(result)

    def get(self, dsk, keys, **kwargs):
        """ Compute dask graph

        Parameters
        ----------
        dsk: dict
        keys: object, or nested lists of objects
        restrictions: dict (optional)
            A mapping of {key: {set of worker hostnames}} that restricts where
            jobs can take place

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.get({'x': (add, 1, 2)}, 'x')  # doctest: +SKIP
        3

        See Also
        --------
        Executor.compute: Compute asynchronous collections
        """
        status, result = sync(self.loop, self._get, dsk, keys,
                              raise_on_error=False, **kwargs)

        if status == 'error':
            raise result
        else:
            return result

    def compute(self, args, sync=False):
        """ Compute dask collections on cluster

        Parameters
        ----------
        args: iterable of dask objects or single dask object
            Collections like dask.array or dataframe or dask.value objects
        sync: bool (optional)
            Returns Futures if False (default) or concrete values if True

        Returns
        -------
        List of Futures if input is a sequence, or a single future otherwise

        Examples
        --------
        >>> from dask import do, value
        >>> from operator import add
        >>> x = dask.do(add)(1, 2)
        >>> y = dask.do(add)(x, x)
        >>> xx, yy = executor.compute([x, y])  # doctest: +SKIP
        >>> xx  # doctest: +SKIP
        <Future: status: finished, key: add-8f6e709446674bad78ea8aeecfee188e>
        >>> xx.result()  # doctest: +SKIP
        3
        >>> yy.result()  # doctest: +SKIP
        6

        Also support single arguments

        >>> xx = executor.compute(x)  # doctest: +SKIP

        See Also
        --------
        Executor.get: Normal synchronous dask.get function
        """
        if isinstance(args, (list, tuple, set, frozenset)):
            singleton = False
        else:
            args = [args]
            singleton = True

        variables = [a for a in args if isinstance(a, Base)]

        groups = groupby(lambda x: x._optimize, variables)
        dsk = merge([opt(merge([v.dask for v in val]),
                         [v._keys() for v in val])
                    for opt, val in groups.items()])
        names = ['finalize-%s' % tokenize(v) for v in variables]
        dsk2 = {name: (v._finalize, v._keys()) for name, v in zip(names, variables)}

        d = {k: unpack_remotedata(v) for k, v in merge(dsk, dsk2).items()}
        dsk3 = {k: v[0] for k, v in d.items()}
        dependencies = {k: v[1] for k, v in d.items()}

        for k, v in dsk3.items():
            dependencies[k] |= set(_deps(dsk, v))

        self._send_to_scheduler({'op': 'update-graph',
                                 'tasks': valmap(dumps_task, dsk3),
                                 'dependencies': dependencies,
                                 'keys': names,
                                 'client': self.id})

        i = 0
        futures = []
        for arg in args:
            if isinstance(arg, Base):
                futures.append(Future(names[i], self))
                i += 1
            else:
                futures.append(arg)

        if sync:
            result = self.gather(futures)
        else:
            result = futures

        if singleton:
            return first(result)
        else:
            return result

    def persist(self, collections):
        """ Persist dask collections on cluster

        Starts computation of the collection on the cluster in the background.
        Provides a new dask collection that is semantically identical to the
        previous one, but now based off of futures currently in execution.

        Parameters
        ----------
        collections: sequence or single dask object
            Collections like dask.array or dataframe or dask.value objects

        Returns
        -------
        List of collections, or single collection, depending on type of input.

        Examples
        --------
        >>> xx = executor.persist(x)  # doctest: +SKIP
        >>> xx, yy = executor.persist([x, y])  # doctest: +SKIP

        See Also
        --------
        Executor.compute
        """
        if isinstance(collections, (tuple, list, set, frozenset)):
            singleton = False
        else:
            singleton = True
            collections = [collections]

        assert all(isinstance(c, Base) for c in collections)

        groups = groupby(lambda x: x._optimize, collections)
        dsk = merge([opt(merge([v.dask for v in val]),
                         [v._keys() for v in val])
                    for opt, val in groups.items()])

        d = {k: unpack_remotedata(v) for k, v in dsk.items()}
        dsk2 = {k: v[0] for k, v in d.items()}
        dependencies = {k: v[1] for k, v in d.items()}

        for k, v in dsk2.items():
            dependencies[k] |= set(_deps(dsk, v))

        names = list({k for c in collections for k in flatten(c._keys())})

        self._send_to_scheduler({'op': 'update-graph',
                                 'tasks': valmap(dumps_task, dsk2),
                                 'dependencies': dependencies,
                                 'keys': names,
                                 'client': self.id})
        result = [redict_collection(c, {k: Future(k, self)
                                        for k in flatten(c._keys())})
                for c in collections]
        if singleton:
            return first(result)
        else:
            return result

    @gen.coroutine
    def _restart(self):
        self._send_to_scheduler({'op': 'restart'})
        self._restart_event = Event()
        yield self._restart_event.wait()

        raise gen.Return(self)

    def restart(self):
        """ Restart the distributed network

        This kills all active work, deletes all data on the network, and
        restarts the worker processes.
        """
        return sync(self.loop, self._restart)

    @gen.coroutine
    def _upload_file(self, filename, raise_on_error=True):
        with open(filename, 'rb') as f:
            data = f.read()
        _, fn = os.path.split(filename)
        d = yield self.center.broadcast(msg={'op': 'upload_file',
                                             'filename': fn,
                                             'data': data})

        if any(isinstance(v, Exception) for v in d.values()):
            exception = next(v for v in d.values() if isinstance(v, Exception))
            if raise_on_error:
                raise exception
            else:
                raise gen.Return(exception)

        assert all(len(data) == v for v in d.values())

    def upload_file(self, filename):
        """ Upload local package to workers

        This sends a local file up to all worker nodes.  This file is placed
        into a temporary directory on Python's system path so any .py or .egg
        files will be importable.

        Parameters
        ----------
        filename: string
            Filename of .py or .egg file to send to workers

        Examples
        --------
        >>> executor.upload_file('mylibrary.egg')  # doctest: +SKIP
        >>> from mylibrary import myfunc  # doctest: +SKIP
        >>> L = e.map(myfunc, seq)  # doctest: +SKIP
        """
        result = sync(self.loop, self._upload_file, filename,
                        raise_on_error=False)
        if isinstance(result, Exception):
            raise result
コード例 #54
0
ファイル: core.py プロジェクト: indera/distributed
class ConnectionPool(object):
    """ A maximum sized pool of Tornado IOStreams

    This provides a connect method that mirrors the normal distributed.connect
    method, but provides connection sharing and tracks connection limits.

    This object provides an ``rpc`` like interface::

        >>> rpc = ConnectionPool(limit=512)
        >>> scheduler = rpc('127.0.0.1:8786')
        >>> workers = [rpc(ip=ip, port=port) for ip, port in ...]

        >>> info = yield scheduler.identity()

    It creates enough streams to satisfy concurrent connections to any
    particular address::

        >>> a, b = yield [scheduler.who_has(), scheduler.has_what()]

    It reuses existing streams so that we don't have to continuously reconnect.

    It also maintains a stream limit to avoid "too many open file handle"
    issues.  Whenever this maximum is reached we clear out all idling streams.
    If that doesn't do the trick then we wait until one of the occupied streams
    closes.
    """
    def __init__(self, limit=512):
        self.open = 0
        self.active = 0
        self.limit = limit
        self.available = defaultdict(set)
        self.occupied = defaultdict(set)
        self.event = Event()

    def __str__(self):
        return "<ConnectionPool: open=%d, active=%d>" % (self.open,
                self.active)

    __repr__ = __str__

    def __call__(self, arg=None, ip=None, port=None, addr=None):
        """ Cached rpc objects """
        ip, port = ip_port_from_args(arg=arg, addr=addr, ip=ip, port=port)
        return RPCCall(ip, port, self)

    @gen.coroutine
    def connect(self, ip, port, timeout=3):
        if self.available.get((ip, port)):
            stream = self.available[ip, port].pop()
            self.active += 1
            self.occupied[ip, port].add(stream)
            raise gen.Return(stream)

        while self.open >= self.limit:
            self.event.clear()
            self.collect()
            yield self.event.wait()

        self.open += 1
        stream = yield connect(ip=ip, port=port, timeout=timeout)
        stream.set_close_callback(lambda: self.on_close(ip, port, stream))
        self.active += 1
        self.occupied[ip, port].add(stream)

        if self.open >= self.limit:
            self.event.clear()

        raise gen.Return(stream)

    def on_close(self, ip, port, stream):
        self.open -= 1

        if stream in self.available[ip, port]:
            self.available[ip, port].remove(stream)
        if stream in self.occupied[ip, port]:
            self.occupied[ip, port].remove(stream)
            self.active -= 1

        if self.open <= self.limit:
            self.event.set()

    def collect(self):
        logger.info("Collecting unused streams.  open: %d, active: %d",
                    self.open, self.active)
        for streams in list(self.available.values()):
            for stream in streams:
                stream.close()

    def close(self):
        for streams in list(self.available.values()):
            for stream in streams:
                stream.close()
        for streams in list(self.occupied.values()):
            for stream in streams:
                stream.close()
コード例 #55
0
ファイル: core.py プロジェクト: mariusvniekerk/distributed
class ConnectionPool(object):
    """ A maximum sized pool of Comm objects.

    This provides a connect method that mirrors the normal distributed.connect
    method, but provides connection sharing and tracks connection limits.

    This object provides an ``rpc`` like interface::

        >>> rpc = ConnectionPool(limit=512)
        >>> scheduler = rpc('127.0.0.1:8786')
        >>> workers = [rpc(address) for address ...]

        >>> info = yield scheduler.identity()

    It creates enough comms to satisfy concurrent connections to any
    particular address::

        >>> a, b = yield [scheduler.who_has(), scheduler.has_what()]

    It reuses existing comms so that we don't have to continuously reconnect.

    It also maintains a comm limit to avoid "too many open file handle"
    issues.  Whenever this maximum is reached we clear out all idling comms.
    If that doesn't do the trick then we wait until one of the occupied comms
    closes.

    Parameters
    ----------
    limit: int
        The number of open comms to maintain at once
    deserialize: bool
        Whether or not to deserialize data by default or pass it through
    """
    def __init__(self, limit=512, deserialize=True):
        self.open = 0  # Total number of open comms
        self.active = 0  # Number of comms currently in use
        self.limit = limit  # Max number of open comms
        # Invariant: len(available) == open - active
        self.available = defaultdict(set)
        # Invariant: len(occupied) == active
        self.occupied = defaultdict(set)
        self.deserialize = deserialize
        self.event = Event()

    def __str__(self):
        return "<ConnectionPool: open=%d, active=%d>" % (self.open,
                                                         self.active)

    __repr__ = __str__

    def __call__(self, addr=None, ip=None, port=None):
        """ Cached rpc objects """
        addr = addr_from_args(addr=addr, ip=ip, port=port)
        return PooledRPCCall(addr, self)

    @gen.coroutine
    def connect(self, addr, timeout=3):
        """
        Get a Comm to the given address.  For internal use.
        """
        available = self.available[addr]
        occupied = self.occupied[addr]
        if available:
            comm = available.pop()
            if not comm.closed():
                self.active += 1
                occupied.add(comm)
                raise gen.Return(comm)
            else:
                self.open -= 1

        while self.open >= self.limit:
            self.event.clear()
            self.collect()
            yield self.event.wait()

        self.open += 1
        try:
            comm = yield connect(addr,
                                 timeout=timeout,
                                 deserialize=self.deserialize)
        except Exception:
            self.open -= 1
            raise
        self.active += 1
        occupied.add(comm)

        if self.open >= self.limit:
            self.event.clear()

        raise gen.Return(comm)

    def reuse(self, addr, comm):
        """
        Reuse an open communication to the given address.  For internal use.
        """
        self.occupied[addr].remove(comm)
        self.active -= 1
        if comm.closed():
            self.open -= 1
            if self.open < self.limit:
                self.event.set()
        else:
            self.available[addr].add(comm)

    def collect(self):
        """
        Collect open but unused communications, to allow opening other ones.
        """
        logger.info("Collecting unused comms.  open: %d, active: %d",
                    self.open, self.active)
        for addr, comms in self.available.items():
            for comm in comms:
                comm.close()
            comms.clear()
        self.open = self.active
        if self.open < self.limit:
            self.event.set()

    def close(self):
        """
        Close all communications abruptly.
        """
        for comms in self.available.values():
            for comm in comms:
                comm.abort()
        for comms in self.occupied.values():
            for comm in comms:
                comm.abort()
コード例 #56
0
ファイル: drain.py プロジェクト: CrowdStrike/cs.eyrie
class SQSDrain(object):
    """Implementation of IDrain that writes to an AWS SQS queue.
    """

    def __init__(self, logger, loop, sqs_client,
                 metric_prefix='emitter'):
        self.emitter = sqs_client
        self.logger = logger
        self.loop = loop
        self.metric_prefix = metric_prefix
        self.output_error = Event()
        self.state = RUNNING
        self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__,
                                            self.__class__.__name__)
        self._send_queue = Queue()
        self._should_flush_queue = Event()
        self._flush_handle = None
        self.loop.spawn_callback(self._onSend)

    @gen.coroutine
    def _flush_send_batch(self, batch_size):
        send_batch = [
            self._send_queue.get_nowait()
            for pos in range(min(batch_size, self.emitter.max_messages))
        ]
        try:
            response = yield self.emitter.send_message_batch(*send_batch)
        except SQSError as err:
            self.logger.exception('Error encountered flushing data to SQS: %s',
                                  err)
            self.output_error.set()
            for msg in send_batch:
                self._send_queue.put_nowait(msg)
        else:
            if response.Failed:
                self.output_error.set()
                for req in response.Failed:
                    self.logger.error('Message failed to send: %s', req.Id)
                    self._send_queue.put_nowait(req)

    @gen.coroutine
    def _onSend(self):
        respawn = True
        while respawn:
            qsize = self._send_queue.qsize()
            # This will keep flushing until clear,
            # including items that show up in between flushes
            while qsize > 0:
                try:
                    yield self._flush_send_batch(qsize)
                except Exception as err:
                    self.logger.exception(err)
                    self.output_error.set()
                qsize = self._send_queue.qsize()
            # We've cleared the backlog, remove any possible future flush
            if self._flush_handle:
                self.loop.remove_timeout(self._flush_handle)
                self._flush_handle = None
            self._should_flush_queue.clear()
            yield self._should_flush_queue.wait()

    @gen.coroutine
    def close(self, timeout=None):
        self.state = CLOSING
        yield self._send_queue.join(timeout)

    def emit_nowait(self, msg):
        if self._send_queue.qsize() >= self.emitter.max_messages:
            # Signal flush
            self._should_flush_queue.set()
            raise QueueFull()
        elif self._flush_handle is None:
            # Ensure we flush messages at least by MAX_TIMEOUT
            self._flush_handle = self.loop.add_timeout(
                MAX_TIMEOUT,
                lambda: self._should_flush_queue.set(),
            )
        self.logger.debug("Drain emitting")
        self._send_queue.put_nowait(msg)

    @gen.coroutine
    def emit(self, msg, timeout=None):
        if self._send_queue.qsize() >= self.emitter.max_messages:
            # Signal flush
            self._should_flush_queue.set()
        elif self._flush_handle is None:
            # Ensure we flush messages at least by MAX_TIMEOUT
            self._flush_handle = self.loop.add_timeout(
                MAX_TIMEOUT,
                lambda: self._should_flush_queue.set(),
            )
        yield self._send_queue.put(msg, timeout)
コード例 #57
0
class WorkerProcess(object):
    def __init__(
        self,
        worker_args,
        worker_kwargs,
        worker_start_args,
        silence_logs,
        on_exit,
        worker,
        env,
    ):
        self.status = "init"
        self.silence_logs = silence_logs
        self.worker_args = worker_args
        self.worker_kwargs = worker_kwargs
        self.worker_start_args = worker_start_args
        self.on_exit = on_exit
        self.process = None
        self.Worker = worker
        self.env = env

        # Initialized when worker is ready
        self.worker_dir = None
        self.worker_address = None

    @gen.coroutine
    def start(self):
        """
        Ensure the worker process is started.
        """
        enable_proctitle_on_children()
        if self.status == "running":
            raise gen.Return(self.status)
        if self.status == "starting":
            yield self.running.wait()
            raise gen.Return(self.status)

        self.init_result_q = init_q = mp_context.Queue()
        self.child_stop_q = mp_context.Queue()
        uid = uuid.uuid4().hex

        self.process = AsyncProcess(
            target=self._run,
            name="Dask Worker process (from Nanny)",
            kwargs=dict(
                worker_args=self.worker_args,
                worker_kwargs=self.worker_kwargs,
                worker_start_args=self.worker_start_args,
                silence_logs=self.silence_logs,
                init_result_q=self.init_result_q,
                child_stop_q=self.child_stop_q,
                uid=uid,
                Worker=self.Worker,
                env=self.env,
            ),
        )
        self.process.daemon = True
        self.process.set_exit_callback(self._on_exit)
        self.running = Event()
        self.stopped = Event()
        self.status = "starting"
        yield self.process.start()
        msg = yield self._wait_until_connected(uid)
        if not msg:
            raise gen.Return(self.status)
        self.worker_address = msg["address"]
        self.worker_dir = msg["dir"]
        assert self.worker_address
        self.status = "running"
        self.running.set()

        init_q.close()

        raise gen.Return(self.status)

    def _on_exit(self, proc):
        if proc is not self.process:
            # Ignore exit of old process instance
            return
        self.mark_stopped()

    def _death_message(self, pid, exitcode):
        assert exitcode is not None
        if exitcode == 255:
            return "Worker process %d was killed by unknown signal" % (pid,)
        elif exitcode >= 0:
            return "Worker process %d exited with status %d" % (pid, exitcode)
        else:
            return "Worker process %d was killed by signal %d" % (pid, -exitcode)

    def is_alive(self):
        return self.process is not None and self.process.is_alive()

    @property
    def pid(self):
        return self.process.pid if self.process and self.process.is_alive() else None

    def mark_stopped(self):
        if self.status != "stopped":
            r = self.process.exitcode
            assert r is not None
            if r != 0:
                msg = self._death_message(self.process.pid, r)
                logger.warning(msg)
            self.status = "stopped"
            self.stopped.set()
            # Release resources
            self.process.close()
            self.init_result_q = None
            self.child_stop_q = None
            self.process = None
            # Best effort to clean up worker directory
            if self.worker_dir and os.path.exists(self.worker_dir):
                shutil.rmtree(self.worker_dir, ignore_errors=True)
            self.worker_dir = None
            # User hook
            if self.on_exit is not None:
                self.on_exit(r)

    @gen.coroutine
    def kill(self, timeout=2, executor_wait=True):
        """
        Ensure the worker process is stopped, waiting at most
        *timeout* seconds before terminating it abruptly.
        """
        loop = IOLoop.current()
        deadline = loop.time() + timeout

        if self.status == "stopped":
            return
        if self.status == "stopping":
            yield self.stopped.wait()
            return
        assert self.status in ("starting", "running")
        self.status = "stopping"

        process = self.process
        self.child_stop_q.put(
            {
                "op": "stop",
                "timeout": max(0, deadline - loop.time()) * 0.8,
                "executor_wait": executor_wait,
            }
        )
        self.child_stop_q.close()

        while process.is_alive() and loop.time() < deadline:
            yield gen.sleep(0.05)

        if process.is_alive():
            logger.warning(
                "Worker process still alive after %d seconds, killing", timeout
            )
            try:
                yield process.terminate()
            except Exception as e:
                logger.error("Failed to kill worker process: %s", e)

    @gen.coroutine
    def _wait_until_connected(self, uid):
        delay = 0.05
        while True:
            if self.status != "starting":
                return
            try:
                msg = self.init_result_q.get_nowait()
            except Empty:
                yield gen.sleep(delay)
                continue

            if msg["uid"] != uid:  # ensure that we didn't cross queues
                continue

            if "exception" in msg:
                logger.error(
                    "Failed while trying to start worker process: %s", msg["exception"]
                )
                yield self.process.join()
                raise msg
            else:
                raise gen.Return(msg)

    @classmethod
    def _run(
        cls,
        worker_args,
        worker_kwargs,
        worker_start_args,
        silence_logs,
        init_result_q,
        child_stop_q,
        uid,
        env,
        Worker,
    ):  # pragma: no cover
        os.environ.update(env)
        try:
            from dask.multiprocessing import initialize_worker_process
        except ImportError:  # old Dask version
            pass
        else:
            initialize_worker_process()

        if silence_logs:
            logger.setLevel(silence_logs)

        IOLoop.clear_instance()
        loop = IOLoop()
        loop.make_current()
        worker = Worker(*worker_args, **worker_kwargs)

        @gen.coroutine
        def do_stop(timeout=5, executor_wait=True):
            try:
                yield worker.close(
                    report=False,
                    nanny=False,
                    executor_wait=executor_wait,
                    timeout=timeout,
                )
            finally:
                loop.stop()

        def watch_stop_q():
            """
            Wait for an incoming stop message and then stop the
            worker cleanly.
            """
            while True:
                try:
                    msg = child_stop_q.get(timeout=1000)
                except Empty:
                    pass
                else:
                    child_stop_q.close()
                    assert msg.pop("op") == "stop"
                    loop.add_callback(do_stop, **msg)
                    break

        t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch")
        t.daemon = True
        t.start()

        @gen.coroutine
        def run():
            """
            Try to start worker and inform parent of outcome.
            """
            try:
                yield worker._start(*worker_start_args)
            except Exception as e:
                logger.exception("Failed to start worker")
                init_result_q.put({"uid": uid, "exception": e})
                init_result_q.close()
            else:
                assert worker.address
                init_result_q.put(
                    {"address": worker.address, "dir": worker.local_dir, "uid": uid}
                )
                init_result_q.close()
                yield worker.wait_until_closed()
                logger.info("Worker closed")

        try:
            loop.run_sync(run)
        except TimeoutError:
            # Loop was stopped before wait_until_closed() returned, ignore
            pass
        except KeyboardInterrupt:
            pass
コード例 #58
0
ファイル: drain.py プロジェクト: CrowdStrike/cs.eyrie
class ZMQDrain(object):
    """Implementation of IDrain that pushes to a zmq.Socket asynchronously.
    This implementation overrides the high-water mark behavior from
    cs.eyrie.vassal.Vassal to instead use a zmq.Poller.
    """

    def __init__(self, logger, loop, zmq_socket,
                 metric_prefix='emitter'):
        self.emitter = zmq_socket
        self.logger = logger
        self.loop = loop
        self.metric_prefix = metric_prefix
        self.output_error = Event()
        self.state = RUNNING
        self._writable = Event()
        self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__,
                                            self.__class__.__name__)

    def _handle_events(self, fd, events):
        if events & self.loop.ERROR:
            self.logger.error('Error polling socket for writability')
        elif events & self.loop.WRITE:
            self.loop.remove_handler(self.emitter)
            self._writable.set()

    @gen.coroutine
    def _poll(self):
        self.loop.add_handler(self.emitter,
                              self._handle_events,
                              self.loop.WRITE)
        yield self._writable.wait()
        self._writable.clear()

    @gen.coroutine
    def close(self, timeout=None):
        self.state = CLOSING
        self.logger.debug("Flushing send queue")
        self.emitter.close()

    def emit_nowait(self, msg):
        self.logger.debug("Drain emitting")
        if isinstance(msg, basestring):
            msg = [msg]
        try:
            self.emitter.send_multipart(msg, zmq.NOBLOCK)
        except zmq.Again:
            raise QueueFull()

    @gen.coroutine
    def emit(self, msg, retry_timeout=INITIAL_TIMEOUT):
        if isinstance(msg, basestring):
            msg = [msg]
        while True:
            # This should ensure the ZMQ socket can accept more data
            yield self._poll()
            try:
                self.emitter.send_multipart(msg, zmq.NOBLOCK)
            except zmq.Again:
                # But sometimes it's not enough
                self.logger.debug('Error polling for socket writability')
                retry_timeout = min(retry_timeout*2, MAX_TIMEOUT)
                yield gen.sleep(retry_timeout.total_seconds())
            else:
                break
コード例 #59
0
class Queue(object):
    """Coordinate producer and consumer coroutines.

    If maxsize is 0 (the default) the queue size is unbounded.

    .. testcode::

        from tornado import gen
        from tornado.ioloop import IOLoop
        from tornado.queues import Queue

        q = Queue(maxsize=2)

        async def consumer():
            async for item in q:
                try:
                    print('Doing work on %s' % item)
                    await gen.sleep(0.01)
                finally:
                    q.task_done()

        async def producer():
            for item in range(5):
                await q.put(item)
                print('Put %s' % item)

        async def main():
            # Start consumer without waiting (since it never finishes).
            IOLoop.current().spawn_callback(consumer)
            await producer()     # Wait for producer to put all tasks.
            await q.join()       # Wait for consumer to finish all tasks.
            print('Done')

        IOLoop.current().run_sync(main)

    .. testoutput::

        Put 0
        Put 1
        Doing work on 0
        Put 2
        Doing work on 1
        Put 3
        Doing work on 2
        Put 4
        Doing work on 3
        Doing work on 4
        Done


    In versions of Python without native coroutines (before 3.5),
    ``consumer()`` could be written as::

        @gen.coroutine
        def consumer():
            while True:
                item = yield q.get()
                try:
                    print('Doing work on %s' % item)
                    yield gen.sleep(0.01)
                finally:
                    q.task_done()

    .. versionchanged:: 4.3
       Added ``async for`` support in Python 3.5.

    """
    def __init__(self, maxsize=0):
        if maxsize is None:
            raise TypeError("maxsize can't be None")

        if maxsize < 0:
            raise ValueError("maxsize can't be negative")

        self._maxsize = maxsize
        self._init()
        self._getters = collections.deque([])  # Futures.
        self._putters = collections.deque([])  # Pairs of (item, Future).
        self._unfinished_tasks = 0
        self._finished = Event()
        self._finished.set()

    @property
    def maxsize(self):
        """Number of items allowed in the queue."""
        return self._maxsize

    def qsize(self):
        """Number of items in the queue."""
        return len(self._queue)

    def empty(self):
        return not self._queue

    def full(self):
        if self.maxsize == 0:
            return False
        else:
            return self.qsize() >= self.maxsize

    def put(self, item, timeout=None):
        """Put an item into the queue, perhaps waiting until there is room.

        Returns a Future, which raises `tornado.util.TimeoutError` after a
        timeout.

        ``timeout`` may be a number denoting a time (on the same
        scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a
        `datetime.timedelta` object for a deadline relative to the
        current time.
        """
        future = Future()
        try:
            self.put_nowait(item)
        except QueueFull:
            self._putters.append((item, future))
            _set_timeout(future, timeout)
        else:
            future.set_result(None)
        return future

    def put_nowait(self, item):
        """Put an item into the queue without blocking.

        If no free slot is immediately available, raise `QueueFull`.
        """
        self._consume_expired()
        if self._getters:
            assert self.empty(), "queue non-empty, why are getters waiting?"
            getter = self._getters.popleft()
            self.__put_internal(item)
            future_set_result_unless_cancelled(getter, self._get())
        elif self.full():
            raise QueueFull
        else:
            self.__put_internal(item)

    def get(self, timeout=None):
        """Remove and return an item from the queue.

        Returns a Future which resolves once an item is available, or raises
        `tornado.util.TimeoutError` after a timeout.

        ``timeout`` may be a number denoting a time (on the same
        scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a
        `datetime.timedelta` object for a deadline relative to the
        current time.
        """
        future = Future()
        try:
            future.set_result(self.get_nowait())
        except QueueEmpty:
            self._getters.append(future)
            _set_timeout(future, timeout)
        return future

    def get_nowait(self):
        """Remove and return an item from the queue without blocking.

        Return an item if one is immediately available, else raise
        `QueueEmpty`.
        """
        self._consume_expired()
        if self._putters:
            assert self.full(), "queue not full, why are putters waiting?"
            item, putter = self._putters.popleft()
            self.__put_internal(item)
            future_set_result_unless_cancelled(putter, None)
            return self._get()
        elif self.qsize():
            return self._get()
        else:
            raise QueueEmpty

    def task_done(self):
        """Indicate that a formerly enqueued task is complete.

        Used by queue consumers. For each `.get` used to fetch a task, a
        subsequent call to `.task_done` tells the queue that the processing
        on the task is complete.

        If a `.join` is blocking, it resumes when all items have been
        processed; that is, when every `.put` is matched by a `.task_done`.

        Raises `ValueError` if called more times than `.put`.
        """
        if self._unfinished_tasks <= 0:
            raise ValueError('task_done() called too many times')
        self._unfinished_tasks -= 1
        if self._unfinished_tasks == 0:
            self._finished.set()

    def join(self, timeout=None):
        """Block until all items in the queue are processed.

        Returns a Future, which raises `tornado.util.TimeoutError` after a
        timeout.
        """
        return self._finished.wait(timeout)

    def __aiter__(self):
        return _QueueIterator(self)

    # These three are overridable in subclasses.
    def _init(self):
        self._queue = collections.deque()

    def _get(self):
        return self._queue.popleft()

    def _put(self, item):
        self._queue.append(item)

    # End of the overridable methods.

    def __put_internal(self, item):
        self._unfinished_tasks += 1
        self._finished.clear()
        self._put(item)

    def _consume_expired(self):
        # Remove timed-out waiters.
        while self._putters and self._putters[0][1].done():
            self._putters.popleft()

        while self._getters and self._getters[0].done():
            self._getters.popleft()

    def __repr__(self):
        return '<%s at %s %s>' % (type(self).__name__, hex(
            id(self)), self._format())

    def __str__(self):
        return '<%s %s>' % (type(self).__name__, self._format())

    def _format(self):
        result = 'maxsize=%r' % (self.maxsize, )
        if getattr(self, '_queue', None):
            result += ' queue=%r' % self._queue
        if self._getters:
            result += ' getters[%s]' % len(self._getters)
        if self._putters:
            result += ' putters[%s]' % len(self._putters)
        if self._unfinished_tasks:
            result += ' tasks=%s' % self._unfinished_tasks
        return result
コード例 #60
0
class TornadoReconnectionManager(ReconnectionManager):
    def __init__(self, pubnub):
        self._cancelled_event = Event()
        super(TornadoReconnectionManager, self).__init__(pubnub)

    @gen.coroutine
    def _register_heartbeat_timer(self):
        self._cancelled_event.clear()

        while not self._cancelled_event.is_set():
            if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.EXPONENTIAL:
                self._timer_interval = int(math.pow(2, self._connection_errors) - 1)
                if self._timer_interval > self.MAXEXPONENTIALBACKOFF:
                    self._timer_interval = self.MINEXPONENTIALBACKOFF
                    self._connection_errors = 1
                    logger.debug("timerInterval > MAXEXPONENTIALBACKOFF at: %s" % utils.datetime_now())
                elif self._timer_interval < 1:
                    self._timer_interval = self.MINEXPONENTIALBACKOFF
                logger.debug("timerInterval = %d at: %s" % (self._timer_interval, utils.datetime_now()))
            else:
                self._timer_interval = self.INTERVAL

            # >>> Wait given interval or cancel
            sleeper = tornado.gen.sleep(self._timer_interval)
            canceller = self._cancelled_event.wait()

            wi = tornado.gen.WaitIterator(canceller, sleeper)

            while not wi.done():
                try:
                    future = wi.next()
                    yield future
                except Exception as e:
                    # TODO: verify the error will not be eaten
                    logger.error(e)
                    raise
                else:
                    if wi.current_future == sleeper:
                        break
                    elif wi.current_future == canceller:
                        return
                    else:
                        raise Exception("unknown future raised")

            logger.debug("reconnect loop at: %s" % utils.datetime_now())

            # >>> Attempt to request /time/0 endpoint
            try:
                yield self._pubnub.time().result()
                self._connection_errors = 1
                self._callback.on_reconnect()
                logger.debug("reconnection manager stop due success time endpoint call: %s" % utils.datetime_now())
                break
            except Exception:
                if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.EXPONENTIAL:
                    logger.debug("reconnect interval increment at: %s" % utils.datetime_now())
                    self._connection_errors += 1

    def start_polling(self):
        if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.NONE:
            logger.warn("reconnection policy is disabled, please handle reconnection manually.")
            return

        self._pubnub.ioloop.spawn_callback(self._register_heartbeat_timer)

    def stop_polling(self):
        if self._cancelled_event is not None and not self._cancelled_event.is_set():
            self._cancelled_event.set()