コード例 #1
0
class Stream(object):
    def __init__(self, raw_headers, header_table):
        self._queue = Queue()
        self._header_table = header_table
        self._current_headers = self._header_table.merge(raw_headers)

    @gen.coroutine
    def get(self, timeout=0):
        if timeout == 0:
            res, headers = yield self._queue.get()
        else:
            deadline = datetime.timedelta(seconds=timeout)
            res, headers = yield self._queue.get(deadline)

        self._current_headers = headers
        if isinstance(res, Exception):
            raise res
        else:
            raise gen.Return(res)

    def push(self, item, raw_headers):
        headers = self._header_table.merge(raw_headers)
        self._queue.put_nowait((item, headers))

    def done(self, raw_headers):
        headers = self._header_table.merge(raw_headers)
        return self._queue.put_nowait((ChokeEvent(), headers))

    def error(self, errnumber, reason, raw_headers):
        headers = self._header_table.merge(raw_headers)
        return self._queue.put_nowait((RequestError(errnumber, reason), headers))

    @property
    def headers(self):
        return self._current_headers
コード例 #2
0
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    @tornado.gen.coroutine
    def wait_for_connect(self):
        if not self.connected_event.is_set():
            yield self.connected_event.wait()
        else:
            raise Exception("instance is already connected")

    @tornado.gen.coroutine
    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            yield self.disconnected_event.wait()
        else:
            raise Exception("instance is already disconnected")

    @tornado.gen.coroutine
    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                env = yield self.message_queue.get()
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.message_queue.task_done()

    @tornado.gen.coroutine
    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                env = yield self.presence_queue.get()
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.presence_queue.task_done()
コード例 #3
0
class ConnectionPool(object):
    def __init__(self, servers, maxsize=15, minsize=1, loop=None, debug=0):
        loop = loop if loop is not None else tornado.ioloop.IOLoop.instance()
        if debug:
            logging.basicConfig(
                level=logging.DEBUG,
                format="'%(levelname)s %(asctime)s"
                " %(module)s:%(lineno)d %(process)d %(thread)d %(message)s'")
        self._loop = loop
        self._servers = servers
        self._minsize = minsize
        self._debug = debug
        self._in_use = set()
        self._pool = Queue(maxsize)

    @gen.coroutine
    def clear(self):
        """Clear pool connections."""
        while not self._pool.empty():
            conn = yield self._pool.get()
            conn.close_socket()

    def size(self):
        return len(self._in_use) + self._pool.qsize()

    @gen.coroutine
    def acquire(self):
        """Acquire connection from the pool, or spawn new one
        if pool maxsize permits.

        :return: ``Connetion`` (reader, writer)
        """
        while self.size() < self._minsize:
            _conn = yield self._create_new_conn()
            yield self._pool.put(_conn)

        conn = None
        while not conn:
            if not self._pool.empty():
                conn = yield self._pool.get()

            if conn is None:
                conn = yield self._create_new_conn()

        self._in_use.add(conn)
        raise gen.Return(conn)

    @gen.coroutine
    def _create_new_conn(self):
        conn = yield Connection.get_conn(self._servers, self._debug)
        raise gen.Return(conn)

    def release(self, conn):
        self._in_use.remove(conn)
        try:
            self._pool.put_nowait(conn)
        except (QueueEmpty, QueueFull):
            conn.close_socket()
コード例 #4
0
ファイル: admin.py プロジェクト: ewintec/domogik
class Publisher(MQAsyncSub):
    """Handles new data to be passed on to subscribers."""
    def __init__(self):
        self.WSmessages = Queue()
        self.MQmessages = Queue()
        self.sub = MQAsyncSub.__init__(self, zmq.Context(), 'admin', [])
        self.subscribers = set()

    def register(self, subscriber):
        """Register a new subscriber."""
        self.subscribers.add(subscriber)

    def deregister(self, subscriber):
        """Stop publishing to a subscriber."""
        self.subscribers.remove(subscriber)

    @gen.coroutine
    def on_message(self, did, msg):
        """Receive message from MQ sub and send to WS."""
        yield self.WSmessages.put({"msgid": did, "content": msg})

    @gen.coroutine
    def submit(self, message):
        """Submit a new message to publish to subscribers."""
        yield self.WSmessages.put(message)

    @gen.coroutine
    def publishToWS(self):
        while True:
            message = yield self.WSmessages.get()
            if len(self.subscribers) > 0:
                print("Pushing MQ message {} to {} WS subscribers...".format(
                    message, len(self.subscribers)))
                yield [subscriber.submit(message) for subscriber in self.subscribers]

    @gen.coroutine
    def publishToMQ(self):
        ctx = zmq.Context()
        cli = MQSyncReq(ctx)
        pub = MQPub(ctx, 'admin')
        while True:
            message = yield self.MQmessages.get()
            jsons = json.loads(message)
            # req/rep
            if 'mq_request' in jsons and 'data' in jsons:
                msg = MQMessage()
                msg.set_action(str(jsons['mq_request']))
                msg.set_data(jsons['data'])
                print("REQ : {0}".format(msg.get()))
                if 'dst' in jsons:
                    print cli.request(str(jsons['dst']), msg.get(), timeout=10).get()
                else:
                    print cli.request('manager', msg.get(), timeout=10).get()
            # pub
            elif 'mq_publish' in jsons and 'data' in jsons:
                print("Publish : {0}".format(jsons['data']))
                pub.send_event(jsons['mq_publish'],
                                    jsons['data'])
コード例 #5
0
class RestfulHandler(RequestHandler):
    _registrar = None
    _queue = None

    def initialize(self, registrar=None):
        self._registrar: PartRegistrar = registrar
        self._queue = Queue()

    @gen.coroutine
    def get(self, endpoint_str):
        # called from tornado thread
        path = endpoint_str.split("/")
        request = Get(path=path)
        self.report_request(request)
        response = yield self._queue.get()
        self.handle_response(response)

    # curl -d '{"name": "me"}' http://localhost:8008/rest/HELLO/greet
    @gen.coroutine
    def post(self, endpoint_str):
        # called from tornado thread
        path = endpoint_str.split("/")
        parameters = json_decode(self.request.body.decode())
        request = Post(path=path, parameters=parameters)
        self.report_request(request)
        response = yield self._queue.get()
        self.handle_response(response)

    def report_request(self, request):
        # called from tornado thread
        request.set_callback(self.queue_response)
        mri = request.path[0]
        self._registrar.report(builtin.infos.RequestInfo(request, mri))

    def queue_response(self, response):
        # called from cothread
        IOLoopHelper.call(self._queue.put, response)

    def handle_response(self, response):
        # called from tornado thread
        if isinstance(response, Return):
            message = json_encode(response.value)
            self.finish(message + "\n")
        else:
            if isinstance(response, Error):
                message = response.message
            else:
                message = "Unknown response %s" % type(response)
            self.set_status(500, message)
            self.write_error(500)
コード例 #6
0
ファイル: py-server.py プロジェクト: beef9999/go-chatroom
class Room(object):

    def __init__(self, server, name):
        self.server = server
        self.name = name
        self.clients = {}
        self.lock = threading.RLock()
        self.inqueue = Queue(maxsize=QUEUE_SIZE)

    @coroutine
    def dispatch(self):
        logging.debug('Chatroom: %s opened' % self.name)
        while True:
            msg = yield self.inqueue.get()
            logging.debug("Room got message: room[%s], command[%s], content[%s]",
                          msg.receiver, msg.command, msg.content)
            if msg.command == COMMAND_JOIN:
                logging.debug("%s joined", msg.sender.name)
                self.clients[msg.sender.name] = msg.sender
            elif msg.command == COMMAND_QUIT:
                del self.clients[msg.sender.name]
            yield self.broadcast(msg)

    @coroutine
    def broadcast(self, msg):
        for _, client in self.clients.items():
            yield client.inqueue.put(msg)
コード例 #7
0
ファイル: dask.py プロジェクト: kszucs/streams
class gather(Stream):
    def __init__(self, child, limit=10, client=None):
        self.client = client or default_client()
        self.queue = Queue(maxsize=limit)
        self.condition = Condition()

        Stream.__init__(self, child)

        self.client.loop.add_callback(self.cb)

    def update(self, x, who=None):
        return self.queue.put(x)

    @gen.coroutine
    def cb(self):
        while True:
            x = yield self.queue.get()
            L = [x]
            while not self.queue.empty():
                L.append(self.queue.get_nowait())
            results = yield self.client._gather(L)
            for x in results:
                yield self.emit(x)
            if self.queue.empty():
                self.condition.notify_all()

    @gen.coroutine
    def flush(self):
        while not self.queue.empty():
            yield self.condition.wait()
コード例 #8
0
ファイル: app-checkpoint.py プロジェクト: Harleymckee/books
class Publisher(object):
    """Handles new data to be passed on to subscribers."""
    def __init__(self):
        self.messages = Queue()
        self.subscribers = set()

    def register(self, subscriber):
        """Register a new subscriber."""
        self.subscribers.add(subscriber)

    def deregister(self, subscriber):
        """Stop publishing to a subscriber."""
        self.subscribers.remove(subscriber)

    @gen.coroutine
    def submit(self, message):
        """Submit a new message to publish to subscribers."""
        yield self.messages.put(message)

    @gen.coroutine
    def publish(self):
        while True:
            message = yield self.messages.get()
            if len(self.subscribers) > 0:
                # print("Pushing message {} to {} subscribers...".format(
                # 		message, len(self.subscribers)))
                yield [
                    subscriber.submit(message)
                    for subscriber in self.subscribers
                ]
コード例 #9
0
ファイル: core.py プロジェクト: cmwaura/streamz
class delay(Stream):
    """ Add a time delay to results """
    _graphviz_shape = 'octagon'

    def __init__(self, upstream, interval, loop=None, **kwargs):
        loop = loop or upstream.loop or IOLoop.current()
        self.interval = interval
        self.queue = Queue()

        Stream.__init__(self, upstream, loop=loop, **kwargs)

        self.loop.add_callback(self.cb)

    @gen.coroutine
    def cb(self):
        while True:
            last = time()
            x = yield self.queue.get()
            yield self._emit(x)
            duration = self.interval - (time() - last)
            if duration > 0:
                yield gen.sleep(duration)

    def update(self, x, who=None):
        return self.queue.put(x)
コード例 #10
0
ファイル: core.py プロジェクト: cmwaura/streamz
class buffer(Stream):
    """ Allow results to pile up at this point in the stream

    This allows results to buffer in place at various points in the stream.
    This can help to smooth flow through the system when backpressure is
    applied.
    """
    _graphviz_shape = 'diamond'

    def __init__(self, upstream, n, loop=None, **kwargs):
        loop = loop or upstream.loop or IOLoop.current()
        self.queue = Queue(maxsize=n)

        Stream.__init__(self, upstream, loop=loop, **kwargs)

        self.loop.add_callback(self.cb)

    def update(self, x, who=None):
        return self.queue.put(x)

    @gen.coroutine
    def cb(self):
        while True:
            x = yield self.queue.get()
            yield self._emit(x)
コード例 #11
0
class EventSource(RequestHandler):
    def initialize(self, stream):
        #assert isinstance(stream, Stream)
        self.stream = stream
        self.messages = Queue()
        self.finished = False
        self.set_header('content-type', 'text/event-stream')
        self.set_header('cache-control', 'no-cache')
        self.store = self.stream.sink(self.messages.put)

    @gen.coroutine
    def publish(self, message):
        """Pushes data to a listener."""
        try:
            self.write(message >> to_str)
            yield self.flush()
        except StreamClosedError:
            self.finished = True
            (self.request.remote_ip, StreamClosedError) >> log

    @gen.coroutine
    def get(self, *args, **kwargs):
        try:
            while not self.finished:
                message = yield self.messages.get()
                yield self.publish(message)
        except Exception:
            pass
        finally:
            self.store.destroy()
            self.messages.empty()
            self.finish()
コード例 #12
0
ファイル: biz.py プロジェクト: JinkelaCrops/tornado-learning
class Decode(object):
    def __init__(self, sess_field):
        self.q = Queue(maxsize=1000)
        self.p = Queue(maxsize=1000)
        self.sess_field = sess_field

    @staticmethod
    def batch_pad(nd):
        max_length = max(map(len, nd))
        pad_nd = [
            i + [text_encoder.PAD_ID] * (max_length - len(i)) for i in nd
        ]
        return pad_nd

    @gen.coroutine
    def decode(self):
        log.info("[biz] Decode: model loading ... ")
        saver = tf.train.Saver()

        with tf.Session(config=self.sess_field.sess_config) as sess:
            # Load weights from checkpoint.
            log.info("[biz] Decode: restoring parameters")
            saver.restore(sess, self.sess_field.ckpt)
            log.info("[biz] Decode: model already loaded")
            while True:
                inputs = yield self.q.get()
                log.info("[biz] Decode: " + str(inputs))
                st_time = time.time()
                inputs_numpy = [
                    self.sess_field.encoders["inputs"].encode(i) +
                    [text_encoder.EOS_ID] for i in inputs
                ]
                num_decode_batches = (len(inputs_numpy) -
                                      1) // self.sess_field.batch_size + 1
                results = []
                for i in range(num_decode_batches):
                    input_numpy = inputs_numpy[i *
                                               self.sess_field.batch_size:(i +
                                                                           1) *
                                               self.sess_field.batch_size]
                    inputs_numpy_batch = input_numpy + [[
                        text_encoder.EOS_ID
                    ]] * (self.sess_field.batch_size - len(input_numpy))
                    inputs_numpy_batch = self.batch_pad(
                        inputs_numpy_batch)  # pad using 0
                    # log.info("[biz] Decode: " + str(inputs_numpy_batch))
                    feed = {self.sess_field.inputs_ph: inputs_numpy_batch}
                    result = sess.run(self.sess_field.prediction, feed)
                    decoded_outputs = [
                        self.sess_field.encoders["targets"].decode(i).strip(
                            "<pad>").strip("<EOS>")
                        for i in result["outputs"][:len(input_numpy)]
                    ]
                    results += decoded_outputs
                self.p.put(results)
                log.info("[biz] Decode: source: " + str(inputs))
                log.info("[biz] Decode: target: " + str(results))
                log.info("[biz] Decode: using %s s" % (time.time() - st_time))
コード例 #13
0
ファイル: executor.py プロジェクト: ogrisel/distributed
def _first_completed(futures):
    """ Return a single completed future

    See Also:
        _as_completed
    """
    q = Queue()
    yield _as_completed(futures, q)
    result = yield q.get()
    raise gen.Return(result)
コード例 #14
0
ファイル: executor.py プロジェクト: canavandl/distributed
def _first_completed(futures):
    """ Return a single completed future

    See Also:
        _as_completed
    """
    q = Queue()
    yield _as_completed(futures, q)
    result = yield q.get()
    raise gen.Return(result)
コード例 #15
0
ファイル: libwatcher.py プロジェクト: jianingy/watchgang
class TornadoQuerierBase(object):

    def __init__(self):
        self.tasks = TornadoQueue()

    def gen_task(self):
        raise NotImplementError()

    def run_task(self, task):
        raise NotImplementError()

    def prepare(self):
        self.running = True

    def cleanup(self):
        self.running = False

    @coroutine
    def run_worker(self, worker_id, f):
        while self.tasks.qsize() > 0:
            task = yield self.tasks.get()
            LOG.debug('worker[%d]: current task is %s' % (worker_id, task))
            try:
                yield f(task)
                pass
            except Exception as e:
                LOG.warning(str(e))
            finally:
                self.tasks.task_done()
                task = None
        LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks))

    @coroutine
    def start(self, num_workers=1):

        self.prepare()

        # add tasks
        tasks = yield self.gen_task()
        for task in tasks:
            yield self.tasks.put(task)

        # start shoot workers
        for worker_id in range(num_workers):
            LOG.debug('starting worker %d' % worker_id)
            self.run_worker(worker_id, self.run_task)

        yield self.tasks.join()
        self.cleanup()
コード例 #16
0
class Stream(object):
    def __init__(self, raw_headers, header_table):
        self._queue = Queue()
        self._header_table = header_table
        self._current_headers = self._header_table.merge(raw_headers)

    @gen.coroutine
    def get(self, timeout=0):
        if timeout == 0:
            res, headers = yield self._queue.get()
        else:
            deadline = datetime.timedelta(seconds=timeout)
            res, headers = yield self._queue.get(deadline)

        self._current_headers = headers
        if isinstance(res, Exception):
            raise res
        else:
            raise gen.Return(res)

    def push(self, item, raw_headers):
        headers = self._header_table.merge(raw_headers)
        self._queue.put_nowait((item, headers))

    def done(self, raw_headers):
        headers = self._header_table.merge(raw_headers)
        return self._queue.put_nowait((ChokeEvent(), headers))

    def error(self, errnumber, reason, raw_headers):
        headers = self._header_table.merge(raw_headers)
        return self._queue.put_nowait((RequestError(errnumber,
                                                    reason), headers))

    @property
    def headers(self):
        return self._current_headers
コード例 #17
0
ファイル: libwatcher.py プロジェクト: jianingy/watchgang
class TornadoQuerierBase(object):
    def __init__(self):
        self.tasks = TornadoQueue()

    def gen_task(self):
        raise NotImplementError()

    def run_task(self, task):
        raise NotImplementError()

    def prepare(self):
        self.running = True

    def cleanup(self):
        self.running = False

    @coroutine
    def run_worker(self, worker_id, f):
        while self.tasks.qsize() > 0:
            task = yield self.tasks.get()
            LOG.debug('worker[%d]: current task is %s' % (worker_id, task))
            try:
                yield f(task)
                pass
            except Exception as e:
                LOG.warning(str(e))
            finally:
                self.tasks.task_done()
                task = None
        LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks))

    @coroutine
    def start(self, num_workers=1):

        self.prepare()

        # add tasks
        tasks = yield self.gen_task()
        for task in tasks:
            yield self.tasks.put(task)

        # start shoot workers
        for worker_id in range(num_workers):
            LOG.debug('starting worker %d' % worker_id)
            self.run_worker(worker_id, self.run_task)

        yield self.tasks.join()
        self.cleanup()
コード例 #18
0
ファイル: core.py プロジェクト: zjw0358/streamz3
class buffer(Stream):
    def __init__(self, n, child, loop=None):
        self.queue = Queue(maxsize=n)

        Stream.__init__(self, child, loop=loop)

        self.loop.add_callback(self.cb)

    def update(self, x, who=None):
        return self.queue.put(x)

    @gen.coroutine
    def cb(self):
        while True:
            x = yield self.queue.get()
            yield self.emit(x)
コード例 #19
0
ファイル: tornao_send.py プロジェクト: DashShen/Journey
class TopicAppllication(tornado.web.Application):

    def __init__(self):
        handlers = [
            url(r'/', MainHandler)
        ]
        self.queue = Queue(maxsize=10)
        super(TopicAppllication, self).__init__(handlers=handlers, debug=True)

    @gen.coroutine
    def consumer(self):
        item = yield self.queue.get()
        try:
            print item
        finally:
            self.queue.task_done()
コード例 #20
0
ファイル: executor.py プロジェクト: aterrel/distributed
def as_completed(fs):
    if len(set(f.executor for f in fs)) == 1:
        loop = first(fs).executor.loop
    else:
        # TODO: Groupby executor, spawn many _as_completed coroutines
        raise NotImplementedError(
        "as_completed on many event loops not yet supported")

    from .compatibility import Queue
    queue = Queue()

    coroutine = lambda: _as_completed(fs, queue)
    loop.add_callback(coroutine)

    for i in range(len(fs)):
        yield queue.get()
コード例 #21
0
ファイル: executor.py プロジェクト: cowlicks/distributed
def as_completed(fs):
    if len(set(f.executor for f in fs)) == 1:
        loop = first(fs).executor.loop
    else:
        # TODO: Groupby executor, spawn many _as_completed coroutines
        raise NotImplementedError(
            "as_completed on many event loops not yet supported")

    from .compatibility import Queue
    queue = Queue()

    coroutine = lambda: _as_completed(fs, queue)
    loop.add_callback(coroutine)

    for i in range(len(fs)):
        yield queue.get()
コード例 #22
0
ファイル: command_worker.py プロジェクト: jbenua/Flashlight
class CommandQueue():
    def __init__(self):
        self.queue = Queue()

    @gen.coroutine
    def process_command(self):
        while True:
            item = yield self.queue.get()
            try:
                yield gen.sleep(0.1)
                command, view = item
                view.write_message({command[0]: command[1]})
            finally:
                self.queue.task_done()

    def put(self, item):
        self.queue.put(item)
コード例 #23
0
class FirehoseWebSocket(tornado.websocket.WebSocketHandler):
    @tornado.gen.coroutine
    def open(self):
        print "hose open"
        global queues
        self.queue = Queue()
        queues.append(self.queue)
        while True:
            item = yield self.queue.get()
            self.queue.task_done()
            self.write_message(json.dumps(item))

    @tornado.gen.coroutine
    def on_close(self):
        global queues
        yield self.queue.join()
        queues.remove(self.queue)
コード例 #24
0
class MessageRouter(object):
    def __init__(self, message_sender, default_handler=None):
        self._queue = Queue()
        self.message_sender = message_sender
        self.default_handler = default_handler
        self._message_handlers = {}
        self._working = False

    def register_message_handler(self, message, handler):
        assert isinstance(message, MessageMeta)
        assert hasattr(handler, '__call__')
        self._message_handlers[message.__name__] = handler

    @gen.coroutine
    def put_message(self, message):
        assert isinstance(message, Message)
        yield self._queue.put(message)

    @gen.coroutine
    def start(self):
        self._working = True
        while self._working:
            message = yield self._queue.get()
            try:
                # TODO: Maybe we need to add special handling for BarrierRequest
                handler = self._message_handlers.get(message.type,
                                                     self.default_handler)
                if handler:
                    yield handler(message)
            except Exception as e:
                exc_type, exc_value, exc_tb = sys.exc_info()
                error_type, error_subtype, error_message, extended_message = errors.exception_to_error_args(
                    exc_type, exc_value, exc_tb)
                response = Error.from_request(
                    message,
                    error_type=error_type,
                    error_subtype=error_subtype,
                    message=error_message,
                    extended_message=extended_message)
                yield self.message_sender.send_message_ignore_response(
                    response)
            finally:
                self._queue.task_done()

    def stop(self):
        self._working = False
コード例 #25
0
ファイル: messaging.py プロジェクト: lisidan/tega
def request(subscriber, request_type, tega_id, path, **kwargs):
    '''
    tega request/response service -- this method returns a generator
    (tornado coroutine) to send a request to a remote tega db.
    '''
    global seq_no
    seq_no += 1
    if seq_no > 65535:  # seq_no region: 0 - 65535.
        seq_no = 0
    subscriber.write_message('REQUEST {} {} {} {}\n{}'.format(
        seq_no, request_type.name, tega_id, path, json.dumps(kwargs)))
    queue = Queue(maxsize=1)  # used like a synchronous queue
    callback[seq_no] = queue  # synchronous queue per request/response
    try:
        result = yield queue.get(timeout=timedelta(seconds=REQUEST_TIMEOUT))
        return result
    except gen.TimeoutError:
        raise
コード例 #26
0
ファイル: messaging.py プロジェクト: araobp/tega
def request(subscriber, request_type, tega_id, path, **kwargs):
    '''
    tega request/response service -- this method returns a generator
    (tornado coroutine) to send a request to a remote tega db.
    '''
    global seq_no
    seq_no += 1
    if seq_no > 65535:  # seq_no region: 0 - 65535.
        seq_no = 0
    subscriber.write_message('REQUEST {} {} {} {}\n{}'.format(
        seq_no, request_type.name, tega_id, path, json.dumps(kwargs)))
    queue = Queue(maxsize=1)  # used like a synchronous queue
    callback[seq_no] = queue  # synchronous queue per request/response
    try:
        result = yield queue.get(timeout=timedelta(seconds=REQUEST_TIMEOUT))
        return result
    except gen.TimeoutError:
        raise
コード例 #27
0
ファイル: stream_handler.py プロジェクト: roflcoopter/viseron
    def stream(self, nvr, mjpeg_stream, mjpeg_stream_config, publish_frame_topic):
        """Subscribe to frames, draw on them, then publish processed frame."""
        frame_queue = Queue(maxsize=10)
        subscribe_frame_topic = (
            f"{nvr.config.camera.name_slug}/{TOPIC_FRAME_PROCESSED}/*"
        )
        unique_id = DataStream.subscribe_data(subscribe_frame_topic, frame_queue)

        while self.active_streams[mjpeg_stream]:
            item = yield frame_queue.get()
            frame = copy.copy(item.frame)
            ret, jpg = yield self.process_frame(nvr, frame, mjpeg_stream_config)

            if ret:
                DataStream.publish_data(publish_frame_topic, jpg)

        DataStream.unsubscribe_data(subscribe_frame_topic, unique_id)
        LOGGER.debug(f"Closing stream {mjpeg_stream}")
コード例 #28
0
class StreamClient(object):
    MAX_SIZE = 60

    def __init__(self, steam_id):
        self.id = generate_id()
        self.stream_id = steam_id
        self.queue = Queue(StreamClient.MAX_SIZE)

    @coroutine
    def send(self, item):
        yield self.queue.put(item)

    @coroutine
    def fetch(self):
        item = yield self.queue.get()
        self.queue.task_done()
        return item

    def empty(self):
        return self.queue.qsize() == 0
コード例 #29
0
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        """ Empty the queue of messages to send to the WS """
        while not self.finished:
            message = yield self.messages.get()
            self.send(message)

    def send(self, message):
        try:
            self.write_message(message)
        except WebSocketClosedError:
            self._close()

    def on_message(self, content):
        """ reciev message from websocket and send to MQ """
        #print(u"WS to MQ: {0}".format(content))
        self.publisher.MQmessages.put(content)
コード例 #30
0
ファイル: admin.py プロジェクト: domogik/domogik
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()        

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        """ Empty the queue of messages to send to the WS """
        while not self.finished:
            message = yield self.messages.get()
            self.send(message)

    def send(self, message):
        try:
            self.write_message(message)
        except WebSocketClosedError:
            self._close()
    
    def on_message(self, content):
        """ reciev message from websocket and send to MQ """
        #print(u"WS to MQ: {0}".format(content))
        self.publisher.MQmessages.put(content)
コード例 #31
0
ファイル: core.py プロジェクト: zjw0358/streamz3
class delay(Stream):
    def __init__(self, interval, child, loop=None):
        self.interval = interval
        self.queue = Queue()

        Stream.__init__(self, child, loop=loop)

        self.loop.add_callback(self.cb)

    @gen.coroutine
    def cb(self):
        while True:
            last = time()
            x = yield self.queue.get()
            yield self.emit(x)
            duration = self.interval - (time() - last)
            if duration > 0:
                yield gen.sleep(duration)

    def update(self, x, who=None):
        return self.queue.put(x)
コード例 #32
0
class PopularCategories:
    def __init__(self):
        self.categories = {}
        self.update_queue = Queue()

    @gen.coroutine
    def add_for_processing(self, predictions):
        yield self.update_queue.put(predictions)

    @gen.coroutine
    def process_queue(self):
        if self.update_queue.qsize() > 0:
            for i in range(self.update_queue.qsize()):
                predictions = yield self.update_queue.get()
                try:
                    self._update_categories(predictions)
                finally:
                    self.update_queue.task_done()

            # update top 5
            top_5 = sorted(self.categories.items(),
                           key=lambda x: x[1],
                           reverse=True)[:5]
            mapped = map(lambda x: to_json_result(x[0], x[1]), top_5)
            yield update_top_5(list(mapped))

    def _update_categories(self, new_predictions):
        predictions = new_predictions.argsort()[0]

        #  update categories total
        for prediction in predictions:
            label = configuration.image_labels[prediction]
            score = new_predictions[0][prediction]

            if label in self.categories:
                update_score = (self.categories[label] + score) / 2
            else:
                update_score = score

            self.categories[label] = update_score
コード例 #33
0
ファイル: admin.py プロジェクト: ewintec/domogik
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()        

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        while not self.finished:
            message = yield self.messages.get()
            #print("New MQ message: " + str(message))
            self.send(message)

    def send(self, message):
        try:
            self.write_message(message)
        except WebSocketClosedError:
            self._close()
    
    def on_message(self, content):
        self.publisher.MQmessages.put(content)
コード例 #34
0
ファイル: app-checkpoint.py プロジェクト: Harleymckee/books
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def check_origin(self, origin):
        return True

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        while not self.finished:
            message = yield self.messages.get()
            # print("New message: " + str(message))
            self.send(message)

    def send(self, message):
        try:
            self.write_message(dict(value=message))
        except WebSocketClosedError:
            self._close()
コード例 #35
0
    def call(self, action, body):
        """ Do the actual calling

        :param str action: action to perform (CRUD for example)
        :param str body: an object to send (will be json-encoded)

        """

        # queue is used to send result back to this routine
        corr_id = str(uuid.uuid4())
        queue = Queue(maxsize=1)
        self._reply_queues[corr_id] = queue

        # send message
        self.channel().basic_publish(exchange='',
                                     routing_key=action,
                                     properties=pika.BasicProperties(
                                         correlation_id=corr_id,
                                         reply_to=self._callback_queue,
                                     ),
                                     body=json.dumps(body))

        # add timeout callback
        self._ioloop.add_timeout(
            time.time() + self._timeout,
            functools.partial(
                self._on_timeout,
                queue=queue,
                correlation_id=corr_id,
            ))

        # retrieve result back
        result = yield queue.get()
        queue.task_done()

        if 'timeout_error' in result:
            raise TimeoutError(result['error'])

        return result
コード例 #36
0
ファイル: stores.py プロジェクト: mivade/tornadose
class QueueStore(BaseStore):
    """Publish data via queues.

    This class is meant to be used in cases where subscribers should
    not miss any data. Compared to the :class:`DataStore` class, new
    messages to be broadcast to clients are put in a queue to be
    processed in order.

    """
    def initialize(self):
        self.messages = Queue()
        self.publish()

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def publish(self):
        while True:
            message = yield self.messages.get()
            if len(self.subscribers) > 0:
                yield [subscriber.submit(message) for subscriber in self.subscribers]
コード例 #37
0
ファイル: executor.py プロジェクト: lucashtnguyen/distributed
def as_completed(fs):
    """ Return futures in the order in which they complete

    This returns an iterator that yields the input future objects in the order
    in which they complete.  Calling ``next`` on the iterator will block until
    the next future completes, irrespective of order.

    This function does not return futures in the order in which they are input.
    """
    if len(set(f.executor for f in fs)) == 1:
        loop = first(fs).executor.loop
    else:
        # TODO: Groupby executor, spawn many _as_completed coroutines
        raise NotImplementedError(
        "as_completed on many event loops not yet supported")

    from .compatibility import Queue
    queue = Queue()

    coroutine = lambda: _as_completed(fs, queue)
    loop.add_callback(coroutine)

    for i in range(len(fs)):
        yield queue.get()
コード例 #38
0
def as_completed(fs):
    """ Return futures in the order in which they complete

    This returns an iterator that yields the input future objects in the order
    in which they complete.  Calling ``next`` on the iterator will block until
    the next future completes, irrespective of order.

    This function does not return futures in the order in which they are input.
    """
    if len(set(f.executor for f in fs)) == 1:
        loop = first(fs).executor.loop
    else:
        # TODO: Groupby executor, spawn many _as_completed coroutines
        raise NotImplementedError(
            "as_completed on many event loops not yet supported")

    from .compatibility import Queue
    queue = Queue()

    coroutine = lambda: _as_completed(fs, queue)
    loop.add_callback(coroutine)

    for i in range(len(fs)):
        yield queue.get()
コード例 #39
0
class TaskLogger(object):
    def __init__(self,
                 task_id,
                 engine=EngineType.REQUESTS,
                 io_loop=None,
                 task_url=TASK_URL,
                 wrap=False,
                 tenant=None):
        self.task_id = task_id
        self.task_url = task_url
        self._seq = 0
        self._partial_log_url = self._get_partial_url('log')
        self._partial_result_url = self._get_partial_url('result')

        self.wrap = wrap
        if wrap and tenant:
            self._partial_log_url = update_query_params(
                self._partial_log_url, {'tenant': tenant})
            self._partial_result_url = update_query_params(
                self._partial_result_url, {'tenant': tenant})

        if engine == EngineType.REQUESTS:
            self.log = self._log_by_requests
            self.result = self._result_by_requests
        elif engine == EngineType.TORNADO:
            io_loop = io_loop if io_loop else IOLoop.current()
            self._http_client = AsyncHTTPClient(io_loop=io_loop)
            self._queue = Queue()
            self.log = self._log_by_tornado
            self.result = self._result_by_tornado
        else:
            raise TaskLoggerError('',
                                  reason='engine only supports {}'.format(
                                      EngineType.types_str()))

    def _get_partial_url(self, partial_name):
        url = urljoin(self.task_url, partial_name)
        url = update_query_params(url, {'task_id': self.task_id})
        return url

    def _get_log_url(self, seq):
        url = update_query_params(self._partial_log_url, {'seq': seq})
        return url

    def _get_result_url(self, seq, exit_code=0):
        url = update_query_params(self._partial_result_url, {
            'seq': seq,
            'exit_code': exit_code
        })
        return url

    def _log_by_requests(self, log):
        self._seq += 1
        log_url = self._get_log_url(self._seq)
        data = self._create_log(log, self._seq)
        self._send_by_requests(log_url, data)

    def _result_by_requests(self, result, exit_code=0):
        self._seq += 1
        result_url = self._get_result_url(self._seq, exit_code)
        data = self._create_result(result, self._seq, exit_code=exit_code)
        self._send_by_requests(result_url, data)

    @staticmethod
    def _send_by_requests(url, data):
        res = requests.post(url, data=data, verify=False)
        if res.status_code != 200:
            raise TaskLoggerError(data, reason=res.reason)

    @gen.coroutine
    def _log_by_tornado(self, log):
        yield self._queue.put(1)
        self._seq += 1
        log_url = self._get_log_url(self._seq)
        data = self._create_log(log, self._seq)
        try:
            yield self._send_by_tornado(log_url, data)
        finally:
            yield self._queue.get()
            self._queue.task_done()

    @gen.coroutine
    def _result_by_tornado(self, result, exit_code=0):
        yield self._queue.join()
        self._seq += 1
        result_url = self._get_result_url(self._seq, exit_code)
        data = self._create_result(result, self._seq, exit_code=exit_code)
        yield self._send_by_tornado(result_url, data)

    @gen.coroutine
    def _send_by_tornado(self, url, data):
        try:
            response = yield self._http_client.fetch(
                url,
                method='POST',
                headers={'Content-Type': 'application/json'},
                validate_cert=False,
                body=data)
        except Exception as exc:
            if hasattr(exc, 'response') and exc.response:
                exc = 'url:{}, exc:{}, body:{}'.format(url, exc,
                                                       exc.response.body)
            raise TaskLoggerError(data, str(exc))
        else:
            if response.code != 200:
                raise TaskLoggerError(data, reason=response.body)

    def _create_log(self, log, seq):
        assert isinstance(log, basestring)
        log = log + '\n'
        if self.wrap:
            log_msg = TaskLogMessage(task_id=self.task_id, log=log, seq=seq)
            data = json_encode({'messages': log_msg})
        else:
            data = log
        return data

    def _create_result(self, result, seq, exit_code):
        assert isinstance(result, basestring)
        result = result + '\n'
        if self.wrap:
            result_msg = TaskResultMessage(task_id=self.task_id,
                                           result=result,
                                           seq=seq,
                                           exit_code=exit_code)
            data = json_encode({'messages': result_msg})
        else:
            data = result
        return data
コード例 #40
0
    class TornadoTransmission():
        def __init__(self, max_concurrent_batches=10, block_on_send=False,
                    block_on_response=False, max_batch_size=100, send_frequency=0.25,
                    user_agent_addition=''):
            if not has_tornado:
                raise ImportError('TornadoTransmission requires tornado, but it was not found.')

            self.block_on_send = block_on_send
            self.block_on_response = block_on_response
            self.max_batch_size = max_batch_size
            self.send_frequency = send_frequency

            user_agent = "libhoney-py/" + VERSION
            if user_agent_addition:
                user_agent += " " + user_agent_addition

            self.http_client = AsyncHTTPClient(
                force_instance=True,
                defaults=dict(user_agent=user_agent))

            # libhoney adds events to the pending queue for us to send
            self.pending = Queue(maxsize=1000)
            # we hand back responses from the API on the responses queue
            self.responses = Queue(maxsize=2000)

            self.batch_data = {}
            self.sd = statsd.StatsClient(prefix="libhoney")
            self.batch_sem = Semaphore(max_concurrent_batches)

        def start(self):
            ioloop.IOLoop.current().spawn_callback(self._sender)

        def send(self, ev):
            '''send accepts an event and queues it to be sent'''
            self.sd.gauge("queue_length", self.pending.qsize())
            try:
                if self.block_on_send:
                    self.pending.put(ev)
                else:
                    self.pending.put_nowait(ev)
                self.sd.incr("messages_queued")
            except QueueFull:
                response = {
                    "status_code": 0,
                    "duration": 0,
                    "metadata": ev.metadata,
                    "body": "",
                    "error": "event dropped; queue overflow",
                }
                if self.block_on_response:
                    self.responses.put(response)
                else:
                    try:
                        self.responses.put_nowait(response)
                    except QueueFull:
                        # if the response queue is full when trying to add an event
                        # queue is full response, just skip it.
                        pass
                self.sd.incr("queue_overflow")

        # We're using the older decorator/yield model for compatibility with
        # Python versions before 3.5.
        # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await
        @gen.coroutine
        def _sender(self):
            '''_sender is the control loop that pulls events off the `self.pending`
            queue and submits batches for actual sending. '''
            events = []
            last_flush = time.time()
            while True:
                try:
                    ev = yield self.pending.get(timeout=self.send_frequency)
                    if ev is None:
                        # signals shutdown
                        yield self._flush(events)
                        return
                    events.append(ev)
                    if (len(events) > self.max_batch_size or
                        time.time() - last_flush > self.send_frequency):
                        yield self._flush(events)
                        events = []
                except TimeoutError:
                    yield self._flush(events)
                    events = []
                    last_flush = time.time()

        @gen.coroutine
        def _flush(self, events):
            if not events:
                return
            for dest, group in group_events_by_destination(events).items():
                yield self._send_batch(dest, group)

        @gen.coroutine
        def _send_batch(self, destination, events):
            ''' Makes a single batch API request with the given list of events. The
            `destination` argument contains the write key, API host and dataset
            name used to build the request.'''
            start = time.time()
            status_code = 0

            try:
                # enforce max_concurrent_batches
                yield self.batch_sem.acquire()
                url = urljoin(urljoin(destination.api_host, "/1/batch/"),
                            destination.dataset)
                payload = []
                for ev in events:
                    event_time = ev.created_at.isoformat()
                    if ev.created_at.tzinfo is None:
                        event_time += "Z"
                    payload.append({
                        "time": event_time,
                        "samplerate": ev.sample_rate,
                        "data": ev.fields()})
                req = HTTPRequest(
                    url,
                    method='POST',
                    headers={
                        "X-Honeycomb-Team": destination.writekey,
                        "Content-Type": "application/json",
                    },
                    body=json.dumps(payload, default=json_default_handler),
                )
                self.http_client.fetch(req, self._response_callback)
                # store the events that were sent so we can process responses later
                # it is important that we delete these eventually, or we'll run into memory issues
                self.batch_data[req] = {"start": start, "events": events}
            except Exception as e:
                # Catch all exceptions and hand them to the responses queue.
                self._enqueue_errors(status_code, e, start, events)
            finally:
                self.batch_sem.release()

        def _enqueue_errors(self, status_code, error, start, events):
            for ev in events:
                self.sd.incr("send_errors")
                self._enqueue_response(status_code, "", error, start, ev.metadata)

        def _response_callback(self, resp):
            # resp.request should be the same HTTPRequest object built by _send_batch
            # and mapped to values in batch_data
            events = self.batch_data[resp.request]["events"]
            start  = self.batch_data[resp.request]["start"]
            try:
                status_code = resp.code
                resp.rethrow()

                statuses = [d["status"] for d in json.loads(resp.body)]
                for ev, status in zip(events, statuses):
                    self._enqueue_response(status, "", None, start, ev.metadata)
                    self.sd.incr("messages_sent")
            except Exception as e:
                self._enqueue_errors(status_code, e, start, events)
                self.sd.incr("send_errors")
            finally:
                # clean up the data for this batch
                del self.batch_data[resp.request]

        def _enqueue_response(self, status_code, body, error, start, metadata):
            resp = {
                "status_code": status_code,
                "body": body,
                "error": error,
                "duration": (time.time() - start) * 1000,
                "metadata": metadata
            }
            if self.block_on_response:
                self.responses.put(resp)
            else:
                try:
                    self.responses.put_nowait(resp)
                except QueueFull:
                    pass

        def close(self):
            '''call close to send all in-flight requests and shut down the
                senders nicely. Times out after max 20 seconds per sending thread
                plus 10 seconds for the response queue'''
            try:
                self.pending.put(None, 10)
            except QueueFull:
                pass
            # signal to the responses queue that nothing more is coming.
            try:
                self.responses.put(None, 10)
            except QueueFull:
                pass

        def get_response_queue(self):
            ''' return the responses queue on to which will be sent the response
            objects from each event send'''
            return self.responses
コード例 #41
0
ファイル: countercache.py プロジェクト: wyj999/mtShow
class CounterCache(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.m_queue = Queue()
        self.m_CacheFlag = 1
        self.m_CounterCache = None
        self.m_Cache_A = defaultdict()
        self.m_Cache_B = defaultdict()

        self.database = Database(redis_conf = REDISEVER, password = STATUS_REDIS_PASS)

        self.cacheInit(self.m_Cache_A)
        self.cacheInit(self.m_Cache_B)

    def switchCache(self):
        if self.m_CacheFlag == 1:
            return self.m_Cache_A
        elif self.m_CacheFlag == 2:
            return self.m_Cache_B

    def chageCacheFlag(self):
        if self.m_CacheFlag == 1:
            self.m_CacheFlag = 2
        elif self.m_CacheFlag == 2:
            self.m_CacheFlag = 1
    
    def clearCache(self):
        if self.m_CacheFlag == 1:
            self.m_Cache_B.clear()
            self.cacheInit(self.m_Cache_B)
        elif self.m_CacheFlag == 2:
            self.m_Cache_A.clear()
            self.cacheInit(self.m_Cache_A)

    def cacheInit(self, cache):
        cache['pid_info'] = defaultdict(int)
        cache['eid_info'] = { 'pv':defaultdict(int), 'exchange_price':defaultdict(int) }
        cache['adx_info'] = { 'pv':defaultdict(int), 'exchange_price':defaultdict(int) }
        cache['aid_info'] = { 'exchange_price':defaultdict(int) }

    @tornado.gen.coroutine
    def queueMsgPut(self, msg):
        yield self.m_queue.put(msg)

    @tornado.gen.coroutine
    def queueMsgGet(self):
        while True:
            msg = yield self.m_queue.get()
            #print msg
            logger.info('QueueGet:%r' % msg)
            self.cacheInfoPut(msg)

    def cacheInfoPut(self, info):
        cache = self.switchCache()
        type = eid = pid = aid = price = adx = None
        if info.has_key('type'):
            type = info['type']
        if info.has_key('eid'):
            eid = info['eid']
        if info.has_key('pid'):
            pid = info['pid']
        if info.has_key('price'):
            price = info['price']
        if info.has_key('aid'):
            aid = info['aid']
        #if info.has_key('adx'):
        #    adx = info['adx']
        if type == 1 and eid and (price != None) and aid: # pv
            cache['aid_info']['exchange_price'][aid] = cache['aid_info']['exchange_price'][aid] + price
            cache['eid_info']['pv'][eid] = cache['eid_info']['pv'][eid] + 1
            cache['eid_info']['exchange_price'][eid] = cache['eid_info']['exchange_price'][eid] + price
            #cache['adx_info']['pv'][adx] = cache['adx_info']['pv'][adx] + 1
            #cache['adx_info']['exchange_price'][adx] = cache['adx_info']['exchange_price'][adx] + price
        else:
            return None


    def cacheDura(self):
        cache = None
        if self.m_CacheFlag == 1:
            cache = self.m_Cache_B
        if self.m_CacheFlag == 2:
            cache = self.m_Cache_A

        #loginfo(cache)
        if cache.has_key('pid_info'):
            pass
        if cache.has_key('eid_info'):
            it_p = cache['eid_info']['exchange_price']
            it_m = cache['eid_info']['pv']
            for eid in it_p.iterkeys():
                self.database.incEidHourSp(eid, it_p[eid])
                logger.debug("increase Order:%r Money:%r OK!" % (eid, it_p[eid]))
            for eid in it_m.iterkeys():
                self.database.incEidShow(eid, it_m[eid])
                logger.debug("increase Order:%r PV:%r OK!" % (eid,it_m[eid]))

        if cache.has_key('aid_info'):
            it_a = cache['aid_info']['exchange_price']
            for aid in it_a.iterkeys():
                self.database.incAidHourSp(aid, it_a[aid])
                self.database.decAdvBidSpend(aid, "-%.3f" %  (float(it_a[aid])/1000))
                logger.debug("increase Advertiser:%s Money:%s!" % (aid, str(float(it_a[aid])/1000)) )

    def run(self):
        while True:
            try:
                time.sleep( CACHE_DUR_FREQ )
                self.chageCacheFlag()
                self.cacheDura()
                self.clearCache()

            except Exception, e:
                logger.error(e)
                continue
コード例 #42
0
ファイル: batched.py プロジェクト: broxtronix/distributed
class BatchedStream(object):
    """ Mostly obsolete, see BatchedSend """

    def __init__(self, stream, interval):
        self.stream = stream
        self.interval = interval / 1000.0
        self.last_transmission = default_timer()
        self.send_q = Queue()
        self.recv_q = Queue()
        self._background_send_coroutine = self._background_send()
        self._background_recv_coroutine = self._background_recv()
        self._broken = None

        self.pc = PeriodicCallback(lambda: None, 100)
        self.pc.start()

    @gen.coroutine
    def _background_send(self):
        with log_errors():
            while True:
                msg = yield self.send_q.get()
                if msg == "close":
                    break
                msgs = [msg]
                now = default_timer()
                wait_time = self.last_transmission + self.interval - now
                if wait_time > 0:
                    yield gen.sleep(wait_time)
                while not self.send_q.empty():
                    msgs.append(self.send_q.get_nowait())

                try:
                    yield write(self.stream, msgs)
                except StreamClosedError:
                    self.recv_q.put_nowait("close")
                    self._broken = True
                    break

                if len(msgs) > 1:
                    logger.debug("Batched messages: %d", len(msgs))
                for _ in msgs:
                    self.send_q.task_done()

    @gen.coroutine
    def _background_recv(self):
        with log_errors():
            while True:
                try:
                    msgs = yield read(self.stream)
                except StreamClosedError:
                    self.recv_q.put_nowait("close")
                    self.send_q.put_nowait("close")
                    self._broken = True
                    break
                assert isinstance(msgs, list)
                if len(msgs) > 1:
                    logger.debug("Batched messages: %d", len(msgs))
                for msg in msgs:
                    self.recv_q.put_nowait(msg)

    @gen.coroutine
    def flush(self):
        yield self.send_q.join()

    @gen.coroutine
    def send(self, msg):
        if self._broken:
            raise StreamClosedError("Batch Stream is Closed")
        else:
            self.send_q.put_nowait(msg)

    @gen.coroutine
    def recv(self):
        result = yield self.recv_q.get()
        if result == "close":
            raise StreamClosedError("Batched Stream is Closed")
        else:
            raise gen.Return(result)

    @gen.coroutine
    def close(self):
        yield self.flush()
        raise gen.Return(self.stream.close())

    def closed(self):
        return self.stream.closed()
コード例 #43
0
ファイル: Server.py プロジェクト: rwth-i6/returnn
class Model:
  def __init__(self, config_file):
    self.lock = locks.Lock()
    self.classification_queue = Queue()

    print('loading config %s' % config_file, file=log.v5)
    # Load and setup config
    try:
      self.config = Config.Config()
      self.config.load_file(config_file)
      self.pause_after_first_seq = self.config.float('pause_after_first_seq', 0.2)
      self.batch_size = self.config.int('batch_size', 5000)
      self.max_seqs = self.config.int('max_seqs', -1)
    except Exception:
      print('Error: loading config %s failed' % config_file, file=log.v1)
      raise

    try:
      self.devices = self._init_devices()
    except Exception:
      print('Error: Loading devices for config %s failed' % config_file, file=log.v1)
      raise

    print('Starting engine for config %s' % config_file, file=log.v5)
    self.engine = Engine.Engine(self.devices)
    try:
      self.engine.init_network_from_config(config=self.config)
    except Exception:
      print('Error: Loading network for config %s failed' % config_file, file=log.v1)
      raise

    IOLoop.current().spawn_callback(self.classify_in_background)

    self.last_used = datetime.datetime.now()

  def _init_devices(self):
    """
    Initiates the required devices for a config. Same as the funtion initDevices in
    rnn.py.
    :param config:
    :return: A list with the devices used.
    """
    oldDeviceConfig = ",".join(self.config.list('device', ['default']))
    if "device" in TheanoFlags:
      # This is important because Theano likely already has initialized that device.
      config.set("device", TheanoFlags["device"])
      print("Devices: Use %s via THEANO_FLAGS instead of %s." % (TheanoFlags["device"], oldDeviceConfig), file=log.v4)
    devArgs = get_devices_init_args(self.config)
    assert len(devArgs) > 0
    devices = [Device(**kwargs) for kwargs in devArgs]
    for device in devices:
      while not device.initialized:
        time.sleep(0.25)
    if devices[0].blocking:
      print("Devices: Used in blocking / single proc mode.", file=log.v4)
    else:
      print("Devices: Used in multiprocessing mode.", file=log.v4)
    return devices

  @tornado.gen.coroutine
  def classify_in_background(self):
    while True:
      requests = []
      # fetch first request
      r = yield self.classification_queue.get()
      requests.append(r)
      # grab all other waiting requests
      try:
        while True:
          requests.append(self.classification_queue.get_nowait())
      except QueueEmpty:
        pass

      output_dim = {}
      # Do dataset creation and classification.
      dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim)
      dataset.init_seq_order()
      batches = dataset.generate_batches(recurrent_net=self.engine.network.recurrent,
                                         batch_size=self.batch_size, max_seqs=self.max_seqs)

      with (yield self.lock.acquire()):
        ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches)
        yield ctt.join()

      try:
        for i in range(dataset.num_seqs):
          requests[i].future.set_result(ctt.result[i])
          self.classification_queue.task_done()
      except Exception as e:
        print('exception', e)
        raise

  @tornado.gen.coroutine
  def classify(self, data):
    self.last_used = datetime.datetime.now()
    request = ClassificationRequest(data)

    yield self.classification_queue.put(request)
    yield request.future

    return request.future.result()
コード例 #44
0
class BlogBackup(object):
    _default_dir_name = 'seg_blog_backup'

    def _generate_save_dir(self):
        cur_dir = os.path.dirname(__file__)
        self.save_path = os.path.join(cur_dir, self._default_dir_name)
        if not os.path.isdir(self.save_path):
            os.mkdir(self.save_path)

    def _parse_save_path(self):
        if self.save_path:
            if os.path.exists(self.save_path) and \
                    os.path.isdir(self.save_path):
                return
            else:
                raise BlogSavePathError(
                    "'%s' not exists or is not dir!" % self.save_path)
        else:
            self._generate_save_dir()

    def _get_user_cookies(self):
        url = target_url + login_page_path
        self.driver.get(url)
        try:
            user_input = self.driver.find_element_by_name('mail')
            passwd_input = self.driver.find_element_by_name('password')
            submit_btn = self.driver.find_element_by_class_name('pr20')
        except NoSuchElementException:
            raise PageHtmlChanged(
                "%s login page structure have changed!" % _domain)

        user_input.send_keys(self.username)
        passwd_input.send_keys(self.passwd)
        submit_btn.click()
        try:
            WebDriverWait(self.driver, 3).until(staleness_of(submit_btn))
        except TimeoutException:
            raise Exception("Wrong username or password!")

        WebDriverWait(self.driver, timeout=10).until(has_page_load)
        try_times = 0
        while True:
            time.sleep(1)
            if url != self.driver.current_url:
                return self.driver.get_cookies()

            try_times += 1
            if try_times > 10:
                raise Exception("Getting cookie info failed!")

    def _get_driver(self):
        if self.phantomjs_path:
            try:
                return webdriver.PhantomJS(
                    executable_path=self.phantomjs_path,
                    service_log_path=os.path.devnull)
            except WebDriverException:
                raise PhantomjsPathError("Phantomjs locate path invalid!")
        else:
            return webdriver.PhantomJS(service_log_path=os.path.devnull)

    def __init__(self, **conf):
        self.username = conf['username']
        self.passwd = conf['passwd']
        self.phantomjs_path = conf.get('phantomjs_path')
        self.save_path = conf.get('save_path')
        self._q = Queue()

        self._parse_save_path()
        self.driver = self._get_driver()
        self._cookies = self._get_user_cookies()

    @gen.coroutine
    def run(self):
        self.__filter_cookies()

        start_url = target_url + blog_path
        yield self._fetch_blog_list_page(start_url)
        for _ in xrange(cpu_count()):
            self._fetch_essay_content()

        yield self._q.join()

    def __filter_cookies(self):
        self._cookies = {k['name']: k['value'] for k in self._cookies if
                         k['domain'] == _domain}

    @gen.coroutine
    def _fetch_blog_list_page(self, page_link):
        ret = requests.get(page_link, cookies=self._cookies)
        d = pq(ret.text)
        link_elements = d('.stream-list__item > .summary > h2 > a')
        for link in link_elements:
            yield self._q.put(d(link).attr('href'))

        next_ele = d('.pagination li.next a')
        if next_ele:
            next_page_url = target_url + next_ele.attr('href')
            self._fetch_blog_list_page(next_page_url)

    @gen.coroutine
    def _fetch_essay_content(self):
        while True:
            try:
                essay_path = yield self._q.get(timeout=1)
                essay_url = target_url + essay_path + edit_suffix
                ret = requests.get(essay_url, cookies=self._cookies)
                d = pq(ret.text)
                title = d("#myTitle").val()
                content = d("#myEditor").text()
                file_name = title + '.md'
                real_file_name = os.path.join(self.save_path, file_name)
                with open(real_file_name, 'w') as f:
                    f.writelines(content.encode('utf8'))
            except gen.TimeoutError:
                raise gen.Return()
            finally:
                self._q.task_done()
コード例 #45
0
async def test_listeners(known_server, handlers, jsonrpc_init_msg):
    """will some listeners listen?"""
    handler, ws_handler = handlers
    manager = handler.manager

    manager.all_listeners = ["jupyter_lsp.tests.listener.dummy_listener"]

    manager.initialize()
    manager._listeners["client"] = []  # hide predefined client listeners

    assert len(manager._listeners["all"]) == 1

    dummy_listener = manager._listeners["all"][0]
    assert re.match(
        ("<MessageListener listener=<function dummy_listener at .*?>,"
         " method=None, language_server=None>"),
        repr(dummy_listener),
    )

    handler_listened = Queue()
    server_listened = Queue()
    all_listened = Queue()

    # some client listeners
    @lsp_message_listener("client",
                          language_server=known_server,
                          method="initialize")
    async def client_listener(scope, message, language_server, manager):
        await handler_listened.put(message)

    @lsp_message_listener("client", method=r"not-a-method")
    async def other_client_listener(scope, message, language_server,
                                    manager):  # pragma: no cover
        await handler_listened.put(message)
        raise NotImplementedError("shouldn't get here")

    # some server listeners
    @lsp_message_listener("server", language_server=None, method=None)
    async def server_listener(scope, message, language_server, manager):
        await server_listened.put(message)

    @lsp_message_listener("server", language_server=r"not-a-language-server")
    async def other_server_listener(scope, message, language_server,
                                    manager):  # pragma: no cover
        await handler_listened.put(message)
        raise NotImplementedError("shouldn't get here")

    # an all listener
    @lsp_message_listener("all")
    async def all_listener(scope, message, language_server,
                           manager):  # pragma: no cover
        await all_listened.put(message)

    assert len(manager._listeners["server"]) == 2
    assert len(manager._listeners["client"]) == 2
    assert len(manager._listeners["all"]) == 2

    ws_handler.open(known_server)

    await ws_handler.on_message(jsonrpc_init_msg)

    results = await asyncio.wait_for(
        asyncio.gather(
            handler_listened.get(),
            server_listened.get(),
            all_listened.get(),
            all_listened.get(),
            return_exceptions=True,
        ),
        20,
    )
    assert all([isinstance(res, dict) for res in results])

    ws_handler.on_close()

    handler_listened.task_done()
    server_listened.task_done()
    all_listened.task_done()
    all_listened.task_done()

    [
        manager.unregister_message_listener(listener) for listener in [
            client_listener,
            other_client_listener,
            server_listener,
            other_server_listener,
            all_listener,
        ]
    ]

    assert not manager._listeners["server"]
    assert not manager._listeners["client"]
    assert len(manager._listeners["all"]) == 1
コード例 #46
0
ファイル: executor.py プロジェクト: aterrel/distributed
class Executor(object):
    """ Distributed executor with data dependencies

    This executor resembles executors in concurrent.futures but also allows
    Futures within submit/map calls.

    Provide center address on initialization

    >>> executor = Executor(('127.0.0.1', 8787))  # doctest: +SKIP

    Use ``submit`` method like normal

    >>> a = executor.submit(add, 1, 2)  # doctest: +SKIP
    >>> b = executor.submit(add, 10, 20)  # doctest: +SKIP

    Additionally, provide results of submit calls (futures) to further submit
    calls:

    >>> c = executor.submit(add, a, b)  # doctest: +SKIP

    This allows for the dynamic creation of complex dependencies.
    """
    def __init__(self, center=None, scheduler=None, start=True, delete_batch_time=1, loop=None):
        self.futures = dict()
        self.refcount = defaultdict(lambda: 0)
        self.loop = loop or IOLoop()
        self.scheduler_queue = Queue()
        self.report_queue = Queue()

        if scheduler:
            if isinstance(scheduler, Scheduler):
                self.scheduler = scheduler
                if not center:
                    self.center = scheduler.center
            else:
                raise NotImplementedError()
                # self.scheduler = coerce_to_rpc(scheduler)
        else:
            self.scheduler = Scheduler(center, loop=self.loop,
                                       delete_batch_time=delete_batch_time)
        if center:
            self.center = coerce_to_rpc(center)

        if not self.center:
            raise ValueError("Provide Center address")

        if start:
            self.start()

    def start(self):
        """ Start scheduler running in separate thread """
        if hasattr(self, '_loop_thread'):
            return
        from threading import Thread
        self._loop_thread = Thread(target=self.loop.start)
        self._loop_thread.daemon = True
        _global_executor[0] = self
        self._loop_thread.start()
        sync(self.loop, self._start)

    def send_to_scheduler(self, msg):
        if isinstance(self.scheduler, Scheduler):
            self.loop.add_callback(self.scheduler_queue.put_nowait, msg)
        else:
            raise NotImplementedError()

    @gen.coroutine
    def _start(self):
        if self.scheduler.status != 'running':
            yield self.scheduler._sync_center()
            self.scheduler.start()

        start_event = Event()
        self.coroutines = [
                self.scheduler.handle_queues(self.scheduler_queue, self.report_queue),
                self.report(start_event)]

        _global_executor[0] = self
        yield start_event.wait()
        logger.debug("Started scheduling coroutines. Synchronized")

    def __enter__(self):
        if not self.loop._running:
            self.start()
        return self

    def __exit__(self, type, value, traceback):
        self.shutdown()

    def _inc_ref(self, key):
        self.refcount[key] += 1

    def _dec_ref(self, key):
        self.refcount[key] -= 1
        if self.refcount[key] == 0:
            del self.refcount[key]
            self._release_key(key)

    def _release_key(self, key):
        """ Release key from distributed memory """
        logger.debug("Release key %s", key)
        if key in self.futures:
            self.futures[key]['event'].clear()
            del self.futures[key]
        self.send_to_scheduler({'op': 'release-held-data', 'key': key})

    @gen.coroutine
    def report(self, start_event):
        """ Listen to scheduler """
        while True:
            if isinstance(self.scheduler, Scheduler):
                msg = yield self.report_queue.get()
            elif isinstance(self.scheduler, IOStream):
                raise NotImplementedError()
                msg = yield read(self.scheduler)
            else:
                raise NotImplementedError()

            if msg['op'] == 'stream-start':
                start_event.set()
            if msg['op'] == 'close':
                break
            if msg['op'] == 'key-in-memory':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'finished'
                    self.futures[msg['key']]['event'].set()
            if msg['op'] == 'lost-data':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'lost'
                    self.futures[msg['key']]['event'].clear()
            if msg['op'] == 'task-erred':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'error'
                    self.futures[msg['key']]['exception'] = msg['exception']
                    self.futures[msg['key']]['traceback'] = msg['traceback']
                    self.futures[msg['key']]['event'].set()
            if msg['op'] == 'restart':
                logger.info("Receive restart signal from scheduler")
                events = [d['event'] for d in self.futures.values()]
                self.futures.clear()
                for e in events:
                    e.set()
                with ignoring(AttributeError):
                    self._restart_event.set()

    @gen.coroutine
    def _shutdown(self, fast=False):
        """ Send shutdown signal and wait until scheduler completes """
        self.send_to_scheduler({'op': 'close'})
        if _global_executor[0] is self:
            _global_executor[0] = None
        if not fast:
            yield self.coroutines

    def shutdown(self, timeout=10):
        """ Send shutdown signal and wait until scheduler terminates """
        self.send_to_scheduler({'op': 'close'})
        self.loop.stop()
        self._loop_thread.join(timeout=timeout)
        if _global_executor[0] is self:
            _global_executor[0] = None

    def submit(self, func, *args, **kwargs):
        """ Submit a function application to the scheduler

        Parameters
        ----------
        func: callable
        *args:
        **kwargs:
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> c = executor.submit(add, a, b)  # doctest: +SKIP

        Returns
        -------
        Future

        See Also
        --------
        distributed.executor.Executor.submit:
        """
        if not callable(func):
            raise TypeError("First input to submit must be a callable function")

        key = kwargs.pop('key', None)
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)

        if key is None:
            if pure:
                key = funcname(func) + '-' + tokenize(func, kwargs, *args)
            else:
                key = funcname(func) + '-' + next(tokens)

        if key in self.futures:
            return Future(key, self)

        if kwargs:
            task = (apply, func, args, kwargs)
        else:
            task = (func,) + args

        if workers is not None:
            restrictions = {key: workers}
        else:
            restrictions = {}

        logger.debug("Submit %s(...), %s", funcname(func), key)
        self.send_to_scheduler({'op': 'update-graph',
                                'dsk': {key: task},
                                'keys': [key],
                                'restrictions': restrictions})

        return Future(key, self)

    def map(self, func, *iterables, **kwargs):
        """ Map a function on a sequence of arguments

        Arguments can be normal objects or Futures

        Parameters
        ----------
        func: callable
        iterables: Iterables
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> L = executor.map(func, sequence)  # doctest: +SKIP

        Returns
        -------
        list of futures

        See also
        --------
        distributed.executor.Executor.submit
        """
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)
        if not callable(func):
            raise TypeError("First input to map must be a callable function")
        iterables = [list(it) for it in iterables]
        if pure:
            keys = [funcname(func) + '-' + tokenize(func, kwargs, *args)
                    for args in zip(*iterables)]
        else:
            uid = str(uuid.uuid4())
            keys = [funcname(func) + '-' + uid + '-' + next(tokens)
                    for i in range(min(map(len, iterables)))]

        if not kwargs:
            dsk = {key: (func,) + args
                   for key, args in zip(keys, zip(*iterables))}
        else:
            dsk = {key: (apply, func, args, kwargs)
                   for key, args in zip(keys, zip(*iterables))}

        if isinstance(workers, (list, set)):
            if workers and isinstance(first(workers), (list, set)):
                if len(workers) != len(keys):
                    raise ValueError("You only provided %d worker restrictions"
                    " for a sequence of length %d" % (len(workers), len(keys)))
                restrictions = dict(zip(keys, workers))
            else:
                restrictions = {key: workers for key in keys}
        elif workers is None:
            restrictions = {}
        else:
            raise TypeError("Workers must be a list or set of workers or None")

        logger.debug("map(%s, ...)", funcname(func))
        self.send_to_scheduler({'op': 'update-graph',
                                'dsk': dsk,
                                'keys': keys,
                                'restrictions': restrictions})

        return [Future(key, self) for key in keys]

    @gen.coroutine
    def _gather(self, futures):
        futures2, keys = unpack_remotedata(futures)
        keys = list(keys)

        while True:
            logger.debug("Waiting on futures to clear before gather")
            yield All([self.futures[key]['event'].wait() for key in keys
                                                    if key in self.futures])
            exceptions = [self.futures[key]['exception'] for key in keys
                          if self.futures[key]['status'] == 'error']
            if exceptions:
                raise exceptions[0]
            try:
                data = yield _gather(self.center, keys)
            except KeyError as e:
                logger.debug("Couldn't gather keys %s", e)
                self.send_to_scheduler({'op': 'missing-data',
                                        'missing': e.args})
                for key in e.args:
                    self.futures[key]['event'].clear()
            else:
                break

        data = dict(zip(keys, data))

        result = pack_data(futures2, data)
        raise gen.Return(result)

    def gather(self, futures):
        """ Gather futures from distributed memory

        Accepts a future or any nested core container of futures

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> x = e.submit(add, 1, 2)  # doctest: +SKIP
        >>> e.gather(x)  # doctest: +SKIP
        3
        >>> e.gather([x, [x], x])  # doctest: +SKIP
        [3, [3], 3]
        """
        return sync(self.loop, self._gather, futures)

    @gen.coroutine
    def _scatter(self, data, workers=None):
        remotes = yield self.scheduler._scatter(None, data, workers)
        if isinstance(remotes, list):
            remotes = [Future(r.key, self) for r in remotes]
            keys = {r.key for r in remotes}
        elif isinstance(remotes, dict):
            remotes = {k: Future(v.key, self) for k, v in remotes.items()}
            keys = set(remotes)

        for key in keys:
            self.futures[key]['status'] = 'finished'
            self.futures[key]['event'].set()

        raise gen.Return(remotes)

    def scatter(self, data, workers=None):
        """ Scatter data into distributed memory

        Accepts a list of data elements or dict of key-value pairs

        Optionally provide a set of workers to constrain the scatter.  Specify
        workers as hostname/port pairs, i.e.  ('127.0.0.1', 8787).
        Default port is 8788.

        Examples
        --------
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.scatter([1, 2, 3])  # doctest: +SKIP
        [RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>,
         RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>,
         RemoteData<center=127.0.0.1:8787, key=d1d26ff2-8...>]
        >>> e.scatter({'x': 1, 'y': 2, 'z': 3})  # doctest: +SKIP
        {'x': RemoteData<center=127.0.0.1:8787, key=x>,
         'y': RemoteData<center=127.0.0.1:8787, key=y>,
         'z': RemoteData<center=127.0.0.1:8787, key=z>}

        >>> e.scatter([1, 2, 3], workers=[('hostname', 8788)])  # doctest: +SKIP
        """
        return sync(self.loop, self._scatter, data, workers=workers)

    @gen.coroutine
    def _get(self, dsk, keys, restrictions=None, raise_on_error=True):
        flatkeys = list(flatten([keys]))
        futures = {key: Future(key, self) for key in flatkeys}

        self.send_to_scheduler({'op': 'update-graph',
                                'dsk': dsk,
                                'keys': flatkeys,
                                'restrictions': restrictions or {}})

        packed = pack_data(keys, futures)
        if raise_on_error:
            result = yield self._gather(packed)
        else:
            try:
                result = yield self._gather(packed)
                result = 'OK', result
            except Exception as e:
                result = 'error', e
        raise gen.Return(result)

    def get(self, dsk, keys, **kwargs):
        """ Gather futures from distributed memory

        Parameters
        ----------
        dsk: dict
        keys: object, or nested lists of objects
        restrictions: dict (optional)
            A mapping of {key: {set of worker hostnames}} that restricts where
            jobs can take place

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.get({'x': (add, 1, 2)}, 'x')  # doctest: +SKIP
        3
        """
        status, result = sync(self.loop, self._get, dsk, keys,
                              raise_on_error=False, **kwargs)

        if status == 'error':
            raise result
        else:
            return result

    def compute(self, *args, **kwargs):
        """ Compute dask collections on cluster

        Parameters
        ----------
        args: iterable of dask objects
            Collections like dask.array or dataframe or dask.value objects
        sync: bool (optional)
            Returns Futures if False (default) or concrete values if True

        Returns
        -------
        Tuple of Futures or concrete values

        Examples
        --------

        >>> from dask import do, value
        >>> from operator import add
        >>> x = dask.do(add)(1, 2)
        >>> y = dask.do(add)(x, x)
        >>> xx, yy = executor.compute(x, y)  # doctest: +SKIP
        >>> xx  # doctest: +SKIP
        <Future: status: finished, key: add-8f6e709446674bad78ea8aeecfee188e>
        >>> xx.result()  # doctest: +SKIP
        3
        >>> yy.result()  # doctest: +SKIP
        6
        """
        sync = kwargs.pop('sync', False)
        assert not kwargs
        if sync:
            return dask.compute(*args, get=self.get)

        variables = [a for a in args if isinstance(a, Base)]

        groups = groupby(lambda x: x._optimize, variables)
        dsk = merge([opt(merge([v.dask for v in val]),
                         [v._keys() for v in val])
                    for opt, val in groups.items()])
        names = ['finalize-%s' % tokenize(v) for v in variables]
        dsk2 = {name: (v._finalize, v, v._keys()) for name, v in zip(names, variables)}

        self.loop.add_callback(self.scheduler_queue.put_nowait,
                                {'op': 'update-graph',
                                'dsk': merge(dsk, dsk2),
                                'keys': names})

        i = 0
        futures = []
        for arg in args:
            if isinstance(arg, Base):
                futures.append(Future(names[i], self))
                i += 1
            else:
                futures.append(arg)

        return futures

    @gen.coroutine
    def _restart(self):
        self.send_to_scheduler({'op': 'restart'})
        self._restart_event = Event()
        yield self._restart_event.wait()

        raise gen.Return(self)

    def restart(self):
        """ Restart the distributed network

        This kills all active work, deletes all data on the network, and
        restarts the worker processes.
        """
        return sync(self.loop, self._restart)

    @gen.coroutine
    def _upload_file(self, filename, raise_on_error=True):
        with open(filename, 'rb') as f:
            data = f.read()
        _, fn = os.path.split(filename)
        d = yield self.center.broadcast(msg={'op': 'upload_file',
                                             'filename': fn,
                                             'data': data})

        if any(isinstance(v, Exception) for v in d.values()):
            exception = next(v for v in d.values() if isinstance(v, Exception))
            if raise_on_error:
                raise exception
            else:
                raise gen.Return(exception)

        assert all(len(data) == v for v in d.values())

    def upload_file(self, filename):
        """ Upload local package to workers

        Parameters
        ----------
        filename: string
            Filename of .py file to send to workers
        """
        result = sync(self.loop, self._upload_file, filename,
                        raise_on_error=False)
        if isinstance(result, Exception):
            raise result
コード例 #47
0
ファイル: testserver.py プロジェクト: Toshihiko-Tabata/botw4m
class ReceiveQueue():
    u"""LINEからの受信メッセージキュー
    """
    # BOT定義
    Content_Type = "application/json"
    X_Line_ChannelID = "ChannelID"
    X_Line_ChannelSecret = "ChannelSecret"
    X_Line_Trusted_User_With_ACL = "MID"
    # リクエストヘッダ
    REQUEST_HEADER = {
        "Content-Type": "application/json; charset=UTF-8",
        "X-Line-ChannelID": X_Line_ChannelID,
        "X-Line-ChannelSecret": X_Line_ChannelSecret,
        "X-Line-Trusted-User-With-ACL": X_Line_Trusted_User_With_ACL
    }
    # POST EVENT APIのURL
    POST_EVENT_API = "https://trialbot-api.line.me/v1/events"
    # Profiles APIのURL(GET)
    PROFILES_API = "https://trialbot-api.line.me/v1/profiles"

    def __init__(self):
        self.queued_items = Queue()
        self.db = DatabaseUtil("db/database.db")
        if not self.db.checkDuplicate("MessageObjects"):
            sql = ReceivingEventObject().getQuery_MessageObjects("create")
            self.db.execute(sql)
        if not self.db.checkDuplicate("OperationObjects"):
            sql = ReceivingEventObject().getQuery_OperationObjects("create")
            self.db.execute(sql)

    @tornado.gen.coroutine
    def watch_queue(self):
        while True:
            items = yield self.queued_items.get()
            self.parse_receiving_event(items[0], items[1])

    def parse_receiving_event(self, req_head, req_body):
        u"""LINEServerからのイベント通知を解析
        """
        # リクエストの署名検証
        if not self.validate_signature(req_head['X-LINE-ChannelSignature'], req_body):
            print("Signature NG")
            return

        # リクエストの解析
        # 1回の通知でresultが複数含まれている場合もある
        # 以下の処理は1リクエストずつ処理
        json_dic = tornado.escape.json_decode(req_body)
        for result in json_dic["result"]:
            print(result)
            result_jsonstr = tornado.escape.json_encode(result)
            reo = ReceivingEventObject(result)

            if reo.isEventTypeMessage():
                print("MessageObject")
                # メッセージ通知(MessageObject)
                mo = reo.content
                # DBに格納
                sql = reo.getQuery_MessageObjects('insert')
                d = {
                    'id': mo.id, 'contentType': mo.contentType, 'from': mo._from,
                    'createdTime': mo.createdTime, 'to': mo.to, 'toType': mo.toType,
                    'text': mo.text, 'json_data': result_jsonstr
                }
                self.db.execute(sql, d)
                # イベント通知に対する送信
                self.toPOSTEventAPI(
                    tornado.escape.json_encode(
                        SendingEventObject(reo).createRequestBody()
                    )
                )
            elif reo.isEventTypeOperation():
                # ユーザ操作(OperationObject)
                oo = reo.content
                # DBに格納
                sql = reo.getQuery_OperationObjects('insert')

                d = {
                    'revision': oo.revision, 'opType': oo.opType,
                    'params0': oo.params[0], 'params1': '', 'params2': '',
                    'json_data': result_jsonstr
                }
                self.db.execute(sql, d)
                # ユーザーによる友だち追加(ブロック解除を含む)
                if reo.content.isFriendsAdd():
                    print("User Operation Add    ",reo.content.params[0])
                    # プロフィールの取得
                    result = self.toProfilesAPI(reo.content.params[0])
                    for v in result:
                        name = v["displayName"]
                    # ありがとうメッセージの送信
                    seo = SendingEventObject(reo)
                    seo.createSendContent_Thanks(name)
                    res = self.toPOSTEventAPI(
                        tornado.escape.json_encode(
                            seo.createRequestBody()
                        )
                    )
                elif reo.content.isFriendsBlock():
                    # ユーザーによるブロック
                    print("User Operation Block    ",reo.content.params[0])

    @tornado.gen.coroutine
    def toPOSTEventAPI(self, send_body):
        u"""LINEServerへ送信
        ※LINEServerへ送信する際はこちらがクライアント
        """
        # リクエストボディ(Sending Event ObjectのJSONデータ)
        # ※リクエストボディは8Kib以下であること(LINEの仕様)
        http_client = tornado.httpclient.AsyncHTTPClient()
        response = None
        try:
            response = yield http_client.fetch(
                self.POST_EVENT_API,
                method='POST',
                headers=self.REQUEST_HEADER,
                body=send_body
            )
        except http_client.HTTPError as e:
            # HTTPError is raised for non-200 responses
            print("Error: " + str(e))
        except Exception as e:
            # Other errors
            print("Error: " + str(e))
        http_client.close()
        return response

    def toProfilesAPI(self, mid):
        u"""PROFILES APIでLINEServerにプロフィールを問い合わせ
        ※Contact Response ObjectのJSONデータが返却されてくる
        ※TODO:複数一括リクエストは未対応
        """
        # リクエストはGetでユーザーの識別子 (複数時はカンマ区切り)を渡す
        http_client = tornado.httpclient.HTTPClient()
        url = url_concat(self.PROFILES_API, {"mids": mid})
        print(url)
        result = None
        try:
            response = http_client.fetch(
                url,
                method='GET',
                headers=self.REQUEST_HEADER,
                body=None
            )
            json_dic = tornado.escape.json_decode(response.body)
            result = json_dic["contacts"]
        except http_client.HTTPError as e:
            # HTTPError is raised for non-200 responses
            print("Error: " + str(e))
        except Exception as e:
            # Other errors
            print("Error: " + str(e))
        http_client.close()
        return result

    def validate_signature(self, signature, content):
        u"""LINEServerからのリクエストかを署名検証する
        [検証内容]
        1. ChannelSecretを秘密鍵としHMAC-SHA256でリクエストボディのダイジェスト値を取得
        2. ダイジェスト値をBase64化した値がリクエストヘッダのChannelSignatureと合致するか検証
        ※LINE BOT SDK for Python(https://github.com/studio3104/line-bot-sdk-python)から借用
        """
        return hmac.compare_digest(
            signature.encode('utf-8'),
            base64.b64encode(
                hmac.new(
                    self.X_Line_ChannelSecret.encode('utf-8'),
                    msg=content,
                    digestmod=hashlib.sha256
                ).digest()
            )
        )
コード例 #48
0
ファイル: worker.py プロジェクト: coobas/distributed
class Worker(Server):
    """ Worker Node

    Workers perform two functions:

    1.  **Serve data** from a local dictionary
    2.  **Perform computation** on that data and on data from peers

    Additionally workers keep a Center informed of their data and use that
    Center to gather data from other workers when necessary to perform a
    computation.

    You can start a worker with the ``dworker`` command line application::

        $ dworker scheduler-ip:port

    **State**

    * **data:** ``{key: object}``:
        Dictionary mapping keys to actual values
    * **active:** ``{key}``:
        Set of keys currently under computation
    * **ncores:** ``int``:
        Number of cores used by this worker process
    * **executor:** ``concurrent.futures.ThreadPoolExecutor``:
        Executor used to perform computation
    * **local_dir:** ``path``:
        Path on local machine to store temporary files
    * **center:** ``rpc``:
        Location of center or scheduler.  See ``.ip/.port`` attributes.
    * **name:** ``string``:
        Alias
    * **services:** ``{str: Server}``:
        Auxiliary web servers running on this worker
    * **service_ports:** ``{str: port}``:

    Examples
    --------

    Create centers and workers in Python:

    >>> from distributed import Center, Worker
    >>> c = Center('192.168.0.100', 8787)  # doctest: +SKIP
    >>> w = Worker(c.ip, c.port)  # doctest: +SKIP
    >>> yield w._start(port=8788)  # doctest: +SKIP

    Or use the command line::

       $ dcenter
       Start center at 127.0.0.1:8787

       $ dworker 127.0.0.1:8787
       Start worker at:            127.0.0.1:8788
       Registered with center at:  127.0.0.1:8787

    See Also
    --------
    distributed.center.Center:
    """

    def __init__(self, center_ip, center_port, ip=None, ncores=None,
                 loop=None, local_dir=None, services=None, service_ports=None,
                 name=None, **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        self.executor = ThreadPoolExecutor(self.ncores)
        self.thread_tokens = Queue()  # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572
        for i in range(self.ncores):
            self.thread_tokens.put_nowait(i)
        self.center = rpc(ip=center_ip, port=center_port)
        self.active = set()
        self.name = name

        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {'compute': self.compute,
                    'gather': self.gather,
                    'compute-stream': self.compute_stream,
                    'run': self.run,
                    'get_data': self.get_data,
                    'update_data': self.update_data,
                    'delete_data': self.delete_data,
                    'terminate': self.terminate,
                    'ping': pingpong,
                    'health': self.health,
                    'upload_file': self.upload_file}

        super(Worker, self).__init__(handlers, **kwargs)

    @gen.coroutine
    def _start(self, port=0):
        self.listen(port)
        self.name = self.name or self.address
        for k, v in self.services.items():
            v.listen(0)
            self.service_ports[k] = v.port

        logger.info('      Start worker at: %20s:%d', self.ip, self.port)
        for k, v in self.service_ports.items():
            logger.info('  %16s at: %20s:%d' % (k, self.ip, v))
        logger.info('Waiting to connect to: %20s:%d',
                    self.center.ip, self.center.port)
        while True:
            try:
                resp = yield self.center.register(
                        ncores=self.ncores, address=(self.ip, self.port),
                        keys=list(self.data), services=self.service_ports,
                        name=self.name)
                break
            except (OSError, StreamClosedError):
                logger.debug("Unable to register with scheduler.  Waiting")
                yield gen.sleep(0.5)
        if resp != 'OK':
            raise ValueError(resp)
        logger.info('        Registered to: %20s:%d',
                    self.center.ip, self.center.port)
        self.status = 'running'

    def start(self, port=0):
        self.loop.add_callback(self._start, port)

    def identity(self, stream):
        return {'type': type(self).__name__, 'id': self.id,
                'center': (self.center.ip, self.center.port)}

    @gen.coroutine
    def _close(self, report=True, timeout=10):
        if report:
            yield gen.with_timeout(timedelta(seconds=timeout),
                    self.center.unregister(address=(self.ip, self.port)),
                    io_loop=self.loop)
        self.center.close_streams()
        self.stop()
        self.executor.shutdown()
        if os.path.exists(self.local_dir):
            shutil.rmtree(self.local_dir)

        for k, v in self.services.items():
            v.stop()
        self.status = 'closed'
        self.stop()

    @gen.coroutine
    def terminate(self, stream, report=True):
        yield self._close(report=report)
        raise Return('OK')

    @property
    def address(self):
        return '%s:%d' % (self.ip, self.port)

    @property
    def address_tuple(self):
        return (self.ip, self.port)

    @gen.coroutine
    def gather(self, stream=None, who_has=None):
        who_has = {k: [coerce_to_address(addr) for addr in v]
                    for k, v in who_has.items()
                    if k not in self.data}
        try:
            result = yield gather_from_workers(who_has)
        except KeyError as e:
            logger.warn("Could not find data", e)
            raise Return({'status': 'missing-data',
                          'keys': e.args})
        else:
            self.data.update(result)
            raise Return({'status': 'OK'})

    @gen.coroutine
    def _ready_task(self, function=None, key=None, args=(), kwargs={},
            task=None, who_has=None):
        diagnostics = {}
        if who_has:
            local_data = {k: self.data[k] for k in who_has if k in self.data}
            who_has = {k: set(map(coerce_to_address, v))
                       for k, v in who_has.items()
                       if k not in self.data}
            try:
                logger.info("gather %d keys from peers: %s",
                            len(who_has), str(who_has))
                diagnostics['transfer-start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer-stop'] = time()
                data = merge(local_data, other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({'status': 'missing-data',
                              'keys': e.args,
                              'key': key})
        else:
            data = {}
            transfer_time = 0
        try:
            start = default_timer()
            if task is not None:
                task = loads(task)
            if function is not None:
                function = loads(function)
            if args:
                args = loads(args)
            if kwargs:
                kwargs = loads(kwargs)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            raise Return(assoc(error_message(e), 'key', key))

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task,)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({'status': 'OK',
                      'function': function,
                      'args': args2,
                      'kwargs': kwargs2,
                      'diagnostics': diagnostics,
                      'key': key})

    @gen.coroutine
    def executor_submit(self, key, function, *args, **kwargs):
        """ Safely run function in thread pool executor

        We've run into issues running concurrent.future futures within
        tornado.  Apparently it's advantageous to use timeouts and periodic
        callbacks to ensure things run smoothly.  This can get tricky, so we
        pull it off into an separate method.
        """
        token = yield self.thread_tokens.get()
        job_counter[0] += 1
        i = job_counter[0]
        # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
        future = self.executor.submit(function, *args, **kwargs)
        pc = PeriodicCallback(lambda: logger.debug("future state: %s - %s",
            key, future._state), 1000); pc.start()
        try:
            if sys.version_info < (3, 2):
                yield future
            else:
                while not future.done() and future._state != 'FINISHED':
                    try:
                        yield gen.with_timeout(timedelta(seconds=1), future,
                                               io_loop=self.loop)
                        break
                    except gen.TimeoutError:
                        logger.info("work queue size: %d", self.executor._work_queue.qsize())
                        logger.info("future state: %s", future._state)
                        logger.info("Pending job %d: %s", i, future)
        finally:
            pc.stop()
            self.thread_tokens.put(token)

        result = future.result()

        logger.info("Finish job %d, %s", i, key)
        raise gen.Return(result)

    @gen.coroutine
    def compute_stream(self, stream):
        with log_errors():
            logger.debug("Open compute stream")
            bstream = BatchedSend(interval=10, loop=self.loop)
            bstream.start(stream)

        @gen.coroutine
        def process(msg):
            try:
                result = yield self.compute(report=False, **msg)
                bstream.send(result)
            except Exception as e:
                logger.exception(e)
                bstream.send(assoc(error_message(e), 'key', msg.get('key')))

        with log_errors():
            while True:
                try:
                    msgs = yield read(stream)
                except StreamClosedError:
                    break
                if not isinstance(msgs, list):
                    msgs = [msgs]

                for msg in msgs:
                    op = msg.pop('op', None)
                    if op == 'close':
                        break
                    elif op == 'compute-task':
                        self.loop.add_callback(process, msg)
                    else:
                        logger.warning("Unknown operation %s, %s", op, msg)

            yield bstream.close()
            logger.info("Close compute stream")

    @gen.coroutine
    def compute(self, stream=None, function=None, key=None, args=(), kwargs={},
            task=None, who_has=None, report=True):
        """ Execute function """
        self.active.add(key)

        # Ready function for computation
        msg = yield self._ready_task(function=function, key=key, args=args,
            kwargs=kwargs, task=task, who_has=who_has)
        if msg['status'] != 'OK':
            try:
                self.active.remove(key)
            except KeyError:
                pass
            raise Return(msg)
        else:
            function = msg['function']
            args = msg['args']
            kwargs = msg['kwargs']

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args, kwargs)

        result['key'] = key
        result.update(msg['diagnostics'])

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.center.add_keys(address=(self.ip, self.port),
                                                      keys=[key])
                if not response == 'OK':
                    logger.warn('Could not report results to center: %s',
                                str(response))
        else:
            logger.warn(" Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000], str(args)[:1000],
                str(kwargs)[:1000], exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key, msg)
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    @gen.coroutine
    def run(self, stream, function=None, args=(), kwargs={}):
        function = loads(function)
        if args:
            args = loads(args)
        if kwargs:
            kwargs = loads(kwargs)
        try:
            result = function(*args, **kwargs)
        except Exception as e:
            logger.warn(" Run Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000], str(args)[:1000],
                str(kwargs)[:1000], exc_info=True)

            response = error_message(e)
        else:
            response = {
                'status': 'OK',
                'result': dumps(result),
            }
        raise Return(response)

    @gen.coroutine
    def update_data(self, stream, data=None, report=True):
        data = valmap(loads, data)
        self.data.update(data)
        if report:
            response = yield self.center.add_keys(address=(self.ip, self.port),
                                                  keys=list(data))
            assert response == 'OK'
        info = {'nbytes': {k: sizeof(v) for k, v in data.items()},
                'status': 'OK'}
        raise Return(info)

    @gen.coroutine
    def delete_data(self, stream, keys=None, report=True):
        for key in keys:
            if key in self.data:
                del self.data[key]
        logger.info("Deleted %d keys", len(keys))
        if report:
            logger.debug("Reporting loss of keys to center")
            yield self.center.remove_keys(address=self.address,
                                          keys=list(keys))
        raise Return('OK')

    def get_data(self, stream, keys=None):
        return {k: dumps(self.data[k]) for k in keys if k in self.data}

    def upload_file(self, stream, filename=None, data=None, load=True):
        out_filename = os.path.join(self.local_dir, filename)
        if isinstance(data, unicode):
            data = data.encode()
        with open(out_filename, 'wb') as f:
            f.write(data)
            f.flush()

        if load:
            try:
                name, ext = os.path.splitext(filename)
                if ext in ('.py', '.pyc'):
                    logger.info("Reload module %s from .py file", name)
                    name = name.split('-')[0]
                    reload(import_module(name))
                if ext == '.egg':
                    sys.path.append(out_filename)
                    pkgs = pkg_resources.find_distributions(out_filename)
                    for pkg in pkgs:
                        logger.info("Load module %s from egg", pkg.project_name)
                        reload(import_module(pkg.project_name))
                    if not pkgs:
                        logger.warning("Found no packages in egg file")
            except Exception as e:
                logger.exception(e)
                return {'status': 'error', 'exception': dumps(e)}
        return {'status': 'OK', 'nbytes': len(data)}

    def health(self, stream=None):
        """ Information about worker """
        d = {'active': len(self.active),
             'stored': len(self.data),
             'time': time()}
        try:
            import psutil
            mem = psutil.virtual_memory()
            d.update({'cpu': psutil.cpu_percent(),
                      'memory': mem.total,
                      'memory-percent': mem.percent})
            try:
                net_io = psutil.net_io_counters()
                d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent
                d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv
            except AttributeError:
                pass
            self._last_net_io = net_io

            try:
                disk_io = psutil.disk_io_counters()
                d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes
                d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes
            except (AttributeError, RuntimeError):
                disk_io = None
            self._last_disk_io = disk_io
        except ImportError:
            pass
        return d
コード例 #49
0
class Rx(PrettyPrintable):
    def __init__(self, rx_tree, session_id, header_table=None, io_loop=None, service_name=None,
                 raw_headers=None, trace_id=None):
        if header_table is None:
            header_table = CocaineHeaders()

        # If it's not the main thread
        # and a current IOloop doesn't exist here,
        # IOLoop.instance becomes self._io_loop
        self._io_loop = io_loop or IOLoop.current()
        self._queue = Queue()
        self._done = False
        self.session_id = session_id
        self.service_name = service_name
        self.rx_tree = rx_tree
        self.default_protocol = detect_protocol_type(rx_tree)
        self._headers = header_table
        self._current_headers = self._headers.merge(raw_headers)
        self.log = get_trace_adapter(log, trace_id)

    @coroutine
    def get(self, timeout=0, protocol=None):
        if self._done and self._queue.empty():
            raise ChokeEvent()

        # to pull various service errors
        if timeout <= 0:
            item = yield self._queue.get()
        else:
            deadline = datetime.timedelta(seconds=timeout)
            item = yield self._queue.get(deadline)

        if isinstance(item, Exception):
            raise item

        if protocol is None:
            protocol = self.default_protocol

        name, payload, raw_headers = item
        self._current_headers = self._headers.merge(raw_headers)
        res = protocol(name, payload)
        if isinstance(res, ProtocolError):
            raise ServiceError(self.service_name, res.reason, res.code, res.category)
        else:
            raise Return(res)

    def done(self):
        self._done = True

    def push(self, msg_type, payload, raw_headers):
        dispatch = self.rx_tree.get(msg_type)
        self.log.debug("dispatch %s %.300s", dispatch, payload)
        if dispatch is None:
            raise InvalidMessageType(self.service_name, CocaineErrno.INVALIDMESSAGETYPE,
                                     "unexpected message type %s" % msg_type)
        name, rx = dispatch
        self.log.info(
            "got message from `%s`: channel id: %s, type: %s",
            self.service_name,
            self.session_id,
            name
        )
        self._queue.put_nowait((name, payload, raw_headers))
        if rx == {}:  # the last transition
            self.done()
        elif rx is not None:  # not a recursive transition
            self.rx_tree = rx

    def error(self, err):
        self._queue.put_nowait(err)

    def closed(self):
        return self._done

    def _format(self):
        return "name: %s, queue: %s, done: %s" % (self.service_name, self._queue, self._done)

    @property
    def headers(self):
        return self._current_headers
コード例 #50
0
ファイル: executor.py プロジェクト: cowlicks/distributed
class Executor(object):
    """ Distributed executor with data dependencies

    This executor resembles executors in concurrent.futures but also allows
    Futures within submit/map calls.

    Provide center address on initialization

    >>> executor = Executor(('127.0.0.1', 8787))  # doctest: +SKIP

    Use ``submit`` method like normal

    >>> a = executor.submit(add, 1, 2)  # doctest: +SKIP
    >>> b = executor.submit(add, 10, 20)  # doctest: +SKIP

    Additionally, provide results of submit calls (futures) to further submit
    calls:

    >>> c = executor.submit(add, a, b)  # doctest: +SKIP

    This allows for the dynamic creation of complex dependencies.
    """
    def __init__(self, center, start=True, delete_batch_time=1):
        self.center = coerce_to_rpc(center)
        self.futures = dict()
        self.refcount = defaultdict(lambda: 0)
        self.dask = dict()
        self.restrictions = dict()
        self.loop = IOLoop()
        self.report_queue = Queue()
        self.scheduler_queue = Queue()
        self._shutdown_event = Event()
        self._delete_batch_time = delete_batch_time

        if start:
            self.start()

    def start(self):
        """ Start scheduler running in separate thread """
        from threading import Thread
        self.loop.add_callback(self._go)
        self._loop_thread = Thread(target=self.loop.start)
        self._loop_thread.start()

    def __enter__(self):
        if not self.loop._running:
            self.start()
        return self

    def __exit__(self, type, value, traceback):
        self.shutdown()

    def _inc_ref(self, key):
        self.refcount[key] += 1

    def _dec_ref(self, key):
        self.refcount[key] -= 1
        if self.refcount[key] == 0:
            del self.refcount[key]
            self._release_key(key)

    def _release_key(self, key):
        """ Release key from distributed memory """
        self.futures[key]['event'].clear()
        logger.debug("Release key %s", key)
        del self.futures[key]
        self.scheduler_queue.put_nowait({'op': 'release-held-data',
                                         'key': key})

    @gen.coroutine
    def report(self):
        """ Listen to scheduler """
        while True:
            msg = yield self.report_queue.get()
            if msg['op'] == 'close':
                break
            if msg['op'] == 'task-finished':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'finished'
                    self.futures[msg['key']]['event'].set()
            if msg['op'] == 'lost-data':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'lost'
                    self.futures[msg['key']]['event'].clear()
            if msg['op'] == 'task-erred':
                if msg['key'] in self.futures:
                    self.futures[msg['key']]['status'] = 'error'
                    self.futures[msg['key']]['event'].set()

    @gen.coroutine
    def _shutdown(self):
        """ Send shutdown signal and wait until _go completes """
        self.report_queue.put_nowait({'op': 'close'})
        self.scheduler_queue.put_nowait({'op': 'close'})
        yield self._shutdown_event.wait()

    def shutdown(self):
        """ Send shutdown signal and wait until scheduler terminates """
        self.report_queue.put_nowait({'op': 'close'})
        self.scheduler_queue.put_nowait({'op': 'close'})
        self.loop.stop()
        self._loop_thread.join()

    @gen.coroutine
    def _go(self):
        """ Setup and run all other coroutines.  Block until finished. """
        self.who_has, self.has_what, self.ncores = yield [self.center.who_has(),
                                                         self.center.has_what(),
                                                         self.center.ncores()]
        self.waiting = {}
        self.processing = {}
        self.stacks = {}

        worker_queues = {worker: Queue() for worker in self.ncores}
        delete_queue = Queue()

        coroutines = ([
            self.report(),
            scheduler(self.scheduler_queue, self.report_queue, worker_queues,
                      delete_queue, self.who_has, self.has_what, self.ncores,
                      self.dask, self.restrictions, self.waiting, self.stacks,
                      self.processing),
            delete(self.scheduler_queue, delete_queue,
                   self.center.ip, self.center.port, self._delete_batch_time)]
         + [worker(self.scheduler_queue, worker_queues[w], w, n)
            for w, n in self.ncores.items()])

        results = yield All(coroutines)
        self._shutdown_event.set()

    def submit(self, func, *args, **kwargs):
        """ Submit a function application to the scheduler

        Parameters
        ----------
        func: callable
        *args:
        **kwargs:
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> c = executor.submit(add, a, b)  # doctest: +SKIP

        Returns
        -------
        Future

        See Also
        --------
        distributed.executor.Executor.submit:
        """
        if not callable(func):
            raise TypeError("First input to submit must be a callable function")

        key = kwargs.pop('key', None)
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)

        if key is None:
            if pure:
                key = funcname(func) + '-' + tokenize(func, kwargs, *args)
            else:
                key = funcname(func) + '-' + next(tokens)

        if key in self.futures:
            return Future(key, self)

        if kwargs:
            task = (apply, func, args, kwargs)
        else:
            task = (func,) + args

        if workers is not None:
            restrictions = {key: workers}
        else:
            restrictions = {}

        if key not in self.futures:
            self.futures[key] = {'event': Event(), 'status': 'waiting'}

        logger.debug("Submit %s(...), %s", funcname(func), key)
        self.scheduler_queue.put_nowait({'op': 'update-graph',
                                         'dsk': {key: task},
                                         'keys': [key],
                                         'restrictions': restrictions})

        return Future(key, self)

    def map(self, func, *iterables, **kwargs):
        """ Map a function on a sequence of arguments

        Arguments can be normal objects or Futures

        Parameters
        ----------
        func: callable
        iterables: Iterables
        pure: bool (defaults to True)
            Whether or not the function is pure.  Set ``pure=False`` for
            impure functions like ``np.random.random``.
        workers: set, iterable of sets
            A set of worker hostnames on which computations may be performed.
            Leave empty to default to all workers (common case)

        Examples
        --------
        >>> L = executor.map(func, sequence)  # doctest: +SKIP

        Returns
        -------
        list of futures

        See also
        --------
        distributed.executor.Executor.submit
        """
        pure = kwargs.pop('pure', True)
        workers = kwargs.pop('workers', None)
        if not callable(func):
            raise TypeError("First input to map must be a callable function")
        iterables = [list(it) for it in iterables]
        if pure:
            keys = [funcname(func) + '-' + tokenize(func, kwargs, *args)
                    for args in zip(*iterables)]
        else:
            uid = str(uuid.uuid4())
            keys = [funcname(func) + '-' + uid + '-' + next(tokens)
                    for i in range(min(map(len, iterables)))]

        if not kwargs:
            dsk = {key: (func,) + args
                   for key, args in zip(keys, zip(*iterables))}
        else:
            dsk = {key: (apply, func, args, kwargs)
                   for key, args in zip(keys, zip(*iterables))}

        for key in dsk:
            if key not in self.futures:
                self.futures[key] = {'event': Event(), 'status': 'waiting'}

        if isinstance(workers, (list, set)):
            if workers and isinstance(first(workers), (list, set)):
                if len(workers) != len(keys):
                    raise ValueError("You only provided %d worker restrictions"
                    " for a sequence of length %d" % (len(workers), len(keys)))
                restrictions = dict(zip(keys, workers))
            else:
                restrictions = {key: workers for key in keys}
        elif workers is None:
            restrictions = {}
        else:
            raise TypeError("Workers must be a list or set of workers or None")

        logger.debug("map(%s, ...)", funcname(func))
        self.scheduler_queue.put_nowait({'op': 'update-graph',
                                         'dsk': dsk,
                                         'keys': keys,
                                         'restrictions': restrictions})

        return [Future(key, self) for key in keys]

    @gen.coroutine
    def _gather(self, futures):
        futures2, keys = unpack_remotedata(futures)
        keys = list(keys)

        while True:
            yield All([self.futures[key]['event'].wait() for key in keys])
            try:
                data = yield _gather(self.center, keys)
            except KeyError as e:
                self.scheduler_queue.put_nowait({'op': 'missing-data',
                                                 'missing': e.args})
                for key in e.args:
                    self.futures[key]['event'].clear()
            else:
                break

        data = dict(zip(keys, data))

        result = pack_data(futures2, data)
        raise gen.Return(result)

    def gather(self, futures):
        """ Gather futures from distributed memory

        Accepts a future or any nested core container of futures

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> x = e.submit(add, 1, 2)  # doctest: +SKIP
        >>> e.gather(x)  # doctest: +SKIP
        3
        >>> e.gather([x, [x], x])  # doctest: +SKIP
        [3, [3], 3]
        """
        return sync(self.loop, self._gather, futures)

    @gen.coroutine
    def _get(self, dsk, keys, restrictions=None):
        flatkeys = list(flatten(keys))
        for key in flatkeys:
            if key not in self.futures:
                self.futures[key] = {'event': Event(), 'status': None}
        futures = {key: Future(key, self) for key in flatkeys}

        self.scheduler_queue.put_nowait({'op': 'update-graph',
                                         'dsk': dsk,
                                         'keys': flatkeys,
                                         'restrictions': restrictions or {}})

        packed = pack_data(keys, futures)
        result = yield self._gather(packed)
        raise gen.Return(result)

    def get(self, dsk, keys, **kwargs):
        """ Gather futures from distributed memory

        Parameters
        ----------
        dsk: dict
        keys: object, or nested lists of objects
        restrictions: dict (optional)
            A mapping of {key: {set of worker hostnames}} that restricts where
            jobs can take place

        Examples
        --------
        >>> from operator import add  # doctest: +SKIP
        >>> e = Executor('127.0.0.1:8787')  # doctest: +SKIP
        >>> e.get({'x': (add, 1, 2)}, 'x')  # doctest: +SKIP
        3
        """
        return sync(self.loop, self._get, dsk, keys, **kwargs)
コード例 #51
0
ファイル: pubnub_tornado.py プロジェクト: pubnub/python
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()
        self.error_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()
        elif status.is_error():
            self.error_queue.put_nowait(status.error_data.exception)

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    @tornado.gen.coroutine
    def _wait_for(self, coro):
        error = self.error_queue.get()
        wi = tornado.gen.WaitIterator(coro, error)

        while not wi.done():
            result = yield wi.next()

            if wi.current_future == coro:
                raise gen.Return(result)
            elif wi.current_future == error:
                raise result
            else:
                raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

    @tornado.gen.coroutine
    def wait_for_connect(self):
        if not self.connected_event.is_set():
            yield self._wait_for(self.connected_event.wait())
        else:
            raise Exception("instance is already connected")

    @tornado.gen.coroutine
    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            yield self._wait_for(self.disconnected_event.wait())
        else:
            raise Exception("instance is already disconnected")

    @tornado.gen.coroutine
    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try: # NOQA
                env = yield self._wait_for(self.message_queue.get())
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.message_queue.task_done()

    @tornado.gen.coroutine
    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                try:
                    env = yield self._wait_for(self.presence_queue.get())
                except: # NOQA E722 pylint: disable=W0702
                    break
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.presence_queue.task_done()
コード例 #52
0
ファイル: scheduler.py プロジェクト: freeman-lab/distributed
class Scheduler(object):
    def __init__(self, center, delete_batch_time=1):
        self.scheduler_queue = Queue()
        self.report_queue = Queue()
        self.delete_queue = Queue()
        self.status = None

        self.center = coerce_to_rpc(center)

        self.dask = dict()
        self.dependencies = dict()
        self.dependents = dict()
        self.generation = 0
        self.has_what = defaultdict(set)
        self.held_data = set()
        self.in_play = set()
        self.keyorder = dict()
        self.nbytes = dict()
        self.ncores = dict()
        self.processing = dict()
        self.restrictions = dict()
        self.stacks = dict()
        self.waiting = dict()
        self.waiting_data = dict()
        self.who_has = defaultdict(set)

        self.exceptions = dict()
        self.tracebacks = dict()
        self.exceptions_blame = dict()

        self.delete_batch_time = delete_batch_time

    @gen.coroutine
    def _sync_center(self):
        self.ncores, self.has_what, self.who_has = yield [
                self.center.ncores(),
                self.center.has_what(),
                self.center.who_has()]

    def start(self):
        collections = [self.dask, self.dependencies, self.dependents,
                self.waiting, self.waiting_data, self.in_play, self.keyorder,
                self.nbytes, self.processing, self.restrictions]
        for collection in collections:
            collection.clear()

        self.processing = {addr: set() for addr in self.ncores}
        self.stacks = {addr: list() for addr in self.ncores}

        self.worker_queues = {addr: Queue() for addr in self.ncores}

        self.coroutines = ([
             self.scheduler(),
             delete(self.scheduler_queue, self.delete_queue,
                    self.center.ip, self.center.port,
                    self.delete_batch_time)]
            + [worker(self.scheduler_queue, self.worker_queues[w], w, n)
               for w, n in self.ncores.items()])

        for cor in self.coroutines:
            if cor.done():
                raise cor.exception()

        return All(self.coroutines)

    @gen.coroutine
    def _close(self):
        self.scheduler_queue.put_nowait({'op': 'close'})
        yield All(self.coroutines)

    @gen.coroutine
    def cleanup(self):
        """ Clean up queues and coroutines, prepare to stop """
        logger.debug("Cleaning up coroutines")
        n = 0
        self.delete_queue.put_nowait({'op': 'close'}); n += 1
        for w, nc in self.ncores.items():
            for i in range(nc):
                self.worker_queues[w].put_nowait({'op': 'close'}); n += 1

        for i in range(n):
            yield self.scheduler_queue.get()

    def mark_ready_to_run(self, key):
        """ Send task to an appropriate worker, trigger worker """
        logger.debug("Mark %s ready to run", key)
        if key in self.waiting:
            assert not self.waiting[key]
            del self.waiting[key]

        new_worker = decide_worker(self.dependencies, self.stacks,
                self.who_has, self.restrictions, self.nbytes, key)

        self.stacks[new_worker].append(key)
        self.ensure_occupied(new_worker)

    def mark_key_in_memory(self, key, workers=None):
        logger.debug("Mark %s in memory", key)
        if workers is None:
            workers = self.who_has[key]
        for worker in workers:
            self.who_has[key].add(worker)
            self.has_what[worker].add(key)
            with ignoring(KeyError):
                self.processing[worker].remove(key)

        for dep in sorted(self.dependents.get(key, []), key=self.keyorder.get,
                          reverse=True):
            if dep in self.waiting:
                s = self.waiting[dep]
                with ignoring(KeyError):
                    s.remove(key)
                if not s:  # new task ready to run
                    self.mark_ready_to_run(dep)

        for dep in self.dependencies.get(key, []):
            if dep in self.waiting_data:
                s = self.waiting_data[dep]
                with ignoring(KeyError):
                    s.remove(key)
                if not s and dep:
                    self.release_key(dep)

        self.report_queue.put_nowait({'op': 'key-in-memory',
                                     'key': key,
                                     'workers': workers})

    def ensure_occupied(self, worker):
        """ Spin up tasks on worker while it has tasks and free cores """
        logger.debug('Ensure worker is occupied: %s', worker)
        while (self.stacks[worker] and
               self.ncores[worker] > len(self.processing[worker])):
            key = self.stacks[worker].pop()
            self.processing[worker].add(key)
            logger.debug("Send job to worker: %s, %s, %s", worker, key, self.dask[key])
            self.worker_queues[worker].put_nowait(
                    {'op': 'compute-task',
                     'key': key,
                     'task': self.dask[key],
                     'needed': self.dependencies[key]})

    def seed_ready_tasks(self, keys=None):
        """ Distribute leaves among workers

        Takes an iterable of keys to consider for execution
        """
        if keys is None:
            keys = self.dask
        new_stacks = assign_many_tasks(
                self.dependencies, self.waiting, self.keyorder, self.who_has,
                self.stacks, self.restrictions, self.nbytes,
                [k for k in keys if k in self.waiting and not self.waiting[k]])
        logger.debug("Seed ready tasks: %s", new_stacks)
        for worker, stack in new_stacks.items():
            if stack:
                self.ensure_occupied(worker)

    def release_key(self, key):
        """ Release key from distributed memory if its ready """
        logger.debug("Release key %s", key)
        if key not in self.held_data and not self.waiting_data.get(key):
            self.delete_queue.put_nowait({'op': 'delete-task',
                                          'key': key})
            for w in self.who_has[key]:
                self.has_what[w].remove(key)
            del self.who_has[key]
            if key in self.waiting_data:
                del self.waiting_data[key]
            if key in self.in_play:
                self.in_play.remove(key)

    def update_data(self, extra_who_has, extra_nbytes):
        logger.debug("Update data %s", extra_who_has)
        for key, workers in extra_who_has.items():
            self.mark_key_in_memory(key, workers)

        self.nbytes.update(extra_nbytes)

        self.held_data.update(extra_who_has)
        self.in_play.update(extra_who_has)

    def mark_failed(self, key, failing_key=None):
        """ When a task fails mark it and all dependent task as failed """
        logger.debug("Mark key as failed %s", key)
        if key in self.exceptions_blame:
            return
        self.exceptions_blame[key] = failing_key
        self.report_queue.put_nowait({'op': 'task-erred',
                                     'key': key,
                                     'exception': self.exceptions[failing_key],
                                     'traceback': self.tracebacks[failing_key]})
        if key in self.waiting:
            del self.waiting[key]
        if key in self.waiting_data:
            del self.waiting_data[key]
        self.in_play.remove(key)
        for dep in self.dependents[key]:
            self.mark_failed(dep, failing_key)

    def log_state(self, msg=''):
        logger.debug("Runtime State: %s", msg)
        logger.debug('\n\nwaiting: %s\n\nstacks: %s\n\nprocessing: %s\n\n'
                'in_play: %s\n\n', self.waiting, self.stacks, self.processing,
                self.in_play)

    def mark_worker_missing(self, worker):
        logger.debug("Mark worker as missing %s", worker)
        if worker not in self.processing:
            return
        keys = self.has_what.pop(worker)
        for i in range(self.ncores[worker]):  # send close message, in case not dead
            self.worker_queues[worker].put_nowait({'op': 'close', 'report': False})
        del self.worker_queues[worker]
        del self.ncores[worker]
        del self.stacks[worker]
        del self.processing[worker]
        if not self.stacks:
            logger.critical("Lost all workers")
        missing_keys = set()
        for key in keys:
            self.who_has[key].remove(worker)
            if not self.who_has[key]:
                missing_keys.add(key)
        gone_data = {k for k, v in self.who_has.items() if not v}
        self.in_play.difference_update(missing_keys)
        for k in gone_data:
            del self.who_has[k]

    def heal_state(self):
        """ Recover from catastrophic change """
        logger.debug("Heal state")
        self.log_state("Before Heal")
        state = heal(self.dependencies, self.dependents, set(self.who_has),
                self.stacks, self.processing, self.waiting, self.waiting_data)
        released = state['released']
        self.in_play.clear(); self.in_play.update(state['in_play'])
        add_keys = {k for k, v in self.waiting.items() if not v}
        for key in self.held_data & released:
            self.report_queue.put_nowait({'op': 'lost-key', 'key': key})
        if self.stacks:
            for key in add_keys:
                self.mark_ready_to_run(key)
        for key in set(self.who_has) & released - self.held_data:
            self.delete_queue.put_nowait({'op': 'delete-task', 'key': key})
        self.in_play.update(self.who_has)
        self.log_state("After Heal")

    def my_heal_missing_data(self, missing):
        logger.debug("Heal from missing data")
        return heal_missing_data(self.dask, self.dependencies, self.dependents,
                self.held_data, self.who_has, self.in_play, self.waiting,
                self.waiting_data, missing)

    @gen.coroutine
    def scheduler(self):
        """ The scheduler coroutine for dask scheduling

        This coroutine manages interactions with all worker cores and with the
        delete coroutine through queues.

        Parameters
        ----------
        scheduler_queue: tornado.queues.Queue
            Get information from outside
        report_queue: tornado.queues.Queue
            Report information to outside
        worker_queues: dict {worker: tornado.queues.Queue}
            One queue per worker node.
            Each queue is listened to by several worker_core coroutines.
        delete_queue: tornado.queues.Queue
            One queue listened to by ``delete`` which connects to the
            center to delete unnecessary intermediate data
        who_has: dict {key: set}
            Mapping key to {set of worker-identities}
        has_what: dict {worker: set}
            Mapping worker-identity to {set of keys}
        ncores: dict {worker: int}
            Mapping worker-identity to number-of-cores
        """

        assert (not self.dask) == (not self.dependencies), (self.dask, self.dependencies)

        self.heal_state()

        self.status = 'running'
        self.report_queue.put_nowait({'op': 'start'})
        while True:
            msg = yield self.scheduler_queue.get()

            logger.debug("scheduler receives message %s", msg)
            if msg['op'] == 'close':
                break
            elif msg['op'] == 'update-graph':
                update_state(self.dask, self.dependencies, self.dependents,
                        self.held_data, self.who_has, self.in_play,
                        self.waiting, self.waiting_data, msg['dsk'],
                        msg['keys'])

                cover_aliases(self.dask, msg['dsk'])

                self.restrictions.update(msg.get('restrictions', {}))

                new_keyorder = order(msg['dsk'])  # TODO: define order wrt old graph
                for key in new_keyorder:
                    if key not in self.keyorder:
                        # TODO: add test for this
                        self.keyorder[key] = (self.generation, new_keyorder[key]) # prefer old
                if len(msg['dsk']) > 1:
                    self.generation += 1  # older graph generations take precedence

                for key in msg['dsk']:
                    for dep in self.dependencies[key]:
                        if dep in self.exceptions_blame:
                            self.mark_failed(key, self.exceptions_blame[dep])

                self.seed_ready_tasks(msg['dsk'])
                for key in msg['keys']:
                    if self.who_has[key]:
                        self.mark_key_in_memory(key)

            elif msg['op'] == 'update-data':
                self.update_data(msg['who-has'], msg['nbytes'])

            elif msg['op'] == 'task-finished':
                key, worker = msg['key'], msg['workers'][0]
                logger.debug("Mark task as finished %s, %s", key, worker)
                if key in self.processing[worker]:
                    self.nbytes[key] = msg['nbytes']
                    self.mark_key_in_memory(key, [worker])
                    self.ensure_occupied(worker)
                else:
                    logger.debug("Key not found in processing, %s, %s, %s",
                            key, worker, self.processing[worker])

            elif msg['op'] == 'task-erred':
                key, worker = msg['key'], msg['worker']
                if key in self.processing[worker]:
                    self.processing[worker].remove(key)
                    self.exceptions[key] = msg['exception']
                    self.tracebacks[key] = msg['traceback']
                    self.mark_failed(key, key)
                    self.ensure_occupied(worker)

            elif msg['op'] in ('missing-data', 'task-missing-data'):
                missing = set(msg['missing'])
                logger.debug("Recovering missing data: %s", missing)
                for k in missing:
                    with ignoring(KeyError):
                        workers = self.who_has.pop(k)
                        for worker in workers:
                            self.has_what[worker].remove(k)
                self.my_heal_missing_data(missing)

                if msg['op'] == 'task-missing-data':
                    key = msg['key']
                    with ignoring(KeyError):
                        self.processing[msg['worker']].remove(key)
                    self.waiting[key] = missing
                    logger.info('task missing data, %s, %s', key, self.waiting)
                    with ignoring(KeyError):
                        self.processing[msg['worker']].remove(msg['key'])

                    self.ensure_occupied(msg['worker'])

                self.seed_ready_tasks()

            elif msg['op'] == 'worker-failed':
                worker = msg['worker']
                self.mark_worker_missing(worker)
                if msg.get('heal', True):
                    self.heal_state()

            elif msg['op'] == 'release-held-data':
                if msg['key'] in self.held_data:
                    logger.debug("Release key: %s", msg['key'])
                    self.held_data.remove(msg['key'])
                    self.release_key(msg['key'])

            else:
                logger.warn("Bad message: %s", msg)

        logger.debug('Finished scheduling')
        yield self.cleanup()
        self.status = 'done'
コード例 #53
0
class Worker(Server):
    """ Worker Node

    Workers perform two functions:

    1.  **Serve data** from a local dictionary
    2.  **Perform computation** on that data and on data from peers

    Additionally workers keep a Center informed of their data and use that
    Center to gather data from other workers when necessary to perform a
    computation.

    You can start a worker with the ``dworker`` command line application::

        $ dworker scheduler-ip:port

    **State**

    * **data:** ``{key: object}``:
        Dictionary mapping keys to actual values
    * **active:** ``{key}``:
        Set of keys currently under computation
    * **ncores:** ``int``:
        Number of cores used by this worker process
    * **executor:** ``concurrent.futures.ThreadPoolExecutor``:
        Executor used to perform computation
    * **local_dir:** ``path``:
        Path on local machine to store temporary files
    * **center:** ``rpc``:
        Location of center or scheduler.  See ``.ip/.port`` attributes.
    * **name:** ``string``:
        Alias
    * **services:** ``{str: Server}``:
        Auxiliary web servers running on this worker
    * **service_ports:** ``{str: port}``:

    Examples
    --------

    Create centers and workers in Python:

    >>> from distributed import Center, Worker
    >>> c = Center('192.168.0.100', 8787)  # doctest: +SKIP
    >>> w = Worker(c.ip, c.port)  # doctest: +SKIP
    >>> yield w._start(port=8788)  # doctest: +SKIP

    Or use the command line::

       $ dcenter
       Start center at 127.0.0.1:8787

       $ dworker 127.0.0.1:8787
       Start worker at:            127.0.0.1:8788
       Registered with center at:  127.0.0.1:8787

    See Also
    --------
    distributed.center.Center:
    """
    def __init__(self,
                 center_ip,
                 center_port,
                 ip=None,
                 ncores=None,
                 loop=None,
                 local_dir=None,
                 services=None,
                 service_ports=None,
                 name=None,
                 **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        self.executor = ThreadPoolExecutor(self.ncores)
        self.thread_tokens = Queue(
        )  # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572
        for i in range(self.ncores):
            self.thread_tokens.put_nowait(i)
        self.center = rpc(ip=center_ip, port=center_port)
        self.active = set()
        self.name = name

        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {
            'compute': self.compute,
            'gather': self.gather,
            'compute-stream': self.compute_stream,
            'run': self.run,
            'get_data': self.get_data,
            'update_data': self.update_data,
            'delete_data': self.delete_data,
            'terminate': self.terminate,
            'ping': pingpong,
            'health': self.health,
            'upload_file': self.upload_file
        }

        super(Worker, self).__init__(handlers, **kwargs)

    @gen.coroutine
    def _start(self, port=0):
        self.listen(port)
        self.name = self.name or self.address
        for k, v in self.services.items():
            v.listen(0)
            self.service_ports[k] = v.port

        logger.info('      Start worker at: %20s:%d', self.ip, self.port)
        for k, v in self.service_ports.items():
            logger.info('  %16s at: %20s:%d' % (k, self.ip, v))
        logger.info('Waiting to connect to: %20s:%d', self.center.ip,
                    self.center.port)
        while True:
            try:
                resp = yield self.center.register(ncores=self.ncores,
                                                  address=(self.ip, self.port),
                                                  keys=list(self.data),
                                                  services=self.service_ports,
                                                  name=self.name)
                break
            except (OSError, StreamClosedError):
                logger.debug("Unable to register with scheduler.  Waiting")
                yield gen.sleep(0.5)
        if resp != 'OK':
            raise ValueError(resp)
        logger.info('        Registered to: %20s:%d', self.center.ip,
                    self.center.port)
        self.status = 'running'

    def start(self, port=0):
        self.loop.add_callback(self._start, port)

    def identity(self, stream):
        return {
            'type': type(self).__name__,
            'id': self.id,
            'center': (self.center.ip, self.center.port)
        }

    @gen.coroutine
    def _close(self, report=True, timeout=10):
        if report:
            yield gen.with_timeout(timedelta(seconds=timeout),
                                   self.center.unregister(address=(self.ip,
                                                                   self.port)),
                                   io_loop=self.loop)
        self.center.close_streams()
        self.stop()
        self.executor.shutdown()
        if os.path.exists(self.local_dir):
            shutil.rmtree(self.local_dir)

        for k, v in self.services.items():
            v.stop()
        self.status = 'closed'
        self.stop()

    @gen.coroutine
    def terminate(self, stream, report=True):
        yield self._close(report=report)
        raise Return('OK')

    @property
    def address(self):
        return '%s:%d' % (self.ip, self.port)

    @property
    def address_tuple(self):
        return (self.ip, self.port)

    @gen.coroutine
    def gather(self, stream=None, who_has=None):
        who_has = {
            k: [coerce_to_address(addr) for addr in v]
            for k, v in who_has.items() if k not in self.data
        }
        try:
            result = yield gather_from_workers(who_has)
        except KeyError as e:
            logger.warn("Could not find data", e)
            raise Return({'status': 'missing-data', 'keys': e.args})
        else:
            self.data.update(result)
            raise Return({'status': 'OK'})

    @gen.coroutine
    def _ready_task(self,
                    function=None,
                    key=None,
                    args=(),
                    kwargs={},
                    task=None,
                    who_has=None):
        who_has = who_has or {}
        diagnostics = {}
        data = {k: self.data[k] for k in who_has if k in self.data}
        who_has = {
            k: set(map(coerce_to_address, v))
            for k, v in who_has.items() if k not in self.data
        }
        if who_has:
            try:
                logger.info("gather %d keys from peers: %s", len(who_has),
                            str(who_has))
                diagnostics['transfer-start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer-stop'] = time()
                self.data.update(other)
                yield self.center.add_keys(address=self.address,
                                           keys=list(other))
                data.update(other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({
                    'status': 'missing-data',
                    'keys': e.args,
                    'key': key
                })
        else:
            transfer_time = 0
        try:
            start = default_timer()
            if task is not None:
                task = loads(task)
            if function is not None:
                function = loads(function)
            if args:
                args = loads(args)
            if kwargs:
                kwargs = loads(kwargs)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            raise Return(assoc(error_message(e), 'key', key))

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task, )

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({
            'status': 'OK',
            'function': function,
            'args': args2,
            'kwargs': kwargs2,
            'diagnostics': diagnostics,
            'key': key
        })

    @gen.coroutine
    def executor_submit(self, key, function, *args, **kwargs):
        """ Safely run function in thread pool executor

        We've run into issues running concurrent.future futures within
        tornado.  Apparently it's advantageous to use timeouts and periodic
        callbacks to ensure things run smoothly.  This can get tricky, so we
        pull it off into an separate method.
        """
        token = yield self.thread_tokens.get()
        job_counter[0] += 1
        i = job_counter[0]
        # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
        future = self.executor.submit(function, *args, **kwargs)
        pc = PeriodicCallback(
            lambda: logger.debug("future state: %s - %s", key, future._state),
            1000)
        pc.start()
        try:
            if sys.version_info < (3, 2):
                yield future
            else:
                while not future.done() and future._state != 'FINISHED':
                    try:
                        yield gen.with_timeout(timedelta(seconds=1),
                                               future,
                                               io_loop=self.loop)
                        break
                    except gen.TimeoutError:
                        logger.info("work queue size: %d",
                                    self.executor._work_queue.qsize())
                        logger.info("future state: %s", future._state)
                        logger.info("Pending job %d: %s", i, future)
        finally:
            pc.stop()
            self.thread_tokens.put(token)

        result = future.result()

        logger.info("Finish job %d, %s", i, key)
        raise gen.Return(result)

    @gen.coroutine
    def compute_stream(self, stream):
        with log_errors():
            logger.debug("Open compute stream")
            bstream = BatchedSend(interval=10, loop=self.loop)
            bstream.start(stream)

        @gen.coroutine
        def process(msg):
            try:
                result = yield self.compute(report=False, **msg)
                bstream.send(result)
            except Exception as e:
                logger.exception(e)
                bstream.send(assoc(error_message(e), 'key', msg.get('key')))

        with log_errors():
            while True:
                try:
                    msgs = yield read(stream)
                except StreamClosedError:
                    break
                if not isinstance(msgs, list):
                    msgs = [msgs]

                for msg in msgs:
                    op = msg.pop('op', None)
                    if op == 'close':
                        break
                    elif op == 'compute-task':
                        self.loop.add_callback(process, msg)
                    else:
                        logger.warning("Unknown operation %s, %s", op, msg)

            yield bstream.close()
            logger.info("Close compute stream")

    @gen.coroutine
    def compute(self,
                stream=None,
                function=None,
                key=None,
                args=(),
                kwargs={},
                task=None,
                who_has=None,
                report=True):
        """ Execute function """
        self.active.add(key)

        # Ready function for computation
        msg = yield self._ready_task(function=function,
                                     key=key,
                                     args=args,
                                     kwargs=kwargs,
                                     task=task,
                                     who_has=who_has)
        if msg['status'] != 'OK':
            try:
                self.active.remove(key)
            except KeyError:
                pass
            raise Return(msg)
        else:
            function = msg['function']
            args = msg['args']
            kwargs = msg['kwargs']

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args, kwargs)

        result['key'] = key
        result.update(msg['diagnostics'])

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.center.add_keys(address=(self.ip,
                                                               self.port),
                                                      keys=[key])
                if not response == 'OK':
                    logger.warn('Could not report results to center: %s',
                                str(response))
        else:
            logger.warn(
                " Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                str(args)[:1000],
                str(kwargs)[:1000],
                exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key, msg)
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    @gen.coroutine
    def run(self, stream, function=None, args=(), kwargs={}):
        function = loads(function)
        if args:
            args = loads(args)
        if kwargs:
            kwargs = loads(kwargs)
        try:
            result = function(*args, **kwargs)
        except Exception as e:
            logger.warn(
                " Run Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                str(args)[:1000],
                str(kwargs)[:1000],
                exc_info=True)

            response = error_message(e)
        else:
            response = {
                'status': 'OK',
                'result': dumps(result),
            }
        raise Return(response)

    @gen.coroutine
    def update_data(self, stream, data=None, report=True):
        data = valmap(loads, data)
        self.data.update(data)
        if report:
            response = yield self.center.add_keys(address=(self.ip, self.port),
                                                  keys=list(data))
            assert response == 'OK'
        info = {
            'nbytes': {k: sizeof(v)
                       for k, v in data.items()},
            'status': 'OK'
        }
        raise Return(info)

    @gen.coroutine
    def delete_data(self, stream, keys=None, report=True):
        for key in keys:
            if key in self.data:
                del self.data[key]
        logger.info("Deleted %d keys", len(keys))
        if report:
            logger.debug("Reporting loss of keys to center")
            yield self.center.remove_keys(address=self.address,
                                          keys=list(keys))
        raise Return('OK')

    def get_data(self, stream, keys=None):
        return {k: dumps(self.data[k]) for k in keys if k in self.data}

    def upload_file(self, stream, filename=None, data=None, load=True):
        out_filename = os.path.join(self.local_dir, filename)
        if isinstance(data, unicode):
            data = data.encode()
        with open(out_filename, 'wb') as f:
            f.write(data)
            f.flush()

        if load:
            try:
                name, ext = os.path.splitext(filename)
                if ext in ('.py', '.pyc'):
                    logger.info("Reload module %s from .py file", name)
                    name = name.split('-')[0]
                    reload(import_module(name))
                if ext == '.egg':
                    sys.path.append(out_filename)
                    pkgs = pkg_resources.find_distributions(out_filename)
                    for pkg in pkgs:
                        logger.info("Load module %s from egg",
                                    pkg.project_name)
                        reload(import_module(pkg.project_name))
                    if not pkgs:
                        logger.warning("Found no packages in egg file")
            except Exception as e:
                logger.exception(e)
                return {'status': 'error', 'exception': dumps(e)}
        return {'status': 'OK', 'nbytes': len(data)}

    def health(self, stream=None):
        """ Information about worker """
        d = {
            'active': len(self.active),
            'stored': len(self.data),
            'time': time()
        }
        try:
            import psutil
            mem = psutil.virtual_memory()
            d.update({
                'cpu': psutil.cpu_percent(),
                'memory': mem.total,
                'memory-percent': mem.percent
            })
            try:
                net_io = psutil.net_io_counters()
                d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent
                d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv
            except AttributeError:
                pass
            self._last_net_io = net_io

            try:
                disk_io = psutil.disk_io_counters()
                d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes
                d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes
            except AttributeError:
                pass
            self._last_disk_io = disk_io
        except ImportError:
            pass
        return d
コード例 #54
0
class BlogBackup(object):
    _default_dir_name = "seg_blog_backup"

    def _generate_save_dir(self):
        cur_dir = os.path.dirname(__file__)
        self.save_path = os.path.join(cur_dir, self._default_dir_name)
        if not os.path.isdir(self.save_path):
            os.mkdir(self.save_path)

    def _parse_save_path(self):
        if self.save_path:
            if os.path.exists(self.save_path) and os.path.isdir(self.save_path):
                return
            else:
                raise BlogSavePathError("'%s' not exists or is not dir!" % self.save_path)
        else:
            self._generate_save_dir()

    @staticmethod
    def parse_token_from_html(content):
        overall_pat = re.compile(r"SF.token =.*?,\s+_\w+ = [\d,\[\]]+;", re.DOTALL)
        overall_res = overall_pat.search(content)
        if overall_res:
            overall_content = overall_res.group()
            # remove /* */ type annotation
            filter_res = re.sub(r"(/\*[/a-zA-Z\d' ]+\*/)", "", overall_content)
            str_list = re.findall(r"(?<!//)'([a-zA-Z\d]+)'", filter_res, re.DOTALL)
            filter_list = re.findall(r"\[(\d+),(\d+)\]", overall_content)
            ret = "".join(str_list)

            if filter_list:
                for m, n in filter_list:
                    ret = ret[: int(m)] + ret[int(n) :]
            if len(ret) == 32:
                return ret

        raise PageHtmlChanged("website login token has changed")

    def _get_user_cookies(self):
        s = requests.Session()
        s.headers.update(headers)
        rep = s.get(target_url)
        post_url = "%s%s?_=%s" % (target_url, login_api_path, self.parse_token_from_html(rep.text))
        data = {"mail": self.username, "password": self.passwd}
        s.post(post_url, data=data)
        return s.cookies

    def __init__(self, **conf):
        self.username = conf["username"]
        self.passwd = conf["passwd"]
        self.save_path = conf.get("save_path")
        self._q = Queue()
        self._cookies = self._get_user_cookies()
        self._parse_save_path()

    @gen.coroutine
    def run(self):
        start_url = target_url + blog_path
        yield self._fetch_blog_list_page(start_url)
        for _ in xrange(cpu_count()):
            self._fetch_essay_content()

        yield self._q.join()

    @gen.coroutine
    def _fetch_blog_list_page(self, page_link):
        ret = requests.get(page_link, cookies=self._cookies)
        d = pq(ret.text)
        link_elements = d(".stream-list__item > .summary > h2 > a")
        for link in link_elements:
            yield self._q.put(d(link).attr("href"))

        next_ele = d(".pagination li.next a")
        if next_ele:
            next_page_url = target_url + next_ele.attr("href")
            self._fetch_blog_list_page(next_page_url)

    @gen.coroutine
    def _fetch_essay_content(self):
        while True:
            try:
                essay_path = yield self._q.get(timeout=1)
                essay_url = target_url + essay_path + edit_suffix
                ret = requests.get(essay_url, cookies=self._cookies)
                d = pq(ret.text)
                title = d("#myTitle").val()
                content = d("#myEditor").text()
                real_file_name = os.path.join(self.save_path, title + ".md")
                logger.info("is backup essay: %s" % title)
                with open(real_file_name, "w") as f:
                    f.writelines(content.encode("utf8"))
            except gen.TimeoutError:
                raise gen.Return()
            finally:
                self._q.task_done()
コード例 #55
0
    class TornadoTransmission():
        def __init__(self,
                     max_concurrent_batches=10,
                     block_on_send=False,
                     block_on_response=False,
                     max_batch_size=100,
                     send_frequency=timedelta(seconds=0.25),
                     user_agent_addition=''):
            if not has_tornado:
                raise ImportError(
                    'TornadoTransmission requires tornado, but it was not found.'
                )

            self.block_on_send = block_on_send
            self.block_on_response = block_on_response
            self.max_batch_size = max_batch_size
            self.send_frequency = send_frequency

            user_agent = "libhoney-py/" + VERSION
            if user_agent_addition:
                user_agent += " " + user_agent_addition

            self.http_client = AsyncHTTPClient(
                force_instance=True, defaults=dict(user_agent=user_agent))

            # libhoney adds events to the pending queue for us to send
            self.pending = Queue(maxsize=1000)
            # we hand back responses from the API on the responses queue
            self.responses = Queue(maxsize=2000)

            self.batch_data = {}
            self.sd = statsd.StatsClient(prefix="libhoney")
            self.batch_sem = Semaphore(max_concurrent_batches)

        def start(self):
            ioloop.IOLoop.current().spawn_callback(self._sender)

        def send(self, ev):
            '''send accepts an event and queues it to be sent'''
            self.sd.gauge("queue_length", self.pending.qsize())
            try:
                if self.block_on_send:
                    self.pending.put(ev)
                else:
                    self.pending.put_nowait(ev)
                self.sd.incr("messages_queued")
            except QueueFull:
                response = {
                    "status_code": 0,
                    "duration": 0,
                    "metadata": ev.metadata,
                    "body": "",
                    "error": "event dropped; queue overflow",
                }
                if self.block_on_response:
                    self.responses.put(response)
                else:
                    try:
                        self.responses.put_nowait(response)
                    except QueueFull:
                        # if the response queue is full when trying to add an event
                        # queue is full response, just skip it.
                        pass
                self.sd.incr("queue_overflow")

        # We're using the older decorator/yield model for compatibility with
        # Python versions before 3.5.
        # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await
        @gen.coroutine
        def _sender(self):
            '''_sender is the control loop that pulls events off the `self.pending`
            queue and submits batches for actual sending. '''
            events = []
            last_flush = time.time()
            while True:
                try:
                    ev = yield self.pending.get(timeout=self.send_frequency)
                    if ev is None:
                        # signals shutdown
                        yield self._flush(events)
                        return
                    events.append(ev)
                    if (len(events) > self.max_batch_size
                            or time.time() - last_flush >
                            self.send_frequency.total_seconds()):
                        yield self._flush(events)
                        events = []
                except TimeoutError:
                    yield self._flush(events)
                    events = []
                    last_flush = time.time()

        @gen.coroutine
        def _flush(self, events):
            if not events:
                return
            for dest, group in group_events_by_destination(events).items():
                yield self._send_batch(dest, group)

        @gen.coroutine
        def _send_batch(self, destination, events):
            ''' Makes a single batch API request with the given list of events. The
            `destination` argument contains the write key, API host and dataset
            name used to build the request.'''
            start = time.time()
            status_code = 0

            try:
                # enforce max_concurrent_batches
                yield self.batch_sem.acquire()
                url = urljoin(urljoin(destination.api_host, "/1/batch/"),
                              destination.dataset)
                payload = []
                for ev in events:
                    event_time = ev.created_at.isoformat()
                    if ev.created_at.tzinfo is None:
                        event_time += "Z"
                    payload.append({
                        "time": event_time,
                        "samplerate": ev.sample_rate,
                        "data": ev.fields()
                    })
                req = HTTPRequest(
                    url,
                    method='POST',
                    headers={
                        "X-Honeycomb-Team": destination.writekey,
                        "Content-Type": "application/json",
                    },
                    body=json.dumps(payload, default=json_default_handler),
                )
                self.http_client.fetch(req, self._response_callback)
                # store the events that were sent so we can process responses later
                # it is important that we delete these eventually, or we'll run into memory issues
                self.batch_data[req] = {"start": start, "events": events}
            except Exception as e:
                # Catch all exceptions and hand them to the responses queue.
                self._enqueue_errors(status_code, e, start, events)
            finally:
                self.batch_sem.release()

        def _enqueue_errors(self, status_code, error, start, events):
            for ev in events:
                self.sd.incr("send_errors")
                self._enqueue_response(status_code, "", error, start,
                                       ev.metadata)

        def _response_callback(self, resp):
            # resp.request should be the same HTTPRequest object built by _send_batch
            # and mapped to values in batch_data
            events = self.batch_data[resp.request]["events"]
            start = self.batch_data[resp.request]["start"]
            try:
                status_code = resp.code
                resp.rethrow()

                statuses = [d["status"] for d in json.loads(resp.body)]
                for ev, status in zip(events, statuses):
                    self._enqueue_response(status, "", None, start,
                                           ev.metadata)
                    self.sd.incr("messages_sent")
            except Exception as e:
                self._enqueue_errors(status_code, e, start, events)
                self.sd.incr("send_errors")
            finally:
                # clean up the data for this batch
                del self.batch_data[resp.request]

        def _enqueue_response(self, status_code, body, error, start, metadata):
            resp = {
                "status_code": status_code,
                "body": body,
                "error": error,
                "duration": (time.time() - start) * 1000,
                "metadata": metadata
            }
            if self.block_on_response:
                self.responses.put(resp)
            else:
                try:
                    self.responses.put_nowait(resp)
                except QueueFull:
                    pass

        def close(self):
            '''call close to send all in-flight requests and shut down the
                senders nicely. Times out after max 20 seconds per sending thread
                plus 10 seconds for the response queue'''
            try:
                self.pending.put(None, 10)
            except QueueFull:
                pass
            # signal to the responses queue that nothing more is coming.
            try:
                self.responses.put(None, 10)
            except QueueFull:
                pass

        def get_response_queue(self):
            ''' return the responses queue on to which will be sent the response
            objects from each event send'''
            return self.responses
コード例 #56
0
class AsyncConnection(object):
    def __init__(self, *args, **kwargs):
        kwargs["async"] = True

        if "thread_pool" in kwargs:
            self.__thread_pool = kwargs.pop("thread_pool")
        else:
            self.__thread_pool = futures.ThreadPoolExecutor(cpu_count())

        self.__connection = connect(*args, **kwargs)

        self.__io_loop = IOLoop.current()
        self.__connected = False

        log.debug("Trying to connect to postgresql")
        f = self.__wait()
        self.__io_loop.add_future(f, self.__on_connect)
        self.__queue = Queue()
        self.__has_active_cursor = False

        for method in ("get_backend_pid", "get_parameter_status"):
            setattr(self, method, self.__futurize(method))

    def __on_connect(self, result):
        log.debug("Connection establishment")
        self.__connected = True
        self.__io_loop.add_callback(self._loop)

    @coroutine
    def _loop(self):
        log.debug("Starting queue loop")
        while self.__connected:
            while self.__has_active_cursor or self.__connection.isexecuting():
                yield sleep(0.001)

            func, future = yield self.__queue.get()
            result = func()
            if isinstance(result, Future):
                result = yield result

            self.__io_loop.add_callback(future.set_result, result)
            yield self.__wait()

    @coroutine
    def __wait(self):
        log.debug("Waiting for events")
        while not (yield sleep(0.001)):
            try:
                state = self.__connection.poll()
            except QueryCanceledError:
                yield sleep(0.1)
                continue

            f = Future()

            def resolve(fileno, io_op):
                if f.running():
                    f.set_result(True)
                self.__io_loop.remove_handler(fileno)

            if state == psycopg2.extensions.POLL_OK:
                raise Return(True)

            elif state == psycopg2.extensions.POLL_READ:
                self.__io_loop.add_handler(self.__connection.fileno(), resolve, IOLoop.READ)
                yield f

            elif state == psycopg2.extensions.POLL_WRITE:
                self.__io_loop.add_handler(self.__connection.fileno(), resolve, IOLoop.WRITE)
                yield f

    def __on_cursor_open(self, cursor):
        self.__has_active_cursor = True
        log.debug("Opening cursor")

    def __on_cursor_close(self, cursor):
        self.__has_active_cursor = False
        log.debug("Closing active cursor")

    def cursor(self, **kwargs):
        f = Future()
        self.__io_loop.add_callback(
            self.__queue.put,
            (
                functools.partial(
                    AsyncCursor,
                    self.__connection,
                    self.__thread_pool,
                    self.__wait,
                    on_open=self.__on_cursor_open,
                    on_close=self.__on_cursor_close,
                    **kwargs
                ),
                f,
            ),
        )
        return f

    def cancel(self):
        return self.__thread_pool.submit(self.__connection.cancel)

    def close(self):
        self.__has_active_cursor = True

        @coroutine
        def closer():
            while not (yield self.__queue.empty()):
                func, future = yield self.__queue.get()
                future.set_exception(psycopg2.Error("Connection closed"))

            self.__io_loop.add_callback(self.__connection.close)

    def __futurize(self, item):
        attr = getattr(self.__connection, item)

        @functools.wraps(attr)
        def wrap(*args, **kwargs):
            f = Future()
            self.__io_loop.add_callback(self.__queue.put, (functools.partial(attr, *args, **kwargs), f))
            return f

        return wrap
コード例 #57
0
class ProjectGroomer(object):
  """ Cleans up expired transactions for a project. """
  def __init__(self, project_id, coordinator, zk_client, db_access,
               thread_pool):
    """ Creates a new ProjectGroomer.

    Args:
      project_id: A string specifying a project ID.
      coordinator: A GroomingCoordinator.
      zk_client: A KazooClient.
      db_access: A DatastoreProxy.
      thread_pool: A ThreadPoolExecutor.
    """
    self.project_id = project_id

    self._coordinator = coordinator
    self._zk_client = zk_client
    self._tornado_zk = TornadoKazoo(self._zk_client)
    self._db_access = db_access
    self._thread_pool = thread_pool
    self._project_node = '/appscale/apps/{}'.format(self.project_id)
    self._containers = []
    self._inactive_containers = set()
    self._batch_resolver = BatchResolver(self.project_id, self._db_access)

    self._zk_client.ensure_path(self._project_node)
    self._zk_client.ChildrenWatch(self._project_node, self._update_containers)

    self._txid_manual_offset = 0
    self._offset_node = '/'.join([self._project_node, OFFSET_NODE])
    self._zk_client.DataWatch(self._offset_node, self._update_offset)

    self._stop_event = AsyncEvent()
    self._stopped_event = AsyncEvent()

    # Keeps track of cleanup results for each round of grooming.
    self._txids_cleaned = 0
    self._oldest_valid_tx_time = None

    self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY)
    for _ in range(MAX_CONCURRENCY):
      IOLoop.current().spawn_callback(self._worker)

    IOLoop.current().spawn_callback(self.start)

  @gen.coroutine
  def start(self):
    """ Starts the grooming process until the stop event is set. """
    logger.info('Grooming {}'.format(self.project_id))
    while True:
      if self._stop_event.is_set():
        break

      try:
        yield self._groom_project()
      except Exception:
        # Prevent the grooming loop from stopping if an error is encountered.
        logger.exception(
          'Unexpected error while grooming {}'.format(self.project_id))
        yield gen.sleep(MAX_TX_DURATION)

    self._stopped_event.set()

  @gen.coroutine
  def stop(self):
    """ Stops the grooming process. """
    logger.info('Stopping grooming process for {}'.format(self.project_id))
    self._stop_event.set()
    yield self._stopped_event.wait()

  @gen.coroutine
  def _worker(self):
    """ Processes items in the worker queue. """
    while True:
      tx_path, composite_indexes = yield self._worker_queue.get()
      try:
        tx_time = yield self._resolve_txid(tx_path, composite_indexes)
        if tx_time is None:
          self._txids_cleaned += 1

        if tx_time is not None and tx_time < self._oldest_valid_tx_time:
          self._oldest_valid_tx_time = tx_time
      finally:
        self._worker_queue.task_done()

  def _update_offset(self, new_offset, _):
    """ Watches for updates to the manual offset node.

    Args:
      new_offset: A string specifying the new manual offset.
    """
    self._txid_manual_offset = int(new_offset or 0)

  def _update_containers(self, nodes):
    """ Updates the list of active txid containers.

    Args:
      nodes: A list of strings specifying ZooKeeper nodes.
    """
    counters = [int(node[len(CONTAINER_PREFIX):] or 1)
                for node in nodes if node.startswith(CONTAINER_PREFIX)
                and node not in self._inactive_containers]
    counters.sort()

    containers = [CONTAINER_PREFIX + str(counter) for counter in counters]
    if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX):
      containers[0] = CONTAINER_PREFIX

    self._containers = containers

  @gen.coroutine
  def _groom_project(self):
    """ Runs the grooming process. """
    index = self._coordinator.index
    worker_count = self._coordinator.total_workers

    oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count)

    # Wait until there's a reasonable chance that some transactions have
    # timed out.
    next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION

    # The oldest ignored transaction should still be valid, but ensure that
    # the timeout is not negative.
    next_timeout = max(0, next_timeout_eta - time.time())
    time_to_wait = datetime.timedelta(
      seconds=next_timeout + (MAX_TX_DURATION / 2))

    # Allow the wait to be cut short when a project is removed.
    try:
      yield self._stop_event.wait(timeout=time_to_wait)
    except gen.TimeoutError:
      raise gen.Return()

  @gen.coroutine
  def _remove_path(self, tx_path):
    """ Removes a ZooKeeper node.

    Args:
      tx_path: A string specifying the path to delete.
    """
    try:
      yield self._tornado_zk.delete(tx_path)
    except NoNodeError:
      pass
    except NotEmptyError:
      yield self._thread_pool.submit(self._zk_client.delete, tx_path,
                                     recursive=True)

  @gen.coroutine
  def _resolve_txid(self, tx_path, composite_indexes):
    """ Cleans up a transaction if it has expired.

    Args:
      tx_path: A string specifying the location of the ZooKeeper node.
      composite_indexes: A list of CompositeIndex objects.
    Returns:
      The transaction start time if still valid, None if invalid because this
      method will also delete it.
    """
    tx_data = yield self._tornado_zk.get(tx_path)
    tx_time = float(tx_data[0])

    _, container, tx_node = tx_path.rsplit('/', 2)
    tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX))
    container_count = int(container[len(CONTAINER_PREFIX):] or 1)
    if tx_node_id < 0:
      yield self._remove_path(tx_path)
      raise gen.Return()

    container_size = MAX_SEQUENCE_COUNTER + 1
    automatic_offset = (container_count - 1) * container_size
    txid = self._txid_manual_offset + automatic_offset + tx_node_id

    if txid < 1:
      yield self._remove_path(tx_path)
      raise gen.Return()

    # If the transaction is still valid, return the time it was created.
    if tx_time + MAX_TX_DURATION >= time.time():
      raise gen.Return(tx_time)

    yield self._batch_resolver.resolve(txid, composite_indexes)
    yield self._remove_path(tx_path)
    yield self._batch_resolver.cleanup(txid)

  @gen.coroutine
  def _fetch_and_clean(self, worker_index, worker_count):
    """ Cleans up expired transactions.

    Args:
      worker_index: An integer specifying this worker's index.
      worker_count: An integer specifying the number of total workers.
    Returns:
      A float specifying the time of the oldest valid transaction as a unix
      timestamp.
    """
    self._txids_cleaned = 0
    self._oldest_valid_tx_time = time.time()

    children = []
    for index, container in enumerate(self._containers):
      container_path = '/'.join([self._project_node, container])
      new_children = yield self._tornado_zk.get_children(container_path)

      if not new_children and index < len(self._containers) - 1:
        self._inactive_containers.add(container)

      children.extend(['/'.join([container_path, node])
                       for node in new_children])

    logger.debug(
      'Found {} transaction IDs for {}'.format(len(children), self.project_id))

    if not children:
      raise gen.Return(self._oldest_valid_tx_time)

    # Refresh these each time so that the indexes are fresh.
    encoded_indexes = yield self._thread_pool.submit(
      self._db_access.get_indices, self.project_id)
    composite_indexes = [CompositeIndex(index) for index in encoded_indexes]

    for tx_path in children:
      tx_node_id = int(tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX))
      # Only resolve transactions that this worker has been assigned.
      if tx_node_id % worker_count != worker_index:
        continue

      yield self._worker_queue.put((tx_path, composite_indexes))

    yield self._worker_queue.join()

    if self._txids_cleaned > 0:
      logger.info('Cleaned up {} expired txids for {}'.format(
        self._txids_cleaned, self.project_id))

    raise gen.Return(self._oldest_valid_tx_time)
コード例 #58
0
ファイル: scheduler.py プロジェクト: aterrel/distributed
class Scheduler(object):
    def __init__(self, center, delete_batch_time=1, loop=None,
            resource_interval=1, resource_log_size=1000):
        self.scheduler_queues = [Queue()]
        self.report_queues = []
        self.delete_queue = Queue()
        self.status = None
        self.coroutines = []

        self.center = coerce_to_rpc(center)

        self.dask = dict()
        self.dependencies = dict()
        self.dependents = dict()
        self.generation = 0
        self.has_what = defaultdict(set)
        self.held_data = set()
        self.in_play = set()
        self.keyorder = dict()
        self.nbytes = dict()
        self.ncores = dict()
        self.nannies = dict()
        self.processing = dict()
        self.restrictions = dict()
        self.stacks = dict()
        self.waiting = dict()
        self.waiting_data = dict()
        self.who_has = defaultdict(set)

        self.exceptions = dict()
        self.tracebacks = dict()
        self.exceptions_blame = dict()
        self.resource_logs = dict()

        self.loop = loop or IOLoop.current()

        self.delete_batch_time = delete_batch_time
        self.resource_interval = resource_interval
        self.resource_log_size = resource_log_size

        self.plugins = []

        self.handlers = {'update-graph': self.update_graph,
                         'update-data': self.update_data,
                         'missing-data': self.mark_missing_data,
                         'task-missing-data': self.mark_missing_data,
                         'worker-failed': self.mark_worker_missing,
                         'release-held-data': self.release_held_data,
                         'restart': self._restart}

    def put(self, msg):
        return self.scheduler_queues[0].put_nowait(msg)

    @property
    def report_queue(self):
        return self.report_queues[0]

    @gen.coroutine
    def _sync_center(self):
        self.ncores, self.has_what, self.who_has, self.nannies = yield [
                self.center.ncores(),
                self.center.has_what(),
                self.center.who_has(),
                self.center.nannies()]

        self._nanny_coroutines = []
        for (ip, wport), nport in self.nannies.items():
            if not nport:
                continue
            if (ip, nport) not in self.resource_logs:
                self.resource_logs[(ip, nport)] = deque(maxlen=self.resource_log_size)

            self._nanny_coroutines.append(self._nanny_listen(ip, nport))

    def start(self, start_queues=True):
        collections = [self.dask, self.dependencies, self.dependents,
                self.waiting, self.waiting_data, self.in_play, self.keyorder,
                self.nbytes, self.processing, self.restrictions]
        for collection in collections:
            collection.clear()

        self.processing = {addr: set() for addr in self.ncores}
        self.stacks = {addr: list() for addr in self.ncores}

        self.worker_queues = {addr: Queue() for addr in self.ncores}

        with ignoring(AttributeError):
            self._delete_coroutine.cancel()
        with ignoring(AttributeError):
            for c in self._worker_coroutines:
                c.cancel()

        self._delete_coroutine = self.delete()
        self._worker_coroutines = [self.worker(w) for w in self.ncores]

        self.heal_state()

        if start_queues:
            self.handle_queues(self.scheduler_queues[0], None)

        for cor in self.coroutines:
            if cor.done():
                raise cor.exception()

        return self._finished()

    @gen.coroutine
    def _finished(self):
        while any(not c.done() for c in self.coroutines):
            yield All(self.coroutines)

    @gen.coroutine
    def _close(self):
        yield self.cleanup()
        yield self._finished()
        yield self.center.close(close=True)
        self.center.close_streams()

    @gen.coroutine
    def cleanup(self):
        """ Clean up queues and coroutines, prepare to stop """
        if self.status == 'closing':
            raise gen.Return()

        self.status = 'closing'
        logger.debug("Cleaning up coroutines")
        n = 0
        self.delete_queue.put_nowait({'op': 'close'}); n += 1
        for w, nc in self.ncores.items():
            for i in range(nc):
                self.worker_queues[w].put_nowait({'op': 'close'}); n += 1

        for s in self.scheduler_queues[1:]:
            s.put_nowait({'op': 'close-stream'})

        for i in range(n):
            msg = yield self.scheduler_queues[0].get()

        for q in self.report_queues:
            q.put_nowait({'op': 'close'})

    def mark_ready_to_run(self, key):
        """ Send task to an appropriate worker, trigger worker """
        logger.debug("Mark %s ready to run", key)
        if key in self.waiting:
            assert not self.waiting[key]
            del self.waiting[key]

        new_worker = decide_worker(self.dependencies, self.stacks,
                self.who_has, self.restrictions, self.nbytes, key)

        self.stacks[new_worker].append(key)
        self.ensure_occupied(new_worker)

    def mark_key_in_memory(self, key, workers=None):
        """ Mark that key now lives in particular workers """
        logger.debug("Mark %s in memory", key)
        if workers is None:
            workers = self.who_has[key]
        for worker in workers:
            self.who_has[key].add(worker)
            self.has_what[worker].add(key)
            with ignoring(KeyError):
                self.processing[worker].remove(key)

        for dep in sorted(self.dependents.get(key, []), key=self.keyorder.get,
                          reverse=True):
            if dep in self.waiting:
                s = self.waiting[dep]
                with ignoring(KeyError):
                    s.remove(key)
                if not s:  # new task ready to run
                    self.mark_ready_to_run(dep)

        for dep in self.dependencies.get(key, []):
            if dep in self.waiting_data:
                s = self.waiting_data[dep]
                with ignoring(KeyError):
                    s.remove(key)
                if not s and dep:
                    self.release_key(dep)

        self.report({'op': 'key-in-memory',
                     'key': key,
                     'workers': workers})

    def ensure_occupied(self, worker):
        """ Spin up tasks on worker while it has tasks and free cores """
        logger.debug('Ensure worker is occupied: %s', worker)
        while (self.stacks[worker] and
               self.ncores[worker] > len(self.processing[worker])):
            key = self.stacks[worker].pop()
            self.processing[worker].add(key)
            logger.debug("Send job to worker: %s, %s, %s", worker, key, self.dask[key])
            self.worker_queues[worker].put_nowait(
                    {'op': 'compute-task',
                     'key': key,
                     'task': self.dask[key],
                     'needed': self.dependencies[key]})

    def seed_ready_tasks(self, keys=None):
        """ Distribute leaves among workers

        Takes an iterable of keys to consider for execution
        """
        if keys is None:
            keys = self.dask
        new_stacks = assign_many_tasks(
                self.dependencies, self.waiting, self.keyorder, self.who_has,
                self.stacks, self.restrictions, self.nbytes,
                [k for k in keys if k in self.waiting and not self.waiting[k]])
        logger.debug("Seed ready tasks: %s", new_stacks)
        for worker, stack in new_stacks.items():
            if stack:
                self.ensure_occupied(worker)

    def release_key(self, key):
        """ Release key from distributed memory if its ready """
        logger.debug("Release key %s", key)
        if key not in self.held_data and not self.waiting_data.get(key):
            self.delete_queue.put_nowait({'op': 'delete-task',
                                          'key': key})
            for w in self.who_has[key]:
                self.has_what[w].remove(key)
            del self.who_has[key]
            if key in self.waiting_data:
                del self.waiting_data[key]
            if key in self.in_play:
                self.in_play.remove(key)

    def update_data(self, who_has=None, nbytes=None):
        logger.debug("Update data %s", who_has)
        for key, workers in who_has.items():
            self.mark_key_in_memory(key, workers)

        self.nbytes.update(nbytes)

        self.held_data.update(who_has)
        self.in_play.update(who_has)

    def mark_task_erred(self, key, worker, exception, traceback):
        """ Mark that a task has erred on a particular worker """
        if key in self.processing[worker]:
            self.processing[worker].remove(key)
            self.exceptions[key] = exception
            self.tracebacks[key] = traceback
            self.mark_failed(key, key)
            self.ensure_occupied(worker)
            for plugin in self.plugins[:]:
                try:
                    plugin.task_erred(self, key, worker, exception)
                except Exception as e:
                    logger.exception(e)

    def mark_failed(self, key, failing_key=None):
        """ When a task fails mark it and all dependent task as failed """
        logger.debug("Mark key as failed %s", key)
        if key in self.exceptions_blame:
            return
        self.exceptions_blame[key] = failing_key
        self.report({'op': 'task-erred',
                     'key': key,
                     'exception': self.exceptions[failing_key],
                     'traceback': self.tracebacks[failing_key]})
        if key in self.waiting:
            del self.waiting[key]
        if key in self.waiting_data:
            del self.waiting_data[key]
        self.in_play.remove(key)
        for dep in self.dependents[key]:
            self.mark_failed(dep, failing_key)

    def mark_task_finished(self, key, worker, nbytes):
        """ Mark that a task has finished execution on a particular worker """
        logger.debug("Mark task as finished %s, %s", key, worker)
        if key in self.processing[worker]:
            self.nbytes[key] = nbytes
            self.mark_key_in_memory(key, [worker])
            self.ensure_occupied(worker)
            for plugin in self.plugins[:]:
                try:
                    plugin.task_finished(self, key, worker, nbytes)
                except Exception as e:
                    logger.exception(e)
        else:
            logger.debug("Key not found in processing, %s, %s, %s",
                         key, worker, self.processing[worker])

    def mark_missing_data(self, missing=None, key=None, worker=None):
        missing = set(missing)
        logger.debug("Recovering missing data: %s", missing)
        for k in missing:
            with ignoring(KeyError):
                workers = self.who_has.pop(k)
                for worker in workers:
                    self.has_what[worker].remove(k)
        self.my_heal_missing_data(missing)

        if key and worker:
            with ignoring(KeyError):
                self.processing[worker].remove(key)
            self.waiting[key] = missing
            logger.info('task missing data, %s, %s', key, self.waiting)
            self.ensure_occupied(worker)

        self.seed_ready_tasks()

    def log_state(self, msg=''):
        logger.debug("Runtime State: %s", msg)
        logger.debug('\n\nwaiting: %s\n\nstacks: %s\n\nprocessing: %s\n\n'
                'in_play: %s\n\n', self.waiting, self.stacks, self.processing,
                self.in_play)

    def mark_worker_missing(self, worker=None, heal=True):
        """ Mark that a worker no longer seems responsive """
        logger.debug("Mark worker as missing %s", worker)
        if worker not in self.processing:
            return
        keys = self.has_what.pop(worker)
        for i in range(self.ncores[worker]):  # send close message, in case not dead
            self.worker_queues[worker].put_nowait({'op': 'close', 'report': False})
        del self.worker_queues[worker]
        del self.ncores[worker]
        del self.stacks[worker]
        del self.processing[worker]
        if not self.stacks:
            logger.critical("Lost all workers")
        missing_keys = set()
        for key in keys:
            self.who_has[key].remove(worker)
            if not self.who_has[key]:
                missing_keys.add(key)
        gone_data = {k for k, v in self.who_has.items() if not v}
        self.in_play.difference_update(missing_keys)
        for k in gone_data:
            del self.who_has[k]

        if heal:
            self.heal_state()

    def update_graph(self, dsk=None, keys=None, restrictions={}):
        update_state(self.dask, self.dependencies, self.dependents,
                self.held_data, self.who_has, self.in_play,
                self.waiting, self.waiting_data, dsk, keys)

        cover_aliases(self.dask, dsk)

        self.restrictions.update(restrictions)

        new_keyorder = order(dsk)  # TODO: define order wrt old graph
        for key in new_keyorder:
            if key not in self.keyorder:
                # TODO: add test for this
                self.keyorder[key] = (self.generation, new_keyorder[key]) # prefer old
        if len(dsk) > 1:
            self.generation += 1  # older graph generations take precedence

        for key in dsk:
            for dep in self.dependencies[key]:
                if dep in self.exceptions_blame:
                    self.mark_failed(key, self.exceptions_blame[dep])

        self.seed_ready_tasks(dsk)
        for key in keys:
            if self.who_has[key]:
                self.mark_key_in_memory(key)

        for plugin in self.plugins[:]:
            try:
                plugin.update_graph(self, dsk, keys, restrictions)
            except Exception as e:
                logger.exception(e)

    def release_held_data(self, key=None):
        if key in self.held_data:
            logger.debug("Release key: %s", key)
            self.held_data.remove(key)
            self.release_key(key)

    def heal_state(self):
        """ Recover from catastrophic change """
        logger.debug("Heal state")
        self.log_state("Before Heal")
        state = heal(self.dependencies, self.dependents, set(self.who_has),
                self.stacks, self.processing, self.waiting, self.waiting_data)
        released = state['released']
        self.in_play.clear(); self.in_play.update(state['in_play'])
        add_keys = {k for k, v in self.waiting.items() if not v}
        for key in self.held_data & released:
            self.report({'op': 'lost-key', 'key': key})
        if self.stacks:
            for key in add_keys:
                self.mark_ready_to_run(key)
        for key in set(self.who_has) & released - self.held_data:
            self.delete_queue.put_nowait({'op': 'delete-task', 'key': key})
        self.in_play.update(self.who_has)
        self.log_state("After Heal")

    def my_heal_missing_data(self, missing):
        logger.debug("Heal from missing data")
        return heal_missing_data(self.dask, self.dependencies, self.dependents,
                self.held_data, self.who_has, self.in_play, self.waiting,
                self.waiting_data, missing)

    def report(self, msg):
        for q in self.report_queues:
            q.put_nowait(msg)

    def add_plugin(self, plugin):
        self.plugins.append(plugin)

    def handle_queues(self, scheduler_queue, report_queue):
        self.scheduler_queues.append(scheduler_queue)
        if report_queue:
            self.report_queues.append(report_queue)
        future = self.handle_scheduler(scheduler_queue, report_queue)
        self.coroutines.append(future)
        return future

    @gen.coroutine
    def handle_scheduler(self, queue, report):
        """ The scheduler coroutine for dask scheduling

        This coroutine manages interactions with all worker cores and with the
        delete coroutine through queues.

        Parameters
        ----------
        scheduler_queue: tornado.queues.Queue
            Get information from outside
        report_queue: tornado.queues.Queue
            Report information to outside
        worker_queues: dict {worker: tornado.queues.Queue}
            One queue per worker node.
            Each queue is listened to by several worker_core coroutines.
        delete_queue: tornado.queues.Queue
            One queue listened to by ``delete`` which connects to the
            center to delete unnecessary intermediate data
        who_has: dict {key: set}
            Mapping key to {set of worker-identities}
        has_what: dict {worker: set}
            Mapping worker-identity to {set of keys}
        ncores: dict {worker: int}
            Mapping worker-identity to number-of-cores
        """
        assert (not self.dask) == (not self.dependencies), (self.dask, self.dependencies)

        if not self.status == 'running':
            self.status = 'running'
            self.report({'op': 'start'})

        if report:
            report.put_nowait({'op': 'stream-start'})
        while True:
            msg = yield queue.get()
            logger.debug("scheduler receives message %s", msg)
            op = msg.pop('op')

            if op == 'close-stream':
                break
            elif op == 'close':
               self._close()
            elif op in self.handlers:
                result = self.handlers[op](**msg)
                if isinstance(result, gen.Future):
                    yield result
            else:
                logger.warn("Bad message: op=%s, %s", op, msg)

            if op == 'close':
                break

        logger.debug('Finished scheduling coroutine')

    @gen.coroutine
    def worker(self, ident):
        """ Manage a single distributed worker node

        This coroutine manages one remote worker.  It spins up several
        ``worker_core`` coroutines, one for each core.  It reports a closed
        connection to scheduler if one occurs.
        """
        try:
            yield All([self.worker_core(ident, i)
                    for i in range(self.ncores[ident])])
        except (IOError, OSError):
            logger.info("Worker failed from closed stream: %s", ident)
            self.put({'op': 'worker-failed',
                      'worker': ident})

    @gen.coroutine
    def worker_core(self, ident, i):
        """ Manage one core on one distributed worker node

        This coroutine listens on worker_queue for the following operations

        **Incoming Messages**:

        - compute-task:  call worker.compute(...) on remote node, report when done
        - close: close connection to worker node, report `worker-finished` to
          scheduler

        **Outgoing Messages**:

        - task-finished:  sent to scheduler once a task completes
        - task-erred: sent to scheduler when a task errs
        - worker-finished: sent to scheduler in response to a close command
        """
        worker = rpc(ip=ident[0], port=ident[1])
        logger.debug("Start worker core %s, %d", ident, i)

        while True:
            msg = yield self.worker_queues[ident].get()
            if msg['op'] == 'close':
                logger.debug("Worker core receives close message %s, %s",
                        ident, msg)
                break
            if msg['op'] == 'compute-task':
                key = msg['key']
                needed = msg['needed']
                task = msg['task']
                if not istask(task):
                    response, content = yield worker.update_data(data={key: task})
                    assert response == b'OK', response
                    nbytes = content['nbytes'][key]
                else:
                    response, content = yield worker.compute(function=execute_task,
                                                             args=(task,),
                                                             needed=needed,
                                                             key=key,
                                                             kwargs={})
                    if response == b'OK':
                        nbytes = content['nbytes']
                logger.debug("Compute response from worker %s, %s, %s, %s",
                             ident, key, response, content)
                if response == b'error':
                    error, traceback = content
                    self.mark_task_erred(key, ident, error, traceback)

                elif response == b'missing-data':
                    self.mark_missing_data(content.args, key=key, worker=ident)

                else:
                    self.mark_task_finished(key, ident, nbytes)

        yield worker.close(close=True)
        worker.close_streams()
        if msg.get('report', True):
            self.put({'op': 'worker-finished',
                      'worker': ident})
        logger.debug("Close worker core, %s, %d", ident, i)


    @gen.coroutine
    def delete(self):
        """ Delete extraneous intermediates from distributed memory

        This coroutine manages a connection to the center in order to send keys
        that should be removed from distributed memory.  We batch several keys that
        come in over the ``delete_queue`` into a list.  Roughly once a second we
        send this list of keys over to the center which then handles deleting
        these keys from workers' memory.

        worker \                                /-> worker node
        worker -> scheduler -> delete -> center --> worker node
        worker /                                \-> worker node

        **Incoming Messages**

        - delete-task: holds a key to be deleted
        - close: close this coroutine
        """
        batch = list()
        last = time()

        while True:
            msg = yield self.delete_queue.get()
            if msg['op'] == 'close':
                break

            # TODO: trigger coroutine to go off in a second if no activity
            batch.append(msg['key'])
            if batch and time() - last > self.delete_batch_time:  # One second batching
                logger.debug("Ask center to delete %d keys", len(batch))
                last = time()
                yield self.center.delete_data(keys=batch)
                batch = list()

        if batch:
            yield self.center.delete_data(keys=batch)

        self.put({'op': 'delete-finished'})
        logger.debug('Delete finished')

    @gen.coroutine
    def _nanny_listen(self, ip, port):
        stream = yield connect(ip=ip, port=port)
        yield write(stream, {'op': 'monitor_resources',
                             'interval': self.resource_interval})
        while not stream.closed():
            msg = yield read(stream)
            self.resource_logs[(ip, port)].append(msg)

    @gen.coroutine
    def _scatter(self, stream, data=None, workers=None):
        if not self.ncores:
            raise ValueError("No workers yet found.  "
                             "Try syncing with center.\n"
                             "  e.sync_center()")
        ncores = workers if workers is not None else self.ncores
        remotes, who_has, nbytes = yield scatter_to_workers(
                                            self.center, ncores, data)
        self.update_data(who_has=who_has, nbytes=nbytes)

        raise gen.Return(remotes)

    @gen.coroutine
    def _restart(self):
        logger.debug("Send shutdown signal to workers")

        for q in self.scheduler_queues + self.report_queues:
            clear_queue(q)

        for addr in self.nannies:
            self.mark_worker_missing(worker=addr, heal=False)

        logger.debug("Send kill signal to nannies")
        nannies = [rpc(ip=ip, port=n_port)
                   for (ip, w_port), n_port in self.nannies.items()]
        yield All([nanny.kill() for nanny in nannies])

        while self.ncores:
            yield gen.sleep(0.01)

        # All quiet

        yield All([nanny.instantiate(close=True) for nanny in nannies])
        yield self._sync_center()
        self.start()

        self.report({'op': 'restart'})
        for plugin in self.plugins[:]:
            try:
                plugin.restart(self)
            except Exception as e:
                logger.exception(e)