コード例 #1
0
class TorskelEventLogController(object):
    def __init__(self):
        self.logger = tornado.log.gen_log
        self.queue = Queue()

    def add_log_event(self, event):
        """
        Put event into queue
        :param event:
        :return:
        """
        if isinstance(event, dict):
            self.logger.debug(event)
            self.queue.put(event)

    async def write_log_from_queue(self, db, collection_name,
                                   events_writer_func) -> type(None):
        """
        Retrieves events from the queue.
        and performs the insert into the database
        """

        qsize = self.queue.qsize()
        if options.show_log_event_writer:
            self.logger.info(f'Writing events... queue size = {qsize}')
        if qsize > 0:
            step = qsize if qsize <= options.task_list_size else \
                options.task_list_size
            inserts_list = [await self.queue.get() for _ in range(step)]
            if len(inserts_list) > 0:
                await events_writer_func(db, collection_name, inserts_list)
コード例 #2
0
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    @tornado.gen.coroutine
    def wait_for_connect(self):
        if not self.connected_event.is_set():
            yield self.connected_event.wait()
        else:
            raise Exception("instance is already connected")

    @tornado.gen.coroutine
    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            yield self.disconnected_event.wait()
        else:
            raise Exception("instance is already disconnected")

    @tornado.gen.coroutine
    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                env = yield self.message_queue.get()
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.message_queue.task_done()

    @tornado.gen.coroutine
    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                env = yield self.presence_queue.get()
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.presence_queue.task_done()
コード例 #3
0
    def get_file_list(account, **kwargs):
        queue = Queue()
        sem = BoundedSemaphore(FETCH_CONCURRENCY)
        done, working = set(), set()
        data = set()

        @gen.coroutine
        def fetch_url():
            current_url = yield queue.get()
            try:
                if current_url in working:
                    return
                page_no = working.__len__()
                app_log.info("Fetching page {}".format(page_no))
                working.add(current_url)
                req = account.get_request(current_url)
                client = AsyncHTTPClient()
                response = yield client.fetch(req)
                done.add(current_url)
                app_log.info("Page {} downloaded".format(page_no))
                response_data = json.loads(response.body.decode('utf-8'))

                for file in response_data:
                    # be sure we're a valid file type and less than our maximum response size limit
                    extension = file['path'].lower().split('.')[-1]
                    if extension in VALID_FILETYPES and int(
                            file['bytes']) < RESPONSE_SIZE_LIMIT * 1000000:
                        data.add((
                            file['path'].lstrip('/'),
                            file['path'],
                        ))
                app_log.info("Page {} completed".format(page_no))
            finally:
                queue.task_done()
                sem.release()

        @gen.coroutine
        def worker():
            while True:
                yield sem.acquire()
                fetch_url()

        app_log.info("Gathering filelist for account {}".format(account._id))
        for file_type in VALID_FILETYPES:
            file_type = '.'.join([file_type])
            url = "https://api.dropbox.com/1/search/auto/?query={}&include_membership=true".format(
                file_type)
            queue.put(url)
        # start our concurrency worker
        worker()
        # wait until we're done
        yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME))
        app_log.info("Finished list retrieval. Found {} items.".format(
            data.__len__()))
        return sorted([{
            "title": title,
            "value": path
        } for title, path in data],
                      key=lambda f: f['title'])
コード例 #4
0
ファイル: admin.py プロジェクト: ewintec/domogik
class Publisher(MQAsyncSub):
    """Handles new data to be passed on to subscribers."""
    def __init__(self):
        self.WSmessages = Queue()
        self.MQmessages = Queue()
        self.sub = MQAsyncSub.__init__(self, zmq.Context(), 'admin', [])
        self.subscribers = set()

    def register(self, subscriber):
        """Register a new subscriber."""
        self.subscribers.add(subscriber)

    def deregister(self, subscriber):
        """Stop publishing to a subscriber."""
        self.subscribers.remove(subscriber)

    @gen.coroutine
    def on_message(self, did, msg):
        """Receive message from MQ sub and send to WS."""
        yield self.WSmessages.put({"msgid": did, "content": msg})

    @gen.coroutine
    def submit(self, message):
        """Submit a new message to publish to subscribers."""
        yield self.WSmessages.put(message)

    @gen.coroutine
    def publishToWS(self):
        while True:
            message = yield self.WSmessages.get()
            if len(self.subscribers) > 0:
                print("Pushing MQ message {} to {} WS subscribers...".format(
                    message, len(self.subscribers)))
                yield [subscriber.submit(message) for subscriber in self.subscribers]

    @gen.coroutine
    def publishToMQ(self):
        ctx = zmq.Context()
        cli = MQSyncReq(ctx)
        pub = MQPub(ctx, 'admin')
        while True:
            message = yield self.MQmessages.get()
            jsons = json.loads(message)
            # req/rep
            if 'mq_request' in jsons and 'data' in jsons:
                msg = MQMessage()
                msg.set_action(str(jsons['mq_request']))
                msg.set_data(jsons['data'])
                print("REQ : {0}".format(msg.get()))
                if 'dst' in jsons:
                    print cli.request(str(jsons['dst']), msg.get(), timeout=10).get()
                else:
                    print cli.request('manager', msg.get(), timeout=10).get()
            # pub
            elif 'mq_publish' in jsons and 'data' in jsons:
                print("Publish : {0}".format(jsons['data']))
                pub.send_event(jsons['mq_publish'],
                                    jsons['data'])
コード例 #5
0
ファイル: biz.py プロジェクト: JinkelaCrops/tornado-learning
class Decode(object):
    def __init__(self, sess_field):
        self.q = Queue(maxsize=1000)
        self.p = Queue(maxsize=1000)
        self.sess_field = sess_field

    @staticmethod
    def batch_pad(nd):
        max_length = max(map(len, nd))
        pad_nd = [
            i + [text_encoder.PAD_ID] * (max_length - len(i)) for i in nd
        ]
        return pad_nd

    @gen.coroutine
    def decode(self):
        log.info("[biz] Decode: model loading ... ")
        saver = tf.train.Saver()

        with tf.Session(config=self.sess_field.sess_config) as sess:
            # Load weights from checkpoint.
            log.info("[biz] Decode: restoring parameters")
            saver.restore(sess, self.sess_field.ckpt)
            log.info("[biz] Decode: model already loaded")
            while True:
                inputs = yield self.q.get()
                log.info("[biz] Decode: " + str(inputs))
                st_time = time.time()
                inputs_numpy = [
                    self.sess_field.encoders["inputs"].encode(i) +
                    [text_encoder.EOS_ID] for i in inputs
                ]
                num_decode_batches = (len(inputs_numpy) -
                                      1) // self.sess_field.batch_size + 1
                results = []
                for i in range(num_decode_batches):
                    input_numpy = inputs_numpy[i *
                                               self.sess_field.batch_size:(i +
                                                                           1) *
                                               self.sess_field.batch_size]
                    inputs_numpy_batch = input_numpy + [[
                        text_encoder.EOS_ID
                    ]] * (self.sess_field.batch_size - len(input_numpy))
                    inputs_numpy_batch = self.batch_pad(
                        inputs_numpy_batch)  # pad using 0
                    # log.info("[biz] Decode: " + str(inputs_numpy_batch))
                    feed = {self.sess_field.inputs_ph: inputs_numpy_batch}
                    result = sess.run(self.sess_field.prediction, feed)
                    decoded_outputs = [
                        self.sess_field.encoders["targets"].decode(i).strip(
                            "<pad>").strip("<EOS>")
                        for i in result["outputs"][:len(input_numpy)]
                    ]
                    results += decoded_outputs
                self.p.put(results)
                log.info("[biz] Decode: source: " + str(inputs))
                log.info("[biz] Decode: target: " + str(results))
                log.info("[biz] Decode: using %s s" % (time.time() - st_time))
コード例 #6
0
    def queueStream(self, rdds, oneAtATime=True, default=None):
        """Create stream iterable over RDDs.

        :param rdds: Iterable over RDDs or lists.
        :param oneAtATime: Process one at a time or all.
        :param default: If no more RDDs in ``rdds``, return this. Can be None.
        :rtype: DStream


        Example:

        >>> import fast_pyspark_tester
        >>> sc = fast_pyspark_tester.Context()
        >>> ssc = fast_pyspark_tester.streaming.StreamingContext(sc, 0.1)
        >>> (
        ...     ssc
        ...     .queueStream([[4], [2], [7]])
        ...     .foreachRDD(lambda rdd: print(rdd.collect()))
        ... )
        >>> ssc.start()
        >>> ssc.awaitTermination(0.35)
        [4]
        [2]
        [7]


        Example testing the default value:

        >>> import fast_pyspark_tester
        >>> sc = fast_pyspark_tester.Context()
        >>> ssc = fast_pyspark_tester.streaming.StreamingContext(sc, 0.1)
        >>> (
        ...     ssc
        ...     .queueStream([[4], [2]], default=['placeholder'])
        ...     .foreachRDD(lambda rdd: print(rdd.collect()))
        ... )
        >>> ssc.start()
        >>> ssc.awaitTermination(0.35)
        [4]
        [2]
        ['placeholder']
        """
        deserializer = QueueStreamDeserializer(self._context)
        if default is not None:
            default = deserializer(default)

        if Queue is False:
            log.error('Run "pip install tornado" to install tornado.')

        q = Queue()
        for i in rdds:
            q.put(i)

        qstream = QueueStream(q, oneAtATime, default)
        return DStream(qstream, self, deserializer)
コード例 #7
0
ファイル: files.py プロジェクト: vizydrop/apps
    def get_file_list(account, **kwargs):
        queue = Queue()
        sem = BoundedSemaphore(FETCH_CONCURRENCY)
        done, working = set(), set()
        data = set()

        @gen.coroutine
        def fetch_url():
            current_url = yield queue.get()
            try:
                if current_url in working:
                    return
                page_no = working.__len__()
                app_log.info("Fetching page {}".format(page_no))
                working.add(current_url)
                req = account.get_request(current_url)
                client = AsyncHTTPClient()
                response = yield client.fetch(req)
                done.add(current_url)
                app_log.info("Page {} downloaded".format(page_no))
                response_data = json.loads(response.body.decode('utf-8'))

                for file in response_data:
                    # be sure we're a valid file type and less than our maximum response size limit
                    extension = file['path'].lower().split('.')[-1]
                    if extension in VALID_FILETYPES and int(file['bytes']) < RESPONSE_SIZE_LIMIT * 1000000:
                        data.add((file['path'].lstrip('/'), file['path'], ))
                app_log.info("Page {} completed".format(page_no))
            finally:
                queue.task_done()
                sem.release()

        @gen.coroutine
        def worker():
            while True:
                yield sem.acquire()
                fetch_url()

        app_log.info("Gathering filelist for account {}".format(account._id))
        for file_type in VALID_FILETYPES:
            file_type = '.'.join([file_type])
            url = "https://api.dropbox.com/1/search/auto/?query={}&include_membership=true".format(file_type)
            queue.put(url)
        # start our concurrency worker
        worker()
        # wait until we're done
        yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME))
        app_log.info("Finished list retrieval. Found {} items.".format(data.__len__()))
        return sorted([{"title": title, "value": path} for title, path in data], key=lambda f: f['title'])
コード例 #8
0
ファイル: core.py プロジェクト: cmwaura/streamz
class delay(Stream):
    """ Add a time delay to results """
    _graphviz_shape = 'octagon'

    def __init__(self, upstream, interval, loop=None, **kwargs):
        loop = loop or upstream.loop or IOLoop.current()
        self.interval = interval
        self.queue = Queue()

        Stream.__init__(self, upstream, loop=loop, **kwargs)

        self.loop.add_callback(self.cb)

    @gen.coroutine
    def cb(self):
        while True:
            last = time()
            x = yield self.queue.get()
            yield self._emit(x)
            duration = self.interval - (time() - last)
            if duration > 0:
                yield gen.sleep(duration)

    def update(self, x, who=None):
        return self.queue.put(x)
コード例 #9
0
ファイル: dask.py プロジェクト: kszucs/streams
class gather(Stream):
    def __init__(self, child, limit=10, client=None):
        self.client = client or default_client()
        self.queue = Queue(maxsize=limit)
        self.condition = Condition()

        Stream.__init__(self, child)

        self.client.loop.add_callback(self.cb)

    def update(self, x, who=None):
        return self.queue.put(x)

    @gen.coroutine
    def cb(self):
        while True:
            x = yield self.queue.get()
            L = [x]
            while not self.queue.empty():
                L.append(self.queue.get_nowait())
            results = yield self.client._gather(L)
            for x in results:
                yield self.emit(x)
            if self.queue.empty():
                self.condition.notify_all()

    @gen.coroutine
    def flush(self):
        while not self.queue.empty():
            yield self.condition.wait()
コード例 #10
0
ファイル: handlers.py プロジェクト: mivade/tornadose
class BaseHandler(RequestHandler):
    """Base handler for subscribers. To be compatible with data stores
    defined in :mod:`tornadose.stores`, custom handlers should inherit
    this class and implement the :meth:`publish` method.

    """
    def initialize(self, store):
        """Common initialization of handlers happens here. If additional
        initialization is required, this method must either be called with
        ``super`` or the child class must assign the ``store`` attribute and
        register itself with the store.

        """
        assert isinstance(store, stores.BaseStore)
        self.messages = Queue()
        self.store = store
        self.store.register(self)

    @gen.coroutine
    def submit(self, message):
        """Submit a new message to be published."""
        yield self.messages.put(message)

    def publish(self):
        """Push a message to the subscriber. This method must be
        implemented by child classes.

        """
        raise NotImplementedError('publish must be implemented!')
コード例 #11
0
ファイル: core.py プロジェクト: cmwaura/streamz
class buffer(Stream):
    """ Allow results to pile up at this point in the stream

    This allows results to buffer in place at various points in the stream.
    This can help to smooth flow through the system when backpressure is
    applied.
    """
    _graphviz_shape = 'diamond'

    def __init__(self, upstream, n, loop=None, **kwargs):
        loop = loop or upstream.loop or IOLoop.current()
        self.queue = Queue(maxsize=n)

        Stream.__init__(self, upstream, loop=loop, **kwargs)

        self.loop.add_callback(self.cb)

    def update(self, x, who=None):
        return self.queue.put(x)

    @gen.coroutine
    def cb(self):
        while True:
            x = yield self.queue.get()
            yield self._emit(x)
コード例 #12
0
ファイル: app-checkpoint.py プロジェクト: Harleymckee/books
class Publisher(object):
    """Handles new data to be passed on to subscribers."""
    def __init__(self):
        self.messages = Queue()
        self.subscribers = set()

    def register(self, subscriber):
        """Register a new subscriber."""
        self.subscribers.add(subscriber)

    def deregister(self, subscriber):
        """Stop publishing to a subscriber."""
        self.subscribers.remove(subscriber)

    @gen.coroutine
    def submit(self, message):
        """Submit a new message to publish to subscribers."""
        yield self.messages.put(message)

    @gen.coroutine
    def publish(self):
        while True:
            message = yield self.messages.get()
            if len(self.subscribers) > 0:
                # print("Pushing message {} to {} subscribers...".format(
                # 		message, len(self.subscribers)))
                yield [
                    subscriber.submit(message)
                    for subscriber in self.subscribers
                ]
コード例 #13
0
def main():
    cocurrency = 10

    queue = Queue()
    queue.put("http://www.jianshu.com")

    workers = []
    for _ in range(cocurrency):
        workers.append(Worker(app, queue))

    for worker in workers:
        Log4Spider.debugLog("worker begin:", worker)
        worker.run()

    Log4Spider.debugLog("waitiing for spiderQueue empty:")
    yield queue.join(timeout=timedelta(seconds=300))
    Log4Spider.debugLog("main done!")
コード例 #14
0
ファイル: tcpclient_test.py プロジェクト: SeranSong/tornado-1
class TestTCPServer(TCPServer):
    def __init__(self, family):
        super(TestTCPServer, self).__init__()
        self.streams = []
        self.queue = Queue()
        sockets = bind_sockets(None, 'localhost', family)
        self.add_sockets(sockets)
        self.port = sockets[0].getsockname()[1]

    def handle_stream(self, stream, address):
        self.streams.append(stream)
        self.queue.put(stream)

    def stop(self):
        super(TestTCPServer, self).stop()
        for stream in self.streams:
            stream.close()
コード例 #15
0
def main():
    cocurrency = 10

    queue = Queue()
    queue.put("http://www.jianshu.com")

    workers = []
    for _ in range(cocurrency):
        workers.append(Worker(app,queue))

    for worker in workers:
        Log4Spider.debugLog("worker begin:",worker)
        worker.run()

    Log4Spider.debugLog("waitiing for spiderQueue empty:")
    yield queue.join(timeout=timedelta(seconds=300))
    Log4Spider.debugLog("main done!")
コード例 #16
0
ファイル: tcpclient_test.py プロジェクト: leeclemens/tornado
class TestTCPServer(TCPServer):
    def __init__(self, family):
        super(TestTCPServer, self).__init__()
        self.streams = []
        self.queue = Queue()
        sockets = bind_sockets(None, 'localhost', family)
        self.add_sockets(sockets)
        self.port = sockets[0].getsockname()[1]

    def handle_stream(self, stream, address):
        self.streams.append(stream)
        self.queue.put(stream)

    def stop(self):
        super(TestTCPServer, self).stop()
        for stream in self.streams:
            stream.close()
コード例 #17
0
ファイル: command_worker.py プロジェクト: jbenua/Flashlight
class CommandQueue():
    def __init__(self):
        self.queue = Queue()

    @gen.coroutine
    def process_command(self):
        while True:
            item = yield self.queue.get()
            try:
                yield gen.sleep(0.1)
                command, view = item
                view.write_message({command[0]: command[1]})
            finally:
                self.queue.task_done()

    def put(self, item):
        self.queue.put(item)
コード例 #18
0
class TestTCPServer(TCPServer):
    def __init__(self, family):
        super(TestTCPServer, self).__init__()
        self.streams = []  # type: List[IOStream]
        self.queue = Queue()  # type: Queue[IOStream]
        sockets = bind_sockets(0, "10.0.0.7", family)
        self.add_sockets(sockets)
        self.port = sockets[0].getsockname()[1]

    def handle_stream(self, stream, address):
        self.streams.append(stream)
        self.queue.put(stream)

    def stop(self):
        super(TestTCPServer, self).stop()
        for stream in self.streams:
            stream.close()
コード例 #19
0
class ConnectionPool(object):
    def __init__(self, servers, maxsize=15, minsize=1, loop=None, debug=0):
        loop = loop if loop is not None else tornado.ioloop.IOLoop.instance()
        if debug:
            logging.basicConfig(
                level=logging.DEBUG,
                format="'%(levelname)s %(asctime)s"
                " %(module)s:%(lineno)d %(process)d %(thread)d %(message)s'")
        self._loop = loop
        self._servers = servers
        self._minsize = minsize
        self._debug = debug
        self._in_use = set()
        self._pool = Queue(maxsize)

    @gen.coroutine
    def clear(self):
        """Clear pool connections."""
        while not self._pool.empty():
            conn = yield self._pool.get()
            conn.close_socket()

    def size(self):
        return len(self._in_use) + self._pool.qsize()

    @gen.coroutine
    def acquire(self):
        """Acquire connection from the pool, or spawn new one
        if pool maxsize permits.

        :return: ``Connetion`` (reader, writer)
        """
        while self.size() < self._minsize:
            _conn = yield self._create_new_conn()
            yield self._pool.put(_conn)

        conn = None
        while not conn:
            if not self._pool.empty():
                conn = yield self._pool.get()

            if conn is None:
                conn = yield self._create_new_conn()

        self._in_use.add(conn)
        raise gen.Return(conn)

    @gen.coroutine
    def _create_new_conn(self):
        conn = yield Connection.get_conn(self._servers, self._debug)
        raise gen.Return(conn)

    def release(self, conn):
        self._in_use.remove(conn)
        try:
            self._pool.put_nowait(conn)
        except (QueueEmpty, QueueFull):
            conn.close_socket()
コード例 #20
0
class TestChannelConfiguration:
    MESSAGE = "test_message"

    @gen.coroutine
    def _top(self):
        message = yield self._message_queue.get()
        self._message_queue.put(message)
        return message

    def test_channel_configuration(self, rabbitmq_url, configuration):
        self._message_queue = Queue(maxsize=1)
        io_loop = IOLoop.current()
        self.io_loop = io_loop
        async_connection = AsyncConnection(rabbitmq_url, io_loop, logging.getLogger(__name__))

        publish_channel = ChannelConfiguration(
            async_connection, async_connection.logger, io_loop, **configuration["publish"])
        receive_channel = ChannelConfiguration(
            async_connection, async_connection.logger, io_loop, **configuration["receive"])

        # Test channel creation and getter from channel async queue
        channel = io_loop.run_sync(publish_channel._get_channel)
        assert channel.is_open

        # Publish message and check that it uses the same channel
        io_loop.run_sync(functools.partial(publish_channel.publish, self.MESSAGE))
        assert io_loop.run_sync(publish_channel._get_channel) == channel

        # Start consuming and wait for message
        io_loop.spawn_callback(receive_channel.consume, self.callback)
        message = io_loop.run_sync(self._top, 10)
        # Stop the loop, in order to stop consuming
        io_loop.stop()
        assert message == self.MESSAGE

    def callback(self, channel, method, properties, body):
        body = body.decode()
        print(f"consumed: {body}")
        self._message_queue.put(body)
コード例 #21
0
ファイル: libwatcher.py プロジェクト: jianingy/watchgang
class TornadoQuerierBase(object):

    def __init__(self):
        self.tasks = TornadoQueue()

    def gen_task(self):
        raise NotImplementError()

    def run_task(self, task):
        raise NotImplementError()

    def prepare(self):
        self.running = True

    def cleanup(self):
        self.running = False

    @coroutine
    def run_worker(self, worker_id, f):
        while self.tasks.qsize() > 0:
            task = yield self.tasks.get()
            LOG.debug('worker[%d]: current task is %s' % (worker_id, task))
            try:
                yield f(task)
                pass
            except Exception as e:
                LOG.warning(str(e))
            finally:
                self.tasks.task_done()
                task = None
        LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks))

    @coroutine
    def start(self, num_workers=1):

        self.prepare()

        # add tasks
        tasks = yield self.gen_task()
        for task in tasks:
            yield self.tasks.put(task)

        # start shoot workers
        for worker_id in range(num_workers):
            LOG.debug('starting worker %d' % worker_id)
            self.run_worker(worker_id, self.run_task)

        yield self.tasks.join()
        self.cleanup()
コード例 #22
0
ファイル: libwatcher.py プロジェクト: jianingy/watchgang
class TornadoQuerierBase(object):
    def __init__(self):
        self.tasks = TornadoQueue()

    def gen_task(self):
        raise NotImplementError()

    def run_task(self, task):
        raise NotImplementError()

    def prepare(self):
        self.running = True

    def cleanup(self):
        self.running = False

    @coroutine
    def run_worker(self, worker_id, f):
        while self.tasks.qsize() > 0:
            task = yield self.tasks.get()
            LOG.debug('worker[%d]: current task is %s' % (worker_id, task))
            try:
                yield f(task)
                pass
            except Exception as e:
                LOG.warning(str(e))
            finally:
                self.tasks.task_done()
                task = None
        LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks))

    @coroutine
    def start(self, num_workers=1):

        self.prepare()

        # add tasks
        tasks = yield self.gen_task()
        for task in tasks:
            yield self.tasks.put(task)

        # start shoot workers
        for worker_id in range(num_workers):
            LOG.debug('starting worker %d' % worker_id)
            self.run_worker(worker_id, self.run_task)

        yield self.tasks.join()
        self.cleanup()
コード例 #23
0
ファイル: core.py プロジェクト: zjw0358/streamz3
class buffer(Stream):
    def __init__(self, n, child, loop=None):
        self.queue = Queue(maxsize=n)

        Stream.__init__(self, child, loop=loop)

        self.loop.add_callback(self.cb)

    def update(self, x, who=None):
        return self.queue.put(x)

    @gen.coroutine
    def cb(self):
        while True:
            x = yield self.queue.get()
            yield self.emit(x)
コード例 #24
0
class MessageRouter(object):
    def __init__(self, message_sender, default_handler=None):
        self._queue = Queue()
        self.message_sender = message_sender
        self.default_handler = default_handler
        self._message_handlers = {}
        self._working = False

    def register_message_handler(self, message, handler):
        assert isinstance(message, MessageMeta)
        assert hasattr(handler, '__call__')
        self._message_handlers[message.__name__] = handler

    @gen.coroutine
    def put_message(self, message):
        assert isinstance(message, Message)
        yield self._queue.put(message)

    @gen.coroutine
    def start(self):
        self._working = True
        while self._working:
            message = yield self._queue.get()
            try:
                # TODO: Maybe we need to add special handling for BarrierRequest
                handler = self._message_handlers.get(message.type,
                                                     self.default_handler)
                if handler:
                    yield handler(message)
            except Exception as e:
                exc_type, exc_value, exc_tb = sys.exc_info()
                error_type, error_subtype, error_message, extended_message = errors.exception_to_error_args(
                    exc_type, exc_value, exc_tb)
                response = Error.from_request(
                    message,
                    error_type=error_type,
                    error_subtype=error_subtype,
                    message=error_message,
                    extended_message=extended_message)
                yield self.message_sender.send_message_ignore_response(
                    response)
            finally:
                self._queue.task_done()

    def stop(self):
        self._working = False
コード例 #25
0
class StreamClient(object):
    MAX_SIZE = 60

    def __init__(self, steam_id):
        self.id = generate_id()
        self.stream_id = steam_id
        self.queue = Queue(StreamClient.MAX_SIZE)

    @coroutine
    def send(self, item):
        yield self.queue.put(item)

    @coroutine
    def fetch(self):
        item = yield self.queue.get()
        self.queue.task_done()
        return item

    def empty(self):
        return self.queue.qsize() == 0
コード例 #26
0
ファイル: upload.py プロジェクト: jdk2588/fileuploader
class UploadHandler(BaseHandler):

    executor = ThreadPoolExecutor(max_workers=settings.THREAD_WORKERS)

    def prepare(self, *args, **kwargs):

        #Queue to take chunks of data received'''
        self.queue = Queue()

        #Change the size of body
        if self.request.method.lower() == "post":
            self.request.connection.set_max_body_size(
                settings.MAX_STREAMED_SIZE)

        try:
            self.content_length = int(
                self.request.headers.get("Content-Length", "0"))
        except KeyError:
            self.content_length = 0

        super(UploadHandler, self).prepare(*args, **kwargs)

    @tornado.gen.coroutine
    def data_received(self, chunk):
        #Put chunks in a queue as received'''
        yield self.queue.put(chunk)

    #Upload to S3, with Threaded Pool
    @run_on_executor(executor='executor')
    def background_task(self, obj):
        return obj.upload_to_s3(self.queue)

    @tornado.gen.coroutine
    def post(self):
        obj = UploadFile(body=self.request.body,
                         content_type=self.request.headers.get("Content-Type"),
                         content_length=self.content_length)

        ret = obj.write_entry()
        self.write_json(data={"token": ret})
        yield self.background_task(obj)
コード例 #27
0
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        """ Empty the queue of messages to send to the WS """
        while not self.finished:
            message = yield self.messages.get()
            self.send(message)

    def send(self, message):
        try:
            self.write_message(message)
        except WebSocketClosedError:
            self._close()

    def on_message(self, content):
        """ reciev message from websocket and send to MQ """
        #print(u"WS to MQ: {0}".format(content))
        self.publisher.MQmessages.put(content)
コード例 #28
0
ファイル: admin.py プロジェクト: domogik/domogik
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()        

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        """ Empty the queue of messages to send to the WS """
        while not self.finished:
            message = yield self.messages.get()
            self.send(message)

    def send(self, message):
        try:
            self.write_message(message)
        except WebSocketClosedError:
            self._close()
    
    def on_message(self, content):
        """ reciev message from websocket and send to MQ """
        #print(u"WS to MQ: {0}".format(content))
        self.publisher.MQmessages.put(content)
コード例 #29
0
ファイル: core.py プロジェクト: zjw0358/streamz3
class delay(Stream):
    def __init__(self, interval, child, loop=None):
        self.interval = interval
        self.queue = Queue()

        Stream.__init__(self, child, loop=loop)

        self.loop.add_callback(self.cb)

    @gen.coroutine
    def cb(self):
        while True:
            last = time()
            x = yield self.queue.get()
            yield self.emit(x)
            duration = self.interval - (time() - last)
            if duration > 0:
                yield gen.sleep(duration)

    def update(self, x, who=None):
        return self.queue.put(x)
コード例 #30
0
class PopularCategories:
    def __init__(self):
        self.categories = {}
        self.update_queue = Queue()

    @gen.coroutine
    def add_for_processing(self, predictions):
        yield self.update_queue.put(predictions)

    @gen.coroutine
    def process_queue(self):
        if self.update_queue.qsize() > 0:
            for i in range(self.update_queue.qsize()):
                predictions = yield self.update_queue.get()
                try:
                    self._update_categories(predictions)
                finally:
                    self.update_queue.task_done()

            # update top 5
            top_5 = sorted(self.categories.items(),
                           key=lambda x: x[1],
                           reverse=True)[:5]
            mapped = map(lambda x: to_json_result(x[0], x[1]), top_5)
            yield update_top_5(list(mapped))

    def _update_categories(self, new_predictions):
        predictions = new_predictions.argsort()[0]

        #  update categories total
        for prediction in predictions:
            label = configuration.image_labels[prediction]
            score = new_predictions[0][prediction]

            if label in self.categories:
                update_score = (self.categories[label] + score) / 2
            else:
                update_score = score

            self.categories[label] = update_score
コード例 #31
0
ファイル: app-checkpoint.py プロジェクト: Harleymckee/books
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def check_origin(self, origin):
        return True

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        while not self.finished:
            message = yield self.messages.get()
            # print("New message: " + str(message))
            self.send(message)

    def send(self, message):
        try:
            self.write_message(dict(value=message))
        except WebSocketClosedError:
            self._close()
コード例 #32
0
ファイル: admin.py プロジェクト: ewintec/domogik
class Subscription(WebSocketHandler):
    """Websocket for subscribers."""
    def initialize(self, publisher):
        self.publisher = publisher
        self.messages = Queue()
        self.finished = False

    def open(self):
        print("New subscriber.")
        self.publisher.register(self)
        self.run()

    def on_close(self):
        self._close()        

    def _close(self):
        print("Subscriber left.")
        self.publisher.deregister(self)
        self.finished = True

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def run(self):
        while not self.finished:
            message = yield self.messages.get()
            #print("New MQ message: " + str(message))
            self.send(message)

    def send(self, message):
        try:
            self.write_message(message)
        except WebSocketClosedError:
            self._close()
    
    def on_message(self, content):
        self.publisher.MQmessages.put(content)
コード例 #33
0
ファイル: stores.py プロジェクト: mivade/tornadose
class QueueStore(BaseStore):
    """Publish data via queues.

    This class is meant to be used in cases where subscribers should
    not miss any data. Compared to the :class:`DataStore` class, new
    messages to be broadcast to clients are put in a queue to be
    processed in order.

    """
    def initialize(self):
        self.messages = Queue()
        self.publish()

    @gen.coroutine
    def submit(self, message):
        yield self.messages.put(message)

    @gen.coroutine
    def publish(self):
        while True:
            message = yield self.messages.get()
            if len(self.subscribers) > 0:
                yield [subscriber.submit(message) for subscriber in self.subscribers]
コード例 #34
0
ファイル: drain.py プロジェクト: CrowdStrike/cs.eyrie
class SQSDrain(object):
    """Implementation of IDrain that writes to an AWS SQS queue.
    """

    def __init__(self, logger, loop, sqs_client,
                 metric_prefix='emitter'):
        self.emitter = sqs_client
        self.logger = logger
        self.loop = loop
        self.metric_prefix = metric_prefix
        self.output_error = Event()
        self.state = RUNNING
        self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__,
                                            self.__class__.__name__)
        self._send_queue = Queue()
        self._should_flush_queue = Event()
        self._flush_handle = None
        self.loop.spawn_callback(self._onSend)

    @gen.coroutine
    def _flush_send_batch(self, batch_size):
        send_batch = [
            self._send_queue.get_nowait()
            for pos in range(min(batch_size, self.emitter.max_messages))
        ]
        try:
            response = yield self.emitter.send_message_batch(*send_batch)
        except SQSError as err:
            self.logger.exception('Error encountered flushing data to SQS: %s',
                                  err)
            self.output_error.set()
            for msg in send_batch:
                self._send_queue.put_nowait(msg)
        else:
            if response.Failed:
                self.output_error.set()
                for req in response.Failed:
                    self.logger.error('Message failed to send: %s', req.Id)
                    self._send_queue.put_nowait(req)

    @gen.coroutine
    def _onSend(self):
        respawn = True
        while respawn:
            qsize = self._send_queue.qsize()
            # This will keep flushing until clear,
            # including items that show up in between flushes
            while qsize > 0:
                try:
                    yield self._flush_send_batch(qsize)
                except Exception as err:
                    self.logger.exception(err)
                    self.output_error.set()
                qsize = self._send_queue.qsize()
            # We've cleared the backlog, remove any possible future flush
            if self._flush_handle:
                self.loop.remove_timeout(self._flush_handle)
                self._flush_handle = None
            self._should_flush_queue.clear()
            yield self._should_flush_queue.wait()

    @gen.coroutine
    def close(self, timeout=None):
        self.state = CLOSING
        yield self._send_queue.join(timeout)

    def emit_nowait(self, msg):
        if self._send_queue.qsize() >= self.emitter.max_messages:
            # Signal flush
            self._should_flush_queue.set()
            raise QueueFull()
        elif self._flush_handle is None:
            # Ensure we flush messages at least by MAX_TIMEOUT
            self._flush_handle = self.loop.add_timeout(
                MAX_TIMEOUT,
                lambda: self._should_flush_queue.set(),
            )
        self.logger.debug("Drain emitting")
        self._send_queue.put_nowait(msg)

    @gen.coroutine
    def emit(self, msg, timeout=None):
        if self._send_queue.qsize() >= self.emitter.max_messages:
            # Signal flush
            self._should_flush_queue.set()
        elif self._flush_handle is None:
            # Ensure we flush messages at least by MAX_TIMEOUT
            self._flush_handle = self.loop.add_timeout(
                MAX_TIMEOUT,
                lambda: self._should_flush_queue.set(),
            )
        yield self._send_queue.put(msg, timeout)
コード例 #35
0
ファイル: spider.py プロジェクト: winstonf88/pyjobs
class BaseSpider(object):
    url_parser = None

    def __init__(self, engine, concurrent=3):
        self.engine = engine
        self.http = httpclient.AsyncHTTPClient()
        self.queue = Queue()
        self.concurrency = concurrent

    @property
    def hostname(self):
        return self.url_parser.hostname

    @property
    def url_root(self):
        return self.url_parser.url_root

    @property
    def base_url(self):
        return self.url_parser.base_url

    @gen.coroutine
    def __worker(self):
        """Consumes the queue."""
        while True:
            yield self.fetch_url()

    @gen.coroutine
    def crawl(self, description, location):
        """Starts crawling the specified URL."""
        url = self.url_parser(description, location)
        self.queue.put(url)
        self.engine.notify_started(self)
        for _ in range(self.concurrency):
            self.__worker()
        yield self.queue.join()
        self.engine.notify_finished(self)

    @gen.coroutine
    def fetch_url(self):
        """Retrieves a URL from the queue and returns the parsed data."""
        url = yield self.queue.get()
        logger.info('fetching %s' % url)
        try:
            response = yield self.http.fetch(url)
            soup = BeautifulSoup(response.body)
            logger.info('got response %s' % url)

            urls = yield self.fetch_links(response, soup)
            for new_url in urls:
                logger.debug('Added %s to queue' % new_url)
                yield self.queue.put(new_url)

            data = yield self.parse_response(response, soup)
            logger.info('Parsed response for %s' % url)
        except (httpclient.HTTPError, ValueError):
            message = 'HTTP Error: (%s)' % url
            self.engine.write_message(message, self.engine.STATUS_ERROR)
        else:
            self.engine.write_data(data)
        finally:
            self.queue.task_done()

    @gen.coroutine
    def fetch_links(self, response, soup):
        """Fetch URLs to be added to the queue."""
        raise gen.Return([])

    def parse_response(self, response, soup):
        """Extract information from the response, return should be a 
        list of dict's.
        
        Sample dict:
        {
            'title': 'Job Title',
            'company': 'Company Name',
            'location': 'City/State/Country',
            'tags': ['tag1', 'tag2', 'tag3'],
            'category': 'Software Developer',
            'origin': 'Name of the origin website',
            'url': 'Link to the complete job description',
        }
        """
        raise NotImplementedError
コード例 #36
0
ファイル: pubnub_tornado.py プロジェクト: pubnub/python
class TornadoSubscriptionManager(SubscriptionManager):
    def __init__(self, pubnub_instance):

        subscription_manager = self

        self._message_queue = Queue()
        self._consumer_event = Event()
        self._cancellation_event = Event()
        self._subscription_lock = Semaphore(1)
        # self._current_request_key_object = None
        self._heartbeat_periodic_callback = None
        self._reconnection_manager = TornadoReconnectionManager(pubnub_instance)

        super(TornadoSubscriptionManager, self).__init__(pubnub_instance)
        self._start_worker()

        class TornadoReconnectionCallback(ReconnectionCallback):
            def on_reconnect(self):
                subscription_manager.reconnect()

                pn_status = PNStatus()
                pn_status.category = PNStatusCategory.PNReconnectedCategory
                pn_status.error = False

                subscription_manager._subscription_status_announced = True
                subscription_manager._listener_manager.announce_status(pn_status)

        self._reconnection_listener = TornadoReconnectionCallback()
        self._reconnection_manager.set_reconnection_listener(self._reconnection_listener)

    def _set_consumer_event(self):
        self._consumer_event.set()

    def _message_queue_put(self, message):
        self._message_queue.put(message)

    def _start_worker(self):
        self._consumer = TornadoSubscribeMessageWorker(self._pubnub,
                                                       self._listener_manager,
                                                       self._message_queue,
                                                       self._consumer_event)
        run = stack_context.wrap(self._consumer.run)
        self._pubnub.ioloop.spawn_callback(run)

    def reconnect(self):
        self._should_stop = False
        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
        # self._register_heartbeat_timer()

    def disconnect(self):
        self._should_stop = True
        self._stop_heartbeat_timer()
        self._stop_subscribe_loop()

    @tornado.gen.coroutine
    def _start_subscribe_loop(self):
        self._stop_subscribe_loop()

        yield self._subscription_lock.acquire()

        self._cancellation_event.clear()

        combined_channels = self._subscription_state.prepare_channel_list(True)
        combined_groups = self._subscription_state.prepare_channel_group_list(True)

        if len(combined_channels) == 0 and len(combined_groups) == 0:
            return

        envelope_future = Subscribe(self._pubnub) \
            .channels(combined_channels).channel_groups(combined_groups) \
            .timetoken(self._timetoken).region(self._region) \
            .filter_expression(self._pubnub.config.filter_expression) \
            .cancellation_event(self._cancellation_event) \
            .future()

        canceller_future = self._cancellation_event.wait()

        wi = tornado.gen.WaitIterator(envelope_future, canceller_future)

        # iterates 2 times: one for result one for cancelled
        while not wi.done():
            try:
                result = yield wi.next()
            except Exception as e:
                # TODO: verify the error will not be eaten
                logger.error(e)
                raise
            else:
                if wi.current_future == envelope_future:
                    e = result
                elif wi.current_future == canceller_future:
                    return
                else:
                    raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

                if e.is_error():
                    # 599 error doesn't works - tornado use this status code
                    # for a wide range of errors, for ex:
                    # HTTP Server Error (599): [Errno -2] Name or service not known
                    if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
                        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
                        return

                    logger.error("Exception in subscribe loop: %s" % str(e))

                    if e.status is not None and e.status.category == PNStatusCategory.PNAccessDeniedCategory:
                        e.status.operation = PNOperationType.PNUnsubscribeOperation

                    self._listener_manager.announce_status(e.status)

                    self._reconnection_manager.start_polling()
                    self.disconnect()
                    return
                else:
                    self._handle_endpoint_call(e.result, e.status)

                    self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)

            finally:
                self._cancellation_event.set()
                yield tornado.gen.moment
                self._subscription_lock.release()
                self._cancellation_event.clear()
                break

    def _stop_subscribe_loop(self):
        if self._cancellation_event is not None and not self._cancellation_event.is_set():
            self._cancellation_event.set()

    def _stop_heartbeat_timer(self):
        if self._heartbeat_periodic_callback is not None:
            self._heartbeat_periodic_callback.stop()

    def _register_heartbeat_timer(self):
        super(TornadoSubscriptionManager, self)._register_heartbeat_timer()
        self._heartbeat_periodic_callback = PeriodicCallback(
            stack_context.wrap(self._perform_heartbeat_loop),
            self._pubnub.config.heartbeat_interval * TornadoSubscriptionManager.HEARTBEAT_INTERVAL_MULTIPLIER,
            self._pubnub.ioloop)
        self._heartbeat_periodic_callback.start()

    @tornado.gen.coroutine
    def _perform_heartbeat_loop(self):
        if self._heartbeat_call is not None:
            # TODO: cancel call
            pass

        cancellation_event = Event()
        state_payload = self._subscription_state.state_payload()
        presence_channels = self._subscription_state.prepare_channel_list(False)
        presence_groups = self._subscription_state.prepare_channel_group_list(False)

        if len(presence_channels) == 0 and len(presence_groups) == 0:
            return

        try:
            envelope = yield self._pubnub.heartbeat() \
                .channels(presence_channels) \
                .channel_groups(presence_groups) \
                .state(state_payload) \
                .cancellation_event(cancellation_event) \
                .future()

            heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options
            if envelope.status.is_error:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \
                        heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)
            else:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)

        except PubNubTornadoException:
            pass
            # TODO: check correctness
            # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
            #     self._start_subscribe_loop()
            # else:
            #     self._listener_manager.announce_status(e.status)
        except Exception as e:
            print(e)
        finally:
            cancellation_event.set()

    @tornado.gen.coroutine
    def _send_leave(self, unsubscribe_operation):
        envelope = yield Leave(self._pubnub) \
            .channels(unsubscribe_operation.channels) \
            .channel_groups(unsubscribe_operation.channel_groups).future()
        self._listener_manager.announce_status(envelope.status)
コード例 #37
0
ファイル: py-server.py プロジェクト: beef9999/go-chatroom
class Client(object):

    def __init__(self, server, name, stream):
        self.server = server
        self.name = name
        self.rooms = {}
        self.stream = stream
        self.inqueue = Queue(maxsize=QUEUE_SIZE)
        self.outqueue = Queue(maxsize=QUEUE_SIZE)

    @coroutine
    def forwarding(self):
        while True:
            msg = yield self.outqueue.get()
            if msg.command == COMMAND_QUIT:
                for _, room in self.rooms.items():
                    yield room.inqueue.put(msg)
            elif msg.command == COMMAND_JOIN:
                room_name = msg.receiver
                room = self.server.get_room(room_name)
                self.rooms[room_name] = room
                yield room.inqueue.put(msg)
            else:
                room = self.rooms[msg.receiver]
                yield room.inqueue.put(msg)
            self.outqueue.task_done()

    @coroutine
    def response(self):
        global SPEED
        while True:
            msg = yield self.inqueue.get()
            if msg.command == COMMAND_QUIT:
                self.stream.close()
                return
            else:
                response = ("%s %s:%s\n" % (datetime.datetime.now(),
                                            msg.sender.name,
                                            msg.content.decode()))\
                    .encode('utf-8')
                try:
                    SPEED += 1
                    yield self.stream.write(response)
                except Exception as e:
                    logging.debug(str(e))
                    self.stream.close()

    @coroutine
    def receive(self):
        while True:
            try:
                line = yield self.stream.read_until(b'\n')
            except Exception as e:
                logging.debug(str(e))
                msg = Message(self, '', COMMAND_QUIT, 'CONNECTION ERROR')
                yield self.outqueue.put(msg)
                return
            data = line.strip().split(b' ')
            if len(data) != 2:
                continue
            room_name, content = data[0], data[1]
            if room_name in self.rooms:
                msg = Message(self, room_name, COMMAND_NORMAL, content)
            else:
                msg = Message(self, room_name, COMMAND_JOIN, content)
            yield self.outqueue.put(msg)
コード例 #38
0
class BlogBackup(object):
    _default_dir_name = 'seg_blog_backup'

    def _generate_save_dir(self):
        cur_dir = os.path.dirname(__file__)
        self.save_path = os.path.join(cur_dir, self._default_dir_name)
        if not os.path.isdir(self.save_path):
            os.mkdir(self.save_path)

    def _parse_save_path(self):
        if self.save_path:
            if os.path.exists(self.save_path) and \
                    os.path.isdir(self.save_path):
                return
            else:
                raise BlogSavePathError(
                    "'%s' not exists or is not dir!" % self.save_path)
        else:
            self._generate_save_dir()

    def _get_user_cookies(self):
        url = target_url + login_page_path
        self.driver.get(url)
        try:
            user_input = self.driver.find_element_by_name('mail')
            passwd_input = self.driver.find_element_by_name('password')
            submit_btn = self.driver.find_element_by_class_name('pr20')
        except NoSuchElementException:
            raise PageHtmlChanged(
                "%s login page structure have changed!" % _domain)

        user_input.send_keys(self.username)
        passwd_input.send_keys(self.passwd)
        submit_btn.click()
        try:
            WebDriverWait(self.driver, 3).until(staleness_of(submit_btn))
        except TimeoutException:
            raise Exception("Wrong username or password!")

        WebDriverWait(self.driver, timeout=10).until(has_page_load)
        try_times = 0
        while True:
            time.sleep(1)
            if url != self.driver.current_url:
                return self.driver.get_cookies()

            try_times += 1
            if try_times > 10:
                raise Exception("Getting cookie info failed!")

    def _get_driver(self):
        if self.phantomjs_path:
            try:
                return webdriver.PhantomJS(
                    executable_path=self.phantomjs_path,
                    service_log_path=os.path.devnull)
            except WebDriverException:
                raise PhantomjsPathError("Phantomjs locate path invalid!")
        else:
            return webdriver.PhantomJS(service_log_path=os.path.devnull)

    def __init__(self, **conf):
        self.username = conf['username']
        self.passwd = conf['passwd']
        self.phantomjs_path = conf.get('phantomjs_path')
        self.save_path = conf.get('save_path')
        self._q = Queue()

        self._parse_save_path()
        self.driver = self._get_driver()
        self._cookies = self._get_user_cookies()

    @gen.coroutine
    def run(self):
        self.__filter_cookies()

        start_url = target_url + blog_path
        yield self._fetch_blog_list_page(start_url)
        for _ in xrange(cpu_count()):
            self._fetch_essay_content()

        yield self._q.join()

    def __filter_cookies(self):
        self._cookies = {k['name']: k['value'] for k in self._cookies if
                         k['domain'] == _domain}

    @gen.coroutine
    def _fetch_blog_list_page(self, page_link):
        ret = requests.get(page_link, cookies=self._cookies)
        d = pq(ret.text)
        link_elements = d('.stream-list__item > .summary > h2 > a')
        for link in link_elements:
            yield self._q.put(d(link).attr('href'))

        next_ele = d('.pagination li.next a')
        if next_ele:
            next_page_url = target_url + next_ele.attr('href')
            self._fetch_blog_list_page(next_page_url)

    @gen.coroutine
    def _fetch_essay_content(self):
        while True:
            try:
                essay_path = yield self._q.get(timeout=1)
                essay_url = target_url + essay_path + edit_suffix
                ret = requests.get(essay_url, cookies=self._cookies)
                d = pq(ret.text)
                title = d("#myTitle").val()
                content = d("#myEditor").text()
                file_name = title + '.md'
                real_file_name = os.path.join(self.save_path, file_name)
                with open(real_file_name, 'w') as f:
                    f.writelines(content.encode('utf8'))
            except gen.TimeoutError:
                raise gen.Return()
            finally:
                self._q.task_done()
コード例 #39
0
ファイル: scraper.py プロジェクト: andres-de-castro/scraping
class Scraper():

    def __init__(
                self,
                destinations=None,
                transform=None,
                headers={},
                max_clients=50,
                maxsize=50,
                connect_timeout=1200,
                request_timeout=600,):

        """Instantiate a tornado async http client to do multiple concurrent requests"""

        if None in [destinations, transform]:
            sys.stderr.write('You must pass both collection of URLS and a transform function')
            raise SystemExit

        self.max_clients = max_clients
        self.maxsize = maxsize
        self.connect_timeout = connect_timeout
        self.request_timeout = request_timeout

        AsyncHTTPClient.configure("tornado.simple_httpclient.SimpleAsyncHTTPClient", max_clients=self.max_clients)

        self.http_client = AsyncHTTPClient()
        self.queue = Queue(maxsize=50)
        self.destinations = destinations
        self.transform = transform
        self.headers = headers
        self.read(self.destinations)
        self.get(self.transform, self.headers, self.connect_timeout, self.request_timeout, self.http_client)
        self.loop = ioloop.IOLoop.current()
        self.join_future = self.queue.join()

        def done(future):
            self.loop.stop()

        self.join_future.add_done_callback(done)
        self.loop.start()

    @gen.coroutine
    def read(self, destinations):
        for url in destinations:
            yield self.queue.put(url)

    @gen.coroutine
    def get(self, transform, headers, connect_timeout, request_timeout, http_client):
        while True:
            url = yield self.queue.get()
            try:
                request = HTTPRequest(url,
                                    connect_timeout=connect_timeout,
                                    request_timeout=request_timeout,
                                    method="GET",
                                    headers = headers
                )
            except Exception as e:
                sys.stderr.write('Destination {0} returned error {1}'.format(url, str(e) + '\n'))

            future = self.http_client.fetch(request)

            def done_callback(future):
                body = future.result().body
                url = future.result().effective_url
                transform(body, url=url)
                self.queue.task_done()

            try:
                future.add_done_callback(done_callback)
            except Exception as e:
                sys.stderr.write(str(e))
                queue.put(url)
コード例 #40
0
class ProjectGroomer(object):
  """ Cleans up expired transactions for a project. """
  def __init__(self, project_id, coordinator, zk_client, db_access,
               thread_pool):
    """ Creates a new ProjectGroomer.

    Args:
      project_id: A string specifying a project ID.
      coordinator: A GroomingCoordinator.
      zk_client: A KazooClient.
      db_access: A DatastoreProxy.
      thread_pool: A ThreadPoolExecutor.
    """
    self.project_id = project_id

    self._coordinator = coordinator
    self._zk_client = zk_client
    self._tornado_zk = TornadoKazoo(self._zk_client)
    self._db_access = db_access
    self._thread_pool = thread_pool
    self._project_node = '/appscale/apps/{}'.format(self.project_id)
    self._containers = []
    self._inactive_containers = set()
    self._batch_resolver = BatchResolver(self.project_id, self._db_access)

    self._zk_client.ensure_path(self._project_node)
    self._zk_client.ChildrenWatch(self._project_node, self._update_containers)

    self._txid_manual_offset = 0
    self._offset_node = '/'.join([self._project_node, OFFSET_NODE])
    self._zk_client.DataWatch(self._offset_node, self._update_offset)

    self._stop_event = AsyncEvent()
    self._stopped_event = AsyncEvent()

    # Keeps track of cleanup results for each round of grooming.
    self._txids_cleaned = 0
    self._oldest_valid_tx_time = None

    self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY)
    for _ in range(MAX_CONCURRENCY):
      IOLoop.current().spawn_callback(self._worker)

    IOLoop.current().spawn_callback(self.start)

  @gen.coroutine
  def start(self):
    """ Starts the grooming process until the stop event is set. """
    logger.info('Grooming {}'.format(self.project_id))
    while True:
      if self._stop_event.is_set():
        break

      try:
        yield self._groom_project()
      except Exception:
        # Prevent the grooming loop from stopping if an error is encountered.
        logger.exception(
          'Unexpected error while grooming {}'.format(self.project_id))
        yield gen.sleep(MAX_TX_DURATION)

    self._stopped_event.set()

  @gen.coroutine
  def stop(self):
    """ Stops the grooming process. """
    logger.info('Stopping grooming process for {}'.format(self.project_id))
    self._stop_event.set()
    yield self._stopped_event.wait()

  @gen.coroutine
  def _worker(self):
    """ Processes items in the worker queue. """
    while True:
      tx_path, composite_indexes = yield self._worker_queue.get()
      try:
        tx_time = yield self._resolve_txid(tx_path, composite_indexes)
        if tx_time is None:
          self._txids_cleaned += 1

        if tx_time is not None and tx_time < self._oldest_valid_tx_time:
          self._oldest_valid_tx_time = tx_time
      finally:
        self._worker_queue.task_done()

  def _update_offset(self, new_offset, _):
    """ Watches for updates to the manual offset node.

    Args:
      new_offset: A string specifying the new manual offset.
    """
    self._txid_manual_offset = int(new_offset or 0)

  def _update_containers(self, nodes):
    """ Updates the list of active txid containers.

    Args:
      nodes: A list of strings specifying ZooKeeper nodes.
    """
    counters = [int(node[len(CONTAINER_PREFIX):] or 1)
                for node in nodes if node.startswith(CONTAINER_PREFIX)
                and node not in self._inactive_containers]
    counters.sort()

    containers = [CONTAINER_PREFIX + str(counter) for counter in counters]
    if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX):
      containers[0] = CONTAINER_PREFIX

    self._containers = containers

  @gen.coroutine
  def _groom_project(self):
    """ Runs the grooming process. """
    index = self._coordinator.index
    worker_count = self._coordinator.total_workers

    oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count)

    # Wait until there's a reasonable chance that some transactions have
    # timed out.
    next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION

    # The oldest ignored transaction should still be valid, but ensure that
    # the timeout is not negative.
    next_timeout = max(0, next_timeout_eta - time.time())
    time_to_wait = datetime.timedelta(
      seconds=next_timeout + (MAX_TX_DURATION / 2))

    # Allow the wait to be cut short when a project is removed.
    try:
      yield self._stop_event.wait(timeout=time_to_wait)
    except gen.TimeoutError:
      raise gen.Return()

  @gen.coroutine
  def _remove_path(self, tx_path):
    """ Removes a ZooKeeper node.

    Args:
      tx_path: A string specifying the path to delete.
    """
    try:
      yield self._tornado_zk.delete(tx_path)
    except NoNodeError:
      pass
    except NotEmptyError:
      yield self._thread_pool.submit(self._zk_client.delete, tx_path,
                                     recursive=True)

  @gen.coroutine
  def _resolve_txid(self, tx_path, composite_indexes):
    """ Cleans up a transaction if it has expired.

    Args:
      tx_path: A string specifying the location of the ZooKeeper node.
      composite_indexes: A list of CompositeIndex objects.
    Returns:
      The transaction start time if still valid, None if invalid because this
      method will also delete it.
    """
    tx_data = yield self._tornado_zk.get(tx_path)
    tx_time = float(tx_data[0])

    _, container, tx_node = tx_path.rsplit('/', 2)
    tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX))
    container_count = int(container[len(CONTAINER_PREFIX):] or 1)
    if tx_node_id < 0:
      yield self._remove_path(tx_path)
      raise gen.Return()

    container_size = MAX_SEQUENCE_COUNTER + 1
    automatic_offset = (container_count - 1) * container_size
    txid = self._txid_manual_offset + automatic_offset + tx_node_id

    if txid < 1:
      yield self._remove_path(tx_path)
      raise gen.Return()

    # If the transaction is still valid, return the time it was created.
    if tx_time + MAX_TX_DURATION >= time.time():
      raise gen.Return(tx_time)

    yield self._batch_resolver.resolve(txid, composite_indexes)
    yield self._remove_path(tx_path)
    yield self._batch_resolver.cleanup(txid)

  @gen.coroutine
  def _fetch_and_clean(self, worker_index, worker_count):
    """ Cleans up expired transactions.

    Args:
      worker_index: An integer specifying this worker's index.
      worker_count: An integer specifying the number of total workers.
    Returns:
      A float specifying the time of the oldest valid transaction as a unix
      timestamp.
    """
    self._txids_cleaned = 0
    self._oldest_valid_tx_time = time.time()

    children = []
    for index, container in enumerate(self._containers):
      container_path = '/'.join([self._project_node, container])
      new_children = yield self._tornado_zk.get_children(container_path)

      if not new_children and index < len(self._containers) - 1:
        self._inactive_containers.add(container)

      children.extend(['/'.join([container_path, node])
                       for node in new_children])

    logger.debug(
      'Found {} transaction IDs for {}'.format(len(children), self.project_id))

    if not children:
      raise gen.Return(self._oldest_valid_tx_time)

    # Refresh these each time so that the indexes are fresh.
    encoded_indexes = yield self._thread_pool.submit(
      self._db_access.get_indices, self.project_id)
    composite_indexes = [CompositeIndex(index) for index in encoded_indexes]

    for tx_path in children:
      tx_node_id = int(tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX))
      # Only resolve transactions that this worker has been assigned.
      if tx_node_id % worker_count != worker_index:
        continue

      yield self._worker_queue.put((tx_path, composite_indexes))

    yield self._worker_queue.join()

    if self._txids_cleaned > 0:
      logger.info('Cleaned up {} expired txids for {}'.format(
        self._txids_cleaned, self.project_id))

    raise gen.Return(self._oldest_valid_tx_time)
コード例 #41
0
ファイル: worker.py プロジェクト: coobas/distributed
class Worker(Server):
    """ Worker Node

    Workers perform two functions:

    1.  **Serve data** from a local dictionary
    2.  **Perform computation** on that data and on data from peers

    Additionally workers keep a Center informed of their data and use that
    Center to gather data from other workers when necessary to perform a
    computation.

    You can start a worker with the ``dworker`` command line application::

        $ dworker scheduler-ip:port

    **State**

    * **data:** ``{key: object}``:
        Dictionary mapping keys to actual values
    * **active:** ``{key}``:
        Set of keys currently under computation
    * **ncores:** ``int``:
        Number of cores used by this worker process
    * **executor:** ``concurrent.futures.ThreadPoolExecutor``:
        Executor used to perform computation
    * **local_dir:** ``path``:
        Path on local machine to store temporary files
    * **center:** ``rpc``:
        Location of center or scheduler.  See ``.ip/.port`` attributes.
    * **name:** ``string``:
        Alias
    * **services:** ``{str: Server}``:
        Auxiliary web servers running on this worker
    * **service_ports:** ``{str: port}``:

    Examples
    --------

    Create centers and workers in Python:

    >>> from distributed import Center, Worker
    >>> c = Center('192.168.0.100', 8787)  # doctest: +SKIP
    >>> w = Worker(c.ip, c.port)  # doctest: +SKIP
    >>> yield w._start(port=8788)  # doctest: +SKIP

    Or use the command line::

       $ dcenter
       Start center at 127.0.0.1:8787

       $ dworker 127.0.0.1:8787
       Start worker at:            127.0.0.1:8788
       Registered with center at:  127.0.0.1:8787

    See Also
    --------
    distributed.center.Center:
    """

    def __init__(self, center_ip, center_port, ip=None, ncores=None,
                 loop=None, local_dir=None, services=None, service_ports=None,
                 name=None, **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        self.executor = ThreadPoolExecutor(self.ncores)
        self.thread_tokens = Queue()  # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572
        for i in range(self.ncores):
            self.thread_tokens.put_nowait(i)
        self.center = rpc(ip=center_ip, port=center_port)
        self.active = set()
        self.name = name

        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {'compute': self.compute,
                    'gather': self.gather,
                    'compute-stream': self.compute_stream,
                    'run': self.run,
                    'get_data': self.get_data,
                    'update_data': self.update_data,
                    'delete_data': self.delete_data,
                    'terminate': self.terminate,
                    'ping': pingpong,
                    'health': self.health,
                    'upload_file': self.upload_file}

        super(Worker, self).__init__(handlers, **kwargs)

    @gen.coroutine
    def _start(self, port=0):
        self.listen(port)
        self.name = self.name or self.address
        for k, v in self.services.items():
            v.listen(0)
            self.service_ports[k] = v.port

        logger.info('      Start worker at: %20s:%d', self.ip, self.port)
        for k, v in self.service_ports.items():
            logger.info('  %16s at: %20s:%d' % (k, self.ip, v))
        logger.info('Waiting to connect to: %20s:%d',
                    self.center.ip, self.center.port)
        while True:
            try:
                resp = yield self.center.register(
                        ncores=self.ncores, address=(self.ip, self.port),
                        keys=list(self.data), services=self.service_ports,
                        name=self.name)
                break
            except (OSError, StreamClosedError):
                logger.debug("Unable to register with scheduler.  Waiting")
                yield gen.sleep(0.5)
        if resp != 'OK':
            raise ValueError(resp)
        logger.info('        Registered to: %20s:%d',
                    self.center.ip, self.center.port)
        self.status = 'running'

    def start(self, port=0):
        self.loop.add_callback(self._start, port)

    def identity(self, stream):
        return {'type': type(self).__name__, 'id': self.id,
                'center': (self.center.ip, self.center.port)}

    @gen.coroutine
    def _close(self, report=True, timeout=10):
        if report:
            yield gen.with_timeout(timedelta(seconds=timeout),
                    self.center.unregister(address=(self.ip, self.port)),
                    io_loop=self.loop)
        self.center.close_streams()
        self.stop()
        self.executor.shutdown()
        if os.path.exists(self.local_dir):
            shutil.rmtree(self.local_dir)

        for k, v in self.services.items():
            v.stop()
        self.status = 'closed'
        self.stop()

    @gen.coroutine
    def terminate(self, stream, report=True):
        yield self._close(report=report)
        raise Return('OK')

    @property
    def address(self):
        return '%s:%d' % (self.ip, self.port)

    @property
    def address_tuple(self):
        return (self.ip, self.port)

    @gen.coroutine
    def gather(self, stream=None, who_has=None):
        who_has = {k: [coerce_to_address(addr) for addr in v]
                    for k, v in who_has.items()
                    if k not in self.data}
        try:
            result = yield gather_from_workers(who_has)
        except KeyError as e:
            logger.warn("Could not find data", e)
            raise Return({'status': 'missing-data',
                          'keys': e.args})
        else:
            self.data.update(result)
            raise Return({'status': 'OK'})

    @gen.coroutine
    def _ready_task(self, function=None, key=None, args=(), kwargs={},
            task=None, who_has=None):
        diagnostics = {}
        if who_has:
            local_data = {k: self.data[k] for k in who_has if k in self.data}
            who_has = {k: set(map(coerce_to_address, v))
                       for k, v in who_has.items()
                       if k not in self.data}
            try:
                logger.info("gather %d keys from peers: %s",
                            len(who_has), str(who_has))
                diagnostics['transfer-start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer-stop'] = time()
                data = merge(local_data, other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({'status': 'missing-data',
                              'keys': e.args,
                              'key': key})
        else:
            data = {}
            transfer_time = 0
        try:
            start = default_timer()
            if task is not None:
                task = loads(task)
            if function is not None:
                function = loads(function)
            if args:
                args = loads(args)
            if kwargs:
                kwargs = loads(kwargs)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            raise Return(assoc(error_message(e), 'key', key))

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task,)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({'status': 'OK',
                      'function': function,
                      'args': args2,
                      'kwargs': kwargs2,
                      'diagnostics': diagnostics,
                      'key': key})

    @gen.coroutine
    def executor_submit(self, key, function, *args, **kwargs):
        """ Safely run function in thread pool executor

        We've run into issues running concurrent.future futures within
        tornado.  Apparently it's advantageous to use timeouts and periodic
        callbacks to ensure things run smoothly.  This can get tricky, so we
        pull it off into an separate method.
        """
        token = yield self.thread_tokens.get()
        job_counter[0] += 1
        i = job_counter[0]
        # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
        future = self.executor.submit(function, *args, **kwargs)
        pc = PeriodicCallback(lambda: logger.debug("future state: %s - %s",
            key, future._state), 1000); pc.start()
        try:
            if sys.version_info < (3, 2):
                yield future
            else:
                while not future.done() and future._state != 'FINISHED':
                    try:
                        yield gen.with_timeout(timedelta(seconds=1), future,
                                               io_loop=self.loop)
                        break
                    except gen.TimeoutError:
                        logger.info("work queue size: %d", self.executor._work_queue.qsize())
                        logger.info("future state: %s", future._state)
                        logger.info("Pending job %d: %s", i, future)
        finally:
            pc.stop()
            self.thread_tokens.put(token)

        result = future.result()

        logger.info("Finish job %d, %s", i, key)
        raise gen.Return(result)

    @gen.coroutine
    def compute_stream(self, stream):
        with log_errors():
            logger.debug("Open compute stream")
            bstream = BatchedSend(interval=10, loop=self.loop)
            bstream.start(stream)

        @gen.coroutine
        def process(msg):
            try:
                result = yield self.compute(report=False, **msg)
                bstream.send(result)
            except Exception as e:
                logger.exception(e)
                bstream.send(assoc(error_message(e), 'key', msg.get('key')))

        with log_errors():
            while True:
                try:
                    msgs = yield read(stream)
                except StreamClosedError:
                    break
                if not isinstance(msgs, list):
                    msgs = [msgs]

                for msg in msgs:
                    op = msg.pop('op', None)
                    if op == 'close':
                        break
                    elif op == 'compute-task':
                        self.loop.add_callback(process, msg)
                    else:
                        logger.warning("Unknown operation %s, %s", op, msg)

            yield bstream.close()
            logger.info("Close compute stream")

    @gen.coroutine
    def compute(self, stream=None, function=None, key=None, args=(), kwargs={},
            task=None, who_has=None, report=True):
        """ Execute function """
        self.active.add(key)

        # Ready function for computation
        msg = yield self._ready_task(function=function, key=key, args=args,
            kwargs=kwargs, task=task, who_has=who_has)
        if msg['status'] != 'OK':
            try:
                self.active.remove(key)
            except KeyError:
                pass
            raise Return(msg)
        else:
            function = msg['function']
            args = msg['args']
            kwargs = msg['kwargs']

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args, kwargs)

        result['key'] = key
        result.update(msg['diagnostics'])

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.center.add_keys(address=(self.ip, self.port),
                                                      keys=[key])
                if not response == 'OK':
                    logger.warn('Could not report results to center: %s',
                                str(response))
        else:
            logger.warn(" Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000], str(args)[:1000],
                str(kwargs)[:1000], exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key, msg)
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    @gen.coroutine
    def run(self, stream, function=None, args=(), kwargs={}):
        function = loads(function)
        if args:
            args = loads(args)
        if kwargs:
            kwargs = loads(kwargs)
        try:
            result = function(*args, **kwargs)
        except Exception as e:
            logger.warn(" Run Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000], str(args)[:1000],
                str(kwargs)[:1000], exc_info=True)

            response = error_message(e)
        else:
            response = {
                'status': 'OK',
                'result': dumps(result),
            }
        raise Return(response)

    @gen.coroutine
    def update_data(self, stream, data=None, report=True):
        data = valmap(loads, data)
        self.data.update(data)
        if report:
            response = yield self.center.add_keys(address=(self.ip, self.port),
                                                  keys=list(data))
            assert response == 'OK'
        info = {'nbytes': {k: sizeof(v) for k, v in data.items()},
                'status': 'OK'}
        raise Return(info)

    @gen.coroutine
    def delete_data(self, stream, keys=None, report=True):
        for key in keys:
            if key in self.data:
                del self.data[key]
        logger.info("Deleted %d keys", len(keys))
        if report:
            logger.debug("Reporting loss of keys to center")
            yield self.center.remove_keys(address=self.address,
                                          keys=list(keys))
        raise Return('OK')

    def get_data(self, stream, keys=None):
        return {k: dumps(self.data[k]) for k in keys if k in self.data}

    def upload_file(self, stream, filename=None, data=None, load=True):
        out_filename = os.path.join(self.local_dir, filename)
        if isinstance(data, unicode):
            data = data.encode()
        with open(out_filename, 'wb') as f:
            f.write(data)
            f.flush()

        if load:
            try:
                name, ext = os.path.splitext(filename)
                if ext in ('.py', '.pyc'):
                    logger.info("Reload module %s from .py file", name)
                    name = name.split('-')[0]
                    reload(import_module(name))
                if ext == '.egg':
                    sys.path.append(out_filename)
                    pkgs = pkg_resources.find_distributions(out_filename)
                    for pkg in pkgs:
                        logger.info("Load module %s from egg", pkg.project_name)
                        reload(import_module(pkg.project_name))
                    if not pkgs:
                        logger.warning("Found no packages in egg file")
            except Exception as e:
                logger.exception(e)
                return {'status': 'error', 'exception': dumps(e)}
        return {'status': 'OK', 'nbytes': len(data)}

    def health(self, stream=None):
        """ Information about worker """
        d = {'active': len(self.active),
             'stored': len(self.data),
             'time': time()}
        try:
            import psutil
            mem = psutil.virtual_memory()
            d.update({'cpu': psutil.cpu_percent(),
                      'memory': mem.total,
                      'memory-percent': mem.percent})
            try:
                net_io = psutil.net_io_counters()
                d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent
                d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv
            except AttributeError:
                pass
            self._last_net_io = net_io

            try:
                disk_io = psutil.disk_io_counters()
                d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes
                d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes
            except (AttributeError, RuntimeError):
                disk_io = None
            self._last_disk_io = disk_io
        except ImportError:
            pass
        return d
コード例 #42
0
ファイル: httpd.py プロジェクト: iNaKoll/kevin
class PlainStreamHandler(web.RequestHandler, Watcher):
    """ Provides the job stdout stream via plain HTTP GET """
    @gen.coroutine
    def get(self):
        self.job = None

        try:
            project_name = self.request.query_arguments["project"][0]
        except (KeyError, IndexError):
            self.write(b"no project given\n")
            return

        try:
            build_id = self.request.query_arguments["hash"][0]
        except (KeyError, IndexError):
            self.write(b"no build hash given\n")
            return

        try:
            job_name = self.request.query_arguments["job"][0]
        except (KeyError, IndexError):
            self.write(b"no job given\n")
            return

        project_name = project_name.decode(errors='replace')
        build_id = build_id.decode(errors='replace')
        job_name = job_name.decode(errors='replace')

        try:
            project = CFG.projects[project_name]

        except KeyError:
            self.write(b"unknown project requested\n")
            return

        build = get_build(project, build_id)
        if not build:
            self.write(("no such build: project %s [%s]\n" % (
                project_name, build_id)).encode())
            return
        else:
            self.job = build.jobs.get(job_name)
            if not self.job:
                self.write(("unknown job in project %s [%s]: %s\n" % (
                    project_name, build_id, job_name)).encode())
                return

            # the message queue to be sent to the http client
            self.queue = Queue()

            # request the updates from the watched jobs
            self.job.watch(self)

            # emit the updates and wait until no more are coming
            yield self.watch_job()

    @gen.coroutine
    def watch_job(self):
        """ Process updates and send them to the client """

        self.set_header("Content-Type", "text/plain")

        while True:
            update = yield self.queue.get()

            if update is StopIteration:
                break

            if isinstance(update, StdOut):
                self.write(update.data.encode())

            elif isinstance(update, JobState):
                if update.is_errored():
                    self.write(
                        ("\x1b[31merror:\x1b[m %s\n" %
                         (update.text)).encode()
                    )
                elif update.is_succeeded():
                    self.write(
                        ("\x1b[32msuccess:\x1b[m %s\n" %
                         (update.text)).encode()
                    )
                elif update.is_finished():
                    self.write(
                        ("\x1b[31mfailed:\x1b[m %s\n" %
                         (update.text)).encode()
                    )

            yield self.flush()

        return self.finish()

    def on_update(self, update):
        """ Put a message to the stream queue """
        self.queue.put(update)

    def on_connection_close(self):
        """ Add a connection-end marker to the queue """
        self.on_update(StopIteration)

    def on_finish(self):
        # TODO: only do this if we got a GET request.
        if self.job is not None:
            self.job.unwatch(self)
コード例 #43
0
ファイル: pubnub_tornado.py プロジェクト: pubnub/python
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()
        self.error_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()
        elif status.is_error():
            self.error_queue.put_nowait(status.error_data.exception)

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    @tornado.gen.coroutine
    def _wait_for(self, coro):
        error = self.error_queue.get()
        wi = tornado.gen.WaitIterator(coro, error)

        while not wi.done():
            result = yield wi.next()

            if wi.current_future == coro:
                raise gen.Return(result)
            elif wi.current_future == error:
                raise result
            else:
                raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

    @tornado.gen.coroutine
    def wait_for_connect(self):
        if not self.connected_event.is_set():
            yield self._wait_for(self.connected_event.wait())
        else:
            raise Exception("instance is already connected")

    @tornado.gen.coroutine
    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            yield self._wait_for(self.disconnected_event.wait())
        else:
            raise Exception("instance is already disconnected")

    @tornado.gen.coroutine
    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try: # NOQA
                env = yield self._wait_for(self.message_queue.get())
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.message_queue.task_done()

    @tornado.gen.coroutine
    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                try:
                    env = yield self._wait_for(self.presence_queue.get())
                except: # NOQA E722 pylint: disable=W0702
                    break
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.presence_queue.task_done()
コード例 #44
0
ファイル: app.py プロジェクト: jefffm/swimpy
class Application(object):
    def __init__(self, routes, node, pipe):
        """
        Application instantiates and registers handlers for each message type,
        and routes messages to the pre-instantiated instances of each message handler

        :param routes: list of tuples in the form of (<message type str>, <MessageHandler class>)
        :param node: Node instance of the local node
        :param pipe: Instance of multiprocessing.Pipe for communicating with the parent process
        """
        # We don't really have to worry about synchronization
        # so long as we're careful about explicit context switching
        self.nodes = {node.node_id: node}

        self.local_node = node
        self.handlers = {}

        self.tcpclient = TCPClient()

        self.gossip_inbox = Queue()
        self.gossip_outbox = Queue()

        self.sequence_number = 0

        if routes:
            self.add_handlers(routes)

        self.pipe = pipe
        self.ioloop = IOLoop.current()

        self.add_node_event = Event()

    def next_sequence_number(self):
        self.sequence_number += 1
        return self.sequence_number

    @coroutine
    def ping_random_node(self):
        node = yield self.get_random_node()
        LOGGER.debug('{} pinging random node: {}'.format(self.local_node.node_id,
                                                         node.node_id))
        try:
            yield self.ping(node)
        except TimeoutError:
            self.mark_suspect(node)

    @coroutine
    def add_node(self, node):
        if node.node_id not in self.nodes:
            LOGGER.debug('Adding node {} to {}'.format(node, self.nodes))
            self.add_node_event.set()
            self.nodes[node.node_id] = node
            LOGGER.debug('Added node {} to {}'.format(node, self.nodes))

    @coroutine
    def remove_node(self, node):
        if node.node_id in self.nodes:
            del self.nodes[node.node_id]

            other_nodes = yield self.get_other_nodes
            if not other_nodes:
                self.add_node_event.clear()

    def add_handlers(self, handlers):
        for message_type, handler_cls in handlers:
            assert message_type in MESSAGE_TYPES, (
                'Message type {!r} not found in MESSAGE TYPES {}'.format(
                    message_type,
                    MESSAGE_TYPES.keys()
                )
            )
            self.handlers[message_type] = handler_cls(self)

    def route_stream_message(self, stream, message_type, message):
        LOGGER.debug('{!r} received {} message from {!r}'.format(self, message_type, stream))
        message_cls = MESSAGE_TYPES[message_type]
        message_obj = message_cls(**message)

        handler = self.handlers[message_type]
        LOGGER.debug('Routing {} to {}'.format(message_type, handler))
        handler(stream, message_obj)

    @coroutine
    def send_message(self, stream, message):
        LOGGER.debug('Sending message {!r} to {}'.format(message.MESSAGE_TYPE, stream))
        try:
            yield stream.write(message.to_msgpack)
        except StreamClosedError:
            LOGGER.warn('Unable to send {} to {} - stream closed'.format(message.MESSAGE_TYPE, stream))

    @coroutine
    def _get_next_message(self, stream):
        # get the next message from the stream
        unpacker = msgpack.Unpacker()
        try:
            wire_bytes = yield with_timeout(
                datetime.timedelta(seconds=PING_TIMEOUT),
                stream.read_bytes(4096, partial=True)
            )
        except StreamClosedError:
            LOGGER.warn('Unable to get next message from {} - stream closed'.format(stream))
        else:
            unpacker.feed(wire_bytes)
            LOGGER.debug('Deserializing object from stream {}'.format(stream))
            message = unpacker.next()
            message.pop('type')
            raise Return(message)

    @coroutine
    def ping(self, node):
        """
        Ping a node

        :param node: Instance of Node to ping
        :returns: Boolean, True if successful/False if fail
        """
        host = node.addr
        port = node.port

        LOGGER.debug('pinging {}:{}'.format(host, port))
        ping = Ping(seqno=self.next_sequence_number(),
                    node=node,
                    sender=self.local_node)

        # Connect to the node
        try:
            stream = yield self.tcpclient.connect(host, port)
        except StreamClosedError:
            LOGGER.error('Unable to connect from {} to {} (pinging host)'.format(self.local_node.node_id, node.node_id))
            raise Return(False)

        try:
            # Send the ping
            LOGGER.debug('Sending {!r} to {!r}'.format(ping.MESSAGE_TYPE, node))
            yield self.send_message(stream, ping)

            # Wait for an ACK message in response
            LOGGER.debug('Getting next message from {}:{}'.format(host, port))
            message = yield self._get_next_message(stream)
            if message is None:
                raise Return(False)

            ack = Ack(**message)
            LOGGER.debug('Received {!r} from {!r} (response to {!r})'.format(ack.MESSAGE_TYPE,
                                                                             node.node_id,
                                                                             ping.MESSAGE_TYPE))

            # Check that the ACK sequence number matches the PING sequence number
            if ack.seqno == ping.seqno:
                LOGGER.debug('Sequence number matches. Node {} looks good to !'.format(node.node_id,
                                                                                       self.local_node.node_id))
                # Process the gossip messages tacked onto the ACK message's payload
                for message in ack.payload:
                    try:
                        self.gossip_inbox.put_nowait(message)
                    except QueueFull:
                        LOGGER.error('Unable to add {} message from {} to gossip inbox'.format(message.MESSAGE_TYPE,
                                                                                               node.node_id))
                # mark the node as ALIVE in self.nodes
                self.mark_alive(node)

                # Send gossip that this node is alive
                self.queue_gossip_send(
                    Alive(node=node, sender=self.local_node)
                )

                raise Return(True)
            else:
                raise Return(False)
        finally:
            stream.close()

    @coroutine
    def ack(self, stream, seqno):
        payload = []
        for _ in xrange(ACK_PAYLOAD_SIZE):
            try:
                gossip = self.gossip_outbox.get_nowait()
                payload.append(gossip)
            except QueueEmpty:
                break

        ack = Ack(seqno=seqno, payload=payload)
        LOGGER.debug('Trying to send ack: {}'.format(ack))
        try:
            yield stream.write(ack.to_msgpack)
        except StreamClosedError:
            LOGGER.error('Unable to connect from {} to stream (acking PING)'.format(self.local_node.node_id))
        LOGGER.debug('Sent ack to {}'.format(stream))

    @coroutine
    def _change_node_state(self, node, state):
        """
        Because Tornado has explicit context switching, we don't need to worry much about synchronization here
        """
        LOGGER.debug('{} knows about {}: {}'.format(self.local_node.node_id, node.node_id, state))
        self.add_node(node)
        self.nodes[node.node_id].state = state

    @coroutine
    def mark_alive(self, node):
        if node.node_id != self.local_node.node_id:
            LOGGER.debug('Marking {} ALIVE'.format(node.node_id))
            self._change_node_state(node, State.ALIVE)

    @coroutine
    def mark_dead(self, node):
        self._change_node_state(node, State.DEAD)

    @coroutine
    def mark_suspect(self, node):
        self._change_node_state(node, State.SUSPECT)

    @coroutine
    def ingest_gossip_inbox(self):
        while True:
            LOGGER.debug('checking inbox')
            message = yield self.gossip_inbox.get()
            LOGGER.debug('Received message {} from gossip inbox'.format(message.MESSAGE_TYPE))
            if message.MESSAGE_TYPE == Alive.MESSAGE_TYPE:
                self.mark_alive(message.sender)
                self.mark_alive(message.node)
                self.queue_gossip_send(message)
            elif message.MESSAGE_TYPE == Suspect.MESSAGE_TYPE:
                self.mark_alive(message.sender)
                self.mark_suspect(message.node)
                self.queue_gossip_send(message)
            elif message.MESSAGE_TYPE == Dead.MESSAGE_TYPE:
                self.mark_alive(message.sender)
                self.mark_dead(message.node)
                self.queue_gossip_send(message)

    @coroutine
    def queue_gossip_send(self, message):
        """
        If the message is gossipable, add it to the outbox
        """
        try:
            next_incarnation = message.next_incarnation
            next_incarnation.sender = self.local_node
        except message.MaxIncarnationsReached:
            LOGGER.debug('Max incarnations reached for {}! No gossip 4 u'.format(message.MESSAGE_TYPE))
        else:
            LOGGER.debug('Enqueuing {} gossips for {}'.format(GOSSIP_PEERS, message))
            for _ in xrange(GOSSIP_PEERS):
                yield self.gossip_outbox.put(next_incarnation)

    @coroutine
    def send_buffered_gossip(self):
        while True:
            random_node = yield self.get_random_node()
            message = yield self.gossip_outbox.get()
            LOGGER.debug('{} connecting to {} for gossip'.format(self.local_node, random_node))
            try:
                stream = yield self.tcpclient.connect(random_node.addr, random_node.port)
            except StreamClosedError:
                LOGGER.error('Unable to connect from {} to {} (sending gossip)'.format(self.local_node.node_id,
                                                                                       random_node.node_id))
                LOGGER.warning('Putting the gossip back on our queue')
                try:
                    self.gossip_outbox.put_nowait(message)
                except QueueFull:
                    LOGGER.error('Unable to put gossip back onto the queue. Giving up!')
            else:
                try:
                    LOGGER.debug('{} gossipping with {}'.format(self.local_node.node_id, random_node.node_id))
                    yield self.send_message(stream, message)
                finally:
                    stream.close()

    @coroutine
    def get_other_nodes(self, exclude=None):
        if exclude is None:
            exclude = (self.local_node,)

        exclude_node_ids = [n.node_id for n in exclude]

        raise Return([n for n in self.nodes if n not in exclude_node_ids])

    @coroutine
    def get_random_node(self, exclude=None):
        LOGGER.debug('Waiting for more nodes')
        yield self.add_node_event.wait()
        LOGGER.debug('Getting non-self random node')

        other_nodes = yield self.get_other_nodes(exclude=exclude)
        LOGGER.debug('{} got something! choices: {}'.format(self.local_node.node_id, other_nodes))
        assert other_nodes

        node_id = random.choice(other_nodes)
        raise Return(self.nodes[node_id])
コード例 #45
0
ファイル: countercache.py プロジェクト: wyj999/mtShow
class CounterCache(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.m_queue = Queue()
        self.m_CacheFlag = 1
        self.m_CounterCache = None
        self.m_Cache_A = defaultdict()
        self.m_Cache_B = defaultdict()

        self.database = Database(redis_conf = REDISEVER, password = STATUS_REDIS_PASS)

        self.cacheInit(self.m_Cache_A)
        self.cacheInit(self.m_Cache_B)

    def switchCache(self):
        if self.m_CacheFlag == 1:
            return self.m_Cache_A
        elif self.m_CacheFlag == 2:
            return self.m_Cache_B

    def chageCacheFlag(self):
        if self.m_CacheFlag == 1:
            self.m_CacheFlag = 2
        elif self.m_CacheFlag == 2:
            self.m_CacheFlag = 1
    
    def clearCache(self):
        if self.m_CacheFlag == 1:
            self.m_Cache_B.clear()
            self.cacheInit(self.m_Cache_B)
        elif self.m_CacheFlag == 2:
            self.m_Cache_A.clear()
            self.cacheInit(self.m_Cache_A)

    def cacheInit(self, cache):
        cache['pid_info'] = defaultdict(int)
        cache['eid_info'] = { 'pv':defaultdict(int), 'exchange_price':defaultdict(int) }
        cache['adx_info'] = { 'pv':defaultdict(int), 'exchange_price':defaultdict(int) }
        cache['aid_info'] = { 'exchange_price':defaultdict(int) }

    @tornado.gen.coroutine
    def queueMsgPut(self, msg):
        yield self.m_queue.put(msg)

    @tornado.gen.coroutine
    def queueMsgGet(self):
        while True:
            msg = yield self.m_queue.get()
            #print msg
            logger.info('QueueGet:%r' % msg)
            self.cacheInfoPut(msg)

    def cacheInfoPut(self, info):
        cache = self.switchCache()
        type = eid = pid = aid = price = adx = None
        if info.has_key('type'):
            type = info['type']
        if info.has_key('eid'):
            eid = info['eid']
        if info.has_key('pid'):
            pid = info['pid']
        if info.has_key('price'):
            price = info['price']
        if info.has_key('aid'):
            aid = info['aid']
        #if info.has_key('adx'):
        #    adx = info['adx']
        if type == 1 and eid and (price != None) and aid: # pv
            cache['aid_info']['exchange_price'][aid] = cache['aid_info']['exchange_price'][aid] + price
            cache['eid_info']['pv'][eid] = cache['eid_info']['pv'][eid] + 1
            cache['eid_info']['exchange_price'][eid] = cache['eid_info']['exchange_price'][eid] + price
            #cache['adx_info']['pv'][adx] = cache['adx_info']['pv'][adx] + 1
            #cache['adx_info']['exchange_price'][adx] = cache['adx_info']['exchange_price'][adx] + price
        else:
            return None


    def cacheDura(self):
        cache = None
        if self.m_CacheFlag == 1:
            cache = self.m_Cache_B
        if self.m_CacheFlag == 2:
            cache = self.m_Cache_A

        #loginfo(cache)
        if cache.has_key('pid_info'):
            pass
        if cache.has_key('eid_info'):
            it_p = cache['eid_info']['exchange_price']
            it_m = cache['eid_info']['pv']
            for eid in it_p.iterkeys():
                self.database.incEidHourSp(eid, it_p[eid])
                logger.debug("increase Order:%r Money:%r OK!" % (eid, it_p[eid]))
            for eid in it_m.iterkeys():
                self.database.incEidShow(eid, it_m[eid])
                logger.debug("increase Order:%r PV:%r OK!" % (eid,it_m[eid]))

        if cache.has_key('aid_info'):
            it_a = cache['aid_info']['exchange_price']
            for aid in it_a.iterkeys():
                self.database.incAidHourSp(aid, it_a[aid])
                self.database.decAdvBidSpend(aid, "-%.3f" %  (float(it_a[aid])/1000))
                logger.debug("increase Advertiser:%s Money:%s!" % (aid, str(float(it_a[aid])/1000)) )

    def run(self):
        while True:
            try:
                time.sleep( CACHE_DUR_FREQ )
                self.chageCacheFlag()
                self.cacheDura()
                self.clearCache()

            except Exception, e:
                logger.error(e)
                continue
コード例 #46
0
ファイル: files.py プロジェクト: fstfwd/apps
    def get_file_list(account, **kwargs):
        queue = Queue()
        sem = BoundedSemaphore(FETCH_CONCURRENCY)
        done, working = set(), set()
        data = []
        ids = set()

        @gen.coroutine
        def fetch_url():
            current_url = yield queue.get()
            try:
                if current_url in working:
                    return
                page_no = working.__len__()
                app_log.info("Fetching page {}".format(page_no))
                working.add(current_url)
                req = account.get_request(current_url)
                client = AsyncHTTPClient()
                response = yield client.fetch(req)
                done.add(current_url)
                app_log.info("Page {} downloaded".format(page_no))
                response_data = json.loads(response.body.decode('utf-8'))

                url = response_data.get('@odata.nextLink', None)
                if url is not None:
                    queue.put(url)

                for file in response_data.get('value', []):
                    if file['name'][-4:].strip('.').lower() in VALID_FILETYPES:
                        if file['id'] not in ids:
                            ids.add(file['id'])
                            data.append({
                                "title":
                                file['parentReference']['path'].split(':')
                                [1].lstrip('/') + '/' + file['name'],
                                "value":
                                file['id']
                            })
                app_log.info("Page {} completed".format(page_no))
            finally:
                queue.task_done()
                sem.release()

        @gen.coroutine
        def worker():
            while True:
                yield sem.acquire()
                fetch_url()

        app_log.info("Gathering filelist for account {}".format(account._id))
        for file_type in VALID_FILETYPES:
            file_type = '.'.join([file_type])
            url = "https://api.onedrive.com/v1.0/drive/root/view.search?top=1000&select=parentReference,name,id,size&q={}" \
                .format(file_type)
            queue.put(url)
        # start our concurrency worker
        worker()
        # wait until we're done
        yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME))
        app_log.info("Finished list retrieval. Found {} items.".format(
            data.__len__()))
        return sorted(data, key=lambda f: f['title'])
コード例 #47
0
    class TornadoTransmission():
        def __init__(self,
                     max_concurrent_batches=10,
                     block_on_send=False,
                     block_on_response=False,
                     max_batch_size=100,
                     send_frequency=timedelta(seconds=0.25),
                     user_agent_addition=''):
            if not has_tornado:
                raise ImportError(
                    'TornadoTransmission requires tornado, but it was not found.'
                )

            self.block_on_send = block_on_send
            self.block_on_response = block_on_response
            self.max_batch_size = max_batch_size
            self.send_frequency = send_frequency

            user_agent = "libhoney-py/" + VERSION
            if user_agent_addition:
                user_agent += " " + user_agent_addition

            self.http_client = AsyncHTTPClient(
                force_instance=True, defaults=dict(user_agent=user_agent))

            # libhoney adds events to the pending queue for us to send
            self.pending = Queue(maxsize=1000)
            # we hand back responses from the API on the responses queue
            self.responses = Queue(maxsize=2000)

            self.batch_data = {}
            self.sd = statsd.StatsClient(prefix="libhoney")
            self.batch_sem = Semaphore(max_concurrent_batches)

        def start(self):
            ioloop.IOLoop.current().spawn_callback(self._sender)

        def send(self, ev):
            '''send accepts an event and queues it to be sent'''
            self.sd.gauge("queue_length", self.pending.qsize())
            try:
                if self.block_on_send:
                    self.pending.put(ev)
                else:
                    self.pending.put_nowait(ev)
                self.sd.incr("messages_queued")
            except QueueFull:
                response = {
                    "status_code": 0,
                    "duration": 0,
                    "metadata": ev.metadata,
                    "body": "",
                    "error": "event dropped; queue overflow",
                }
                if self.block_on_response:
                    self.responses.put(response)
                else:
                    try:
                        self.responses.put_nowait(response)
                    except QueueFull:
                        # if the response queue is full when trying to add an event
                        # queue is full response, just skip it.
                        pass
                self.sd.incr("queue_overflow")

        # We're using the older decorator/yield model for compatibility with
        # Python versions before 3.5.
        # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await
        @gen.coroutine
        def _sender(self):
            '''_sender is the control loop that pulls events off the `self.pending`
            queue and submits batches for actual sending. '''
            events = []
            last_flush = time.time()
            while True:
                try:
                    ev = yield self.pending.get(timeout=self.send_frequency)
                    if ev is None:
                        # signals shutdown
                        yield self._flush(events)
                        return
                    events.append(ev)
                    if (len(events) > self.max_batch_size
                            or time.time() - last_flush >
                            self.send_frequency.total_seconds()):
                        yield self._flush(events)
                        events = []
                except TimeoutError:
                    yield self._flush(events)
                    events = []
                    last_flush = time.time()

        @gen.coroutine
        def _flush(self, events):
            if not events:
                return
            for dest, group in group_events_by_destination(events).items():
                yield self._send_batch(dest, group)

        @gen.coroutine
        def _send_batch(self, destination, events):
            ''' Makes a single batch API request with the given list of events. The
            `destination` argument contains the write key, API host and dataset
            name used to build the request.'''
            start = time.time()
            status_code = 0

            try:
                # enforce max_concurrent_batches
                yield self.batch_sem.acquire()
                url = urljoin(urljoin(destination.api_host, "/1/batch/"),
                              destination.dataset)
                payload = []
                for ev in events:
                    event_time = ev.created_at.isoformat()
                    if ev.created_at.tzinfo is None:
                        event_time += "Z"
                    payload.append({
                        "time": event_time,
                        "samplerate": ev.sample_rate,
                        "data": ev.fields()
                    })
                req = HTTPRequest(
                    url,
                    method='POST',
                    headers={
                        "X-Honeycomb-Team": destination.writekey,
                        "Content-Type": "application/json",
                    },
                    body=json.dumps(payload, default=json_default_handler),
                )
                self.http_client.fetch(req, self._response_callback)
                # store the events that were sent so we can process responses later
                # it is important that we delete these eventually, or we'll run into memory issues
                self.batch_data[req] = {"start": start, "events": events}
            except Exception as e:
                # Catch all exceptions and hand them to the responses queue.
                self._enqueue_errors(status_code, e, start, events)
            finally:
                self.batch_sem.release()

        def _enqueue_errors(self, status_code, error, start, events):
            for ev in events:
                self.sd.incr("send_errors")
                self._enqueue_response(status_code, "", error, start,
                                       ev.metadata)

        def _response_callback(self, resp):
            # resp.request should be the same HTTPRequest object built by _send_batch
            # and mapped to values in batch_data
            events = self.batch_data[resp.request]["events"]
            start = self.batch_data[resp.request]["start"]
            try:
                status_code = resp.code
                resp.rethrow()

                statuses = [d["status"] for d in json.loads(resp.body)]
                for ev, status in zip(events, statuses):
                    self._enqueue_response(status, "", None, start,
                                           ev.metadata)
                    self.sd.incr("messages_sent")
            except Exception as e:
                self._enqueue_errors(status_code, e, start, events)
                self.sd.incr("send_errors")
            finally:
                # clean up the data for this batch
                del self.batch_data[resp.request]

        def _enqueue_response(self, status_code, body, error, start, metadata):
            resp = {
                "status_code": status_code,
                "body": body,
                "error": error,
                "duration": (time.time() - start) * 1000,
                "metadata": metadata
            }
            if self.block_on_response:
                self.responses.put(resp)
            else:
                try:
                    self.responses.put_nowait(resp)
                except QueueFull:
                    pass

        def close(self):
            '''call close to send all in-flight requests and shut down the
                senders nicely. Times out after max 20 seconds per sending thread
                plus 10 seconds for the response queue'''
            try:
                self.pending.put(None, 10)
            except QueueFull:
                pass
            # signal to the responses queue that nothing more is coming.
            try:
                self.responses.put(None, 10)
            except QueueFull:
                pass

        def get_response_queue(self):
            ''' return the responses queue on to which will be sent the response
            objects from each event send'''
            return self.responses
コード例 #48
0
class BlogBackup(object):
    _default_dir_name = "seg_blog_backup"

    def _generate_save_dir(self):
        cur_dir = os.path.dirname(__file__)
        self.save_path = os.path.join(cur_dir, self._default_dir_name)
        if not os.path.isdir(self.save_path):
            os.mkdir(self.save_path)

    def _parse_save_path(self):
        if self.save_path:
            if os.path.exists(self.save_path) and os.path.isdir(self.save_path):
                return
            else:
                raise BlogSavePathError("'%s' not exists or is not dir!" % self.save_path)
        else:
            self._generate_save_dir()

    @staticmethod
    def parse_token_from_html(content):
        overall_pat = re.compile(r"SF.token =.*?,\s+_\w+ = [\d,\[\]]+;", re.DOTALL)
        overall_res = overall_pat.search(content)
        if overall_res:
            overall_content = overall_res.group()
            # remove /* */ type annotation
            filter_res = re.sub(r"(/\*[/a-zA-Z\d' ]+\*/)", "", overall_content)
            str_list = re.findall(r"(?<!//)'([a-zA-Z\d]+)'", filter_res, re.DOTALL)
            filter_list = re.findall(r"\[(\d+),(\d+)\]", overall_content)
            ret = "".join(str_list)

            if filter_list:
                for m, n in filter_list:
                    ret = ret[: int(m)] + ret[int(n) :]
            if len(ret) == 32:
                return ret

        raise PageHtmlChanged("website login token has changed")

    def _get_user_cookies(self):
        s = requests.Session()
        s.headers.update(headers)
        rep = s.get(target_url)
        post_url = "%s%s?_=%s" % (target_url, login_api_path, self.parse_token_from_html(rep.text))
        data = {"mail": self.username, "password": self.passwd}
        s.post(post_url, data=data)
        return s.cookies

    def __init__(self, **conf):
        self.username = conf["username"]
        self.passwd = conf["passwd"]
        self.save_path = conf.get("save_path")
        self._q = Queue()
        self._cookies = self._get_user_cookies()
        self._parse_save_path()

    @gen.coroutine
    def run(self):
        start_url = target_url + blog_path
        yield self._fetch_blog_list_page(start_url)
        for _ in xrange(cpu_count()):
            self._fetch_essay_content()

        yield self._q.join()

    @gen.coroutine
    def _fetch_blog_list_page(self, page_link):
        ret = requests.get(page_link, cookies=self._cookies)
        d = pq(ret.text)
        link_elements = d(".stream-list__item > .summary > h2 > a")
        for link in link_elements:
            yield self._q.put(d(link).attr("href"))

        next_ele = d(".pagination li.next a")
        if next_ele:
            next_page_url = target_url + next_ele.attr("href")
            self._fetch_blog_list_page(next_page_url)

    @gen.coroutine
    def _fetch_essay_content(self):
        while True:
            try:
                essay_path = yield self._q.get(timeout=1)
                essay_url = target_url + essay_path + edit_suffix
                ret = requests.get(essay_url, cookies=self._cookies)
                d = pq(ret.text)
                title = d("#myTitle").val()
                content = d("#myEditor").text()
                real_file_name = os.path.join(self.save_path, title + ".md")
                logger.info("is backup essay: %s" % title)
                with open(real_file_name, "w") as f:
                    f.writelines(content.encode("utf8"))
            except gen.TimeoutError:
                raise gen.Return()
            finally:
                self._q.task_done()
コード例 #49
0
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()
        self.error_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()
        elif status.is_error():
            self.error_queue.put_nowait(status.error_data.exception)

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    @tornado.gen.coroutine
    def _wait_for(self, coro):
        error = self.error_queue.get()
        wi = tornado.gen.WaitIterator(coro, error)

        while not wi.done():
            result = yield wi.next()

            if wi.current_future == coro:
                raise gen.Return(result)
            elif wi.current_future == error:
                raise result
            else:
                raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

    @tornado.gen.coroutine
    def wait_for_connect(self):
        if not self.connected_event.is_set():
            yield self._wait_for(self.connected_event.wait())
        else:
            raise Exception("instance is already connected")

    @tornado.gen.coroutine
    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            yield self._wait_for(self.disconnected_event.wait())
        else:
            raise Exception("instance is already disconnected")

    @tornado.gen.coroutine
    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:  # NOQA
                env = yield self._wait_for(self.message_queue.get())
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.message_queue.task_done()

    @tornado.gen.coroutine
    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                try:
                    env = yield self._wait_for(self.presence_queue.get())
                except:  # NOQA E722 pylint: disable=W0702
                    break
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.presence_queue.task_done()
コード例 #50
0
ファイル: Server.py プロジェクト: rwth-i6/returnn
class Model:
  def __init__(self, config_file):
    self.lock = locks.Lock()
    self.classification_queue = Queue()

    print('loading config %s' % config_file, file=log.v5)
    # Load and setup config
    try:
      self.config = Config.Config()
      self.config.load_file(config_file)
      self.pause_after_first_seq = self.config.float('pause_after_first_seq', 0.2)
      self.batch_size = self.config.int('batch_size', 5000)
      self.max_seqs = self.config.int('max_seqs', -1)
    except Exception:
      print('Error: loading config %s failed' % config_file, file=log.v1)
      raise

    try:
      self.devices = self._init_devices()
    except Exception:
      print('Error: Loading devices for config %s failed' % config_file, file=log.v1)
      raise

    print('Starting engine for config %s' % config_file, file=log.v5)
    self.engine = Engine.Engine(self.devices)
    try:
      self.engine.init_network_from_config(config=self.config)
    except Exception:
      print('Error: Loading network for config %s failed' % config_file, file=log.v1)
      raise

    IOLoop.current().spawn_callback(self.classify_in_background)

    self.last_used = datetime.datetime.now()

  def _init_devices(self):
    """
    Initiates the required devices for a config. Same as the funtion initDevices in
    rnn.py.
    :param config:
    :return: A list with the devices used.
    """
    oldDeviceConfig = ",".join(self.config.list('device', ['default']))
    if "device" in TheanoFlags:
      # This is important because Theano likely already has initialized that device.
      config.set("device", TheanoFlags["device"])
      print("Devices: Use %s via THEANO_FLAGS instead of %s." % (TheanoFlags["device"], oldDeviceConfig), file=log.v4)
    devArgs = get_devices_init_args(self.config)
    assert len(devArgs) > 0
    devices = [Device(**kwargs) for kwargs in devArgs]
    for device in devices:
      while not device.initialized:
        time.sleep(0.25)
    if devices[0].blocking:
      print("Devices: Used in blocking / single proc mode.", file=log.v4)
    else:
      print("Devices: Used in multiprocessing mode.", file=log.v4)
    return devices

  @tornado.gen.coroutine
  def classify_in_background(self):
    while True:
      requests = []
      # fetch first request
      r = yield self.classification_queue.get()
      requests.append(r)
      # grab all other waiting requests
      try:
        while True:
          requests.append(self.classification_queue.get_nowait())
      except QueueEmpty:
        pass

      output_dim = {}
      # Do dataset creation and classification.
      dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim)
      dataset.init_seq_order()
      batches = dataset.generate_batches(recurrent_net=self.engine.network.recurrent,
                                         batch_size=self.batch_size, max_seqs=self.max_seqs)

      with (yield self.lock.acquire()):
        ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches)
        yield ctt.join()

      try:
        for i in range(dataset.num_seqs):
          requests[i].future.set_result(ctt.result[i])
          self.classification_queue.task_done()
      except Exception as e:
        print('exception', e)
        raise

  @tornado.gen.coroutine
  def classify(self, data):
    self.last_used = datetime.datetime.now()
    request = ClassificationRequest(data)

    yield self.classification_queue.put(request)
    yield request.future

    return request.future.result()
コード例 #51
0
class TaskLogger(object):
    def __init__(self,
                 task_id,
                 engine=EngineType.REQUESTS,
                 io_loop=None,
                 task_url=TASK_URL,
                 wrap=False,
                 tenant=None):
        self.task_id = task_id
        self.task_url = task_url
        self._seq = 0
        self._partial_log_url = self._get_partial_url('log')
        self._partial_result_url = self._get_partial_url('result')

        self.wrap = wrap
        if wrap and tenant:
            self._partial_log_url = update_query_params(
                self._partial_log_url, {'tenant': tenant})
            self._partial_result_url = update_query_params(
                self._partial_result_url, {'tenant': tenant})

        if engine == EngineType.REQUESTS:
            self.log = self._log_by_requests
            self.result = self._result_by_requests
        elif engine == EngineType.TORNADO:
            io_loop = io_loop if io_loop else IOLoop.current()
            self._http_client = AsyncHTTPClient(io_loop=io_loop)
            self._queue = Queue()
            self.log = self._log_by_tornado
            self.result = self._result_by_tornado
        else:
            raise TaskLoggerError('',
                                  reason='engine only supports {}'.format(
                                      EngineType.types_str()))

    def _get_partial_url(self, partial_name):
        url = urljoin(self.task_url, partial_name)
        url = update_query_params(url, {'task_id': self.task_id})
        return url

    def _get_log_url(self, seq):
        url = update_query_params(self._partial_log_url, {'seq': seq})
        return url

    def _get_result_url(self, seq, exit_code=0):
        url = update_query_params(self._partial_result_url, {
            'seq': seq,
            'exit_code': exit_code
        })
        return url

    def _log_by_requests(self, log):
        self._seq += 1
        log_url = self._get_log_url(self._seq)
        data = self._create_log(log, self._seq)
        self._send_by_requests(log_url, data)

    def _result_by_requests(self, result, exit_code=0):
        self._seq += 1
        result_url = self._get_result_url(self._seq, exit_code)
        data = self._create_result(result, self._seq, exit_code=exit_code)
        self._send_by_requests(result_url, data)

    @staticmethod
    def _send_by_requests(url, data):
        res = requests.post(url, data=data, verify=False)
        if res.status_code != 200:
            raise TaskLoggerError(data, reason=res.reason)

    @gen.coroutine
    def _log_by_tornado(self, log):
        yield self._queue.put(1)
        self._seq += 1
        log_url = self._get_log_url(self._seq)
        data = self._create_log(log, self._seq)
        try:
            yield self._send_by_tornado(log_url, data)
        finally:
            yield self._queue.get()
            self._queue.task_done()

    @gen.coroutine
    def _result_by_tornado(self, result, exit_code=0):
        yield self._queue.join()
        self._seq += 1
        result_url = self._get_result_url(self._seq, exit_code)
        data = self._create_result(result, self._seq, exit_code=exit_code)
        yield self._send_by_tornado(result_url, data)

    @gen.coroutine
    def _send_by_tornado(self, url, data):
        try:
            response = yield self._http_client.fetch(
                url,
                method='POST',
                headers={'Content-Type': 'application/json'},
                validate_cert=False,
                body=data)
        except Exception as exc:
            if hasattr(exc, 'response') and exc.response:
                exc = 'url:{}, exc:{}, body:{}'.format(url, exc,
                                                       exc.response.body)
            raise TaskLoggerError(data, str(exc))
        else:
            if response.code != 200:
                raise TaskLoggerError(data, reason=response.body)

    def _create_log(self, log, seq):
        assert isinstance(log, basestring)
        log = log + '\n'
        if self.wrap:
            log_msg = TaskLogMessage(task_id=self.task_id, log=log, seq=seq)
            data = json_encode({'messages': log_msg})
        else:
            data = log
        return data

    def _create_result(self, result, seq, exit_code):
        assert isinstance(result, basestring)
        result = result + '\n'
        if self.wrap:
            result_msg = TaskResultMessage(task_id=self.task_id,
                                           result=result,
                                           seq=seq,
                                           exit_code=exit_code)
            data = json_encode({'messages': result_msg})
        else:
            data = result
        return data
コード例 #52
0
    class TornadoTransmission():
        def __init__(self, max_concurrent_batches=10, block_on_send=False,
                    block_on_response=False, max_batch_size=100, send_frequency=0.25,
                    user_agent_addition=''):
            if not has_tornado:
                raise ImportError('TornadoTransmission requires tornado, but it was not found.')

            self.block_on_send = block_on_send
            self.block_on_response = block_on_response
            self.max_batch_size = max_batch_size
            self.send_frequency = send_frequency

            user_agent = "libhoney-py/" + VERSION
            if user_agent_addition:
                user_agent += " " + user_agent_addition

            self.http_client = AsyncHTTPClient(
                force_instance=True,
                defaults=dict(user_agent=user_agent))

            # libhoney adds events to the pending queue for us to send
            self.pending = Queue(maxsize=1000)
            # we hand back responses from the API on the responses queue
            self.responses = Queue(maxsize=2000)

            self.batch_data = {}
            self.sd = statsd.StatsClient(prefix="libhoney")
            self.batch_sem = Semaphore(max_concurrent_batches)

        def start(self):
            ioloop.IOLoop.current().spawn_callback(self._sender)

        def send(self, ev):
            '''send accepts an event and queues it to be sent'''
            self.sd.gauge("queue_length", self.pending.qsize())
            try:
                if self.block_on_send:
                    self.pending.put(ev)
                else:
                    self.pending.put_nowait(ev)
                self.sd.incr("messages_queued")
            except QueueFull:
                response = {
                    "status_code": 0,
                    "duration": 0,
                    "metadata": ev.metadata,
                    "body": "",
                    "error": "event dropped; queue overflow",
                }
                if self.block_on_response:
                    self.responses.put(response)
                else:
                    try:
                        self.responses.put_nowait(response)
                    except QueueFull:
                        # if the response queue is full when trying to add an event
                        # queue is full response, just skip it.
                        pass
                self.sd.incr("queue_overflow")

        # We're using the older decorator/yield model for compatibility with
        # Python versions before 3.5.
        # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await
        @gen.coroutine
        def _sender(self):
            '''_sender is the control loop that pulls events off the `self.pending`
            queue and submits batches for actual sending. '''
            events = []
            last_flush = time.time()
            while True:
                try:
                    ev = yield self.pending.get(timeout=self.send_frequency)
                    if ev is None:
                        # signals shutdown
                        yield self._flush(events)
                        return
                    events.append(ev)
                    if (len(events) > self.max_batch_size or
                        time.time() - last_flush > self.send_frequency):
                        yield self._flush(events)
                        events = []
                except TimeoutError:
                    yield self._flush(events)
                    events = []
                    last_flush = time.time()

        @gen.coroutine
        def _flush(self, events):
            if not events:
                return
            for dest, group in group_events_by_destination(events).items():
                yield self._send_batch(dest, group)

        @gen.coroutine
        def _send_batch(self, destination, events):
            ''' Makes a single batch API request with the given list of events. The
            `destination` argument contains the write key, API host and dataset
            name used to build the request.'''
            start = time.time()
            status_code = 0

            try:
                # enforce max_concurrent_batches
                yield self.batch_sem.acquire()
                url = urljoin(urljoin(destination.api_host, "/1/batch/"),
                            destination.dataset)
                payload = []
                for ev in events:
                    event_time = ev.created_at.isoformat()
                    if ev.created_at.tzinfo is None:
                        event_time += "Z"
                    payload.append({
                        "time": event_time,
                        "samplerate": ev.sample_rate,
                        "data": ev.fields()})
                req = HTTPRequest(
                    url,
                    method='POST',
                    headers={
                        "X-Honeycomb-Team": destination.writekey,
                        "Content-Type": "application/json",
                    },
                    body=json.dumps(payload, default=json_default_handler),
                )
                self.http_client.fetch(req, self._response_callback)
                # store the events that were sent so we can process responses later
                # it is important that we delete these eventually, or we'll run into memory issues
                self.batch_data[req] = {"start": start, "events": events}
            except Exception as e:
                # Catch all exceptions and hand them to the responses queue.
                self._enqueue_errors(status_code, e, start, events)
            finally:
                self.batch_sem.release()

        def _enqueue_errors(self, status_code, error, start, events):
            for ev in events:
                self.sd.incr("send_errors")
                self._enqueue_response(status_code, "", error, start, ev.metadata)

        def _response_callback(self, resp):
            # resp.request should be the same HTTPRequest object built by _send_batch
            # and mapped to values in batch_data
            events = self.batch_data[resp.request]["events"]
            start  = self.batch_data[resp.request]["start"]
            try:
                status_code = resp.code
                resp.rethrow()

                statuses = [d["status"] for d in json.loads(resp.body)]
                for ev, status in zip(events, statuses):
                    self._enqueue_response(status, "", None, start, ev.metadata)
                    self.sd.incr("messages_sent")
            except Exception as e:
                self._enqueue_errors(status_code, e, start, events)
                self.sd.incr("send_errors")
            finally:
                # clean up the data for this batch
                del self.batch_data[resp.request]

        def _enqueue_response(self, status_code, body, error, start, metadata):
            resp = {
                "status_code": status_code,
                "body": body,
                "error": error,
                "duration": (time.time() - start) * 1000,
                "metadata": metadata
            }
            if self.block_on_response:
                self.responses.put(resp)
            else:
                try:
                    self.responses.put_nowait(resp)
                except QueueFull:
                    pass

        def close(self):
            '''call close to send all in-flight requests and shut down the
                senders nicely. Times out after max 20 seconds per sending thread
                plus 10 seconds for the response queue'''
            try:
                self.pending.put(None, 10)
            except QueueFull:
                pass
            # signal to the responses queue that nothing more is coming.
            try:
                self.responses.put(None, 10)
            except QueueFull:
                pass

        def get_response_queue(self):
            ''' return the responses queue on to which will be sent the response
            objects from each event send'''
            return self.responses
コード例 #53
0
class Worker(Server):
    """ Worker Node

    Workers perform two functions:

    1.  **Serve data** from a local dictionary
    2.  **Perform computation** on that data and on data from peers

    Additionally workers keep a Center informed of their data and use that
    Center to gather data from other workers when necessary to perform a
    computation.

    You can start a worker with the ``dworker`` command line application::

        $ dworker scheduler-ip:port

    **State**

    * **data:** ``{key: object}``:
        Dictionary mapping keys to actual values
    * **active:** ``{key}``:
        Set of keys currently under computation
    * **ncores:** ``int``:
        Number of cores used by this worker process
    * **executor:** ``concurrent.futures.ThreadPoolExecutor``:
        Executor used to perform computation
    * **local_dir:** ``path``:
        Path on local machine to store temporary files
    * **center:** ``rpc``:
        Location of center or scheduler.  See ``.ip/.port`` attributes.
    * **name:** ``string``:
        Alias
    * **services:** ``{str: Server}``:
        Auxiliary web servers running on this worker
    * **service_ports:** ``{str: port}``:

    Examples
    --------

    Create centers and workers in Python:

    >>> from distributed import Center, Worker
    >>> c = Center('192.168.0.100', 8787)  # doctest: +SKIP
    >>> w = Worker(c.ip, c.port)  # doctest: +SKIP
    >>> yield w._start(port=8788)  # doctest: +SKIP

    Or use the command line::

       $ dcenter
       Start center at 127.0.0.1:8787

       $ dworker 127.0.0.1:8787
       Start worker at:            127.0.0.1:8788
       Registered with center at:  127.0.0.1:8787

    See Also
    --------
    distributed.center.Center:
    """
    def __init__(self,
                 center_ip,
                 center_port,
                 ip=None,
                 ncores=None,
                 loop=None,
                 local_dir=None,
                 services=None,
                 service_ports=None,
                 name=None,
                 **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        self.executor = ThreadPoolExecutor(self.ncores)
        self.thread_tokens = Queue(
        )  # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572
        for i in range(self.ncores):
            self.thread_tokens.put_nowait(i)
        self.center = rpc(ip=center_ip, port=center_port)
        self.active = set()
        self.name = name

        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {
            'compute': self.compute,
            'gather': self.gather,
            'compute-stream': self.compute_stream,
            'run': self.run,
            'get_data': self.get_data,
            'update_data': self.update_data,
            'delete_data': self.delete_data,
            'terminate': self.terminate,
            'ping': pingpong,
            'health': self.health,
            'upload_file': self.upload_file
        }

        super(Worker, self).__init__(handlers, **kwargs)

    @gen.coroutine
    def _start(self, port=0):
        self.listen(port)
        self.name = self.name or self.address
        for k, v in self.services.items():
            v.listen(0)
            self.service_ports[k] = v.port

        logger.info('      Start worker at: %20s:%d', self.ip, self.port)
        for k, v in self.service_ports.items():
            logger.info('  %16s at: %20s:%d' % (k, self.ip, v))
        logger.info('Waiting to connect to: %20s:%d', self.center.ip,
                    self.center.port)
        while True:
            try:
                resp = yield self.center.register(ncores=self.ncores,
                                                  address=(self.ip, self.port),
                                                  keys=list(self.data),
                                                  services=self.service_ports,
                                                  name=self.name)
                break
            except (OSError, StreamClosedError):
                logger.debug("Unable to register with scheduler.  Waiting")
                yield gen.sleep(0.5)
        if resp != 'OK':
            raise ValueError(resp)
        logger.info('        Registered to: %20s:%d', self.center.ip,
                    self.center.port)
        self.status = 'running'

    def start(self, port=0):
        self.loop.add_callback(self._start, port)

    def identity(self, stream):
        return {
            'type': type(self).__name__,
            'id': self.id,
            'center': (self.center.ip, self.center.port)
        }

    @gen.coroutine
    def _close(self, report=True, timeout=10):
        if report:
            yield gen.with_timeout(timedelta(seconds=timeout),
                                   self.center.unregister(address=(self.ip,
                                                                   self.port)),
                                   io_loop=self.loop)
        self.center.close_streams()
        self.stop()
        self.executor.shutdown()
        if os.path.exists(self.local_dir):
            shutil.rmtree(self.local_dir)

        for k, v in self.services.items():
            v.stop()
        self.status = 'closed'
        self.stop()

    @gen.coroutine
    def terminate(self, stream, report=True):
        yield self._close(report=report)
        raise Return('OK')

    @property
    def address(self):
        return '%s:%d' % (self.ip, self.port)

    @property
    def address_tuple(self):
        return (self.ip, self.port)

    @gen.coroutine
    def gather(self, stream=None, who_has=None):
        who_has = {
            k: [coerce_to_address(addr) for addr in v]
            for k, v in who_has.items() if k not in self.data
        }
        try:
            result = yield gather_from_workers(who_has)
        except KeyError as e:
            logger.warn("Could not find data", e)
            raise Return({'status': 'missing-data', 'keys': e.args})
        else:
            self.data.update(result)
            raise Return({'status': 'OK'})

    @gen.coroutine
    def _ready_task(self,
                    function=None,
                    key=None,
                    args=(),
                    kwargs={},
                    task=None,
                    who_has=None):
        who_has = who_has or {}
        diagnostics = {}
        data = {k: self.data[k] for k in who_has if k in self.data}
        who_has = {
            k: set(map(coerce_to_address, v))
            for k, v in who_has.items() if k not in self.data
        }
        if who_has:
            try:
                logger.info("gather %d keys from peers: %s", len(who_has),
                            str(who_has))
                diagnostics['transfer-start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer-stop'] = time()
                self.data.update(other)
                yield self.center.add_keys(address=self.address,
                                           keys=list(other))
                data.update(other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({
                    'status': 'missing-data',
                    'keys': e.args,
                    'key': key
                })
        else:
            transfer_time = 0
        try:
            start = default_timer()
            if task is not None:
                task = loads(task)
            if function is not None:
                function = loads(function)
            if args:
                args = loads(args)
            if kwargs:
                kwargs = loads(kwargs)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            raise Return(assoc(error_message(e), 'key', key))

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task, )

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({
            'status': 'OK',
            'function': function,
            'args': args2,
            'kwargs': kwargs2,
            'diagnostics': diagnostics,
            'key': key
        })

    @gen.coroutine
    def executor_submit(self, key, function, *args, **kwargs):
        """ Safely run function in thread pool executor

        We've run into issues running concurrent.future futures within
        tornado.  Apparently it's advantageous to use timeouts and periodic
        callbacks to ensure things run smoothly.  This can get tricky, so we
        pull it off into an separate method.
        """
        token = yield self.thread_tokens.get()
        job_counter[0] += 1
        i = job_counter[0]
        # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
        future = self.executor.submit(function, *args, **kwargs)
        pc = PeriodicCallback(
            lambda: logger.debug("future state: %s - %s", key, future._state),
            1000)
        pc.start()
        try:
            if sys.version_info < (3, 2):
                yield future
            else:
                while not future.done() and future._state != 'FINISHED':
                    try:
                        yield gen.with_timeout(timedelta(seconds=1),
                                               future,
                                               io_loop=self.loop)
                        break
                    except gen.TimeoutError:
                        logger.info("work queue size: %d",
                                    self.executor._work_queue.qsize())
                        logger.info("future state: %s", future._state)
                        logger.info("Pending job %d: %s", i, future)
        finally:
            pc.stop()
            self.thread_tokens.put(token)

        result = future.result()

        logger.info("Finish job %d, %s", i, key)
        raise gen.Return(result)

    @gen.coroutine
    def compute_stream(self, stream):
        with log_errors():
            logger.debug("Open compute stream")
            bstream = BatchedSend(interval=10, loop=self.loop)
            bstream.start(stream)

        @gen.coroutine
        def process(msg):
            try:
                result = yield self.compute(report=False, **msg)
                bstream.send(result)
            except Exception as e:
                logger.exception(e)
                bstream.send(assoc(error_message(e), 'key', msg.get('key')))

        with log_errors():
            while True:
                try:
                    msgs = yield read(stream)
                except StreamClosedError:
                    break
                if not isinstance(msgs, list):
                    msgs = [msgs]

                for msg in msgs:
                    op = msg.pop('op', None)
                    if op == 'close':
                        break
                    elif op == 'compute-task':
                        self.loop.add_callback(process, msg)
                    else:
                        logger.warning("Unknown operation %s, %s", op, msg)

            yield bstream.close()
            logger.info("Close compute stream")

    @gen.coroutine
    def compute(self,
                stream=None,
                function=None,
                key=None,
                args=(),
                kwargs={},
                task=None,
                who_has=None,
                report=True):
        """ Execute function """
        self.active.add(key)

        # Ready function for computation
        msg = yield self._ready_task(function=function,
                                     key=key,
                                     args=args,
                                     kwargs=kwargs,
                                     task=task,
                                     who_has=who_has)
        if msg['status'] != 'OK':
            try:
                self.active.remove(key)
            except KeyError:
                pass
            raise Return(msg)
        else:
            function = msg['function']
            args = msg['args']
            kwargs = msg['kwargs']

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args, kwargs)

        result['key'] = key
        result.update(msg['diagnostics'])

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.center.add_keys(address=(self.ip,
                                                               self.port),
                                                      keys=[key])
                if not response == 'OK':
                    logger.warn('Could not report results to center: %s',
                                str(response))
        else:
            logger.warn(
                " Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                str(args)[:1000],
                str(kwargs)[:1000],
                exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key, msg)
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    @gen.coroutine
    def run(self, stream, function=None, args=(), kwargs={}):
        function = loads(function)
        if args:
            args = loads(args)
        if kwargs:
            kwargs = loads(kwargs)
        try:
            result = function(*args, **kwargs)
        except Exception as e:
            logger.warn(
                " Run Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                str(args)[:1000],
                str(kwargs)[:1000],
                exc_info=True)

            response = error_message(e)
        else:
            response = {
                'status': 'OK',
                'result': dumps(result),
            }
        raise Return(response)

    @gen.coroutine
    def update_data(self, stream, data=None, report=True):
        data = valmap(loads, data)
        self.data.update(data)
        if report:
            response = yield self.center.add_keys(address=(self.ip, self.port),
                                                  keys=list(data))
            assert response == 'OK'
        info = {
            'nbytes': {k: sizeof(v)
                       for k, v in data.items()},
            'status': 'OK'
        }
        raise Return(info)

    @gen.coroutine
    def delete_data(self, stream, keys=None, report=True):
        for key in keys:
            if key in self.data:
                del self.data[key]
        logger.info("Deleted %d keys", len(keys))
        if report:
            logger.debug("Reporting loss of keys to center")
            yield self.center.remove_keys(address=self.address,
                                          keys=list(keys))
        raise Return('OK')

    def get_data(self, stream, keys=None):
        return {k: dumps(self.data[k]) for k in keys if k in self.data}

    def upload_file(self, stream, filename=None, data=None, load=True):
        out_filename = os.path.join(self.local_dir, filename)
        if isinstance(data, unicode):
            data = data.encode()
        with open(out_filename, 'wb') as f:
            f.write(data)
            f.flush()

        if load:
            try:
                name, ext = os.path.splitext(filename)
                if ext in ('.py', '.pyc'):
                    logger.info("Reload module %s from .py file", name)
                    name = name.split('-')[0]
                    reload(import_module(name))
                if ext == '.egg':
                    sys.path.append(out_filename)
                    pkgs = pkg_resources.find_distributions(out_filename)
                    for pkg in pkgs:
                        logger.info("Load module %s from egg",
                                    pkg.project_name)
                        reload(import_module(pkg.project_name))
                    if not pkgs:
                        logger.warning("Found no packages in egg file")
            except Exception as e:
                logger.exception(e)
                return {'status': 'error', 'exception': dumps(e)}
        return {'status': 'OK', 'nbytes': len(data)}

    def health(self, stream=None):
        """ Information about worker """
        d = {
            'active': len(self.active),
            'stored': len(self.data),
            'time': time()
        }
        try:
            import psutil
            mem = psutil.virtual_memory()
            d.update({
                'cpu': psutil.cpu_percent(),
                'memory': mem.total,
                'memory-percent': mem.percent
            })
            try:
                net_io = psutil.net_io_counters()
                d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent
                d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv
            except AttributeError:
                pass
            self._last_net_io = net_io

            try:
                disk_io = psutil.disk_io_counters()
                d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes
                d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes
            except AttributeError:
                pass
            self._last_disk_io = disk_io
        except ImportError:
            pass
        return d
コード例 #54
0
ファイル: admin.py プロジェクト: domogik/domogik
class Publisher(MQAsyncSub):
    """Handles new data to be passed on to subscribers."""
    def __init__(self):
        self.ctx = zmq.Context()
        self.WSmessages = Queue()
        self.MQmessages = Queue()
        self.sub = MQAsyncSub.__init__(self, self.ctx, 'admin', [])
        self.pub = MQPub(self.ctx, 'admin-ws')
        self.subscribers = set()

    def register(self, subscriber):
        """Register a new subscriber."""
        self.subscribers.add(subscriber)

    def deregister(self, subscriber):
        """Stop publishing to a subscriber."""
        try:
            self.subscribers.remove(subscriber)
        except:
            pass

    @gen.coroutine
    def on_message(self, did, msg):
        """Receive message from MQ sub and send to WS."""
        yield self.WSmessages.put({"msgid": did, "content": msg})

    @gen.coroutine
    def publishToWS(self):
        while True:
            message = yield self.WSmessages.get()
            if len(self.subscribers) > 0:
                #print(u"Pushing MQ message to {} WS subscribers...".format(len(self.subscribers)))
                yield [subscriber.submit(message) for subscriber in self.subscribers]

    @gen.coroutine
    def publishToMQ(self):
        while True:
            message = yield self.MQmessages.get()
            self.sendToMQ(message)
    
    def sendToMQ(self, message):
        try:
            ctx = zmq.Context()
            jsons = json.loads(message)
            # req/rep
            if 'mq_request' in jsons and 'data' in jsons:
                cli = MQSyncReq(ctx)
                msg = MQMessage()
                msg.set_action(str(jsons['mq_request']))
                msg.set_data(jsons['data'])
                print(u"REQ : {0}".format(msg.get()))
                if 'dst' in jsons:
                    dst = str(jsons['dst'])
                else:
                    dst = 'manager'
                res = cli.request(dst, msg.get(), timeout=10)
                if res:
                    print(res.get())
                cli.shutdown()
                del cli
            # pub
            elif 'mq_publish' in jsons and 'data' in jsons:
                self.pub.send_event(jsons['mq_publish'],
                                jsons['data'])
        except Exception as e:
            print(u"Error sending mq message: {0}".format(e))
コード例 #55
0
ファイル: kernelbase.py プロジェクト: vscosta/yap
class Kernel(SingletonConfigurable):

    #---------------------------------------------------------------------------
    # Kernel interface
    #---------------------------------------------------------------------------

    # attribute to override with a GUI
    eventloop = Any(None)

    @observe('eventloop')
    def _update_eventloop(self, change):
        """schedule call to eventloop from IOLoop"""
        loop = ioloop.IOLoop.current()
        if change.new is not None:
            loop.add_callback(self.enter_eventloop)

    session = Instance(Session, allow_none=True)
    profile_dir = Instance('IPython.core.profiledir.ProfileDir',
                           allow_none=True)
    shell_stream = Instance(ZMQStream, allow_none=True)

    shell_streams = List(
        help="""Deprecated shell_streams alias. Use shell_stream

        .. versionchanged:: 6.0
            shell_streams is deprecated. Use shell_stream.
        """)

    @default("shell_streams")
    def _shell_streams_default(self):
        warnings.warn(
            "Kernel.shell_streams is deprecated in yapkernel 6.0. Use Kernel.shell_stream",
            DeprecationWarning,
            stacklevel=2,
        )
        if self.shell_stream is not None:
            return [self.shell_stream]
        else:
            return []

    @observe("shell_streams")
    def _shell_streams_changed(self, change):
        warnings.warn(
            "Kernel.shell_streams is deprecated in yapkernel 6.0. Use Kernel.shell_stream",
            DeprecationWarning,
            stacklevel=2,
        )
        if len(change.new) > 1:
            warnings.warn(
                "Kernel only supports one shell stream. Additional streams will be ignored.",
                RuntimeWarning,
                stacklevel=2,
            )
        if change.new:
            self.shell_stream = change.new[0]

    control_stream = Instance(ZMQStream, allow_none=True)

    debug_shell_socket = Any()

    control_thread = Any()
    iopub_socket = Any()
    iopub_thread = Any()
    stdin_socket = Any()
    log = Instance(logging.Logger, allow_none=True)

    # identities:
    int_id = Integer(-1)
    ident = Unicode()

    @default('ident')
    def _default_ident(self):
        return str(uuid.uuid4())

    # This should be overridden by wrapper kernels that implement any real
    # language.
    language_info = {
        'name': 'Prolog (YAP)',
        'mimetype': 'text/x-prolog',
        'file_extension': '.yap',
    }

    # any links that should go in the help menu
    help_links = List()

    # Private interface

    _darwin_app_nap = Bool(
        True,
        help="""Whether to use appnope for compatibility with OS X App Nap.

        Only affects OS X >= 10.9.
        """).tag(config=True)

    # track associations with current request
    _allow_stdin = Bool(False)
    _parents = Dict({"shell": {}, "control": {}})
    _parent_ident = Dict({'shell': b'', 'control': b''})

    @property
    def _parent_header(self):
        warnings.warn(
            "Kernel._parent_header is deprecated in yapkernel 6. Use .get_parent()",
            DeprecationWarning,
            stacklevel=2,
        )
        return self.get_parent(channel="shell")

    # Time to sleep after flushing the stdout/err buffers in each execute
    # cycle.  While this introduces a hard limit on the minimal latency of the
    # execute cycle, it helps prevent output synchronization problems for
    # clients.
    # Units are in seconds.  The minimum zmq latency on local host is probably
    # ~150 microseconds, set this to 500us for now.  We may need to increase it
    # a little if it's not enough after more interactive testing.
    _execute_sleep = Float(0.0005).tag(config=True)

    # Frequency of the kernel's event loop.
    # Units are in seconds, kernel subclasses for GUI toolkits may need to
    # adapt to milliseconds.
    _poll_interval = Float(0.01).tag(config=True)

    stop_on_error_timeout = Float(
        0.0,
        config=True,
        help="""time (in seconds) to wait for messages to arrive
        when aborting queued requests after an error.

        Requests that arrive within this window after an error
        will be cancelled.

        Increase in the event of unusually slow network
        causing significant delays,
        which can manifest as e.g. "Run all" in a notebook
        aborting some, but not all, messages after an error.
        """)

    # If the shutdown was requested over the network, we leave here the
    # necessary reply message so it can be sent by our registered atexit
    # handler.  This ensures that the reply is only sent to clients truly at
    # the end of our shutdown process (which happens after the underlying
    # IPython shell's own shutdown).
    _shutdown_message = None

    # This is a dict of port number that the kernel is listening on. It is set
    # by record_ports and used by connect_request.
    _recorded_ports = Dict()

    # set of aborted msg_ids
    aborted = Set()

    # Track execution count here. For IPython, we override this to use the
    # execution count we store in the shell.
    execution_count = 0

    msg_types = [
        'execute_request',
        'complete_request',
        'inspect_request',
        'history_request',
        'comm_info_request',
        'kernel_info_request',
        'connect_request',
        'shutdown_request',
        'is_complete_request',
        'interrupt_request',
        # deprecated:
        'apply_request',
    ]
    # add deprecated ipyparallel control messages
    control_msg_types = msg_types + [
        'clear_request', 'abort_request', 'debug_request'
    ]

    def __init__(self, **kwargs):
        super(Kernel, self).__init__(**kwargs)
        # Build dict of handlers for message types
        self.shell_handlers = {}
        for msg_type in self.msg_types:
            self.shell_handlers[msg_type] = getattr(self, msg_type)

        self.control_handlers = {}
        for msg_type in self.control_msg_types:
            self.control_handlers[msg_type] = getattr(self, msg_type)

        self.control_queue = Queue()

    def dispatch_control(self, msg):
        self.control_queue.put_nowait(msg)

    async def poll_control_queue(self):
        while True:
            msg = await self.control_queue.get()
            # handle tracers from _flush_control_queue
            if isinstance(msg, (concurrent.futures.Future, asyncio.Future)):
                msg.set_result(None)
                continue
            await self.process_control(msg)

    async def _flush_control_queue(self):
        """Flush the control queue, wait for processing of any pending messages"""
        if self.control_thread:
            control_loop = self.control_thread.io_loop
            # concurrent.futures.Futures are threadsafe
            # and can be used to await across threads
            tracer_future = concurrent.futures.Future()
            awaitable_future = asyncio.wrap_future(tracer_future)
        else:
            control_loop = self.io_loop
            tracer_future = awaitable_future = asyncio.Future()

        def _flush():
            # control_stream.flush puts messages on the queue
            self.control_stream.flush()
            # put Future on the queue after all of those,
            # so we can wait for all queued messages to be processed
            self.control_queue.put(tracer_future)

        control_loop.add_callback(_flush)
        return awaitable_future

    async def process_control(self, msg):
        """dispatch control requests"""
        idents, msg = self.session.feed_identities(msg, copy=False)
        try:
            msg = self.session.deserialize(msg, content=True, copy=False)
        except Exception:
            self.log.error("Invalid Control Message", exc_info=True)
            return

        self.log.debug("Control received: %s", msg)

        # Set the parent message for side effects.
        self.set_parent(idents, msg, channel='control')
        self._publish_status('busy', 'control')

        header = msg['header']
        msg_type = header['msg_type']

        handler = self.control_handlers.get(msg_type, None)
        if handler is None:
            self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r", msg_type)
        else:
            try:
                result = handler(self.control_stream, idents, msg)
                if inspect.isawaitable(result):
                    await result
            except Exception:
                self.log.error("Exception in control handler:", exc_info=True)

        sys.stdout.flush()
        sys.stderr.flush()
        self._publish_status('idle', 'control')
        # flush to ensure reply is sent
        self.control_stream.flush(zmq.POLLOUT)

    def should_handle(self, stream, msg, idents):
        """Check whether a shell-channel message should be handled

        Allows subclasses to prevent handling of certain messages (e.g. aborted requests).
        """
        msg_id = msg['header']['msg_id']
        if msg_id in self.aborted:
            # is it safe to assume a msg_id will not be resubmitted?
            self.aborted.remove(msg_id)
            self._send_abort_reply(stream, msg, idents)
            return False
        return True

    async def dispatch_shell(self, msg):
        """dispatch shell requests"""

        # flush control queue before handling shell requests
        await self._flush_control_queue()

        idents, msg = self.session.feed_identities(msg, copy=False)
        try:
            msg = self.session.deserialize(msg, content=True, copy=False)
        except Exception:
            self.log.error("Invalid Message", exc_info=True)
            return

        # Set the parent message for side effects.
        self.set_parent(idents, msg, channel='shell')
        self._publish_status('busy', 'shell')

        msg_type = msg['header']['msg_type']

        # Only abort execute requests
        if self._aborting and msg_type == 'execute_request':
            self._send_abort_reply(self.shell_stream, msg, idents)
            self._publish_status('idle', 'shell')
            # flush to ensure reply is sent before
            # handling the next request
            self.shell_stream.flush(zmq.POLLOUT)
            return

        # Print some info about this message and leave a '--->' marker, so it's
        # easier to trace visually the message chain when debugging.  Each
        # handler prints its message at the end.
        self.log.debug('\n*** MESSAGE TYPE:%s***', msg_type)
        self.log.debug('   Content: %s\n   --->\n   ', msg['content'])

        if not self.should_handle(self.shell_stream, msg, idents):
            return

        handler = self.shell_handlers.get(msg_type, None)
        if handler is None:
            self.log.warning("Unknown message type: %r", msg_type)
        else:
            self.log.debug("%s: %s", msg_type, msg)
            try:
                self.pre_handler_hook()
            except Exception:
                self.log.debug("Unable to signal in pre_handler_hook:",
                               exc_info=True)
            try:
                result = handler(self.shell_stream, idents, msg)
                if inspect.isawaitable(result):
                    await result
            except Exception:
                self.log.error("Exception in message handler:", exc_info=True)
            except KeyboardInterrupt:
                # Ctrl-c shouldn't crash the kernel here.
                self.log.error("KeyboardInterrupt caught in kernel.")
            finally:
                try:
                    self.post_handler_hook()
                except Exception:
                    self.log.debug("Unable to signal in post_handler_hook:",
                                   exc_info=True)

        sys.stdout.flush()
        sys.stderr.flush()
        self._publish_status('idle', 'shell')
        # flush to ensure reply is sent before
        # handling the next request
        self.shell_stream.flush(zmq.POLLOUT)

    def pre_handler_hook(self):
        """Hook to execute before calling message handler"""
        # ensure default_int_handler during handler call
        self.saved_sigint_handler = signal(SIGINT, default_int_handler)

    def post_handler_hook(self):
        """Hook to execute after calling message handler"""
        signal(SIGINT, self.saved_sigint_handler)

    def enter_eventloop(self):
        """enter eventloop"""
        self.log.info("Entering eventloop %s", self.eventloop)
        # record handle, so we can check when this changes
        eventloop = self.eventloop
        if eventloop is None:
            self.log.info("Exiting as there is no eventloop")
            return

        def advance_eventloop():
            # check if eventloop changed:
            if self.eventloop is not eventloop:
                self.log.info("exiting eventloop %s", eventloop)
                return
            if self.msg_queue.qsize():
                self.log.debug("Delaying eventloop due to waiting messages")
                # still messages to process, make the eventloop wait
                schedule_next()
                return
            self.log.debug("Advancing eventloop %s", eventloop)
            try:
                eventloop(self)
            except KeyboardInterrupt:
                # Ctrl-C shouldn't crash the kernel
                self.log.error("KeyboardInterrupt caught in kernel")
                pass
            if self.eventloop is eventloop:
                # schedule advance again
                schedule_next()

        def schedule_next():
            """Schedule the next advance of the eventloop"""
            # flush the eventloop every so often,
            # giving us a chance to handle messages in the meantime
            self.log.debug("Scheduling eventloop advance")
            self.io_loop.call_later(0.001, advance_eventloop)

        # begin polling the eventloop
        schedule_next()

    async def do_one_iteration(self):
        """Process a single shell message

        Any pending control messages will be flushed as well

        .. versionchanged:: 5
            This is now a coroutine
        """
        # flush messages off of shell stream into the message queue
        self.shell_stream.flush()
        # process at most one shell message per iteration
        await self.process_one(wait=False)

    async def process_one(self, wait=True):
        """Process one request

        Returns None if no message was handled.
        """
        if wait:
            t, dispatch, args = await self.msg_queue.get()
        else:
            try:
                t, dispatch, args = self.msg_queue.get_nowait()
            except asyncio.QueueEmpty:
                return None
        await dispatch(*args)

    async def dispatch_queue(self):
        """Coroutine to preserve order of message handling

        Ensures that only one message is processing at a time,
        even when the handler is async
        """

        while True:
            try:
                await self.process_one()
            except Exception:
                self.log.exception("Error in message handler")

    _message_counter = Any(help="""Monotonic counter of messages
        """, )

    @default('_message_counter')
    def _message_counter_default(self):
        return itertools.count()

    def schedule_dispatch(self, dispatch, *args):
        """schedule a message for dispatch"""
        idx = next(self._message_counter)

        self.msg_queue.put_nowait((
            idx,
            dispatch,
            args,
        ))
        # ensure the eventloop wakes up
        self.io_loop.add_callback(lambda: None)

    def start(self):
        """register dispatchers for streams"""
        self.io_loop = ioloop.IOLoop.current()
        self.msg_queue = Queue()
        self.io_loop.add_callback(self.dispatch_queue)

        self.control_stream.on_recv(self.dispatch_control, copy=False)

        if self.control_thread:
            control_loop = self.control_thread.io_loop
        else:
            control_loop = self.io_loop

        asyncio.run_coroutine_threadsafe(self.poll_control_queue(),
                                         control_loop.asyncio_loop)

        self.shell_stream.on_recv(
            partial(
                self.schedule_dispatch,
                self.dispatch_shell,
            ),
            copy=False,
        )

        # publish idle status
        self._publish_status('starting', 'shell')

    def record_ports(self, ports):
        """Record the ports that this kernel is using.

        The creator of the Kernel instance must call this methods if they
        want the :meth:`connect_request` method to return the port numbers.
        """
        self._recorded_ports = ports

    #---------------------------------------------------------------------------
    # Kernel request handlers
    #---------------------------------------------------------------------------

    def _publish_execute_input(self, code, parent, execution_count):
        """Publish the code request on the iopub stream."""

        self.session.send(self.iopub_socket,
                          'execute_input', {
                              'code': code,
                              'execution_count': execution_count
                          },
                          parent=parent,
                          ident=self._topic('execute_input'))

    def _publish_status(self, status, channel, parent=None):
        """send status (busy/idle) on IOPub"""
        self.session.send(
            self.iopub_socket,
            "status",
            {"execution_state": status},
            parent=parent or self.get_parent(channel),
            ident=self._topic("status"),
        )

    def _publish_debug_event(self, event):
        self.session.send(
            self.iopub_socket,
            "debug_event",
            event,
            parent=self.get_parent("control"),
            ident=self._topic("debug_event"),
        )

    def set_parent(self, ident, parent, channel='shell'):
        """Set the current parent request

        Side effects (IOPub messages) and replies are associated with
        the request that caused them via the parent_header.

        The parent identity is used to route input_request messages
        on the stdin channel.
        """
        self._parent_ident[channel] = ident
        self._parents[channel] = parent

    def get_parent(self, channel="shell"):
        """Get the parent request associated with a channel.

        .. versionadded:: 6

        Parameters
        ----------
        channel : str
            the name of the channel ('shell' or 'control')

        Returns
        -------
        message : dict
            the parent message for the most recent request on the channel.
        """
        return self._parents.get(channel, {})

    def send_response(self,
                      stream,
                      msg_or_type,
                      content=None,
                      ident=None,
                      buffers=None,
                      track=False,
                      header=None,
                      metadata=None,
                      channel='shell'):
        """Send a response to the message we're currently processing.

        This accepts all the parameters of :meth:`jupyter_client.session.Session.send`
        except ``parent``.

        This relies on :meth:`set_parent` having been called for the current
        message.
        """
        return self.session.send(
            stream,
            msg_or_type,
            content,
            self.get_parent(channel),
            ident,
            buffers,
            track,
            header,
            metadata,
        )

    def init_metadata(self, parent):
        """Initialize metadata.

        Run at the beginning of execution requests.
        """
        # FIXME: `started` is part of ipyparallel
        # Remove for yapkernel 5.0
        return {
            'started': now(),
        }

    def finish_metadata(self, parent, metadata, reply_content):
        """Finish populating metadata.

        Run after completing an execution request.
        """
        return metadata

    async def execute_request(self, stream, ident, parent):
        """handle an execute_request"""

        try:
            content = parent['content']
            code = content['code']
            silent = content['silent']
            store_history = content.get('store_history', not silent)
            user_expressions = content.get('user_expressions', {})
            allow_stdin = content.get('allow_stdin', False)
        except Exception:
            self.log.error("Got bad msg: ")
            self.log.error("%s", parent)
            return

        stop_on_error = content.get('stop_on_error', True)

        metadata = self.init_metadata(parent)

        # Re-broadcast our input for the benefit of listening clients, and
        # start computing output
        if not silent:
            self.execution_count += 1
            self._publish_execute_input(code, parent, self.execution_count)

        reply_content = self.do_execute(
            code,
            silent,
            store_history,
            user_expressions,
            allow_stdin,
        )
        if inspect.isawaitable(reply_content):
            reply_content = await reply_content

        # Flush output before sending the reply.
        sys.stdout.flush()
        sys.stderr.flush()
        # FIXME: on rare occasions, the flush doesn't seem to make it to the
        # clients... This seems to mitigate the problem, but we definitely need
        # to better understand what's going on.
        if self._execute_sleep:
            time.sleep(self._execute_sleep)

        # Send the reply.
        reply_content = json_clean(reply_content)
        metadata = self.finish_metadata(parent, metadata, reply_content)

        reply_msg = self.session.send(stream,
                                      'execute_reply',
                                      reply_content,
                                      parent,
                                      metadata=metadata,
                                      ident=ident)

        self.log.debug("%s", reply_msg)

        if not silent and reply_msg['content'][
                'status'] == 'error' and stop_on_error:
            await self._abort_queues()

    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_expressions=None,
                   allow_stdin=False):
        """Execute user code. Must be overridden by subclasses.
        """
        raise NotImplementedError

    async def complete_request(self, stream, ident, parent):
        content = parent['content']
        code = content['code']
        cursor_pos = content['cursor_pos']

        matches = self.do_complete(code, cursor_pos)
        if inspect.isawaitable(matches):
            matches = await matches

        matches = json_clean(matches)
        self.session.send(stream, "complete_reply", matches, parent, ident)

    def do_complete(self, code, cursor_pos):
        """Override in subclasses to find completions.
        """
        return {
            'matches': [],
            'cursor_end': cursor_pos,
            'cursor_start': cursor_pos,
            'metadata': {},
            'status': 'ok'
        }

    async def inspect_request(self, stream, ident, parent):
        content = parent['content']

        reply_content = self.do_inspect(
            content['code'],
            content['cursor_pos'],
            content.get('detail_level', 0),
        )
        if inspect.isawaitable(reply_content):
            reply_content = await reply_content

        # Before we send this object over, we scrub it for JSON usage
        reply_content = json_clean(reply_content)
        msg = self.session.send(stream, 'inspect_reply', reply_content, parent,
                                ident)
        self.log.debug("%s", msg)

    def do_inspect(self, code, cursor_pos, detail_level=0):
        """Override in subclasses to allow introspection.
        """
        return {'status': 'ok', 'data': {}, 'metadata': {}, 'found': False}

    async def history_request(self, stream, ident, parent):
        content = parent['content']

        reply_content = self.do_history(**content)
        if inspect.isawaitable(reply_content):
            reply_content = await reply_content

        reply_content = json_clean(reply_content)
        msg = self.session.send(stream, 'history_reply', reply_content, parent,
                                ident)
        self.log.debug("%s", msg)

    def do_history(self,
                   hist_access_type,
                   output,
                   raw,
                   session=None,
                   start=None,
                   stop=None,
                   n=None,
                   pattern=None,
                   unique=False):
        """Override in subclasses to access history.
        """
        return {'status': 'ok', 'history': []}

    async def connect_request(self, stream, ident, parent):
        if self._recorded_ports is not None:
            content = self._recorded_ports.copy()
        else:
            content = {}
        content['status'] = 'ok'
        msg = self.session.send(stream, 'connect_reply', content, parent,
                                ident)
        self.log.debug("%s", msg)

    @property
    def kernel_info(self):
        return {
            'protocol_version': kernel_protocol_version,
            'implementation': self.implementation,
            'implementation_version': self.implementation_version,
            'language_info': self.language_info,
            'banner': self.banner,
            'help_links': self.help_links,
        }

    async def kernel_info_request(self, stream, ident, parent):
        content = {'status': 'ok'}
        content.update(self.kernel_info)
        msg = self.session.send(stream, 'kernel_info_reply', content, parent,
                                ident)
        self.log.debug("%s", msg)

    async def comm_info_request(self, stream, ident, parent):
        content = parent['content']
        target_name = content.get('target_name', None)

        # Should this be moved to ipkernel?
        if hasattr(self, 'comm_manager'):
            comms = {
                k: dict(target_name=v.target_name)
                for (k, v) in self.comm_manager.comms.items()
                if v.target_name == target_name or target_name is None
            }
        else:
            comms = {}
        reply_content = dict(comms=comms, status='ok')
        msg = self.session.send(stream, 'comm_info_reply', reply_content,
                                parent, ident)
        self.log.debug("%s", msg)

    async def interrupt_request(self, stream, ident, parent):
        pid = os.getpid()
        pgid = os.getpgid(pid)

        if os.name == "nt":
            self.log.error("Interrupt message not supported on Windows")

        else:
            # Prefer process-group over process
            if pgid and hasattr(os, "killpg"):
                try:
                    os.killpg(pgid, SIGINT)
                    return
                except OSError:
                    pass
            try:
                os.kill(pid, SIGINT)
            except OSError:
                pass

        content = parent['content']
        self.session.send(stream,
                          'interrupt_reply',
                          content,
                          parent,
                          ident=ident)
        return

    async def shutdown_request(self, stream, ident, parent):
        content = self.do_shutdown(parent['content']['restart'])
        if inspect.isawaitable(content):
            content = await content
        self.session.send(stream,
                          'shutdown_reply',
                          content,
                          parent,
                          ident=ident)
        # same content, but different msg_id for broadcasting on IOPub
        self._shutdown_message = self.session.msg('shutdown_reply', content,
                                                  parent)

        self._at_shutdown()

        self.log.debug('Stopping control ioloop')
        control_io_loop = self.control_stream.io_loop
        control_io_loop.add_callback(control_io_loop.stop)

        self.log.debug('Stopping shell ioloop')
        shell_io_loop = self.shell_stream.io_loop
        shell_io_loop.add_callback(shell_io_loop.stop)

    def do_shutdown(self, restart):
        """Override in subclasses to do things when the frontend shuts down the
        kernel.
        """
        return {'status': 'ok', 'restart': restart}

    async def is_complete_request(self, stream, ident, parent):
        content = parent['content']
        code = content['code']

        reply_content = self.do_is_complete(code)
        if inspect.isawaitable(reply_content):
            reply_content = await reply_content
        reply_content = json_clean(reply_content)
        reply_msg = self.session.send(stream, 'is_complete_reply',
                                      reply_content, parent, ident)
        self.log.debug("%s", reply_msg)

    def do_is_complete(self, code):
        """Override in subclasses to find completions.
        """
        return {'status': 'unknown'}

    async def debug_request(self, stream, ident, parent):
        content = parent['content']

        reply_content = self.do_debug_request(content)
        if inspect.isawaitable(reply_content):
            reply_content = await reply_content
        reply_content = json_clean(reply_content)
        reply_msg = self.session.send(stream, 'debug_reply', reply_content,
                                      parent, ident)
        self.log.debug("%s", reply_msg)

    async def do_debug_request(self, msg):
        raise NotImplementedError

    #---------------------------------------------------------------------------
    # Engine methods (DEPRECATED)
    #---------------------------------------------------------------------------

    async def apply_request(self, stream, ident, parent):
        self.log.warning(
            "apply_request is deprecated in kernel_base, moving to ipyparallel."
        )
        try:
            content = parent['content']
            bufs = parent['buffers']
            msg_id = parent['header']['msg_id']
        except Exception:
            self.log.error("Got bad msg: %s", parent, exc_info=True)
            return

        md = self.init_metadata(parent)

        reply_content, result_buf = self.do_apply(content, bufs, msg_id, md)

        # flush i/o
        sys.stdout.flush()
        sys.stderr.flush()

        md = self.finish_metadata(parent, md, reply_content)

        self.session.send(stream,
                          'apply_reply',
                          reply_content,
                          parent=parent,
                          ident=ident,
                          buffers=result_buf,
                          metadata=md)

    def do_apply(self, content, bufs, msg_id, reply_metadata):
        """DEPRECATED"""
        raise NotImplementedError

    #---------------------------------------------------------------------------
    # Control messages (DEPRECATED)
    #---------------------------------------------------------------------------

    async def abort_request(self, stream, ident, parent):
        """abort a specific msg by id"""
        self.log.warning(
            "abort_request is deprecated in kernel_base. It is only part of IPython parallel"
        )
        msg_ids = parent['content'].get('msg_ids', None)
        if isinstance(msg_ids, str):
            msg_ids = [msg_ids]
        if not msg_ids:
            self._abort_queues()
        for mid in msg_ids:
            self.aborted.add(str(mid))

        content = dict(status='ok')
        reply_msg = self.session.send(stream,
                                      'abort_reply',
                                      content=content,
                                      parent=parent,
                                      ident=ident)
        self.log.debug("%s", reply_msg)

    async def clear_request(self, stream, idents, parent):
        """Clear our namespace."""
        self.log.warning(
            "clear_request is deprecated in kernel_base. It is only part of IPython parallel"
        )
        content = self.do_clear()
        self.session.send(stream,
                          'clear_reply',
                          ident=idents,
                          parent=parent,
                          content=content)

    def do_clear(self):
        """DEPRECATED since 4.0.3"""
        raise NotImplementedError

    #---------------------------------------------------------------------------
    # Protected interface
    #---------------------------------------------------------------------------

    def _topic(self, topic):
        """prefixed topic for IOPub messages"""
        base = "kernel.%s" % self.ident

        return ("%s.%s" % (base, topic)).encode()

    _aborting = Bool(False)

    async def _abort_queues(self):
        self.shell_stream.flush()
        self._aborting = True

        def stop_aborting():
            self.log.info("Finishing abort")
            self._aborting = False

        asyncio.get_event_loop().call_later(self.stop_on_error_timeout,
                                            stop_aborting)

    def _send_abort_reply(self, stream, msg, idents):
        """Send a reply to an aborted request"""
        self.log.info(
            f"Aborting {msg['header']['msg_id']}: {msg['header']['msg_type']}")
        reply_type = msg["header"]["msg_type"].rsplit("_", 1)[0] + "_reply"
        status = {"status": "aborted"}
        md = self.init_metadata(msg)
        md = self.finish_metadata(msg, md, status)
        md.update(status)

        self.session.send(
            stream,
            reply_type,
            metadata=md,
            content=status,
            parent=msg,
            ident=idents,
        )

    def _no_raw_input(self):
        """Raise StdinNotImplementedError if active frontend doesn't support
        stdin."""
        raise StdinNotImplementedError("raw_input was called, but this "
                                       "frontend does not support stdin.")

    def getpass(self, prompt='', stream=None):
        """Forward getpass to frontends

        Raises
        ------
        StdinNotImplementedError if active frontend doesn't support stdin.
        """
        if not self._allow_stdin:
            raise StdinNotImplementedError(
                "getpass was called, but this frontend does not support input requests."
            )
        if stream is not None:
            import warnings

            warnings.warn(
                "The `stream` parameter of `getpass.getpass` will have no effect when using yapkernel",
                UserWarning,
                stacklevel=2,
            )
        return self._input_request(
            prompt,
            self._parent_ident["shell"],
            self.get_parent("shell"),
            password=True,
        )

    def raw_input(self, prompt=''):
        """Forward raw_input to frontends

        Raises
        ------
        StdinNotImplementedError if active frontend doesn't support stdin.
        """
        if not self._allow_stdin:
            raise StdinNotImplementedError(
                "raw_input was called, but this frontend does not support input requests."
            )
        return self._input_request(
            str(prompt),
            self._parent_ident["shell"],
            self.get_parent("shell"),
            password=False,
        )

    def _input_request(self, prompt, ident, parent, password=False):
        # Flush output before making the request.
        sys.stderr.flush()
        sys.stdout.flush()

        # flush the stdin socket, to purge stale replies
        while True:
            try:
                self.stdin_socket.recv_multipart(zmq.NOBLOCK)
            except zmq.ZMQError as e:
                if e.errno == zmq.EAGAIN:
                    break
                else:
                    raise

        # Send the input request.
        content = json_clean(dict(prompt=prompt, password=password))
        self.session.send(self.stdin_socket,
                          'input_request',
                          content,
                          parent,
                          ident=ident)

        # Await a response.
        while True:
            try:
                # Use polling with select() so KeyboardInterrupts can get
                # through; doing a blocking recv() means stdin reads are
                # uninterruptible on Windows. We need a timeout because
                # zmq.select() is also uninterruptible, but at least this
                # way reads get noticed immediately and KeyboardInterrupts
                # get noticed fairly quickly by human response time standards.
                rlist, _, xlist = zmq.select([self.stdin_socket], [],
                                             [self.stdin_socket], 0.01)
                if rlist or xlist:
                    ident, reply = self.session.recv(self.stdin_socket)
                    if (ident, reply) != (None, None):
                        break
            except KeyboardInterrupt:
                # re-raise KeyboardInterrupt, to truncate traceback
                raise KeyboardInterrupt("Interrupted by user") from None
            except Exception:
                self.log.warning("Invalid Message:", exc_info=True)

        try:
            value = reply["content"]["value"]
        except Exception:
            self.log.error("Bad input_reply: %s", parent)
            value = ''
        if value == '\x04':
            # EOF
            raise EOFError
        return value

    def _at_shutdown(self):
        """Actions taken at shutdown by the kernel, called by python's atexit.
        """
        if self._shutdown_message is not None:
            self.session.send(self.iopub_socket,
                              self._shutdown_message,
                              ident=self._topic('shutdown'))
            self.log.debug("%s", self._shutdown_message)
        self.control_stream.flush(zmq.POLLOUT)
コード例 #56
0
__author__ = 'zhangxa'

import sys
sys.path.append("../..")

from app.application import Application
from workers.worker import Worker
from tornado.queues import Queue

app = Application([
        (r"^http://www.baidu.com.*$", "urlHandler.urlHandler.UrlSeekHandler",{"a":10,"b":3}),
        (r"^http://www.jianshu.com/([0-9]+)/([0-9])+", "urlHandler.urlHandler.UrlBaseHandler",{"a":3}),
    ])

if __name__ == "__main__":
    queue = Queue()
    queue.put("http://www.jianshu.com")
    worker = Worker(app,queue)
    worker._find_url_handler("http://www.jianshu.com/1234/4")
    print(worker)
コード例 #57
0
ファイル: files.py プロジェクト: vizydrop/apps
    def get_file_list(account, **kwargs):
        queue = Queue()
        sem = BoundedSemaphore(FETCH_CONCURRENCY)
        done, working = set(), set()
        data = []
        ids = set()

        @gen.coroutine
        def fetch_url():
            current_url = yield queue.get()
            try:
                if current_url in working:
                    return
                page_no = working.__len__()
                app_log.info("Fetching page {}".format(page_no))
                working.add(current_url)
                req = account.get_request(current_url)
                client = AsyncHTTPClient()
                response = yield client.fetch(req)
                done.add(current_url)
                app_log.info("Page {} downloaded".format(page_no))
                response_data = json.loads(response.body.decode("utf-8"))

                url = response_data.get("@odata.nextLink", None)
                if url is not None:
                    queue.put(url)

                for file in response_data.get("value", []):
                    if file["name"][-4:].strip(".").lower() in VALID_FILETYPES:
                        if file["id"] not in ids:
                            ids.add(file["id"])
                            data.append(
                                {
                                    "title": file["parentReference"]["path"].split(":")[1].lstrip("/")
                                    + "/"
                                    + file["name"],
                                    "value": file["id"],
                                }
                            )
                app_log.info("Page {} completed".format(page_no))
            finally:
                queue.task_done()
                sem.release()

        @gen.coroutine
        def worker():
            while True:
                yield sem.acquire()
                fetch_url()

        app_log.info("Gathering filelist for account {}".format(account._id))
        for file_type in VALID_FILETYPES:
            file_type = ".".join([file_type])
            url = "https://api.onedrive.com/v1.0/drive/root/view.search?top=1000&select=parentReference,name,id,size&q={}".format(
                file_type
            )
            queue.put(url)
        # start our concurrency worker
        worker()
        # wait until we're done
        yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME))
        app_log.info("Finished list retrieval. Found {} items.".format(data.__len__()))
        return sorted(data, key=lambda f: f["title"])
コード例 #58
0
ファイル: commits.py プロジェクト: vizydrop/apps
    def get_data(cls, account, source_filter, limit=100, skip=0):
        """
        Gathers commit information from GH
        GET https://api.github.com/repos/:owner/:repo/commits
        Header: Accept: application/vnd.github.v3+json
        """
        if not account or not account.enabled:
            raise ValueError('cannot gather information without a valid account')
        client = AsyncHTTPClient()

        source_filter = GitHubRepositoryDateFilter(source_filter)

        if source_filter.repository is None:
            raise ValueError('required parameter projects missing')

        default_headers = {"Content-Type": "application/json", "Accept": "application/vnd.github.v3+json"}

        # first we grab our list of commits
        uri = "https://api.github.com/repos/{}/commits".format(source_filter.repository)
        qs = source_filter.get_qs()
        if qs != '':
            uri = uri + '?' + qs
        app_log.info("Starting retrieval of commit list for account {}".format(account._id))
        if limit is not None and limit <= 100:
            # we can handle our limit right here
            uri += "?per_page={}".format(limit)
        elif limit is None:
            uri += "?per_page=100"  # maximum number per page for GitHub API
        taken = 0

        queue = Queue()
        sem = BoundedSemaphore(FETCH_CONCURRENCY)
        done, working = set(), set()

        while uri is not None:
            app_log.info(
                "({}) Retrieving next page, received {} commits thus far".format(account._id, taken))
            req = account.get_request(uri, headers=default_headers)
            response = yield client.fetch(req)

            page_data = json.loads(response.body.decode('utf-8'))
            taken += page_data.__len__()
            for item in page_data:
                queue.put(item.get('url', None))

            if limit is None or taken < limit:
                # parse the Link header from GitHub (https://developer.github.com/v3/#pagination)
                links = parse_link_header(response.headers.get('Link', ''))
                uri = links.get('next', None)
            else:
                break

            if queue.qsize() > 500:
                raise HTTPError(413, 'too many commits')
        app_log.info("({}) Commit list retrieved, fetching info for {} commits".format(account._id, taken))

        # open our list
        cls.write('[')

        # our worker to actually fetch the info
        @gen.coroutine
        def fetch_url():
            current_url = yield queue.get()
            try:
                if current_url in working:
                    return
                page_no = working.__len__()
                app_log.info("Fetching page {}".format(page_no))
                working.add(current_url)
                req = account.get_request(current_url)
                client = AsyncHTTPClient()
                response = yield client.fetch(req)
                response_data = json.loads(response.body.decode('utf-8'))
                obj = {
                    'date': response_data['commit']['author']['date'],
                    'author': response_data['commit']['author']['name'],
                    'added_files': [file for file in response_data['files'] if file['status'] == 'added'].__len__(),
                    'deleted_files': [file for file in response_data['files'] if file['status'] == 'deleted'].__len__(),
                    'modified_files': [file for file in response_data['files'] if file['status'] == 'modified'].__len__(),
                    'additions': response_data['stats']['additions'],
                    'deletions': response_data['stats']['deletions']
                }
                if done.__len__() > 0:
                    cls.write(',')
                cls.write(json.dumps(obj))
                done.add(current_url)
                app_log.info("Page {} downloaded".format(page_no))

            finally:
                queue.task_done()
                sem.release()

        @gen.coroutine
        def worker():
            while True:
                yield sem.acquire()
                fetch_url()

        # start our concurrency worker
        worker()
        try:
            # wait until we're done
            yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME))
        except gen.TimeoutError:
            app_log.warning("Request exceeds maximum time, cutting response short")
        finally:
            # close our list
            cls.write(']')
        app_log.info("Finished retrieving commits for {}".format(account._id))