Ejemplo n.º 1
0
class AsyncModbusGeneratorClient(AsyncModbusSerialClient):
    def __init__(self, method='ascii', **kwargs):
        super(AsyncModbusGeneratorClient, self).__init__(method=method,
                                                         **kwargs)
        self.sem = Semaphore(1)

    @gen.coroutine
    def read_input_registers(self, address, count=1, **kwargs):
        fut_result = Future()
        request = ReadInputRegistersRequest(address, count, **kwargs)
        yield self.sem.acquire()
        try:
            res = self.execute(request)
            res.addCallback(fut_result.set_result)
            yield fut_result
        finally:
            self.sem.release()
        raise gen.Return(fut_result.result())

    @gen.coroutine
    def read_holding_registers(self, address, count=1, **kwargs):
        fut_result = Future()
        request = ReadHoldingRegistersRequest(address, count, **kwargs)
        yield self.sem.acquire()
        try:
            res = self.execute(request)
            res.addCallback(fut_result.set_result)
            yield fut_result
        finally:
            self.sem.release()
        raise gen.Return(fut_result.result())

    @gen.coroutine
    def write_coil(self, address, value, **kwargs):
        fut_result = Future()
        request = WriteSingleCoilRequest(address, value, **kwargs)
        yield self.sem.acquire()
        try:
            res = self.execute(request)
            res.addCallback(fut_result.set_result)
            yield fut_result
        finally:
            self.sem.release()
        raise gen.Return(fut_result.result())

    @gen.coroutine
    def write_register(self, address, value, **kwargs):
        fut_result = Future()
        request = WriteSingleRegisterRequest(address, value, **kwargs)
        yield self.sem.acquire()
        try:
            res = self.execute(request)
            res.addCallback(fut_result.set_result)
            yield fut_result
        finally:
            self.sem.release()
        raise gen.Return(fut_result.result())
Ejemplo n.º 2
0
class Worker(object):
    def __init__(self, max_queue=0, io_loop=None):
        self.io_loop = io_loop or IOLoop.current()
        self.active = {}
        self.sem = Semaphore(value=max_queue)
        self.log = logging.getLogger("pizzadelivery.downloader")

    def _add_to_active(self, task, future):
        self.active[self.task_to_id(task)] = future

    def _in_active(self, task):
        return self.task_to_id(task) in self.active

    def _remove_from_active(self, task):
        del self.active[self.task_to_id(task)]

    def _get_future_for_task(self, task):
        return self.active[self.task_to_id(task)]

    def enqueue(self, task):
        if self._in_active(task):
            future = concurrent.Future()
            concurrent.chain_future(self._get_future_for_task(task), future)
            return future

        future = concurrent.Future()
        self._add_to_active(task, future)
        concurrent.chain_future(self._do(task), future)
        return future

    @gen.coroutine
    def _do(self, task):
        assert self._in_active(task)
        try:
            with (yield self.sem.acquire()):
                res = yield gen.maybe_future(self.do(task))
                raise gen.Return(res)
        finally:
            self._remove_from_active(task)

    @gen.coroutine
    def do(self, task):
        raise NotImplementedError  # pragma: no cover

    def task_to_id(self, task):
        return task
Ejemplo n.º 3
0
class ManagedKernelPool(KernelPool):
    '''
    Spawns a pool of kernels. Manages access to individual kernels using a
    borrower/lender pattern. Cleans them all up when shut down.
    '''
    def __init__(self, prespawn_count, kernel_manager):
        # Make sure there's at least one kernel as a delegate
        if not prespawn_count:
            prespawn_count = 1

        super(ManagedKernelPool, self).__init__(prespawn_count, kernel_manager)

        self.kernel_clients = {}
        self.on_recv_funcs = {}
        self.pool_index = 0
        self.kernel_pool = []

        kernel_ids = self.kernel_manager.list_kernel_ids()
        self.kernel_semaphore = Semaphore(len(kernel_ids))

        # Connect to any prespawned kernels
        for kernel_id in kernel_ids:
            self.kernel_clients[kernel_id] = kernel_manager.get_kernel(kernel_id).client()
            self.kernel_pool.append(kernel_id)
            iopub = self.kernel_manager.connect_iopub(kernel_id)
            iopub.on_recv(self.create_on_reply(kernel_id))

    @gen.coroutine
    def acquire(self):
        '''
        Returns a kernel client and id for use and removes the kernel the resource pool.
        Kernels must be returned via the release method.
        :return: Returns a kernel client and a kernel id
        '''
        yield self.kernel_semaphore.acquire()
        kernel_id = self.kernel_pool[0]
        del self.kernel_pool[0]
        raise gen.Return((self.kernel_clients[kernel_id], kernel_id))

    def release(self, kernel_id):
        '''
        Returns a kernel back to the resource pool.
        :param kernel_id: Id of the kernel to return to the pool
        '''
        self.kernel_pool.append(kernel_id)
        self.kernel_semaphore.release()

    def _on_reply(self, kernel_id, msg_list):
        idents, msg_list = self.kernel_clients[kernel_id].session.feed_identities(msg_list)
        msg = self.kernel_clients[kernel_id].session.deserialize(msg_list)
        self.on_recv_funcs[kernel_id](msg)

    def create_on_reply(self, kernel_id):
        '''
        The lambda is used to handle a specific reply per kernel and provide a unique stack scope per invocation.
        '''
        return lambda msg_list: self._on_reply(kernel_id, msg_list)

    def on_recv(self, kernel_id, func):
        '''
        Registers a callback for io_pub messages for a particular kernel.
        This is needed to avoid having multiple callbacks per kernel client.
        :param kernel_id: Id of the kernel
        :param func: Callback function to handle the message
        '''
        self.on_recv_funcs[kernel_id] = func

    def shutdown(self):
        '''
        Shuts down all kernels in the pool and in the kernel manager.
        '''
        for kid in self.kernel_clients:
            self.kernel_clients[kid].stop_channels()
            self.kernel_manager.shutdown_kernel(kid, now=True)

        # Any remaining kernels that were not created for our pool should be shutdown
        super(ManagedKernelPool, self).shutdown()
Ejemplo n.º 4
0
class ManagedKernelPool(KernelPool):
    """Spawns a pool of kernels that are treated as identical delegates for
    future requests.

    Manages access to individual kernels using a borrower/lender pattern.
    Cleans them all up when shut down.

    Parameters
    ----------
    prespawn_count
        Number of kernels to spawn immediately
    kernel_manager
        Kernel manager instance

    Attributes
    ----------
    kernel_clients : dict
        Map of kernel IDs to client instances for communicating with them
    on_recv_funcs : dict
        Map of kernel IDs to iopub callback functions
    kernel_pool : list
        List of available delegate kernel IDs
    kernel_semaphore : tornado.locks.Semaphore
        Semaphore that controls access to the kernel pool
    """
    def __init__(self, prespawn_count, kernel_manager):
        # Make sure there's at least one kernel as a delegate
        if not prespawn_count:
            prespawn_count = 1

        super(ManagedKernelPool, self).__init__(prespawn_count, kernel_manager)

        self.kernel_clients = {}
        self.on_recv_funcs = {}
        self.kernel_pool = []

        kernel_ids = self.kernel_manager.list_kernel_ids()
        self.kernel_semaphore = Semaphore(len(kernel_ids))

        # Create clients and iopub handlers for prespawned kernels
        for kernel_id in kernel_ids:
            self.kernel_clients[kernel_id] = kernel_manager.get_kernel(
                kernel_id).client()
            self.kernel_pool.append(kernel_id)
            iopub = self.kernel_manager.connect_iopub(kernel_id)
            iopub.on_recv(self.create_on_reply(kernel_id))

    @gen.coroutine
    def acquire(self):
        """Gets a kernel client and removes it from the available pool of
        clients.

        Returns
        -------
        tuple
            Kernel client instance, kernel ID
        """
        yield self.kernel_semaphore.acquire()
        kernel_id = self.kernel_pool[0]
        del self.kernel_pool[0]
        raise gen.Return((self.kernel_clients[kernel_id], kernel_id))

    def release(self, kernel_id):
        """Puts a kernel back into the pool of kernels available to handle
        requests.

        Parameters
        ----------
        kernel_id : str
            Kernel to return to the pool
        """
        self.kernel_pool.append(kernel_id)
        self.kernel_semaphore.release()

    def _on_reply(self, kernel_id, msg_list):
        """Invokes the iopub callback registered for the `kernel_id` and passes
        it a deserialized list of kernel messsages.

        Parameters
        ----------
        kernel_id : str
            Kernel that sent the reply
        msg_list : list
            List of 0mq messages
        """
        idents, msg_list = self.kernel_clients[
            kernel_id].session.feed_identities(msg_list)
        msg = self.kernel_clients[kernel_id].session.deserialize(msg_list)
        self.on_recv_funcs[kernel_id](msg)

    def create_on_reply(self, kernel_id):
        """Creates an anonymous function to handle reply messages from the
        kernel.

        Parameters
        ----------
        kernel_id
            Kernel to listen to

        Returns
        -------
        function
            Callback function taking a kernel ID and 0mq message list
        """
        return lambda msg_list: self._on_reply(kernel_id, msg_list)

    def on_recv(self, kernel_id, func):
        """Registers a callback function for iopub messages from a particular
        kernel.

        This is needed to avoid having multiple callbacks per kernel client.

        Parameters
        ----------
        kernel_id
            Kernel from which to receive iopub messages
        func
            Callback function to use for kernel iopub messages
        """
        self.on_recv_funcs[kernel_id] = func

    def shutdown(self):
        """Shuts down all kernels and their clients.
        """
        for kid in self.kernel_clients:
            self.kernel_clients[kid].stop_channels()
            self.kernel_manager.shutdown_kernel(kid, now=True)

        # Any remaining kernels that were not created for our pool should be shutdown
        super(ManagedKernelPool, self).shutdown()
Ejemplo n.º 5
0
class TornadoSubscriptionManager(SubscriptionManager):
    def __init__(self, pubnub_instance):
        self._message_queue = Queue()
        self._consumer_event = Event()
        self._subscription_lock = Semaphore(1)
        # self._current_request_key_object = None
        self._heartbeat_periodic_callback = None
        self._cancellation_event = None
        super(TornadoSubscriptionManager, self).__init__(pubnub_instance)
        self._start_worker()

    def _set_consumer_event(self):
        self._consumer_event.set()

    def _message_queue_put(self, message):
        self._message_queue.put(message)

    def _start_worker(self):
        self._consumer = TornadoSubscribeMessageWorker(self._pubnub,
                                                       self._listener_manager,
                                                       self._message_queue,
                                                       self._consumer_event)
        run = stack_context.wrap(self._consumer.run)
        self._pubnub.ioloop.spawn_callback(run)

    def reconnect(self):
        self._should_stop = False
        self._pubnub.ioloop.add_callback(self._start_subscribe_loop)
        self._register_heartbeat_timer()

    @tornado.gen.coroutine
    def _start_subscribe_loop(self):
        try:
            self._stop_subscribe_loop()

            yield self._subscription_lock.acquire()

            self._cancellation_event = Event()

            combined_channels = self._subscription_state.prepare_channel_list(True)
            combined_groups = self._subscription_state.prepare_channel_group_list(True)

            if len(combined_channels) == 0 and len(combined_groups) == 0:
                return

            envelope_future = Subscribe(self._pubnub) \
                .channels(combined_channels).channel_groups(combined_groups) \
                .timetoken(self._timetoken).region(self._region) \
                .filter_expression(self._pubnub.config.filter_expression) \
                .cancellation_event(self._cancellation_event) \
                .future()

            wi = tornado.gen.WaitIterator(
                envelope_future,
                self._cancellation_event.wait())

            while not wi.done():
                try:
                    result = yield wi.next()
                except Exception as e:
                    logger.error(e)
                    raise
                else:
                    if wi.current_future == envelope_future:
                        envelope = result
                    elif wi.current_future == self._cancellation_event.wait():
                        break

                    self._handle_endpoint_call(envelope.result, envelope.status)
                    self._start_subscribe_loop()
        except PubNubTornadoException as e:
            if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
                self._pubnub.ioloop.add_callback(self._start_subscribe_loop)
            else:
                self._listener_manager.announce_status(e.status)
        except Exception as e:
            logger.error(e)
            raise
        finally:
            self._cancellation_event.set()
            yield tornado.gen.moment
            self._cancellation_event = None
            self._subscription_lock.release()

    def _stop_subscribe_loop(self):
        if self._cancellation_event is not None:
            self._cancellation_event.set()

    def _stop_heartbeat_timer(self):
        if self._heartbeat_periodic_callback is not None:
            self._heartbeat_periodic_callback.stop()

    def _register_heartbeat_timer(self):
        super(TornadoSubscriptionManager, self)._register_heartbeat_timer()

        self._heartbeat_periodic_callback = PeriodicCallback(
            stack_context.wrap(self._perform_heartbeat_loop),
            self._pubnub.config.heartbeat_interval *
            TornadoSubscriptionManager.HEARTBEAT_INTERVAL_MULTIPLIER,
            self._pubnub.ioloop)
        self._heartbeat_periodic_callback.start()

    @tornado.gen.coroutine
    def _perform_heartbeat_loop(self):
        if self._heartbeat_call is not None:
            # TODO: cancel call
            pass

        cancellation_event = Event()
        state_payload = self._subscription_state.state_payload()
        presence_channels = self._subscription_state.prepare_channel_list(False)
        presence_groups = self._subscription_state.prepare_channel_group_list(False)

        if len(presence_channels) == 0 and len(presence_groups) == 0:
            return

        try:
            envelope = yield self._pubnub.heartbeat() \
                .channels(presence_channels) \
                .channel_groups(presence_groups) \
                .state(state_payload) \
                .cancellation_event(cancellation_event) \
                .future()

            heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options
            if envelope.status.is_error:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \
                        heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_stateus(envelope.status)
            else:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_stateus(envelope.status)

        except PubNubTornadoException:
            pass
            # TODO: check correctness
            # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
            #     self._start_subscribe_loop()
            # else:
            #     self._listener_manager.announce_status(e.status)
        finally:
            cancellation_event.set()

    @tornado.gen.coroutine
    def _send_leave(self, unsubscribe_operation):
        envelope = yield Leave(self._pubnub) \
            .channels(unsubscribe_operation.channels) \
            .channel_groups(unsubscribe_operation.channel_groups).future()
        self._listener_manager.announce_status(envelope.status)
Ejemplo n.º 6
0
Archivo: ipc.py Proyecto: bryson/salt
class IPCMessageSubscriber(IPCClient):
    '''
    Salt IPC message subscriber

    Create an IPC client to receive messages from IPC publisher

    An example of a very simple IPCMessageSubscriber connecting to an IPCMessagePublisher.
    This example assumes an already running IPCMessagePublisher.

    IMPORTANT: The below example also assumes the IOLoop is NOT running.

    # Import Tornado libs
    import tornado.ioloop

    # Import Salt libs
    import salt.config
    import salt.transport.ipc

    # Create a new IO Loop.
    # We know that this new IO Loop is not currently running.
    io_loop = tornado.ioloop.IOLoop()

    ipc_publisher_socket_path = '/var/run/ipc_publisher.ipc'

    ipc_subscriber = salt.transport.ipc.IPCMessageSubscriber(ipc_server_socket_path, io_loop=io_loop)

    # Connect to the server
    # Use the associated IO Loop that isn't running.
    io_loop.run_sync(ipc_subscriber.connect)

    # Wait for some data
    package = ipc_subscriber.read_sync()
    '''
    def __singleton_init__(self, socket_path, io_loop=None):
        super(IPCMessageSubscriber, self).__singleton_init__(
            socket_path, io_loop=io_loop)
        self._read_sync_future = None
        self._read_stream_future = None
        self._sync_ioloop_running = False
        self.saved_data = []
        self._sync_read_in_progress = Semaphore()

    @tornado.gen.coroutine
    def _read_sync(self, timeout):
        yield self._sync_read_in_progress.acquire()
        exc_to_raise = None
        ret = None

        try:
            while True:
                if self._read_stream_future is None:
                    self._read_stream_future = self.stream.read_bytes(4096, partial=True)

                if timeout is None:
                    wire_bytes = yield self._read_stream_future
                else:
                    future_with_timeout = FutureWithTimeout(
                        self.io_loop, self._read_stream_future, timeout)
                    wire_bytes = yield future_with_timeout

                self._read_stream_future = None

                # Remove the timeout once we get some data or an exception
                # occurs. We will assume that the rest of the data is already
                # there or is coming soon if an exception doesn't occur.
                timeout = None

                self.unpacker.feed(wire_bytes)
                first = True
                for framed_msg in self.unpacker:
                    if first:
                        ret = framed_msg['body']
                        first = False
                    else:
                        self.saved_data.append(framed_msg['body'])
                if not first:
                    # We read at least one piece of data
                    break
        except tornado.ioloop.TimeoutError:
            # In the timeout case, just return None.
            # Keep 'self._read_stream_future' alive.
            ret = None
        except tornado.iostream.StreamClosedError as exc:
            log.trace('Subscriber disconnected from IPC {0}'.format(self.socket_path))
            self._read_stream_future = None
            exc_to_raise = exc
        except Exception as exc:
            log.error('Exception occurred in Subscriber while handling stream: {0}'.format(exc))
            self._read_stream_future = None
            exc_to_raise = exc

        if self._sync_ioloop_running:
            # Stop the IO Loop so that self.io_loop.start() will return in
            # read_sync().
            self.io_loop.spawn_callback(self.io_loop.stop)

        if exc_to_raise is not None:
            raise exc_to_raise  # pylint: disable=E0702
        self._sync_read_in_progress.release()
        raise tornado.gen.Return(ret)

    def read_sync(self, timeout=None):
        '''
        Read a message from an IPC socket

        The socket must already be connected.
        The associated IO Loop must NOT be running.
        :param int timeout: Timeout when receiving message
        :return: message data if successful. None if timed out. Will raise an
                 exception for all other error conditions.
        '''
        if self.saved_data:
            return self.saved_data.pop(0)

        self._sync_ioloop_running = True
        self._read_sync_future = self._read_sync(timeout)
        self.io_loop.start()
        self._sync_ioloop_running = False

        ret_future = self._read_sync_future
        self._read_sync_future = None
        return ret_future.result()

    @tornado.gen.coroutine
    def _read_async(self, callback):
        while not self.stream.closed():
            try:
                self._read_stream_future = self.stream.read_bytes(4096, partial=True)
                wire_bytes = yield self._read_stream_future
                self._read_stream_future = None
                self.unpacker.feed(wire_bytes)
                for framed_msg in self.unpacker:
                    body = framed_msg['body']
                    self.io_loop.spawn_callback(callback, body)
            except tornado.iostream.StreamClosedError:
                log.trace('Subscriber disconnected from IPC {0}'.format(self.socket_path))
                break
            except Exception as exc:
                log.error('Exception occurred while Subscriber handling stream: {0}'.format(exc))

    @tornado.gen.coroutine
    def read_async(self, callback):
        '''
        Asynchronously read messages and invoke a callback when they are ready.

        :param callback: A callback with the received data
        '''
        while not self.connected():
            try:
                yield self.connect(timeout=5)
            except tornado.iostream.StreamClosedError:
                log.trace('Subscriber closed stream on IPC {0} before connect'.format(self.socket_path))
                yield tornado.gen.sleep(1)
            except Exception as exc:
                log.error('Exception occurred while Subscriber connecting: {0}'.format(exc))
                yield tornado.gen.sleep(1)
        yield self._read_async(callback)

    def close(self):
        '''
        Routines to handle any cleanup before the instance shuts down.
        Sockets and filehandles should be closed explicitly, to prevent
        leaks.
        '''
        if not self._closing:
            IPCClient.close(self)
            # This will prevent this message from showing up:
            # '[ERROR   ] Future exception was never retrieved:
            # StreamClosedError'
            if self._read_sync_future is not None:
                self._read_sync_future.exc_info()
            if self._read_stream_future is not None:
                self._read_stream_future.exc_info()

    def __del__(self):
        if IPCMessageSubscriber in globals():
            self.close()
Ejemplo n.º 7
0
class ManagedKernelPool(KernelPool):
    """Spawns a pool of kernels that are treated as identical delegates for
    future requests.

    Manages access to individual kernels using a borrower/lender pattern.
    Cleans them all up when shut down.

    Parameters
    ----------
    prespawn_count
        Number of kernels to spawn immediately
    kernel_manager
        Kernel manager instance

    Attributes
    ----------
    kernel_clients : dict
        Map of kernel IDs to client instances for communicating with them
    on_recv_funcs : dict
        Map of kernel IDs to iopub callback functions
    kernel_pool : list
        List of available delegate kernel IDs
    kernel_semaphore : tornado.locks.Semaphore
        Semaphore that controls access to the kernel pool
    """
    def __init__(self, prespawn_count, kernel_manager):
        # Make sure there's at least one kernel as a delegate
        if not prespawn_count:
            prespawn_count = 1

        super(ManagedKernelPool, self).__init__(prespawn_count, kernel_manager)

        self.kernel_clients = {}
        self.on_recv_funcs = {}
        self.kernel_pool = []

        kernel_ids = self.kernel_manager.list_kernel_ids()
        self.kernel_semaphore = Semaphore(len(kernel_ids))

        # Create clients and iopub handlers for prespawned kernels
        for kernel_id in kernel_ids:
            self.kernel_clients[kernel_id] = kernel_manager.get_kernel(kernel_id).client()
            self.kernel_pool.append(kernel_id)
            iopub = self.kernel_manager.connect_iopub(kernel_id)
            iopub.on_recv(self.create_on_reply(kernel_id))

    @gen.coroutine
    def acquire(self):
        """Gets a kernel client and removes it from the available pool of
        clients.

        Returns
        -------
        tuple
            Kernel client instance, kernel ID
        """
        yield self.kernel_semaphore.acquire()
        kernel_id = self.kernel_pool[0]
        del self.kernel_pool[0]
        raise gen.Return((self.kernel_clients[kernel_id], kernel_id))

    def release(self, kernel_id):
        """Puts a kernel back into the pool of kernels available to handle
        requests.

        Parameters
        ----------
        kernel_id : str
            Kernel to return to the pool
        """
        self.kernel_pool.append(kernel_id)
        self.kernel_semaphore.release()

    def _on_reply(self, kernel_id, msg_list):
        """Invokes the iopub callback registered for the `kernel_id` and passes
        it a deserialized list of kernel messsages.

        Parameters
        ----------
        kernel_id : str
            Kernel that sent the reply
        msg_list : list
            List of 0mq messages
        """
        idents, msg_list = self.kernel_clients[kernel_id].session.feed_identities(msg_list)
        msg = self.kernel_clients[kernel_id].session.deserialize(msg_list)
        self.on_recv_funcs[kernel_id](msg)

    def create_on_reply(self, kernel_id):
        """Creates an anonymous function to handle reply messages from the
        kernel.

        Parameters
        ----------
        kernel_id
            Kernel to listen to

        Returns
        -------
        function
            Callback function taking a kernel ID and 0mq message list
        """
        return lambda msg_list: self._on_reply(kernel_id, msg_list)

    def on_recv(self, kernel_id, func):
        """Registers a callback function for iopub messages from a particular
        kernel.

        This is needed to avoid having multiple callbacks per kernel client.

        Parameters
        ----------
        kernel_id
            Kernel from which to receive iopub messages
        func
            Callback function to use for kernel iopub messages
        """
        self.on_recv_funcs[kernel_id] = func

    def shutdown(self):
        """Shuts down all kernels and their clients.
        """
        for kid in self.kernel_clients:
            self.kernel_clients[kid].stop_channels()
            self.kernel_manager.shutdown_kernel(kid, now=True)

        # Any remaining kernels that were not created for our pool should be shutdown
        super(ManagedKernelPool, self).shutdown()
Ejemplo n.º 8
0
class IPCMessageSubscriber(IPCClient):
    '''
    Salt IPC message subscriber

    Create an IPC client to receive messages from IPC publisher

    An example of a very simple IPCMessageSubscriber connecting to an IPCMessagePublisher.
    This example assumes an already running IPCMessagePublisher.

    IMPORTANT: The below example also assumes the IOLoop is NOT running.

    # Import Tornado libs
    import tornado.ioloop

    # Import Salt libs
    import salt.config
    import salt.transport.ipc

    # Create a new IO Loop.
    # We know that this new IO Loop is not currently running.
    io_loop = tornado.ioloop.IOLoop()

    ipc_publisher_socket_path = '/var/run/ipc_publisher.ipc'

    ipc_subscriber = salt.transport.ipc.IPCMessageSubscriber(ipc_server_socket_path, io_loop=io_loop)

    # Connect to the server
    # Use the associated IO Loop that isn't running.
    io_loop.run_sync(ipc_subscriber.connect)

    # Wait for some data
    package = ipc_subscriber.read_sync()
    '''
    def __singleton_init__(self, socket_path, io_loop=None):
        super(IPCMessageSubscriber, self).__singleton_init__(socket_path,
                                                             io_loop=io_loop)
        self._read_sync_future = None
        self._read_stream_future = None
        self._sync_ioloop_running = False
        self.saved_data = []
        self._sync_read_in_progress = Semaphore()
        self.callbacks = set()
        self.reading = False

    @tornado.gen.coroutine
    def _read_sync(self, timeout):
        yield self._sync_read_in_progress.acquire()
        exc_to_raise = None
        ret = None

        try:
            while True:
                if self._read_stream_future is None:
                    self._read_stream_future = self.stream.read_bytes(
                        4096, partial=True)

                if timeout is None:
                    wire_bytes = yield self._read_stream_future
                else:
                    future_with_timeout = FutureWithTimeout(
                        self.io_loop, self._read_stream_future, timeout)
                    wire_bytes = yield future_with_timeout

                self._read_stream_future = None

                # Remove the timeout once we get some data or an exception
                # occurs. We will assume that the rest of the data is already
                # there or is coming soon if an exception doesn't occur.
                timeout = None

                self.unpacker.feed(wire_bytes)
                first = True
                for framed_msg in self.unpacker:
                    if first:
                        ret = framed_msg['body']
                        first = False
                    else:
                        self.saved_data.append(framed_msg['body'])
                if not first:
                    # We read at least one piece of data
                    break
        except TornadoTimeoutError:
            # In the timeout case, just return None.
            # Keep 'self._read_stream_future' alive.
            ret = None
        except tornado.iostream.StreamClosedError as exc:
            log.trace('Subscriber disconnected from IPC %s', self.socket_path)
            self._read_stream_future = None
            exc_to_raise = exc
        except Exception as exc:
            log.error(
                'Exception occurred in Subscriber while handling stream: %s',
                exc)
            self._read_stream_future = None
            exc_to_raise = exc

        if self._sync_ioloop_running:
            # Stop the IO Loop so that self.io_loop.start() will return in
            # read_sync().
            self.io_loop.spawn_callback(self.io_loop.stop)

        if exc_to_raise is not None:
            raise exc_to_raise  # pylint: disable=E0702
        self._sync_read_in_progress.release()
        raise tornado.gen.Return(ret)

    def read_sync(self, timeout=None):
        '''
        Read a message from an IPC socket

        The socket must already be connected.
        The associated IO Loop must NOT be running.
        :param int timeout: Timeout when receiving message
        :return: message data if successful. None if timed out. Will raise an
                 exception for all other error conditions.
        '''
        if self.saved_data:
            return self.saved_data.pop(0)

        self._sync_ioloop_running = True
        self._read_sync_future = self._read_sync(timeout)
        self.io_loop.start()
        self._sync_ioloop_running = False

        ret_future = self._read_sync_future
        self._read_sync_future = None
        return ret_future.result()

    @tornado.gen.coroutine
    def _read_async(self, callback):
        while not self.stream.closed():
            try:
                self._read_stream_future = self.stream.read_bytes(4096,
                                                                  partial=True)
                self.reading = True
                wire_bytes = yield self._read_stream_future
                self._read_stream_future = None
                self.unpacker.feed(wire_bytes)
                for framed_msg in self.unpacker:
                    body = framed_msg['body']
                    self.io_loop.spawn_callback(callback, body)
            except tornado.iostream.StreamClosedError:
                log.trace('Subscriber disconnected from IPC %s',
                          self.socket_path)
                break
            except Exception as exc:
                log.error(
                    'Exception occurred while Subscriber handling stream: %s',
                    exc)
                yield tornado.gen.sleep(1)

    def __run_callbacks(self, raw):
        for callback in self.callbacks:
            self.io_loop.spawn_callback(callback, raw)

    @tornado.gen.coroutine
    def read_async(self):
        '''
        Asynchronously read messages and invoke a callback when they are ready.

        :param callback: A callback with the received data
        '''
        while not self.connected():
            try:
                yield self.connect(timeout=5)
            except tornado.iostream.StreamClosedError:
                log.trace('Subscriber closed stream on IPC %s before connect',
                          self.socket_path)
                yield tornado.gen.sleep(1)
            except Exception as exc:
                log.error('Exception occurred while Subscriber connecting: %s',
                          exc)
                yield tornado.gen.sleep(1)
        yield self._read_async(self.__run_callbacks)

    def close(self):
        '''
        Routines to handle any cleanup before the instance shuts down.
        Sockets and filehandles should be closed explicitly, to prevent
        leaks.
        '''
        if not self._closing:
            IPCClient.close(self)
            if self._closing:
                # This will prevent this message from showing up:
                # '[ERROR   ] Future exception was never retrieved:
                # StreamClosedError'
                if self._read_sync_future is not None and self._read_sync_future.done(
                ):
                    self._read_sync_future.exception()
                if self._read_stream_future is not None and self._read_stream_future.done(
                ):
                    self._read_stream_future.exception()
Ejemplo n.º 9
0
class ManagedKernelPool(KernelPool):
    '''
    Spawns a pool of kernels. Manages access to individual kernels using a
    borrower/lender pattern. Cleans them all up when shut down.
    '''
    def __init__(self, prespawn_count, kernel_manager):
        # Make sure there's at least one kernel as a delegate
        if not prespawn_count:
            prespawn_count = 1

        super(ManagedKernelPool, self).__init__(prespawn_count, kernel_manager)

        self.kernel_clients = {}
        self.on_recv_funcs = {}
        self.pool_index = 0
        self.kernel_pool = []

        kernel_ids = self.kernel_manager.list_kernel_ids()
        self.kernel_semaphore = Semaphore(len(kernel_ids))

        # Connect to any prespawned kernels
        for kernel_id in kernel_ids:
            self.kernel_clients[kernel_id] = kernel_manager.get_kernel(
                kernel_id).client()
            self.kernel_pool.append(kernel_id)
            iopub = self.kernel_manager.connect_iopub(kernel_id)
            iopub.on_recv(self.create_on_reply(kernel_id))

    @gen.coroutine
    def acquire(self):
        '''
        Returns a kernel client and id for use and removes the kernel the resource pool.
        Kernels must be returned via the release method.
        :return: Returns a kernel client and a kernel id
        '''
        yield self.kernel_semaphore.acquire()
        kernel_id = self.kernel_pool[0]
        del self.kernel_pool[0]
        raise gen.Return((self.kernel_clients[kernel_id], kernel_id))

    def release(self, kernel_id):
        '''
        Returns a kernel back to the resource pool.
        :param kernel_id: Id of the kernel to return to the pool
        '''
        self.kernel_pool.append(kernel_id)
        self.kernel_semaphore.release()

    def _on_reply(self, kernel_id, msg_list):
        idents, msg_list = self.kernel_clients[
            kernel_id].session.feed_identities(msg_list)
        msg = self.kernel_clients[kernel_id].session.deserialize(msg_list)
        self.on_recv_funcs[kernel_id](msg)

    def create_on_reply(self, kernel_id):
        '''
        The lambda is used to handle a specific reply per kernel and provide a unique stack scope per invocation.
        '''
        return lambda msg_list: self._on_reply(kernel_id, msg_list)

    def on_recv(self, kernel_id, func):
        '''
        Registers a callback for io_pub messages for a particular kernel.
        This is needed to avoid having multiple callbacks per kernel client.
        :param kernel_id: Id of the kernel
        :param func: Callback function to handle the message
        '''
        self.on_recv_funcs[kernel_id] = func

    def shutdown(self):
        '''
        Shuts down all kernels in the pool and in the kernel manager.
        '''
        for kid in self.kernel_clients:
            self.kernel_clients[kid].stop_channels()
            self.kernel_manager.shutdown_kernel(kid, now=True)

        # Any remaining kernels that were not created for our pool should be shutdown
        super(ManagedKernelPool, self).shutdown()
Ejemplo n.º 10
0
    def get_resized(self, gallery, photo,
            width=None, height=None, quality=60,
            rotation=0.0, img_format=None, orientation=0):
        """
        Retrieve the given photo in a resized format.
        """
        # Determine the path to the original file.
        orig_node = self._fs_node.join_node(gallery, photo)

        if img_format is None:
            # Detect from original file and quality setting.
            with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
                mime_type = m.id_filename(orig_node.abs_path)
                self._log.debug('%s/%s detected format %s',
                        gallery, photo, mime_type)
                if mime_type == 'image/gif':
                    img_format = ImageFormat.GIF
                else:
                    if quality == 100:
                        # Assume PNG
                        img_format = ImageFormat.PNG
                    else:
                        # Assume JPEG
                        img_format = ImageFormat.JPEG
        else:
            # Use the format given by the user
            img_format = ImageFormat(img_format)

        self._log.debug('%s/%s using %s format',
                gallery, photo, img_format.name)

        # Sanitise dimensions given by user.
        width, height = self.get_dimensions(gallery, photo, width, height)
        self._log.debug('%s/%s target dimensions %d by %d',
                gallery, photo, width, height)

        # Determine where the file would be cached
        (cache_dir, cache_name) = self._get_cache_name(gallery, photo,
                width,height, quality, rotation, img_format)

        # Do we have this file?
        data = self._read_cache(orig_node, cache_dir, cache_name)
        if data is not None:
            raise Return((img_format, cache_name, data))

        # Locate the lock for this photo.
        mutex_key = (gallery, photo, width, height, quality, rotation,
                img_format)
        try:
            mutex = self._mutexes[mutex_key]
        except KeyError:
            mutex = Semaphore(1)
            self._mutexes[mutex_key] = mutex

        resize_args = (gallery, photo, width, height, quality,
                    rotation, img_format.value, orientation)
        try:
            self._log.debug('%s/%s waiting for mutex',
                    gallery, photo)
            yield mutex.acquire()

            # We have the semaphore, call our resize routine.
            self._log.debug('%s/%s retrieving resized image (args=%s)',
                    gallery, photo, resize_args)
            (img_format, file_name, file_data) = yield self._pool.apply(
                func=self._do_resize,
                args=resize_args)
            raise Return((img_format, file_name, file_data))
        except Return:
            raise
        except:
            self._log.exception('Error resizing photo; gallery: %s, photo: %s, '\
                    'width: %d, height: %d, quality: %f, rotation: %f, format: %s',
                    gallery, photo, width, height, quality, rotation, img_format)
            raise
        finally:
            mutex.release()
Ejemplo n.º 11
0
class HackadayAPI(object):
    """
    Core Hackaday.io API handler.
    """

    HAD_API_URI='https://api.hackaday.io/v1'
    HAD_AUTH_URI='https://hackaday.io/authorize'\
            '?client_id=%(CLIENT_ID)s'\
            '&response_type=code'
    HAD_TOKEN_URI='https://auth.hackaday.io/access_token'\
            '?client_id=%(CLIENT_ID)s'\
            '&client_secret=%(CLIENT_SECRET)s'\
            '&code=%(CODE)s'\
            '&grant_type=authorization_code'

    # Rate limiting
    RQLIM_TIME=30  # seconds

    def __init__(self, client_id, client_secret, api_key,
            api_uri=HAD_API_URI, auth_uri=HAD_AUTH_URI,
            token_uri=HAD_TOKEN_URI, rqlim_time=RQLIM_TIME,
            client=None, log=None, io_loop=None):

        if log is None:
            log = extdlog.getLogger(self.__class__.__module__)

        if io_loop is None:
            io_loop = IOLoop.current()

        if client is None:
            client = AsyncHTTPClient()

        self._client = client
        self._io_loop = io_loop
        self._log = log
        self._client_id = client_id
        self._client_secret = client_secret
        self._api_key = api_key
        self._api_uri = api_uri
        self._auth_uri = auth_uri
        self._token_uri = token_uri

        # Timestamps of last rqlim_num requests
        self._last_rq = 0.0
        self._rqlim_time = rqlim_time

        # Semaphore to limit concurrent access
        self._rq_sem = Semaphore(1)

        # If None, then no "forbidden" status is current.
        # Otherwise, this stores when the "forbidden" flag expires.
        self._forbidden_expiry = None

    @property
    def is_forbidden(self):
        """
        Return true if the last request returned a "forbidden" response
        code and was made within the last hour.
        """
        if self._forbidden_expiry is None:
            return False

        return self._forbidden_expiry > self._io_loop.time()

    @coroutine
    def _ratelimit_sleep(self):
        """
        Ensure we don't exceed the rate limit by tracking the request
        timestamps and adding a sleep if required.
        """
        now = self._io_loop.time()

        # Figure out if we need to wait before the next request
        delay = (self._last_rq + self._rqlim_time) - now
        self._log.trace('Last request at %f, delay: %f', self._last_rq, delay)
        if delay <= 0:
            # Nope, we're clear
            return

        self._log.debug('Waiting %f sec for rate limit', delay)
        yield sleep(delay)
        self._log.trace('Resuming operations')

    def _decode(self, response, default_encoding='UTF-8'):
        """
        Decode a given reponse body.
        """
        return decode_body(response.headers['Content-Type'], response.body,
                default_encoding)

    @coroutine
    def api_fetch(self, uri, **kwargs):
        """
        Make a raw request whilst respecting the HAD API request limits.

        This is primarily to support retrieval of avatars and other data
        without hitting the HAD.io site needlessly hard.
        """
        if 'connect_timeout' not in kwargs:
            kwargs['connect_timeout'] = 120.0
        if 'request_timeout' not in kwargs:
            kwargs['request_timeout'] = 120.0

        try:
            yield self._rq_sem.acquire()
            while True:
                try:
                    yield self._ratelimit_sleep()
                    response = yield self._client.fetch(uri, **kwargs)
                    self._last_rq = self._io_loop.time()
                    self._log.audit('Request:\n'
                        '%s %s\n'
                        'Headers: %s\n'
                        'Response: %s\n'
                        'Headers: %s\n'
                        'Body:\n%s',
                        response.request.method,
                        response.request.url,
                        response.request.headers,
                        response.code,
                        response.headers,
                        response_text(response))
                    break
                except gaierror as e:
                    if e.errno != EAGAIN:
                        raise
                    raise
                except HTTPError as e:
                    if e.response is not None:
                        self._log.audit('Request:\n'
                            '%s %s\n'
                            'Headers: %s\n'
                            'Response: %s\n'
                            'Headers: %s\n'
                            'Body:\n%s',
                            e.response.request.method,
                            e.response.request.url,
                            e.response.request.headers,
                            e.response.code,
                            e.response.headers,
                            response_text(e.response))
                    if e.code == 403:
                        # Back-end is rate limiting us.  Back off an hour.
                        self._forbidden_expiry = self._io_loop.time() \
                                + 3600.0
                    raise
                except ConnectionResetError:
                    # Back-end is blocking us.  Back off 15 minutes.
                    self._forbidden_expiry = self._io_loop.time() \
                            + 900.0
                    raise
        finally:
            self._rq_sem.release()

        raise Return(response)

    @coroutine
    def _api_call(self, uri, query=None, token=None, api_key=True, **kwargs):
        headers = kwargs.setdefault('headers', {})
        headers.setdefault('Accept', 'application/json')
        if token is not None:
            headers['Authorization'] = 'token %s' % token

        if query is None:
            query = {}

        if api_key:
            query.setdefault('api_key', self._api_key)

        self._log.audit('Query arguments: %r', query)
        encode_kv = lambda k, v : '%s=%s' % (k, urlparse.quote_plus(str(v)))
        def encode_item(item):
            (key, value) = item
            if isinstance(value, list):
                return '&'.join(map(lambda v : encode_kv(key, v), value))
            else:
                return encode_kv(key, value)

        if len(query) > 0:
            uri += '?%s' % '&'.join(map(encode_item, query.items()))

        if not uri.startswith('http'):
            uri = self._api_uri + uri

        self._log.audit('%s %r', kwargs.get('method','GET'), uri)
        response = yield self.api_fetch(uri, **kwargs)

        # If we get here, then our service is back.
        self._forbidden_expiry = None
        (ct, ctopts, body) = self._decode(response)
        if ct.lower() != 'application/json':
            raise ValueError('Server returned unrecognised type %s' % ct)
        raise Return(json.loads(body))

    # oAuth endpoints

    @property
    def auth_uri(self):
        """
        Return the auth URI that we need to send the user to if they're not
        logged in.
        """
        return self._auth_uri % dict(CLIENT_ID=self._client_id)

    def get_token(self, code):
        """
        Fetch the token for API queries from the authorization code given.
        """
        # Determine where to retrieve the access token from
        post_uri = self._token_uri % dict(
                CLIENT_ID=urlparse.quote_plus(self._client_id),
                CLIENT_SECRET=urlparse.quote_plus(self._client_secret),
                CODE=urlparse.quote_plus(code)
        )

        return self._api_call(
            post_uri, method='POST', body=b'', api_key=False)

    # Pagination options

    def _page_query_opts(self, page, per_page):
        query = {}
        if page is not None:
            query['page'] = int(page)
        if per_page is not None:
            query['per_page'] = int(per_page)

        return query

    # User API endpoints

    def get_current_user(self, token):
        """
        Fetch the current user's profile information.
        """
        return self._api_call('/me', token=token)

    def _user_query_opts(self, sortby, page, per_page):
        query = self._page_query_opts(page, per_page)
        sortby = UserSortBy(sortby)
        query['sortby'] = sortby.value
        return query

    _GET_USERS_WORKAROUND_RE = re.compile(
            r'<a href="([^"]+)" class="hacker-image">')
    _PRIVATE_MSG_LINK_RE = re.compile(
            r'<a href="/messages/new\?user=(\d+)">')
    @coroutine
    def get_user_ids(self, sortby=UserSortBy.influence, page=None):
        if page is None:
            page = 1

        sortby = UserSortBy(sortby)
        response = yield self.api_fetch(
                'https://hackaday.io/hackers?sort=%s&page=%d' \
                        % (sortby.value, page))
        (ct, ctopts, body) = self._decode(response)

        # Body is in HTML, look for links to profile pages
        pages = []
        for line in body.split('\n'):
            match = self._GET_USERS_WORKAROUND_RE.search(line)
            if match:
                pages.append(match.group(1))

        ids = []
        # Fetch each profile page (ugh!) and look for user ID
        # This is literally all we need at this point, the rest we'll
        # get from the API.
        for page in pages:
            if page.startswith('/'):
                page = 'https://hackaday.io' + page
            response = yield self.api_fetch(page)
            (ct, ctopts, body) = self._decode(response)
            for line in body.split('\n'):
                match = self._PRIVATE_MSG_LINK_RE.search(line)
                if match:
                    ids.append(int(match.group(1)))
                    break

        raise Return(ids)

    @coroutine
    def _get_users_workaround(self, sortby=UserSortBy.influence, page=None):
        ids = yield self.get_user_ids(sortby, page)
        users = yield self.get_users(ids=ids)
        raise Return(users)

    @coroutine
    def get_users(self, sortby=UserSortBy.influence,
            ids=None, page=None, per_page=None):
        """
        Retrieve a list of all users
        """
        query = self._user_query_opts(sortby, page, per_page)

        if ids is None:
            # sortby==newest is broken, has been for a while now.
            if sortby == UserSortBy.newest:
                result = yield self._get_users_workaround(
                        sortby, query.get('page'))
            else:
                result = yield self._api_call('/users', query=query)
        elif isinstance(ids, slice):
            query['ids'] = '%d,%d' % (ids.start, ids.stop)
            result = yield self._api_call('/users/range', query=query)
        else:
            ids = set(ids)
            if len(ids) > 50:
                raise ValueError('Too many IDs')
            query['ids'] = ','.join(['%d' % uid for uid in ids])
            result = yield self._api_call('/users/batch', query=query)
        raise Return(result)

    def search_users(self, screen_name=None, location=None, tag=None,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)

        for (arg, val) in   (   ('screen_name', screen_name),
                                ('location', location),
                                ('tag', tag)    ):
            if val is not None:
                query[arg] = str(val)
        return self._api_call('/users/search', query=query)

    def get_user(self, user_id):
        return self._api_call('/users/%d' % user_id)

    def get_user_followers(self, user_id,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)
        return self._api_call('/users/%d/followers' % user_id, query=query)

    def get_user_following(self, user_id,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)
        return self._api_call('/users/%d/following' % user_id, query=query)

    def get_user_projects(self, user_id,
            sortby=ProjectSortBy.skulls, page=None, per_page=None):
        query = self._project_query_opts(sortby, page, per_page)
        return self._api_call('/users/%d/projects' % user_id, query=query)

    def get_user_skulls(self, user_id,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)
        return self._api_call('/users/%d/skulls' % user_id, query=query)

    def get_user_links(self, user_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/users/%d/links' % user_id, query=query)

    def get_user_tags(self, user_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/users/%d/tags' % user_id, query=query)

    def get_user_pages(self, user_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/users/%d/pages' % user_id, query=query)

    # Projects API

    def _project_query_opts(self, sortby, page, per_page):
        query = self._page_query_opts(page, per_page)
        sortby = ProjectSortBy(sortby)
        query['sortby'] = sortby.value
        return query

    def get_projects(self, sortby=ProjectSortBy.skulls,
            ids=None, page=None, per_page=None):
        """
        Retrieve a list of all projects
        """
        query = self._project_query_opts(sortby, page, per_page)

        if ids is None:
            return self._api_call('/projects', query=query)
        elif isinstance(ids, slice):
            query['ids'] = '%d,%d' % (slice.start, slice.stop)
            return self._api_call('/projects/range', query=query)
        else:
            ids = set(ids)
            if len(ids) > 50:
                raise ValueError('Too many IDs')
            query['ids'] = ','.join(['%d' % pid for pid in ids])
            return self._api_call('/projects/batch', query=query)

    def search_projects(self, term,
            sortby=ProjectSortBy.skulls, page=None, per_page=None):
        query = self._project_query_opts(sortby, page, per_page)
        query['search_term'] = str(term)
        return self._api_call('/projects/search', query=query)

    def get_project(self, project_id):
        return self._api_call('/projects/%d' % project_id)

    def get_project_team(self, project_id,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)
        return self._api_call('/projects/%d/team' % project_id, query=query)

    def get_project_followers(self, project_id,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)
        return self._api_call('/projects/%d/followers' % project_id,
                query=query)

    def get_project_skulls(self, project_id,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)
        return self._api_call('/projects/%d/skulls' % project_id,
                query=query)

    def get_project_comments(self, project_id,
            sortby=UserSortBy.influence, page=None, per_page=None):
        query = self._user_query_opts(sortby, page, per_page)
        return self._api_call('/projects/%d/comments' % project_id,
                query=query)

    def get_project_links(self, project_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/projects/%d/links' % project_id,
                query=query)

    def get_project_images(self, project_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/projects/%d/images' % project_id,
                query=query)

    def get_project_components(self, project_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/projects/%d/components' % project_id,
                query=query)

    def get_project_tags(self, project_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/projects/%d/tags' % project_id, query=query)

    def get_project_logs(self, project_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/projects/%d/logs' % project_id, query=query)

    def get_project_instructions(self, project_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/projects/%d/instructions' % project_id,
                query=query)

    def get_project_details(self, project_id, page=None, per_page=None):
        query = self._page_query_opts(page, per_page)
        return self._api_call('/projects/%d/details' % project_id,
                query=query)
Ejemplo n.º 12
0
class WorkerPool(object):
    """
    The WorkerPool object represents a pool of worker threads which
    each run a task in an external thread.
    """
    def __init__(self, workers=None, io_loop=None):
        if workers is None:
            workers = cpu_count()
        if io_loop is None:
            io_loop = IOLoop.current()
        self._io_loop = io_loop
        self._sem = Semaphore(workers)
        self._queue = Queue()
        self._active = False

    @coroutine
    def apply(self, func, args=None, kwds=None):
        """
        Enqueue a request to be processed in a worker thread.
        """
        if args is None: args = ()
        if kwds is None: kwds = {}

        # Our result placeholder
        future = Future()

        # Enqueue the request
        yield self._queue.put((future, func, args, kwds))

        # Kick-start the queue manager if not already running
        self._io_loop.add_callback(self._queue_manager)

        # Get back the result
        result = yield future
        raise Return(result)

    @coroutine
    def _apply(self, future, func, args=None, kwds=None):
        """
        Execute a function in a worker thread.  Wrapper function.
        """
        yield self._sem.acquire()

        # Receive the result back; sets the future result
        def _recv_result(err, res):
            self._sem.release()
            if err is not None:
                future.set_exc_info(err)
            else:
                future.set_result(res)

        # Run the function; in a worker thread
        def _exec():
            err = None
            res = None

            try:
                res = func(*args, **kwds)
            except:
                err = exc_info()

            self._io_loop.add_callback(_recv_result, err, res)

        # Spawn the worker thread
        thread = Thread(target=_exec)
        thread.start()

    @coroutine
    def _queue_manager(self):
        """
        Queue manager co-routine.
        """
        if self._active:
            # Already active
            return

        try:
            self._active = True
            while True:
                (future, func, args, kwds) = yield self._queue.get()
                self._io_loop.add_callback(self._apply, future, func, args,
                                           kwds)
        finally:
            self._active = False
Ejemplo n.º 13
0
class Crawler(object):
    def _init_defaults(self):
        self.start_link = None
        self.link_priority = 2
        self.img_priority = 8
        self.politeness = 2
        self.workers_limit = 10  # allow at most 10 concurrent workers
        self.link_regex = re.compile("^http://.*")
        self.img_regex = re.compile(".*")
        self.fname_digits = 4
        self.min_width = 200
        self.min_height = 200
        self.img_dir = "E:/tmp/"
        self.idle_wait_loops = 100
        self.port = 8888

    def _load_config(self):
        parser = ConfigParser.ConfigParser()
        parser.read("config.ini")

        if parser.has_option("global", "starturl"):
            starturl = parser.get("global", "starturl")
            self.start_link = starturl

        if parser.has_option("global", "linkregex"):
            self.link_regex = re.compile(parser.get("global", "linkregex"))
        if parser.has_option("global", "imgregex"):
            self.img_regex = re.compile(parser.get("global", "imgregex"))

        if parser.has_option("global", "politeness"):
            politeness = parser.getint("global", "politeness")
            if politeness <= 0:
                print "politeness must be a positive integer"
                raise SystemExit()
            self.politeness = politeness
        if parser.has_option("global", "imgdir"):
            imgdir = parser.get("global", "imgdir")
            if not os.path.exists(imgdir) or not os.path.isdir(imgdir):
                print "invalid imgdir configuration"
                raise SystemExit()
            if not imgdir.endswith("/"):
                imgdir += "/"
            self.img_dir = imgdir

        if parser.has_option("global", "minwidth"):
            width = parser.getint("global", "minwidth")
            self.min_width = width
        if parser.has_option("global", "minheight"):
            height = parser.getint("global", "minheight")
            self.min_height = height

    def __init__(self, start_link=None):
        self._init_defaults()
        # Now load the config file to override defaults
        self._load_config()

        if start_link:
            self.start_link = start_link
        if not self.start_link:
            raise SystemExit("No start link is provided, exiting now...")
        links.put(self.start_link)
        self.semaphore = Semaphore(self.workers_limit)

    @gen.coroutine
    def run(self):
        # First start an debug server
        app = Application([(r"/", WebHandler)])
        server = HTTPServer(app)
        server.listen(self.port)

        idle_loops = 0
        while True:
            if imageurls.qsize() == 0 and links.qsize() == 0:
                print "Both link and image queues are empty now"
                idle_loops += 1
                if idle_loops == self.idle_wait_loops:
                    break
            else:
                idle_loops = 0  # clear the idle loop counter
                if imageurls.qsize() == 0:
                    self.handle_links()
                elif links.qsize() == 0:
                    self.handle_imageurls()
                else:
                    choices = [0] * self.link_priority + [1] * self.img_priority
                    choice = random.choice(choices)
                    if choice:
                        self.handle_imageurls()
                    else:
                        self.handle_links()
            yield gen.sleep(0.1 * self.politeness)
        # Wait for all link handlers
        links.join()
        # Handling imageurls if generated by the last few links
        while imageurls.qsize():
            self.handle_imageurls()
        imageurls.join()

    @gen.coroutine
    def handle_links(self):
        yield self.semaphore.acquire()
        newlink = yield links.get()

        # Make sure we haven't visited this one
        if newlink in visited_links:
            self.semaphore.release()
            raise gen.Return()
        visited_links.add(newlink)

        # use async client to fetch this url
        client = AsyncHTTPClient()
        tries = 3  # Give it 3 chances before putting it in failure
        while tries:
            response = yield client.fetch(newlink)
            if response.code == 200:
                break
            tries -= 1

        # release the semaphore
        self.semaphore.release()
        if response.code != 200:
            link_failures.append(newlink)
            print "[FAILURE] - %s" % newlink
            raise gen.Return()

        # TODO: replace this with a report api
        print "[VISITED] - %s" % newlink

        # parse url to get the base url
        components = urlparse.urlparse(newlink)
        baseurl = components[0] + "://" + components[1]
        path = components[2]

        # parse the html with bs
        soup = bs4.BeautifulSoup(response.body)
        # extract valid links and put into links
        a_tags = soup.find_all("a")
        for tag in a_tags:
            if "href" not in tag.attrs:
                continue
            href = tag["href"]
            if href.startswith("#"):
                continue
            if href.startswith("/"):  # relative
                href = baseurl + href
            else:
                if not path.endswith("/"):
                    path = path[: path.rfind("/") + 1]
                href = baseurl + "/" + path + href
            if not self.link_regex.match(href):
                continue
            if href in visited_links:
                continue
            links.put(href)
            print "NEWLINK:", href

        # extract imgs and put into imageurls
        img_tags = soup.find_all("img")
        for tag in img_tags:
            if "src" not in tag.attrs:
                continue
            src = tag["src"]
            if src.startswith("/"):  # relative
                src = baseurl + src
            if not self.img_regex.match(src):
                continue
            if src in downloaded_images:
                continue
            imageurls.put(src)
            print "NEW IMAGE:", src

        # now the task is done
        links.task_done()

    @gen.coroutine
    def handle_imageurls(self):
        yield self.semaphore.acquire()
        imgurl = yield imageurls.get()

        if imgurl in downloaded_images:
            self.semaphore.release()
            raise gen.Return()
        # mark the image as downloaded
        downloaded_images.add(imgurl)

        # use async client to fetch this url
        client = AsyncHTTPClient()
        tries = 3  # Give it 3 chances before putting it in failure
        while tries:
            response = yield client.fetch(imgurl)
            if response.code == 200:
                break
            tries -= 1
        # Download is finished, release semaphore
        self.semaphore.release()

        if response.code != 200:
            download_failures.append(imgurl)
            print "[FAILURE] - %s" % imgurl
            raise gen.Return()

        # TODO: replace this with a report api
        print "[DOWNLOADED] - %s" % imgurl

        # Read the file content
        img = PIL.Image.open(response.buffer)
        w, h = img.size
        if w < self.min_width or h < self.min_height:
            raise gen.Return()

        # find out the image extension, default to jpg
        if "." in imgurl:
            ext = imgurl.split(".")[-1].lower()
            if ext not in ["jpg", "png", "gif"]:
                ext = "jpg"
        elif img.format:
            ext = img.format.lower()
        else:
            ext = "jpg"

        # increment the counter
        global img_counter
        img_counter += 1
        fname = str(img_counter).zfill(self.fname_digits) + "." + ext
        fpath = self.img_dir + fname
        # save the image file
        f = open(fpath, "wb")
        f.write(response.body)

        # now the task is done
        imageurls.task_done()
Ejemplo n.º 14
0
class KernelPool(object):
    '''
    A class to maintain a pool of kernel and control access to the individual kernels.
    Kernels are protected by a borrower/lender pattern.
    '''
    def __init__(self, prespawn_count, kernel_manager):

        if prespawn_count is None:
            prespawn_count = 0

        self.kernel_clients = {}
        self.on_recv_funcs = {}
        self.kernel_manager = kernel_manager
        self.pool_index = 0
        self.kernel_pool = []
        self.kernel_semaphore = Semaphore(prespawn_count)

        for _ in range(prespawn_count):
            if self.kernel_manager.parent.seed_notebook:
                kernel_id = kernel_manager.start_kernel(kernel_name=self.kernel_manager.parent.seed_notebook['metadata']['kernelspec']['name'])
            else:
                kernel_id = kernel_manager.start_kernel()
            self.kernel_clients[kernel_id] = kernel_manager.get_kernel(kernel_id).client()
            self.kernel_pool.append(kernel_id)
            iopub = self.kernel_manager.connect_iopub(kernel_id)
            iopub.on_recv(self.create_on_reply(kernel_id))

    @gen.coroutine
    def acquire(self):
        '''
        Returns a kernel client and id for use and removes the kernel the resource pool.
        Kernels must be returned via the release method.
        :return:Returns a kernel client and a kernel id
        '''
        yield self.kernel_semaphore.acquire()
        kernel_id = self.kernel_pool[0]
        del self.kernel_pool[0]
        raise gen.Return((self.kernel_clients[kernel_id], kernel_id))

    def release(self, kernel_id):
        '''
        Returns a kernel back to the resource pool.
        :param kernel_id: Id of the kernel to return to the pool
        '''
        self.kernel_pool.append(kernel_id)
        self.kernel_semaphore.release()

    def _on_reply(self, kernel_id, msg_list):
        idents, msg_list = self.kernel_clients[kernel_id].session.feed_identities(msg_list)
        msg = self.kernel_clients[kernel_id].session.deserialize(msg_list)
        self.on_recv_funcs[kernel_id](msg)

    def create_on_reply(self, kernel_id):
        '''
        The lambda is used to handle a specific reply per kernel and provide a unique stack scope per invocation.
        '''
        return lambda msg_list: self._on_reply(kernel_id, msg_list)

    def on_recv(self, kernel_id, func):
        '''
        Registers a callback for io_pub messages for a particular kernel.
        This is needed to avoid having multiple callbacks per kernel client.
        :param kernel_id: Id of the kernel
        :param func: Callback function to handle the message
        '''
        self.on_recv_funcs[kernel_id] = func

    def shutdown(self):
        '''
        Shuts down all kernels in the pool and in the kernel manager.
        '''
        for kid in self.kernel_clients:
            self.kernel_clients[kid].stop_channels()
            self.kernel_manager.shutdown_kernel(kid, now=True)

        # Any remaining kernels that were not created for our pool should be shutdown
        kids = self.kernel_manager.list_kernel_ids()
        for kid in kids:
            self.kernel_manager.shutdown_kernel(kid, now=True)
Ejemplo n.º 15
0
class TornadoSubscriptionManager(SubscriptionManager):
    def __init__(self, pubnub_instance):

        subscription_manager = self

        self._message_queue = Queue()
        self._consumer_event = Event()
        self._cancellation_event = Event()
        self._subscription_lock = Semaphore(1)
        # self._current_request_key_object = None
        self._heartbeat_periodic_callback = None
        self._reconnection_manager = TornadoReconnectionManager(pubnub_instance)

        super(TornadoSubscriptionManager, self).__init__(pubnub_instance)
        self._start_worker()

        class TornadoReconnectionCallback(ReconnectionCallback):
            def on_reconnect(self):
                subscription_manager.reconnect()

                pn_status = PNStatus()
                pn_status.category = PNStatusCategory.PNReconnectedCategory
                pn_status.error = False

                subscription_manager._subscription_status_announced = True
                subscription_manager._listener_manager.announce_status(pn_status)

        self._reconnection_listener = TornadoReconnectionCallback()
        self._reconnection_manager.set_reconnection_listener(self._reconnection_listener)

    def _set_consumer_event(self):
        self._consumer_event.set()

    def _message_queue_put(self, message):
        self._message_queue.put(message)

    def _start_worker(self):
        self._consumer = TornadoSubscribeMessageWorker(self._pubnub,
                                                       self._listener_manager,
                                                       self._message_queue,
                                                       self._consumer_event)
        run = stack_context.wrap(self._consumer.run)
        self._pubnub.ioloop.spawn_callback(run)

    def reconnect(self):
        self._should_stop = False
        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
        # self._register_heartbeat_timer()

    def disconnect(self):
        self._should_stop = True
        self._stop_heartbeat_timer()
        self._stop_subscribe_loop()

    @tornado.gen.coroutine
    def _start_subscribe_loop(self):
        self._stop_subscribe_loop()

        yield self._subscription_lock.acquire()

        self._cancellation_event.clear()

        combined_channels = self._subscription_state.prepare_channel_list(True)
        combined_groups = self._subscription_state.prepare_channel_group_list(True)

        if len(combined_channels) == 0 and len(combined_groups) == 0:
            return

        envelope_future = Subscribe(self._pubnub) \
            .channels(combined_channels).channel_groups(combined_groups) \
            .timetoken(self._timetoken).region(self._region) \
            .filter_expression(self._pubnub.config.filter_expression) \
            .cancellation_event(self._cancellation_event) \
            .future()

        canceller_future = self._cancellation_event.wait()

        wi = tornado.gen.WaitIterator(envelope_future, canceller_future)

        # iterates 2 times: one for result one for cancelled
        while not wi.done():
            try:
                result = yield wi.next()
            except Exception as e:
                # TODO: verify the error will not be eaten
                logger.error(e)
                raise
            else:
                if wi.current_future == envelope_future:
                    e = result
                elif wi.current_future == canceller_future:
                    return
                else:
                    raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

                if e.is_error():
                    # 599 error doesn't works - tornado use this status code
                    # for a wide range of errors, for ex:
                    # HTTP Server Error (599): [Errno -2] Name or service not known
                    if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
                        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
                        return

                    logger.error("Exception in subscribe loop: %s" % str(e))

                    if e.status is not None and e.status.category == PNStatusCategory.PNAccessDeniedCategory:
                        e.status.operation = PNOperationType.PNUnsubscribeOperation

                    self._listener_manager.announce_status(e.status)

                    self._reconnection_manager.start_polling()
                    self.disconnect()
                    return
                else:
                    self._handle_endpoint_call(e.result, e.status)

                    self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)

            finally:
                self._cancellation_event.set()
                yield tornado.gen.moment
                self._subscription_lock.release()
                self._cancellation_event.clear()
                break

    def _stop_subscribe_loop(self):
        if self._cancellation_event is not None and not self._cancellation_event.is_set():
            self._cancellation_event.set()

    def _stop_heartbeat_timer(self):
        if self._heartbeat_periodic_callback is not None:
            self._heartbeat_periodic_callback.stop()

    def _register_heartbeat_timer(self):
        super(TornadoSubscriptionManager, self)._register_heartbeat_timer()
        self._heartbeat_periodic_callback = PeriodicCallback(
            stack_context.wrap(self._perform_heartbeat_loop),
            self._pubnub.config.heartbeat_interval * TornadoSubscriptionManager.HEARTBEAT_INTERVAL_MULTIPLIER,
            self._pubnub.ioloop)
        self._heartbeat_periodic_callback.start()

    @tornado.gen.coroutine
    def _perform_heartbeat_loop(self):
        if self._heartbeat_call is not None:
            # TODO: cancel call
            pass

        cancellation_event = Event()
        state_payload = self._subscription_state.state_payload()
        presence_channels = self._subscription_state.prepare_channel_list(False)
        presence_groups = self._subscription_state.prepare_channel_group_list(False)

        if len(presence_channels) == 0 and len(presence_groups) == 0:
            return

        try:
            envelope = yield self._pubnub.heartbeat() \
                .channels(presence_channels) \
                .channel_groups(presence_groups) \
                .state(state_payload) \
                .cancellation_event(cancellation_event) \
                .future()

            heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options
            if envelope.status.is_error:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \
                        heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)
            else:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)

        except PubNubTornadoException:
            pass
            # TODO: check correctness
            # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
            #     self._start_subscribe_loop()
            # else:
            #     self._listener_manager.announce_status(e.status)
        except Exception as e:
            print(e)
        finally:
            cancellation_event.set()

    @tornado.gen.coroutine
    def _send_leave(self, unsubscribe_operation):
        envelope = yield Leave(self._pubnub) \
            .channels(unsubscribe_operation.channels) \
            .channel_groups(unsubscribe_operation.channel_groups).future()
        self._listener_manager.announce_status(envelope.status)
Ejemplo n.º 16
0
class TornadoSubscriptionManager(SubscriptionManager):
    def __init__(self, pubnub_instance):

        subscription_manager = self

        self._message_queue = Queue()
        self._consumer_event = Event()
        self._cancellation_event = Event()
        self._subscription_lock = Semaphore(1)
        # self._current_request_key_object = None
        self._heartbeat_periodic_callback = None
        self._reconnection_manager = TornadoReconnectionManager(pubnub_instance)

        super(TornadoSubscriptionManager, self).__init__(pubnub_instance)
        self._start_worker()

        class TornadoReconnectionCallback(ReconnectionCallback):
            def on_reconnect(self):
                subscription_manager.reconnect()

                pn_status = PNStatus()
                pn_status.category = PNStatusCategory.PNReconnectedCategory
                pn_status.error = False

                subscription_manager._subscription_status_announced = True
                subscription_manager._listener_manager.announce_status(pn_status)

        self._reconnection_listener = TornadoReconnectionCallback()
        self._reconnection_manager.set_reconnection_listener(self._reconnection_listener)

    def _set_consumer_event(self):
        self._consumer_event.set()

    def _message_queue_put(self, message):
        self._message_queue.put(message)

    def _start_worker(self):
        self._consumer = TornadoSubscribeMessageWorker(self._pubnub,
                                                       self._listener_manager,
                                                       self._message_queue,
                                                       self._consumer_event)
        run = stack_context.wrap(self._consumer.run)
        self._pubnub.ioloop.spawn_callback(run)

    def reconnect(self):
        self._should_stop = False
        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
        # self._register_heartbeat_timer()

    def disconnect(self):
        self._should_stop = True
        self._stop_heartbeat_timer()
        self._stop_subscribe_loop()

    @tornado.gen.coroutine
    def _start_subscribe_loop(self):
        self._stop_subscribe_loop()

        yield self._subscription_lock.acquire()

        self._cancellation_event.clear()

        combined_channels = self._subscription_state.prepare_channel_list(True)
        combined_groups = self._subscription_state.prepare_channel_group_list(True)

        if len(combined_channels) == 0 and len(combined_groups) == 0:
            return

        envelope_future = Subscribe(self._pubnub) \
            .channels(combined_channels).channel_groups(combined_groups) \
            .timetoken(self._timetoken).region(self._region) \
            .filter_expression(self._pubnub.config.filter_expression) \
            .cancellation_event(self._cancellation_event) \
            .future()

        canceller_future = self._cancellation_event.wait()

        wi = tornado.gen.WaitIterator(envelope_future, canceller_future)

        # iterates 2 times: one for result one for cancelled
        while not wi.done():
            try:
                result = yield wi.next()
            except Exception as e:
                # TODO: verify the error will not be eaten
                logger.error(e)
                raise
            else:
                if wi.current_future == envelope_future:
                    e = result
                elif wi.current_future == canceller_future:
                    return
                else:
                    raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

                if e.is_error():
                    # 599 error doesn't works - tornado use this status code
                    # for a wide range of errors, for ex:
                    # HTTP Server Error (599): [Errno -2] Name or service not known
                    if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
                        self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)
                        return

                    logger.error("Exception in subscribe loop: %s" % str(e))

                    if e.status is not None and e.status.category == PNStatusCategory.PNAccessDeniedCategory:
                        e.status.operation = PNOperationType.PNUnsubscribeOperation

                    self._listener_manager.announce_status(e.status)

                    self._reconnection_manager.start_polling()
                    self.disconnect()
                    return
                else:
                    self._handle_endpoint_call(e.result, e.status)

                    self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop)

            finally:
                self._cancellation_event.set()
                yield tornado.gen.moment
                self._subscription_lock.release()
                self._cancellation_event.clear()
                break

    def _stop_subscribe_loop(self):
        if self._cancellation_event is not None and not self._cancellation_event.is_set():
            self._cancellation_event.set()

    def _stop_heartbeat_timer(self):
        if self._heartbeat_periodic_callback is not None:
            self._heartbeat_periodic_callback.stop()

    def _register_heartbeat_timer(self):
        super(TornadoSubscriptionManager, self)._register_heartbeat_timer()
        self._heartbeat_periodic_callback = PeriodicCallback(
            stack_context.wrap(self._perform_heartbeat_loop),
            self._pubnub.config.heartbeat_interval * TornadoSubscriptionManager.HEARTBEAT_INTERVAL_MULTIPLIER,
            self._pubnub.ioloop)
        self._heartbeat_periodic_callback.start()

    @tornado.gen.coroutine
    def _perform_heartbeat_loop(self):
        if self._heartbeat_call is not None:
            # TODO: cancel call
            pass

        cancellation_event = Event()
        state_payload = self._subscription_state.state_payload()
        presence_channels = self._subscription_state.prepare_channel_list(False)
        presence_groups = self._subscription_state.prepare_channel_group_list(False)

        if len(presence_channels) == 0 and len(presence_groups) == 0:
            return

        try:
            envelope = yield self._pubnub.heartbeat() \
                .channels(presence_channels) \
                .channel_groups(presence_groups) \
                .state(state_payload) \
                .cancellation_event(cancellation_event) \
                .future()

            heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options
            if envelope.status.is_error:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \
                        heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)
            else:
                if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL:
                    self._listener_manager.announce_status(envelope.status)

        except PubNubTornadoException:
            pass
            # TODO: check correctness
            # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory:
            #     self._start_subscribe_loop()
            # else:
            #     self._listener_manager.announce_status(e.status)
        except Exception as e:
            print(e)
        finally:
            cancellation_event.set()

    @tornado.gen.coroutine
    def _send_leave(self, unsubscribe_operation):
        envelope = yield Leave(self._pubnub) \
            .channels(unsubscribe_operation.channels) \
            .channel_groups(unsubscribe_operation.channel_groups).future()
        self._listener_manager.announce_status(envelope.status)
Ejemplo n.º 17
0
    class TornadoTransmission():
        def __init__(self, max_concurrent_batches=10, block_on_send=False,
                    block_on_response=False, max_batch_size=100, send_frequency=0.25,
                    user_agent_addition=''):
            if not has_tornado:
                raise ImportError('TornadoTransmission requires tornado, but it was not found.')

            self.block_on_send = block_on_send
            self.block_on_response = block_on_response
            self.max_batch_size = max_batch_size
            self.send_frequency = send_frequency

            user_agent = "libhoney-py/" + VERSION
            if user_agent_addition:
                user_agent += " " + user_agent_addition

            self.http_client = AsyncHTTPClient(
                force_instance=True,
                defaults=dict(user_agent=user_agent))

            # libhoney adds events to the pending queue for us to send
            self.pending = Queue(maxsize=1000)
            # we hand back responses from the API on the responses queue
            self.responses = Queue(maxsize=2000)

            self.batch_data = {}
            self.sd = statsd.StatsClient(prefix="libhoney")
            self.batch_sem = Semaphore(max_concurrent_batches)

        def start(self):
            ioloop.IOLoop.current().spawn_callback(self._sender)

        def send(self, ev):
            '''send accepts an event and queues it to be sent'''
            self.sd.gauge("queue_length", self.pending.qsize())
            try:
                if self.block_on_send:
                    self.pending.put(ev)
                else:
                    self.pending.put_nowait(ev)
                self.sd.incr("messages_queued")
            except QueueFull:
                response = {
                    "status_code": 0,
                    "duration": 0,
                    "metadata": ev.metadata,
                    "body": "",
                    "error": "event dropped; queue overflow",
                }
                if self.block_on_response:
                    self.responses.put(response)
                else:
                    try:
                        self.responses.put_nowait(response)
                    except QueueFull:
                        # if the response queue is full when trying to add an event
                        # queue is full response, just skip it.
                        pass
                self.sd.incr("queue_overflow")

        # We're using the older decorator/yield model for compatibility with
        # Python versions before 3.5.
        # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await
        @gen.coroutine
        def _sender(self):
            '''_sender is the control loop that pulls events off the `self.pending`
            queue and submits batches for actual sending. '''
            events = []
            last_flush = time.time()
            while True:
                try:
                    ev = yield self.pending.get(timeout=self.send_frequency)
                    if ev is None:
                        # signals shutdown
                        yield self._flush(events)
                        return
                    events.append(ev)
                    if (len(events) > self.max_batch_size or
                        time.time() - last_flush > self.send_frequency):
                        yield self._flush(events)
                        events = []
                except TimeoutError:
                    yield self._flush(events)
                    events = []
                    last_flush = time.time()

        @gen.coroutine
        def _flush(self, events):
            if not events:
                return
            for dest, group in group_events_by_destination(events).items():
                yield self._send_batch(dest, group)

        @gen.coroutine
        def _send_batch(self, destination, events):
            ''' Makes a single batch API request with the given list of events. The
            `destination` argument contains the write key, API host and dataset
            name used to build the request.'''
            start = time.time()
            status_code = 0

            try:
                # enforce max_concurrent_batches
                yield self.batch_sem.acquire()
                url = urljoin(urljoin(destination.api_host, "/1/batch/"),
                            destination.dataset)
                payload = []
                for ev in events:
                    event_time = ev.created_at.isoformat()
                    if ev.created_at.tzinfo is None:
                        event_time += "Z"
                    payload.append({
                        "time": event_time,
                        "samplerate": ev.sample_rate,
                        "data": ev.fields()})
                req = HTTPRequest(
                    url,
                    method='POST',
                    headers={
                        "X-Honeycomb-Team": destination.writekey,
                        "Content-Type": "application/json",
                    },
                    body=json.dumps(payload, default=json_default_handler),
                )
                self.http_client.fetch(req, self._response_callback)
                # store the events that were sent so we can process responses later
                # it is important that we delete these eventually, or we'll run into memory issues
                self.batch_data[req] = {"start": start, "events": events}
            except Exception as e:
                # Catch all exceptions and hand them to the responses queue.
                self._enqueue_errors(status_code, e, start, events)
            finally:
                self.batch_sem.release()

        def _enqueue_errors(self, status_code, error, start, events):
            for ev in events:
                self.sd.incr("send_errors")
                self._enqueue_response(status_code, "", error, start, ev.metadata)

        def _response_callback(self, resp):
            # resp.request should be the same HTTPRequest object built by _send_batch
            # and mapped to values in batch_data
            events = self.batch_data[resp.request]["events"]
            start  = self.batch_data[resp.request]["start"]
            try:
                status_code = resp.code
                resp.rethrow()

                statuses = [d["status"] for d in json.loads(resp.body)]
                for ev, status in zip(events, statuses):
                    self._enqueue_response(status, "", None, start, ev.metadata)
                    self.sd.incr("messages_sent")
            except Exception as e:
                self._enqueue_errors(status_code, e, start, events)
                self.sd.incr("send_errors")
            finally:
                # clean up the data for this batch
                del self.batch_data[resp.request]

        def _enqueue_response(self, status_code, body, error, start, metadata):
            resp = {
                "status_code": status_code,
                "body": body,
                "error": error,
                "duration": (time.time() - start) * 1000,
                "metadata": metadata
            }
            if self.block_on_response:
                self.responses.put(resp)
            else:
                try:
                    self.responses.put_nowait(resp)
                except QueueFull:
                    pass

        def close(self):
            '''call close to send all in-flight requests and shut down the
                senders nicely. Times out after max 20 seconds per sending thread
                plus 10 seconds for the response queue'''
            try:
                self.pending.put(None, 10)
            except QueueFull:
                pass
            # signal to the responses queue that nothing more is coming.
            try:
                self.responses.put(None, 10)
            except QueueFull:
                pass

        def get_response_queue(self):
            ''' return the responses queue on to which will be sent the response
            objects from each event send'''
            return self.responses
Ejemplo n.º 18
0
    class TornadoTransmission():
        def __init__(self,
                     max_concurrent_batches=10,
                     block_on_send=False,
                     block_on_response=False,
                     max_batch_size=100,
                     send_frequency=timedelta(seconds=0.25),
                     user_agent_addition=''):
            if not has_tornado:
                raise ImportError(
                    'TornadoTransmission requires tornado, but it was not found.'
                )

            self.block_on_send = block_on_send
            self.block_on_response = block_on_response
            self.max_batch_size = max_batch_size
            self.send_frequency = send_frequency

            user_agent = "libhoney-py/" + VERSION
            if user_agent_addition:
                user_agent += " " + user_agent_addition

            self.http_client = AsyncHTTPClient(
                force_instance=True, defaults=dict(user_agent=user_agent))

            # libhoney adds events to the pending queue for us to send
            self.pending = Queue(maxsize=1000)
            # we hand back responses from the API on the responses queue
            self.responses = Queue(maxsize=2000)

            self.batch_data = {}
            self.sd = statsd.StatsClient(prefix="libhoney")
            self.batch_sem = Semaphore(max_concurrent_batches)

        def start(self):
            ioloop.IOLoop.current().spawn_callback(self._sender)

        def send(self, ev):
            '''send accepts an event and queues it to be sent'''
            self.sd.gauge("queue_length", self.pending.qsize())
            try:
                if self.block_on_send:
                    self.pending.put(ev)
                else:
                    self.pending.put_nowait(ev)
                self.sd.incr("messages_queued")
            except QueueFull:
                response = {
                    "status_code": 0,
                    "duration": 0,
                    "metadata": ev.metadata,
                    "body": "",
                    "error": "event dropped; queue overflow",
                }
                if self.block_on_response:
                    self.responses.put(response)
                else:
                    try:
                        self.responses.put_nowait(response)
                    except QueueFull:
                        # if the response queue is full when trying to add an event
                        # queue is full response, just skip it.
                        pass
                self.sd.incr("queue_overflow")

        # We're using the older decorator/yield model for compatibility with
        # Python versions before 3.5.
        # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await
        @gen.coroutine
        def _sender(self):
            '''_sender is the control loop that pulls events off the `self.pending`
            queue and submits batches for actual sending. '''
            events = []
            last_flush = time.time()
            while True:
                try:
                    ev = yield self.pending.get(timeout=self.send_frequency)
                    if ev is None:
                        # signals shutdown
                        yield self._flush(events)
                        return
                    events.append(ev)
                    if (len(events) > self.max_batch_size
                            or time.time() - last_flush >
                            self.send_frequency.total_seconds()):
                        yield self._flush(events)
                        events = []
                except TimeoutError:
                    yield self._flush(events)
                    events = []
                    last_flush = time.time()

        @gen.coroutine
        def _flush(self, events):
            if not events:
                return
            for dest, group in group_events_by_destination(events).items():
                yield self._send_batch(dest, group)

        @gen.coroutine
        def _send_batch(self, destination, events):
            ''' Makes a single batch API request with the given list of events. The
            `destination` argument contains the write key, API host and dataset
            name used to build the request.'''
            start = time.time()
            status_code = 0

            try:
                # enforce max_concurrent_batches
                yield self.batch_sem.acquire()
                url = urljoin(urljoin(destination.api_host, "/1/batch/"),
                              destination.dataset)
                payload = []
                for ev in events:
                    event_time = ev.created_at.isoformat()
                    if ev.created_at.tzinfo is None:
                        event_time += "Z"
                    payload.append({
                        "time": event_time,
                        "samplerate": ev.sample_rate,
                        "data": ev.fields()
                    })
                req = HTTPRequest(
                    url,
                    method='POST',
                    headers={
                        "X-Honeycomb-Team": destination.writekey,
                        "Content-Type": "application/json",
                    },
                    body=json.dumps(payload, default=json_default_handler),
                )
                self.http_client.fetch(req, self._response_callback)
                # store the events that were sent so we can process responses later
                # it is important that we delete these eventually, or we'll run into memory issues
                self.batch_data[req] = {"start": start, "events": events}
            except Exception as e:
                # Catch all exceptions and hand them to the responses queue.
                self._enqueue_errors(status_code, e, start, events)
            finally:
                self.batch_sem.release()

        def _enqueue_errors(self, status_code, error, start, events):
            for ev in events:
                self.sd.incr("send_errors")
                self._enqueue_response(status_code, "", error, start,
                                       ev.metadata)

        def _response_callback(self, resp):
            # resp.request should be the same HTTPRequest object built by _send_batch
            # and mapped to values in batch_data
            events = self.batch_data[resp.request]["events"]
            start = self.batch_data[resp.request]["start"]
            try:
                status_code = resp.code
                resp.rethrow()

                statuses = [d["status"] for d in json.loads(resp.body)]
                for ev, status in zip(events, statuses):
                    self._enqueue_response(status, "", None, start,
                                           ev.metadata)
                    self.sd.incr("messages_sent")
            except Exception as e:
                self._enqueue_errors(status_code, e, start, events)
                self.sd.incr("send_errors")
            finally:
                # clean up the data for this batch
                del self.batch_data[resp.request]

        def _enqueue_response(self, status_code, body, error, start, metadata):
            resp = {
                "status_code": status_code,
                "body": body,
                "error": error,
                "duration": (time.time() - start) * 1000,
                "metadata": metadata
            }
            if self.block_on_response:
                self.responses.put(resp)
            else:
                try:
                    self.responses.put_nowait(resp)
                except QueueFull:
                    pass

        def close(self):
            '''call close to send all in-flight requests and shut down the
                senders nicely. Times out after max 20 seconds per sending thread
                plus 10 seconds for the response queue'''
            try:
                self.pending.put(None, 10)
            except QueueFull:
                pass
            # signal to the responses queue that nothing more is coming.
            try:
                self.responses.put(None, 10)
            except QueueFull:
                pass

        def get_response_queue(self):
            ''' return the responses queue on to which will be sent the response
            objects from each event send'''
            return self.responses
Ejemplo n.º 19
0
class Crawler(object):
    def _init_defaults(self):
        self.start_link = None
        self.link_priority = 2
        self.img_priority = 8
        self.politeness = 2
        self.workers_limit = 10 # allow at most 10 concurrent workers
        self.link_regex = re.compile("^http://.*")
        self.img_regex = re.compile(".*")
        self.fname_digits = 4
        self.min_width = 200
        self.min_height = 200
        self.img_dir = "E:/tmp/"
        self.idle_wait_loops = 100
        self.port = 8888

    def _load_config(self):
        parser = ConfigParser.ConfigParser()
        parser.read("config.ini")

        if parser.has_option("global", "starturl"):
            starturl = parser.get("global", "starturl")
            self.start_link = starturl
            
        if parser.has_option("global", "linkregex"):
            self.link_regex = re.compile(parser.get("global", "linkregex"))
        if parser.has_option("global", "imgregex"):
            self.img_regex = re.compile(parser.get("global", "imgregex"))

        if parser.has_option("global", "politeness"):
            politeness = parser.getint("global", "politeness")
            if politeness <=0:
                print "politeness must be a positive integer"
                raise SystemExit()
            self.politeness = politeness
        if parser.has_option("global", "imgdir"):
            imgdir = parser.get("global", "imgdir")
            if not os.path.exists(imgdir) or not os.path.isdir(imgdir):
                print "invalid imgdir configuration"
                raise SystemExit()
            if not imgdir.endswith("/"):
                imgdir+="/"
            self.img_dir = imgdir

        if parser.has_option("global", "minwidth"):
            width = parser.getint("global", "minwidth")
            self.min_width = width
        if parser.has_option("global", "minheight"):
            height = parser.getint("global", "minheight")
            self.min_height = height
            
    def __init__(self, start_link=None):
        self._init_defaults()
        # Now load the config file to override defaults
        self._load_config()
        
        if start_link:
            self.start_link = start_link
        if not self.start_link:
            raise SystemExit("No start link is provided, exiting now...")
        links.put(self.start_link)
        self.semaphore = Semaphore(self.workers_limit)

    @gen.coroutine
    def run(self):
        # First start an debug server
        app = Application([(r"/", WebHandler)])
        server = HTTPServer(app)
        server.listen(self.port)
        
        idle_loops = 0
        while True:
            if imageurls.qsize()==0 and links.qsize()==0:
                print "Both link and image queues are empty now"
                idle_loops += 1
                if idle_loops == self.idle_wait_loops:
                    break
            else:
                idle_loops = 0 # clear the idle loop counter
                if imageurls.qsize()==0:
                    self.handle_links()
                elif links.qsize()==0:
                    self.handle_imageurls()
                else:
                    choices = [0]*self.link_priority +[1]*self.img_priority
                    choice = random.choice(choices)
                    if choice:
                        self.handle_imageurls()
                    else:
                        self.handle_links()
            yield gen.sleep(0.1 * self.politeness)
        # Wait for all link handlers
        links.join()
        # Handling imageurls if generated by the last few links
        while imageurls.qsize():
            self.handle_imageurls()
        imageurls.join()

    @gen.coroutine
    def handle_links(self):
        yield self.semaphore.acquire()
        newlink = yield links.get()
        
        # Make sure we haven't visited this one
        if newlink in visited_links:
            self.semaphore.release()
            raise gen.Return()
        visited_links.add(newlink)
        
        # use async client to fetch this url
        client = AsyncHTTPClient()
        tries = 3 # Give it 3 chances before putting it in failure
        while tries:
            response = yield client.fetch(newlink)
            if response.code==200:
                break
            tries -= 1
        
        # release the semaphore
        self.semaphore.release()
        if response.code!=200:
            link_failures.append(newlink)
            print "[FAILURE] - %s"%newlink
            raise gen.Return()

        # TODO: replace this with a report api
        print "[VISITED] - %s"%newlink

        # parse url to get the base url
        components = urlparse.urlparse(newlink)
        baseurl = components[0]+"://"+components[1]
        path = components[2]
        
        # parse the html with bs
        soup = bs4.BeautifulSoup(response.body)
        # extract valid links and put into links
        a_tags = soup.find_all("a")
        for tag in a_tags:
            if "href" not in tag.attrs:
                continue
            href = tag['href']
            if href.startswith("#"):
                continue
            if href.startswith("/"): # relative
                href = baseurl+href
            else:
                if not path.endswith("/"):
                    path = path[:path.rfind("/")+1]
                href = baseurl+"/"+path+href
            if not self.link_regex.match(href):
                continue
            if href in visited_links:
                continue
            links.put(href)
            print "NEWLINK:", href
        
        # extract imgs and put into imageurls
        img_tags = soup.find_all("img")
        for tag in img_tags:
            if "src" not in tag.attrs:
                continue
            src = tag['src']
            if src.startswith("/"): # relative
                src = baseurl+src
            if not self.img_regex.match(src):
                continue
            if src in downloaded_images:
                continue
            imageurls.put(src)
            print "NEW IMAGE:", src
                            
        # now the task is done
        links.task_done()

    @gen.coroutine
    def handle_imageurls(self):
        yield self.semaphore.acquire()
        imgurl = yield imageurls.get()

        if imgurl in downloaded_images:
            self.semaphore.release()
            raise gen.Return()
        # mark the image as downloaded
        downloaded_images.add(imgurl)
        
        # use async client to fetch this url
        client = AsyncHTTPClient()
        tries = 3 # Give it 3 chances before putting it in failure
        while tries:
            response = yield client.fetch(imgurl)
            if response.code==200:
                break
            tries -= 1
        # Download is finished, release semaphore
        self.semaphore.release()
        
        if response.code!=200:
            download_failures.append(imgurl)
            print "[FAILURE] - %s"%imgurl
            raise gen.Return()

        # TODO: replace this with a report api
        print "[DOWNLOADED] - %s"%imgurl
        
        # Read the file content
        img = PIL.Image.open(response.buffer)
        w, h = img.size
        if w <self.min_width or h <self.min_height:
            raise gen.Return()
        
        # find out the image extension, default to jpg
        if "." in imgurl:
            ext = imgurl.split(".")[-1].lower()
            if ext not in ["jpg", "png", "gif"]:
                ext = "jpg"
        elif img.format:
            ext = img.format.lower()
        else:
            ext = "jpg"
            
        # increment the counter
        global img_counter
        img_counter += 1
        fname = str(img_counter).zfill(self.fname_digits)+"."+ext
        fpath = self.img_dir + fname
        # save the image file
        f = open(fpath, "wb")
        f.write(response.body)
        
        # now the task is done
        imageurls.task_done()