예제 #1
0
def retry(
    func: Callable,
    resolution_func: Optional[Callable],
    num_retries: int,
    exception_type: Type,
    error_message: str,
    logger: Logger,
) -> Any:
    for i in range(num_retries + 1):
        try:
            return func()
        except exception_type:
            logger.info('{} {}'.format(error_message, i))
            if resolution_func:
                resolution_func()
            time.sleep(1)
예제 #2
0
class KafkaProducer(StockQuoteProducer):
    def __init__(self, brokers: List[str], topic: str):
        self._brokers = brokers
        self._topic = topic
        self._producer = None
        self._logger = Logger(type(self).__name__)

    def close(self) -> None:
        """Gracefully terminate connection between the producer and the broker."""

        self._logger.info('flushing & closing')
        self._producer.flush()
        self._producer.close()

    def connect(self) -> None:
        """Instantiate connection between the producer and the broker."""

        self._logger.info('connecting to broker')
        self._producer = utils.retry(
            lambda: kafka.KafkaProducer(
                bootstrap_servers=self._brokers,
                value_serializer=lambda item: pickle.dumps(item),
            ),
            None,
            num_retries=15,
            exception_type=NoBrokersAvailable,
            error_message='broker unavailable...',
            logger=self._logger,
        )

    def send(self, quote: StockQuote) -> None:
        """Send a stock quote to the broker."""

        utils.retry(
            lambda: self._producer.send(self._topic, quote),
            None,
            num_retries=15,
            exception_type=KafkaTimeoutError,
            error_message='send timeout...',
            logger=self._logger,
        )
예제 #3
0
파일: consumer.py 프로젝트: r7wang/stock-up
class KafkaConsumer(StockQuoteListener):

    def __init__(self, brokers: List[str], topic: str):
        self._brokers = brokers
        self._topic = topic
        self._consumer = None
        self._is_done = False
        self._logger = Logger(type(self).__name__)

    def start(self, handler: Callable) -> None:
        """Starts listening for stock quotes if the listener has never been stopped

        :param handler: Callback function invoked for every batch of stock quotes, with the following signature:
            quotes: List[StockQuote]
            return: None
        """
        self._connect()

        partitions = self._consumer.partitions_for_topic(self._topic)
        self._logger.info('partitions: {}'.format(', '.join(map(lambda partition: str(partition), partitions))))

        # Assume that only one partition exists.
        topic_partition = TopicPartition(topic=self._topic, partition=0)
        begin_offsets = self._consumer.beginning_offsets([topic_partition])
        end_offsets = self._consumer.end_offsets([topic_partition])
        last_committed_offset = self._consumer.committed(topic_partition)
        self._logger.info('starting offset: {}'.format(begin_offsets[topic_partition]))
        self._logger.info('last offset: {}'.format(end_offsets[topic_partition]))
        self._logger.info('last committed offset: {}'.format(last_committed_offset))

        while not self._is_done:
            self._process_batch(topic_partition, handler)

        self._logger.info("closing consumer")
        self._consumer.close(autocommit=False)

    def stop(self) -> None:
        self._is_done = True

    def _commit_offsets(self, topic_partition: TopicPartition, offset: int):
        """Commits offsets for the partition of a given topic.

        This effectively advances the index so that future reads from the same Kafka consumer group will not read any
        records up to that offset.

        :param topic_partition: Partition of the topic where offsets are to be committed.
        :param offset: Largest offset read so far.
        :return:
        """

        self._consumer.commit({
            topic_partition: OffsetAndMetadata(offset=offset + 1, metadata=''),
        })

    def _connect(self) -> None:
        self._consumer: kafka.KafkaConsumer = utils.retry(
            lambda: kafka.KafkaConsumer(
                self._topic,
                bootstrap_servers=self._brokers,
                auto_offset_reset='earliest',
                enable_auto_commit=False,
                group_id='my-group',
                value_deserializer=lambda item: pickle.loads(item),
            ),
            None,
            num_retries=15,
            exception_type=NoBrokersAvailable,
            error_message='broker unavailable...',
            logger=self._logger,
        )

    def _poll_records(self, topic_partition: TopicPartition) -> (List[StockQuote], int):
        """Polls for records from the partition of a given topic.

        :param topic_partition: Partition of the topic to be polled.
        :return: Tuple of:
            quotes: List of StockQuote objects received from this round of polling. Can be empty.
            max_offset: The largest offset for the objects received. If no objects were received, return 0.
        """
        result = self._consumer.poll(CONSUMER_POLL_TIMEOUT_MS, max_records=CONSUMER_POLL_MAX_RECORDS)
        if topic_partition not in result:
            return [], 0

        quotes = []
        max_offset = 0
        for message in result[topic_partition]:
            max_offset = max(max_offset, message.offset)
            quote: StockQuote = message.value
            quotes.append(quote)
        return quotes, max_offset

    def _process_batch(self, topic_partition: TopicPartition, handler: Callable) -> None:
        quotes, max_offset = self._poll_records(topic_partition)
        if not quotes:
            return

        handler(quotes)
        self._logger.debug('max offset: {}'.format(max_offset))
        self._commit_offsets(topic_partition, max_offset)
예제 #4
0
파일: utils.py 프로젝트: r7wang/stock-up
def log_config(module) -> None:
    logger = Logger('Config')
    attrs = filter(lambda attr: attr[0].isupper(), dir(module))
    for key in attrs:
        val = getattr(settings, key)
        logger.info('{} = {}'.format(key, val))
예제 #5
0
class ConfigListener:

    def __init__(
        self,
        host: str,
        base_prefix: str,
        bucket: ConfigBucket,
    ):
        self._host = host
        self._base_prefix = base_prefix
        self._bucket = bucket
        self._client: Optional[Etcd3Client] = None
        self._watch_id = None
        self._pattern = re.compile('^{}/(.*)'.format(self._base_prefix))
        self._logger = Logger(type(self).__name__)

    def __enter__(self):
        self._logger.info('watching key range {}/*'.format(self._base_prefix))
        self._client = etcd3.client(host=self._host)
        self._watch_id = self._client.add_watch_prefix_callback(
            key_prefix=self._base_prefix + '/',
            callback=self._on_event,
        )
        for val, metadata in self._client.get_prefix(key_prefix=self._base_prefix):
            self._update_key(metadata.key, val, metadata.version)

    def __exit__(self, exc_type, exc_value, traceback):
        if not self._client or self._watch_id is None:
            return

        self._logger.info('unwatching key range {}/*'.format(self._base_prefix))
        self._client.cancel_watch(self._watch_id)

    def _decode(self, key: bytes) -> str:
        return key.decode('utf-8')

    def _key_suffix(self, key: str) -> Optional[str]:
        """Strips the base prefix from the given key

        :param key: Fully qualified key.
        :return: Key suffix, without the base prefix.
        """

        match = self._pattern.match(key)
        if not match:
            return None

        return match[1]

    def _on_event(self, response: WatchResponse) -> None:
        """Callback function for watched keys

        This function is invoked on a separate thread.

        :param response: Contains a response header with metadata and one or more events. Only put and delete events
                         are known and supported.
        """

        events: List[Event] = response.events
        for event in events:
            if isinstance(event, PutEvent):
                self._update_key(event.key, event.value, event.version)
            elif isinstance(event, DeleteEvent):
                self._remove_key(event.key)
            else:
                self._logger.warning('could not handle event [type={}]'.format(type(event)))

    def _remove_key(self, key: bytes) -> None:
        """Removes the key

        :param key: Key for the data to be removed.
        """

        str_key = self._key_suffix(self._decode(key))
        self._bucket.remove(key=str_key)

    def _update_key(self, key: bytes, val: bytes, version: int) -> None:
        """Attempts to update the key

        :param key: Key for the data to be updated.
        :param val: Value associated with the given key.
        :param version: Version associated with the given value.
        """

        str_key = self._key_suffix(self._decode(key))
        str_val = self._decode(val)
        modified = self._bucket.update(key=str_key, val=str_val, version=version)
        if modified:
            self._logger.info('update [key={} val={} version={}]'.format(str_key, str_val, version))
예제 #6
0
class SubscriptionManager:
    def __init__(self, listener: StockQuoteListener, bucket: ConfigBucket):
        self._listener = listener
        self._bucket = bucket
        self._thread = None
        self._notify_signal = threading.Semaphore(value=0)
        self._current_subs = set()
        self._logger = Logger(type(self).__name__)

    def notify_change(self):
        """Notifies the subscription manager that requested subscriptions have been changed"""

        self._logger.info('notify subscription change')
        self._notify_signal.release()

    def notify_reset(self):
        """Notifies the subscription manager that active subscriptions have been reset"""

        self._logger.info('notify subscription reset')
        self._current_subs.clear()
        self._notify_signal.release()

    def start(self):
        """Starts the subscription manager on a new thread"""

        if self._thread:
            return

        self._logger.info('starting thread')
        self._thread = threading.Thread(target=self._update_subscriptions)
        self._thread.start()

    def stop(self):
        """Stop the subscription manager thread and wait for it to terminate"""

        if not self._thread:
            return

        self._logger.info('stopping thread')
        self._notify_signal.release()
        self._thread.join()

    def _parse_subscriptions(self) -> Set[str]:
        subs = self._bucket.get_str(CONFIG_KEY_SUBSCRIPTIONS)
        if not subs:
            return set()

        return set(subs.split(','))

    def _update_subscriptions(self) -> None:
        self._logger.info('watching subscriptions')
        subscription_id = self._bucket.subscribe(CONFIG_KEY_SUBSCRIPTIONS,
                                                 self.notify_change)

        self._current_subs = set()
        while True:
            self._logger.info('waiting on notify signal')
            self._notify_signal.acquire()

            # Check for completion before making any subscription changes. This prevents exceptions caused by
            # subscriptions that happen after the listener has already been closed.
            if self._listener.is_done():
                break

            requested_subs = self._parse_subscriptions()
            to_add = requested_subs.difference(self._current_subs)
            to_remove = self._current_subs.difference(requested_subs)
            self._listener.modify_subscriptions(to_add, to_remove)

            self._current_subs = requested_subs

        self._logger.info('unwatching subscriptions')
        self._bucket.unsubscribe(subscription_id)
예제 #7
0
class StockQuoteListener:
    """
    Listens for stock quotes. Restarting after stopping is unsupported to simplify signal handling. Otherwise, there's
    the possibility that SIGTERM is ignored if received before the server has even started. Listeners have full
    ownership over the websocket applications they create.
    """

    def __init__(self, ws_server: str, api_token: str):
        self._ws_server = ws_server
        self._api_token = api_token
        self._app: Optional[websocket.WebSocketApp] = None
        # Lock to synchronize setup/teardown of the websocket application.
        self._ws_lock = threading.Lock()
        self._is_done = False
        self._logger = Logger(type(self).__name__)

    def is_done(self) -> bool:
        return self._is_done

    def start(self, open_handler: Callable, message_handler: Callable) -> None:
        """Starts listening for stock quotes if the listener has never been stopped

        This call is thread-safe.

        :param open_handler: Callback function invoked when the websocket connection is ready.
            Parameters:
                None
            Return:
                None
        :param message_handler: Callback function invoked for every message.
            Parameters:
                message: str
            Return:
                None
        """

        while self._setup(open_handler, message_handler):
            self._app.run_forever()
            self._teardown()

    def stop(self) -> None:
        """Stops the listener permanently

        This call is thread-safe.
        """

        self._teardown(is_done=True)

    def modify_subscriptions(self, to_add: Set[str], to_remove: Set[str]) -> None:
        """Modifies subscriptions

        This call is thread-safe.

        :param to_add: Subscriptions to be add.
        :param to_remove: Subscriptions to be removed.
        """

        # Prevent any operations that modify the state of the websocket while messages are actively being sent.
        with self._ws_lock:
            # Websocket application may not be ready to accept commands if unreliable connection is still being
            # reinitialized.
            if not self._app:
                return

            for symbol in to_add:
                self._logger.info('subscribing to {}'.format(symbol))
                self._app.send('{{"type":"subscribe","symbol":"{}"}}'.format(symbol))
            for symbol in to_remove:
                self._logger.info('unsubscribing from {}'.format(symbol))
                self._app.send('{{"type":"unsubscribe","symbol":"{}"}}'.format(symbol))

    def _setup(self, open_handler: Callable, message_handler: Callable) -> bool:
        """Sets up the listener websocket application

        This call is not thread-safe and must be wrapped by _lock_operation().

        :param open_handler: Callback function invoked when the websocket connection is ready.
            Parameters:
                None
            Return:
                None
        :param message_handler: Callback function invoked for every message.
            Parameters:
                message: str
            Return:
                None
        :return: Whether or not the current thread is allowed to continue operating the listener.
        """

        def _on_open(app: websocket.WebSocketApp):
            self._logger.info('websocket opened')
            open_handler()

        def _on_message(app: websocket.WebSocketApp, message: str):
            message_handler(message)

        def _on_error(app: websocket.WebSocketApp, error):
            # TODO: We may want to handle specific errors here and restart the websocket connection.
            #       ERROR Handshake status 502 Bad Gateway
            self._logger.error(error)

        def _on_close(app: websocket.WebSocketApp):
            self._logger.info('websocket closed')

        with self._ws_lock:
            # May occur if we've signaled the listener to stop before even starting the listener.
            if self._is_done:
                return False

            # May occur if we try to start the listener from multiple threads. Because the application is already being
            # started on another thread, we cannot duplicate this work.
            if self._app:
                return False

            websocket.enableTrace(True)
            self._app = websocket.WebSocketApp(
                'wss://{}?token={}'.format(self._ws_server, self._api_token),
                on_open=_on_open,
                on_message=_on_message,
                on_error=_on_error,
                on_close=_on_close,
            )
            return True

    def _teardown(self, is_done: bool = False) -> None:
        """Tears down the listener websocket application.

        This call is not thread-safe and must be wrapped by _lock_operation().

        :param is_done: If true, flag the listener to be permanently stopped.
        """

        with self._ws_lock:
            if is_done:
                self._is_done = True

            if not self._app:
                return

            self._app.close()
            self._app = None
예제 #8
0
class RmqConsumer(StockQuoteListener):
    def __init__(self):
        self._conn = None
        self._channel: Optional[BlockingChannel] = None
        self._is_done = False
        self._logger = Logger(type(self).__name__)

    def start(self, handler: Callable) -> None:
        def _on_message(
            channel: BlockingChannel,
            method: Basic.Deliver,
            properties: BasicProperties,
            body: bytes,
        ) -> None:
            quote: StockQuote = pickle.loads(body)
            handler([quote])
            channel.basic_ack(method.delivery_tag)

        self._connect()
        self._declare_resources()

        self._channel.basic_consume(
            queue=settings.RMQ_QUEUE_QUOTES,
            on_message_callback=_on_message,
            auto_ack=False,
            exclusive=False,
        )
        while not self._is_done:
            self._channel.start_consuming()

        if self._conn and self._conn.is_open:
            self._logger.info('closing connection')
            self._conn.close()

    def stop(self) -> None:
        self._is_done = True
        self._channel.stop_consuming()

    def _connect(self) -> None:
        if self._conn and not self._conn.is_closed:
            return

        self._logger.info('connecting')
        credentials = pika.PlainCredentials(settings.RMQ_USER,
                                            settings.RMQ_PASSWORD)
        params = pika.ConnectionParameters(
            host=settings.RMQ_HOST,
            virtual_host=settings.RMQ_VHOST,
            credentials=credentials,
        )
        self._conn = utils.retry(
            lambda: pika.BlockingConnection(params),
            None,
            num_retries=15,
            exception_type=AMQPConnectionError,
            error_message='broker unavailable...',
            logger=self._logger,
        )
        self._channel: BlockingChannel = self._conn.channel()

    def _declare_resources(self) -> None:
        """Declare all resources required by the consumer."""

        self._channel.queue_declare(
            queue=settings.RMQ_QUEUE_QUOTES,
            durable=True,
            exclusive=False,
            auto_delete=False,
        )
예제 #9
0
class RmqProducer(StockQuoteProducer):
    PERSISTENT_MESSAGE = 2

    def __init__(self):
        self._conn = None
        self._channel: Optional[BlockingChannel] = None
        self._default_message_props = pika.BasicProperties(
            delivery_mode=self.PERSISTENT_MESSAGE)
        self._logger = Logger(type(self).__name__)

        # Storing pending and nacked deliveries in non-persistent storage does pose a risk that the service might be
        # restarted with items in both containers. Losing these items may cause permanent data loss, but we expect the
        # risk to be minimized because:
        #   - pending_deliveries should only contain elements within the last few seconds
        #   - nacked_deliveries should almost always be empty while messages are being sent continuously
        self._pending_deliveries: Dict[int, StockQuote] = {}
        self._nacked_deliveries: Deque[StockQuote] = deque()

        self._acked = 0
        self._nacked = 0
        self._message_number = 0

    def close(self) -> None:
        """Gracefully terminate connection between the producer and the broker."""

        if self._conn and self._conn.is_open:
            self._logger.info('closing connection')
            self._conn.close()

    def connect(self) -> None:
        """Instantiate connection between the producer and the broker, declaring all necessary resources."""

        self._connect()
        self._declare_resources()

    def send(self, quote: StockQuote) -> None:
        """Send a stock quote to the broker.

        Attempt to redeliver all previous undeliverable messages before the current quote.
        """

        while self._nacked_deliveries:
            to_publish = self._nacked_deliveries.popleft()
            self._publish_with_retry(to_publish)
        self._publish_with_retry(quote)

    def _connect(self) -> None:
        """Defines the minimal set of functionality for reconnecting to the broker."""

        if self._conn and not self._conn.is_closed:
            return

        self._logger.info('connecting')
        credentials = pika.PlainCredentials(settings.RMQ_USER,
                                            settings.RMQ_PASSWORD)
        params = pika.ConnectionParameters(
            host=settings.RMQ_HOST,
            virtual_host=settings.RMQ_VHOST,
            credentials=credentials,
        )
        self._conn = utils.retry(
            lambda: pika.BlockingConnection(params),
            None,
            num_retries=15,
            exception_type=AMQPConnectionError,
            error_message='broker unavailable...',
            logger=self._logger,
        )
        self._channel: BlockingChannel = self._conn.channel()
        self._channel.confirm_delivery()
        self._reset_confirmation_tracking()

    def _declare_resources(self) -> None:
        """Declare all resources required by the producer."""

        self._channel.exchange_declare(
            exchange=settings.RMQ_EXCHANGE,
            exchange_type='direct',
            durable=True,
            auto_delete=False,
            internal=False,
        )
        self._channel.queue_declare(
            queue=settings.RMQ_QUEUE_QUOTES,
            durable=True,
            exclusive=False,
            auto_delete=False,
        )
        self._channel.queue_bind(
            queue=settings.RMQ_QUEUE_QUOTES,
            exchange=settings.RMQ_EXCHANGE,
        )

    # def _on_delivery_confirmation(self, method_frame: Method) -> None:
    #     """
    #     Invoked by pika when RabbitMQ responds to a Basic.Publish RPC command, passing in either a Basic.Ack or
    #     Basic.Nack frame with the delivery tag of the message that was published. The delivery tag is an integer
    #     counter indicating the message number that was sent on the channel via Basic.Publish. Here we're just doing
    #     house keeping to keep track of stats and remove message numbers that we expect a delivery confirmation of from
    #     the list used to keep track of messages that are pending confirmation.
    #
    #     :param pika.frame.Method method_frame: Basic.Ack or Basic.Nack frame.
    #     """
    #
    #     confirmation_type = method_frame.method.NAME.split('.')[1].lower()
    #     delivery_tag = method_frame.method.delivery_tag
    #     logger.info('Thread (delivery confirmation): {}'.format(threading.get_ident()))
    #     logger.info('Received {} for delivery tag: {}'.format(confirmation_type, delivery_tag))
    #     if confirmation_type == 'ack':
    #         self._acked += 1
    #     elif confirmation_type == 'nack':
    #         self._nacked += 1
    #         self._nacked_deliveries.append(self._pending_deliveries[delivery_tag])
    #     del self._pending_deliveries[delivery_tag]
    #     logger.info(
    #         'Published {} messages, {} have yet to be confirmed, {} were acked and {} were nacked'.format(
    #             self._message_number,
    #             len(self._pending_deliveries),
    #             self._acked,
    #             self._nacked,
    #         ),
    #     )

    def _publish(self, quote: StockQuote) -> None:
        utils.retry(
            lambda: self._channel.basic_publish(
                exchange=settings.RMQ_EXCHANGE,
                routing_key=settings.RMQ_QUEUE_QUOTES,
                body=pickle.dumps(quote),
                properties=self._default_message_props,
                mandatory=False,
            ),
            None,
            num_retries=15,
            exception_type=NackError,
            error_message='nacked the published message...',
            logger=self._logger,
        )
        self._message_number += 1
        # self._pending_deliveries[self._message_number] = quote

    def _publish_with_retry(self, quote: StockQuote) -> None:
        utils.retry(
            lambda: self._publish(quote),
            lambda: self._connect(),
            num_retries=15,
            exception_type=ConnectionClosed,
            error_message='connection not open...',
            logger=self._logger,
        )

    def _reset_confirmation_tracking(self) -> None:
        # TODO: What happens to pending deliveries and nacked deliveries? For at least once delivery, assume the
        #       messages all need to be resent.
        self._nacked_deliveries.extend(self._pending_deliveries.values())
        self._pending_deliveries = {}
        self._acked = 0
        self._nacked = 0
        self._message_number = 0