def __init__(self, listener: StockQuoteListener, bucket: ConfigBucket): self._listener = listener self._bucket = bucket self._thread = None self._notify_signal = threading.Semaphore(value=0) self._current_subs = set() self._logger = Logger(type(self).__name__)
class MetricGenerator: """Generates metrics needed by the stock analyzer""" def __init__(self): self._metric_formatter = MetricFormatter() self._logger = Logger(type(self).__name__) def get_metrics(self, time_window: TimeWindow, symbol: str, timestamp: int) -> List[str]: """Get list of formatted metrics :param time_window: Object containing accumulated statistics for a given time window. :param symbol: The stock ticker symbol. :param timestamp: The timestamp to use for all metrics, in milliseconds. :return: List of formatted metrics. """ format_metric = self._metric_formatter.get_formatter(symbol, timestamp) try: return [ *format_metric('transactions', time_window.get_transaction_count()), *format_metric('volume', time_window.get_volume()), *format_metric('last_price', time_window.get_last_price()), *format_metric('min_price', time_window.get_min_price()), *format_metric('max_price', time_window.get_max_price()), *format_metric('avg_price_trans', time_window.get_average_price_by_transaction()), *format_metric('avg_price_volume', time_window.get_average_price_by_volume()), ] except KeyError as ex: self._logger.error( 'could not format unsupported metric type: {}'.format(ex)) return []
def __init__(self, ws_server: str, api_token: str): self._ws_server = ws_server self._api_token = api_token self._app: Optional[websocket.WebSocketApp] = None # Lock to synchronize setup/teardown of the websocket application. self._ws_lock = threading.Lock() self._is_done = False self._logger = Logger(type(self).__name__)
def __init__(self): self._db_client = influxdb.InfluxDBClient( host=settings.INFLUXDB_HOST, port=settings.INFLUXDB_PORT, username=settings.INFLUXDB_USER, password=settings.INFLUXDB_PASSWORD, database=settings.INFLUXDB_DB_NAME, ) self._logger = Logger(type(self).__name__)
def __init__( self, host: str, base_prefix: str, bucket: ConfigBucket, ): self._host = host self._base_prefix = base_prefix self._bucket = bucket self._client: Optional[Etcd3Client] = None self._watch_id = None self._pattern = re.compile('^{}/(.*)'.format(self._base_prefix)) self._logger = Logger(type(self).__name__)
def retry( func: Callable, resolution_func: Optional[Callable], num_retries: int, exception_type: Type, error_message: str, logger: Logger, ) -> Any: for i in range(num_retries + 1): try: return func() except exception_type: logger.info('{} {}'.format(error_message, i)) if resolution_func: resolution_func() time.sleep(1)
def __init__(self): self._conn = None self._channel: Optional[BlockingChannel] = None self._default_message_props = pika.BasicProperties( delivery_mode=self.PERSISTENT_MESSAGE) self._logger = Logger(type(self).__name__) # Storing pending and nacked deliveries in non-persistent storage does pose a risk that the service might be # restarted with items in both containers. Losing these items may cause permanent data loss, but we expect the # risk to be minimized because: # - pending_deliveries should only contain elements within the last few seconds # - nacked_deliveries should almost always be empty while messages are being sent continuously self._pending_deliveries: Dict[int, StockQuote] = {} self._nacked_deliveries: Deque[StockQuote] = deque() self._acked = 0 self._nacked = 0 self._message_number = 0
class StockQuotePipeline: def __init__(self, writer: StockQuoteWriter): self._writer = writer self._logger = Logger(type(self).__name__) def handler(self, message: str) -> None: """Callback that receives a raw stock quote message. :param message: Raw stock quote message. """ self._logger.debug(message) quotes = self.parse(message) self._writer.write(quotes) def parse(self, message: str) -> List[StockQuote]: """Converts raw stock quote message into their domain representation. :param message: Raw stock quote message. :return: List of StockQuote objects. Can be empty. """ try: # Ensures that we don't lose any precision while loading the JSON. data = json.loads(message, parse_float=lambda val: Decimal(val)) except json.decoder.JSONDecodeError: self._logger.error('unknown message: {}'.format(message)) return [] message_type = data.get('type') if not message_type: self._logger.error('message missing type: {}'.format(data)) return [] if data.get('type') == 'ping': return [] if not data.get('data'): self._logger.error('message missing data: {}'.format(data)) return [] quotes = data['data'] return list( map( # Ensure that we always maintain correct data types. lambda quote: StockQuote( timestamp=int(quote['t']), symbol=str(quote['s']), price=Decimal(quote['p']), volume=int(quote['v']), ), quotes, ))
class KafkaProducer(StockQuoteProducer): def __init__(self, brokers: List[str], topic: str): self._brokers = brokers self._topic = topic self._producer = None self._logger = Logger(type(self).__name__) def close(self) -> None: """Gracefully terminate connection between the producer and the broker.""" self._logger.info('flushing & closing') self._producer.flush() self._producer.close() def connect(self) -> None: """Instantiate connection between the producer and the broker.""" self._logger.info('connecting to broker') self._producer = utils.retry( lambda: kafka.KafkaProducer( bootstrap_servers=self._brokers, value_serializer=lambda item: pickle.dumps(item), ), None, num_retries=15, exception_type=NoBrokersAvailable, error_message='broker unavailable...', logger=self._logger, ) def send(self, quote: StockQuote) -> None: """Send a stock quote to the broker.""" utils.retry( lambda: self._producer.send(self._topic, quote), None, num_retries=15, exception_type=KafkaTimeoutError, error_message='send timeout...', logger=self._logger, )
class MetricWriter: """ Helper class for writing metrics in InfluxDB line format. For format details, see the link below. https://v2.docs.influxdata.com/v2.0/reference/syntax/line-protocol/ """ def __init__(self): self._db_client = influxdb.InfluxDBClient( host=settings.INFLUXDB_HOST, port=settings.INFLUXDB_PORT, username=settings.INFLUXDB_USER, password=settings.INFLUXDB_PASSWORD, database=settings.INFLUXDB_DB_NAME, ) self._logger = Logger(type(self).__name__) def write(self, metric_data: List[str]) -> None: try: self._logger.debug('metric data: {}'.format(metric_data)) write_result = self._db_client.write_points( points=metric_data, time_precision='ms', protocol='line', ) if not write_result: self._logger.warning('could not write to influx') except InfluxDBClientError as ex: if ex.code == 400: # We are expecting to catch the following scenarios: # - writing points that are older than the retention policy self._logger.warning( 'write_points client error [code={}, content={}]'.format( ex.code, ex.content)) else: raise
def handle_termination_signal(listener, logger=Logger()): """Handles common signals using a standard approach to ensure that applications terminate gracefully. :param listener: Must expose a stop() function. :param logger: """ def signal_stop(sig_num: int, frame): logger.info('Handling signal {}'.format(sig_num)) if listener: logger.info('Stopping listener...') listener.stop() signal.signal(signal.SIGINT, signal_stop) signal.signal(signal.SIGTERM, signal_stop)
def __init__(self, writer: StockQuoteWriter): self._writer = writer self._logger = Logger(type(self).__name__)
class RmqConsumer(StockQuoteListener): def __init__(self): self._conn = None self._channel: Optional[BlockingChannel] = None self._is_done = False self._logger = Logger(type(self).__name__) def start(self, handler: Callable) -> None: def _on_message( channel: BlockingChannel, method: Basic.Deliver, properties: BasicProperties, body: bytes, ) -> None: quote: StockQuote = pickle.loads(body) handler([quote]) channel.basic_ack(method.delivery_tag) self._connect() self._declare_resources() self._channel.basic_consume( queue=settings.RMQ_QUEUE_QUOTES, on_message_callback=_on_message, auto_ack=False, exclusive=False, ) while not self._is_done: self._channel.start_consuming() if self._conn and self._conn.is_open: self._logger.info('closing connection') self._conn.close() def stop(self) -> None: self._is_done = True self._channel.stop_consuming() def _connect(self) -> None: if self._conn and not self._conn.is_closed: return self._logger.info('connecting') credentials = pika.PlainCredentials(settings.RMQ_USER, settings.RMQ_PASSWORD) params = pika.ConnectionParameters( host=settings.RMQ_HOST, virtual_host=settings.RMQ_VHOST, credentials=credentials, ) self._conn = utils.retry( lambda: pika.BlockingConnection(params), None, num_retries=15, exception_type=AMQPConnectionError, error_message='broker unavailable...', logger=self._logger, ) self._channel: BlockingChannel = self._conn.channel() def _declare_resources(self) -> None: """Declare all resources required by the consumer.""" self._channel.queue_declare( queue=settings.RMQ_QUEUE_QUOTES, durable=True, exclusive=False, auto_delete=False, )
def log_config(module) -> None: logger = Logger('Config') attrs = filter(lambda attr: attr[0].isupper(), dir(module)) for key in attrs: val = getattr(settings, key) logger.info('{} = {}'.format(key, val))
def __init__(self): self._conn = None self._channel: Optional[BlockingChannel] = None self._is_done = False self._logger = Logger(type(self).__name__)
class ConfigListener: def __init__( self, host: str, base_prefix: str, bucket: ConfigBucket, ): self._host = host self._base_prefix = base_prefix self._bucket = bucket self._client: Optional[Etcd3Client] = None self._watch_id = None self._pattern = re.compile('^{}/(.*)'.format(self._base_prefix)) self._logger = Logger(type(self).__name__) def __enter__(self): self._logger.info('watching key range {}/*'.format(self._base_prefix)) self._client = etcd3.client(host=self._host) self._watch_id = self._client.add_watch_prefix_callback( key_prefix=self._base_prefix + '/', callback=self._on_event, ) for val, metadata in self._client.get_prefix(key_prefix=self._base_prefix): self._update_key(metadata.key, val, metadata.version) def __exit__(self, exc_type, exc_value, traceback): if not self._client or self._watch_id is None: return self._logger.info('unwatching key range {}/*'.format(self._base_prefix)) self._client.cancel_watch(self._watch_id) def _decode(self, key: bytes) -> str: return key.decode('utf-8') def _key_suffix(self, key: str) -> Optional[str]: """Strips the base prefix from the given key :param key: Fully qualified key. :return: Key suffix, without the base prefix. """ match = self._pattern.match(key) if not match: return None return match[1] def _on_event(self, response: WatchResponse) -> None: """Callback function for watched keys This function is invoked on a separate thread. :param response: Contains a response header with metadata and one or more events. Only put and delete events are known and supported. """ events: List[Event] = response.events for event in events: if isinstance(event, PutEvent): self._update_key(event.key, event.value, event.version) elif isinstance(event, DeleteEvent): self._remove_key(event.key) else: self._logger.warning('could not handle event [type={}]'.format(type(event))) def _remove_key(self, key: bytes) -> None: """Removes the key :param key: Key for the data to be removed. """ str_key = self._key_suffix(self._decode(key)) self._bucket.remove(key=str_key) def _update_key(self, key: bytes, val: bytes, version: int) -> None: """Attempts to update the key :param key: Key for the data to be updated. :param val: Value associated with the given key. :param version: Version associated with the given value. """ str_key = self._key_suffix(self._decode(key)) str_val = self._decode(val) modified = self._bucket.update(key=str_key, val=str_val, version=version) if modified: self._logger.info('update [key={} val={} version={}]'.format(str_key, str_val, version))
def __init__(self): self._metric_formatter = MetricFormatter() self._logger = Logger(type(self).__name__)
class SubscriptionManager: def __init__(self, listener: StockQuoteListener, bucket: ConfigBucket): self._listener = listener self._bucket = bucket self._thread = None self._notify_signal = threading.Semaphore(value=0) self._current_subs = set() self._logger = Logger(type(self).__name__) def notify_change(self): """Notifies the subscription manager that requested subscriptions have been changed""" self._logger.info('notify subscription change') self._notify_signal.release() def notify_reset(self): """Notifies the subscription manager that active subscriptions have been reset""" self._logger.info('notify subscription reset') self._current_subs.clear() self._notify_signal.release() def start(self): """Starts the subscription manager on a new thread""" if self._thread: return self._logger.info('starting thread') self._thread = threading.Thread(target=self._update_subscriptions) self._thread.start() def stop(self): """Stop the subscription manager thread and wait for it to terminate""" if not self._thread: return self._logger.info('stopping thread') self._notify_signal.release() self._thread.join() def _parse_subscriptions(self) -> Set[str]: subs = self._bucket.get_str(CONFIG_KEY_SUBSCRIPTIONS) if not subs: return set() return set(subs.split(',')) def _update_subscriptions(self) -> None: self._logger.info('watching subscriptions') subscription_id = self._bucket.subscribe(CONFIG_KEY_SUBSCRIPTIONS, self.notify_change) self._current_subs = set() while True: self._logger.info('waiting on notify signal') self._notify_signal.acquire() # Check for completion before making any subscription changes. This prevents exceptions caused by # subscriptions that happen after the listener has already been closed. if self._listener.is_done(): break requested_subs = self._parse_subscriptions() to_add = requested_subs.difference(self._current_subs) to_remove = self._current_subs.difference(requested_subs) self._listener.modify_subscriptions(to_add, to_remove) self._current_subs = requested_subs self._logger.info('unwatching subscriptions') self._bucket.unsubscribe(subscription_id)
class StockQuoteListener: """ Listens for stock quotes. Restarting after stopping is unsupported to simplify signal handling. Otherwise, there's the possibility that SIGTERM is ignored if received before the server has even started. Listeners have full ownership over the websocket applications they create. """ def __init__(self, ws_server: str, api_token: str): self._ws_server = ws_server self._api_token = api_token self._app: Optional[websocket.WebSocketApp] = None # Lock to synchronize setup/teardown of the websocket application. self._ws_lock = threading.Lock() self._is_done = False self._logger = Logger(type(self).__name__) def is_done(self) -> bool: return self._is_done def start(self, open_handler: Callable, message_handler: Callable) -> None: """Starts listening for stock quotes if the listener has never been stopped This call is thread-safe. :param open_handler: Callback function invoked when the websocket connection is ready. Parameters: None Return: None :param message_handler: Callback function invoked for every message. Parameters: message: str Return: None """ while self._setup(open_handler, message_handler): self._app.run_forever() self._teardown() def stop(self) -> None: """Stops the listener permanently This call is thread-safe. """ self._teardown(is_done=True) def modify_subscriptions(self, to_add: Set[str], to_remove: Set[str]) -> None: """Modifies subscriptions This call is thread-safe. :param to_add: Subscriptions to be add. :param to_remove: Subscriptions to be removed. """ # Prevent any operations that modify the state of the websocket while messages are actively being sent. with self._ws_lock: # Websocket application may not be ready to accept commands if unreliable connection is still being # reinitialized. if not self._app: return for symbol in to_add: self._logger.info('subscribing to {}'.format(symbol)) self._app.send('{{"type":"subscribe","symbol":"{}"}}'.format(symbol)) for symbol in to_remove: self._logger.info('unsubscribing from {}'.format(symbol)) self._app.send('{{"type":"unsubscribe","symbol":"{}"}}'.format(symbol)) def _setup(self, open_handler: Callable, message_handler: Callable) -> bool: """Sets up the listener websocket application This call is not thread-safe and must be wrapped by _lock_operation(). :param open_handler: Callback function invoked when the websocket connection is ready. Parameters: None Return: None :param message_handler: Callback function invoked for every message. Parameters: message: str Return: None :return: Whether or not the current thread is allowed to continue operating the listener. """ def _on_open(app: websocket.WebSocketApp): self._logger.info('websocket opened') open_handler() def _on_message(app: websocket.WebSocketApp, message: str): message_handler(message) def _on_error(app: websocket.WebSocketApp, error): # TODO: We may want to handle specific errors here and restart the websocket connection. # ERROR Handshake status 502 Bad Gateway self._logger.error(error) def _on_close(app: websocket.WebSocketApp): self._logger.info('websocket closed') with self._ws_lock: # May occur if we've signaled the listener to stop before even starting the listener. if self._is_done: return False # May occur if we try to start the listener from multiple threads. Because the application is already being # started on another thread, we cannot duplicate this work. if self._app: return False websocket.enableTrace(True) self._app = websocket.WebSocketApp( 'wss://{}?token={}'.format(self._ws_server, self._api_token), on_open=_on_open, on_message=_on_message, on_error=_on_error, on_close=_on_close, ) return True def _teardown(self, is_done: bool = False) -> None: """Tears down the listener websocket application. This call is not thread-safe and must be wrapped by _lock_operation(). :param is_done: If true, flag the listener to be permanently stopped. """ with self._ws_lock: if is_done: self._is_done = True if not self._app: return self._app.close() self._app = None
def __init__(self, brokers: List[str], topic: str): self._brokers = brokers self._topic = topic self._producer = None self._logger = Logger(type(self).__name__)
class KafkaConsumer(StockQuoteListener): def __init__(self, brokers: List[str], topic: str): self._brokers = brokers self._topic = topic self._consumer = None self._is_done = False self._logger = Logger(type(self).__name__) def start(self, handler: Callable) -> None: """Starts listening for stock quotes if the listener has never been stopped :param handler: Callback function invoked for every batch of stock quotes, with the following signature: quotes: List[StockQuote] return: None """ self._connect() partitions = self._consumer.partitions_for_topic(self._topic) self._logger.info('partitions: {}'.format(', '.join(map(lambda partition: str(partition), partitions)))) # Assume that only one partition exists. topic_partition = TopicPartition(topic=self._topic, partition=0) begin_offsets = self._consumer.beginning_offsets([topic_partition]) end_offsets = self._consumer.end_offsets([topic_partition]) last_committed_offset = self._consumer.committed(topic_partition) self._logger.info('starting offset: {}'.format(begin_offsets[topic_partition])) self._logger.info('last offset: {}'.format(end_offsets[topic_partition])) self._logger.info('last committed offset: {}'.format(last_committed_offset)) while not self._is_done: self._process_batch(topic_partition, handler) self._logger.info("closing consumer") self._consumer.close(autocommit=False) def stop(self) -> None: self._is_done = True def _commit_offsets(self, topic_partition: TopicPartition, offset: int): """Commits offsets for the partition of a given topic. This effectively advances the index so that future reads from the same Kafka consumer group will not read any records up to that offset. :param topic_partition: Partition of the topic where offsets are to be committed. :param offset: Largest offset read so far. :return: """ self._consumer.commit({ topic_partition: OffsetAndMetadata(offset=offset + 1, metadata=''), }) def _connect(self) -> None: self._consumer: kafka.KafkaConsumer = utils.retry( lambda: kafka.KafkaConsumer( self._topic, bootstrap_servers=self._brokers, auto_offset_reset='earliest', enable_auto_commit=False, group_id='my-group', value_deserializer=lambda item: pickle.loads(item), ), None, num_retries=15, exception_type=NoBrokersAvailable, error_message='broker unavailable...', logger=self._logger, ) def _poll_records(self, topic_partition: TopicPartition) -> (List[StockQuote], int): """Polls for records from the partition of a given topic. :param topic_partition: Partition of the topic to be polled. :return: Tuple of: quotes: List of StockQuote objects received from this round of polling. Can be empty. max_offset: The largest offset for the objects received. If no objects were received, return 0. """ result = self._consumer.poll(CONSUMER_POLL_TIMEOUT_MS, max_records=CONSUMER_POLL_MAX_RECORDS) if topic_partition not in result: return [], 0 quotes = [] max_offset = 0 for message in result[topic_partition]: max_offset = max(max_offset, message.offset) quote: StockQuote = message.value quotes.append(quote) return quotes, max_offset def _process_batch(self, topic_partition: TopicPartition, handler: Callable) -> None: quotes, max_offset = self._poll_records(topic_partition) if not quotes: return handler(quotes) self._logger.debug('max offset: {}'.format(max_offset)) self._commit_offsets(topic_partition, max_offset)
def __init__(self, brokers: List[str], topic: str): self._brokers = brokers self._topic = topic self._consumer = None self._is_done = False self._logger = Logger(type(self).__name__)
class RmqProducer(StockQuoteProducer): PERSISTENT_MESSAGE = 2 def __init__(self): self._conn = None self._channel: Optional[BlockingChannel] = None self._default_message_props = pika.BasicProperties( delivery_mode=self.PERSISTENT_MESSAGE) self._logger = Logger(type(self).__name__) # Storing pending and nacked deliveries in non-persistent storage does pose a risk that the service might be # restarted with items in both containers. Losing these items may cause permanent data loss, but we expect the # risk to be minimized because: # - pending_deliveries should only contain elements within the last few seconds # - nacked_deliveries should almost always be empty while messages are being sent continuously self._pending_deliveries: Dict[int, StockQuote] = {} self._nacked_deliveries: Deque[StockQuote] = deque() self._acked = 0 self._nacked = 0 self._message_number = 0 def close(self) -> None: """Gracefully terminate connection between the producer and the broker.""" if self._conn and self._conn.is_open: self._logger.info('closing connection') self._conn.close() def connect(self) -> None: """Instantiate connection between the producer and the broker, declaring all necessary resources.""" self._connect() self._declare_resources() def send(self, quote: StockQuote) -> None: """Send a stock quote to the broker. Attempt to redeliver all previous undeliverable messages before the current quote. """ while self._nacked_deliveries: to_publish = self._nacked_deliveries.popleft() self._publish_with_retry(to_publish) self._publish_with_retry(quote) def _connect(self) -> None: """Defines the minimal set of functionality for reconnecting to the broker.""" if self._conn and not self._conn.is_closed: return self._logger.info('connecting') credentials = pika.PlainCredentials(settings.RMQ_USER, settings.RMQ_PASSWORD) params = pika.ConnectionParameters( host=settings.RMQ_HOST, virtual_host=settings.RMQ_VHOST, credentials=credentials, ) self._conn = utils.retry( lambda: pika.BlockingConnection(params), None, num_retries=15, exception_type=AMQPConnectionError, error_message='broker unavailable...', logger=self._logger, ) self._channel: BlockingChannel = self._conn.channel() self._channel.confirm_delivery() self._reset_confirmation_tracking() def _declare_resources(self) -> None: """Declare all resources required by the producer.""" self._channel.exchange_declare( exchange=settings.RMQ_EXCHANGE, exchange_type='direct', durable=True, auto_delete=False, internal=False, ) self._channel.queue_declare( queue=settings.RMQ_QUEUE_QUOTES, durable=True, exclusive=False, auto_delete=False, ) self._channel.queue_bind( queue=settings.RMQ_QUEUE_QUOTES, exchange=settings.RMQ_EXCHANGE, ) # def _on_delivery_confirmation(self, method_frame: Method) -> None: # """ # Invoked by pika when RabbitMQ responds to a Basic.Publish RPC command, passing in either a Basic.Ack or # Basic.Nack frame with the delivery tag of the message that was published. The delivery tag is an integer # counter indicating the message number that was sent on the channel via Basic.Publish. Here we're just doing # house keeping to keep track of stats and remove message numbers that we expect a delivery confirmation of from # the list used to keep track of messages that are pending confirmation. # # :param pika.frame.Method method_frame: Basic.Ack or Basic.Nack frame. # """ # # confirmation_type = method_frame.method.NAME.split('.')[1].lower() # delivery_tag = method_frame.method.delivery_tag # logger.info('Thread (delivery confirmation): {}'.format(threading.get_ident())) # logger.info('Received {} for delivery tag: {}'.format(confirmation_type, delivery_tag)) # if confirmation_type == 'ack': # self._acked += 1 # elif confirmation_type == 'nack': # self._nacked += 1 # self._nacked_deliveries.append(self._pending_deliveries[delivery_tag]) # del self._pending_deliveries[delivery_tag] # logger.info( # 'Published {} messages, {} have yet to be confirmed, {} were acked and {} were nacked'.format( # self._message_number, # len(self._pending_deliveries), # self._acked, # self._nacked, # ), # ) def _publish(self, quote: StockQuote) -> None: utils.retry( lambda: self._channel.basic_publish( exchange=settings.RMQ_EXCHANGE, routing_key=settings.RMQ_QUEUE_QUOTES, body=pickle.dumps(quote), properties=self._default_message_props, mandatory=False, ), None, num_retries=15, exception_type=NackError, error_message='nacked the published message...', logger=self._logger, ) self._message_number += 1 # self._pending_deliveries[self._message_number] = quote def _publish_with_retry(self, quote: StockQuote) -> None: utils.retry( lambda: self._publish(quote), lambda: self._connect(), num_retries=15, exception_type=ConnectionClosed, error_message='connection not open...', logger=self._logger, ) def _reset_confirmation_tracking(self) -> None: # TODO: What happens to pending deliveries and nacked deliveries? For at least once delivery, assume the # messages all need to be resent. self._nacked_deliveries.extend(self._pending_deliveries.values()) self._pending_deliveries = {} self._acked = 0 self._nacked = 0 self._message_number = 0