async def __consume(self, loop) -> _QueueInternalResult: def deserializer(serialized): return json.loads(serialized) try: consumer = AIOKafkaConsumer( self._config.optional_param["topic_name"], loop=loop, group_id="youyaku_ai_group", # isolation_level="read_committed", bootstrap_servers=self._config.get_url(), value_deserializer=deserializer, auto_offset_reset="earliest", enable_auto_commit=False, ) await consumer.start() # messageのpositionとoffset(終端)を確認し、データがなければ、空のデータを返す # TODO : 1パーティションの対応のみなので、パーティションが複数の対応が必要 partition = list(consumer.assignment())[0] position = await consumer.position(partition=partition) offset_dict = await consumer.end_offsets(partitions=[partition]) end = offset_dict[partition] if position == end: return _QueueInternalResult(result=[], e=None) # データを一つ取得 data = await consumer.getone() messages = [data.value] await consumer.commit() except Exception as e: return _QueueInternalResult(result=None, e=e) finally: await consumer.stop() return _QueueInternalResult(result=messages, e=None)
class BaseKafkaTableBuilder(object): """Table builder. Builds table using single consumer consuming linearly from raw topic. """ def __init__(self, topic, loop): self.topic = topic self.consumer = None self.messages = [] self.loop = loop self.table = defaultdict(int) self.key_tps = defaultdict(set) self._assignment = None async def build(self): await self._init_consumer() await self._build_table() def get_key(self, message): return json.loads(message.key.decode()) def get_value(self, message): return json.loads(message.value.decode()) async def _init_consumer(self): if not self.consumer: self.consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=bootstrap_servers, auto_offset_reset="earliest", ) await self.consumer.start() self._assignment = self.consumer.assignment() async def _build_table(self): while True: message = await self.consumer.getone() self.messages.append(message) await self._apply(message) if await self._positions() == self._highwaters(): print("Done building table") return async def _apply(self, message): print(message) async def _positions(self): assert self.consumer return { tp: await self.consumer.position(tp) for tp in self._assignment } def _highwaters(self): assert self.consumer return {tp: self.consumer.highwater(tp) for tp in self._assignment}
async def consume_events(app: web.Application) -> None: """The main Kafka consumer, which routes messages to processing functions or tasks. """ logger = structlog.get_logger(app["safir/config"].logger_name) registry = RegistryApi( session=app["safir/http_session"], url=app["safir/config"].schema_registry_url, ) deserializer = Deserializer(registry=registry) consumer_settings = { "bootstrap_servers": app["safir/config"].kafka_broker_url, "group_id": app["safir/config"].kafka_consumer_group_id, "auto_offset_reset": "latest", "security_protocol": app["safir/config"].kafka_protocol, } if consumer_settings["security_protocol"] == "SSL": consumer_settings["ssl_context"] = app["safir/kafka_ssl_context"] consumer = AIOKafkaConsumer( loop=asyncio.get_event_loop(), **consumer_settings ) topic_names = get_configured_topics(app) scheduler = await aiojobs.create_scheduler() try: await consumer.start() logger.info("Started Kafka consumer") logger.info("Subscribing to Kafka topics", names=topic_names) consumer.subscribe(topic_names) partitions = consumer.assignment() while len(partitions) == 0: # Wait for the consuemr to get partition assignment await asyncio.sleep(1.0) partitions = consumer.assignment() logger.info( "Got initial partition assignment for Kafka topics", partitions=[str(p) for p in partitions], ) async for message in consumer: try: value_info = await deserializer.deserialize( message.value, include_schema=True ) except Exception: logger.exception( "Failed to deserialize a Kafka message value", topic=message.topic, partition=message.partition, offset=message.offset, ) continue try: await route_message( app=app, scheduler=scheduler, message=value_info["message"], schema_id=value_info["id"], schema=value_info["schema"], topic=message.topic, partition=message.partition, offset=message.offset, ) except Exception: logger.exception( "Failed to route a Kafka message", topic=message.topic, partition=message.partition, offset=message.offset, ) except asyncio.CancelledError: logger.info("consume_events task got cancelled") finally: logger.info("consume_events task cancelling") await consumer.stop() await scheduler.close()
class AIOKafkaRPCClient(object): log = logging.getLogger(__name__) def __init__(self, kafka_servers='localhost:9092', in_topic='aiokafkarpc_in', out_topic='aiokafkarpc_out', out_partitions=(0, ), max_bytes=1048576, translation_table=[], *, loop): self.call = CallObj(self._call_wrapper) self._topic_in = in_topic self._loop = loop self._waiters = {} self._out_topic = out_topic self._out_partitions = out_partitions self.lock = False default, ext_hook = get_msgpack_hooks(translation_table) self.__consumer = AIOKafkaConsumer( self._out_topic, loop=loop, bootstrap_servers=kafka_servers, group_id=None, fetch_max_bytes=max_bytes, key_deserializer=lambda x: x.decode("utf-8"), enable_auto_commit=True, value_deserializer=lambda x: msgpack.unpackb( x, ext_hook=ext_hook, encoding="utf-8")) self.__producer = AIOKafkaProducer( bootstrap_servers=kafka_servers, loop=loop, max_request_size=max_bytes, enable_idempotence=False, key_serializer=lambda x: x.encode("utf-8"), value_serializer=lambda x: msgpack.packb(x, default=default)) async def run(self): await self.__producer.start() await self.__consumer.start() # FIXME manual partition assignment does not work correctly in aiokafka # self.__consumer.assign( # [TopicPartition(self._out_topic, p) for p in self._out_partitions]) # # ensure that topic partitions exists for tp in self.__consumer.assignment(): await self.__consumer.position(tp) self._consume_task = self._loop.create_task(self.__consume_routine()) async def close(self, timeout=10): await self.__producer.stop() if self._waiters: await asyncio.wait(self._waiters.values(), loop=self._loop, timeout=timeout) self._consume_task.cancel() try: await self._consume_task except asyncio.CancelledError: pass await self.__consumer.stop() for fut in self._waiters.values(): fut.set_exception(asyncio.TimeoutError()) def _call_wrapper(self, method): async def rpc_call(*args, **kw_args): call_id = uuid.uuid4().hex ptid = random.choice(self._out_partitions) request = (method, args, kw_args, ptid) fut = asyncio.Future(loop=self._loop) fut.add_done_callback(lambda fut: self._waiters.pop(call_id)) self._waiters[call_id] = fut try: await self.__producer.send(self._topic_in, request, key=call_id) except Exception as err: self.log.error("send RPC request failed: %s", err) self._waiters[call_id].set_exception(err) return await self._waiters[call_id] return rpc_call async def __consume_routine(self): while True: message = await self.__consumer.getone() call_id = message.key response = message.value self.call = CallObj(self._call_wrapper) fut = self._waiters.get(call_id) if fut is None: continue if "error" in response: self.log.debug(response.get("stacktrace")) fut.set_exception(RPCError(response["error"])) else: fut.set_result(response["result"])
class KafkaConsumer(BaseEventConsumer): def __init__( self, settings: object, app: BaseApp, serializer: BaseEventSerializer, event_topics: List[str], event_group: str, position: str, ) -> None: if not hasattr(settings, 'KAFKA_BOOTSTRAP_SERVER'): raise Exception('Missing KAFKA_BOOTSTRAP_SERVER config') self.max_retries = 10 if hasattr(settings, 'EVENTY_CONSUMER_MAX_RETRIES'): self.max_retries = settings.EVENTY_CONSUMER_MAX_RETRIES self.retry_interval = 1000 if hasattr(settings, 'EVENTY_CONSUMER_RETRY_INTERVAL'): self.retry_interval = settings.EVENTY_CONSUMER_RETRY_INTERVAL self.retry_backoff_coeff = 2 if hasattr(settings, 'EVENTY_CONSUMER_RETRY_BACKOFF_COEFF'): self.retry_backoff_coeff = settings.EVENTY_CONSUMER_RETRY_BACKOFF_COEFF self.app = app self.event_topics = event_topics self.event_group = event_group self.position = position self.consumer = None self.current_position_checkpoint_callback = None self.end_position_checkpoint_callback = None bootstrap_servers = settings.KAFKA_BOOTSTRAP_SERVER consumer_args: Dict[str, Any] consumer_args = { 'loop': asyncio.get_event_loop(), 'bootstrap_servers': [bootstrap_servers], 'enable_auto_commit': False, 'group_id': self.event_group, 'value_deserializer': serializer.decode, 'auto_offset_reset': self.position } try: self.consumer = AIOKafkaConsumer(*self.event_topics, **consumer_args) except Exception as e: logger.error( f"Unable to connect to the Kafka broker {bootstrap_servers} : {e}" ) raise e def set_current_position_checkpoint_callback(self, checkpoint_callback): self.current_position_checkpoint_callback = checkpoint_callback def set_end_position_checkpoint_callback(self, checkpoint_callback): self.end_position_checkpoint_callback = checkpoint_callback async def current_position(self): # Warning: this method returns last committed offsets for each assigned partition position = {} for partition in self.consumer.assignment(): offset = await self.consumer.committed(partition) or 0 position[partition] = offset return position async def consumer_position(self): # Warning: this method returns current offsets for each assigned partition position = {} for partition in self.consumer.assignment(): position[partition] = await self.consumer.position(partition) return position async def end_position(self): position = {} for partition in self.consumer.assignment(): offset = (await self.consumer.end_offsets([partition]))[partition] position[partition] = offset return position async def is_checkpoint_reached(self, checkpoint): for partition in self.consumer.assignment(): position = (await self.consumer.position(partition)) if position < checkpoint[partition]: return False return True async def start(self): logger.info( f'Starting kafka consumer on topic {self.event_topics} with group {self.event_group}' ) try: await self.consumer.start() except Exception as e: logger.error( f'An error occurred while starting kafka consumer ' f'on topic {self.event_topics} with group {self.event_group}: {e}' ) sys.exit(1) current_position_checkpoint = None end_position_checkpoint = None if self.event_group is not None: current_position = await self.current_position() end_position = await self.end_position() logger.debug(f'Current position : {current_position}') logger.debug(f'End position : {end_position}') if self.position == 'earliest' and self.event_group is not None: current_position_checkpoint = current_position end_position_checkpoint = end_position await self.consumer.seek_to_beginning() async for msg in self.consumer: retries = 0 sleep_duration_in_ms = self.retry_interval while True: try: event = msg.value corr_id = event.correlation_id logger.info(f"[CID:{corr_id}] Start handling {event.name}") await event.handle(app=self.app, corr_id=corr_id) logger.info(f"[CID:{corr_id}] End handling {event.name}") if self.event_group is not None: logger.debug( f"[CID:{corr_id}] Commit Kafka transaction") await self.consumer.commit() logger.debug( f"[CID:{corr_id}] Continue with the next message") # break the retry loop break except Exception: logger.exception( f'[CID:{corr_id}] An error occurred while handling received message.' ) if retries != self.max_retries: # increase the number of retries retries = retries + 1 sleep_duration_in_s = int(sleep_duration_in_ms / 1000) logger.info( f"[CID:{corr_id}] Sleeping {sleep_duration_in_s}s a before retrying..." ) await asyncio.sleep(sleep_duration_in_s) # increase the sleep duration sleep_duration_in_ms = sleep_duration_in_ms * self.retry_backoff_coeff else: logger.error( f'[CID:{corr_id}] Unable to handle message within {1 + self.max_retries} tries. Stopping process' ) sys.exit(1) if current_position_checkpoint and await self.is_checkpoint_reached( current_position_checkpoint): logger.info('Current position checkpoint reached') if self.current_position_checkpoint_callback: await self.current_position_checkpoint_callback() current_position_checkpoint = None if end_position_checkpoint and await self.is_checkpoint_reached( end_position_checkpoint): logger.info('End position checkpoint reached') if self.end_position_checkpoint_callback: await self.end_position_checkpoint_callback() end_position_checkpoint = None
async def consume_kafka(app): """Consume Kafka messages directed to templatebot's functionality.""" logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"]) registry = RegistryApi( session=app["root"]["api.lsst.codes/httpSession"], url=app["root"]["templatebot/registryUrl"], ) deserializer = Deserializer(registry=registry) consumer_settings = { "bootstrap_servers": app["root"]["templatebot/brokerUrl"], "group_id": app["root"]["templatebot/slackGroupId"], "auto_offset_reset": "latest", "ssl_context": app["root"]["templatebot/kafkaSslContext"], "security_protocol": app["root"]["templatebot/kafkaProtocol"], } consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(), **consumer_settings) try: await consumer.start() logger.info("Started Kafka consumer", **consumer_settings) topic_names = [ app["root"]["templatebot/appMentionTopic"], app["root"]["templatebot/messageImTopic"], app["root"]["templatebot/interactionTopic"], ] logger.info("Subscribing to Kafka topics", names=topic_names) consumer.subscribe(topic_names) logger.info("Finished subscribing ot Kafka topics", names=topic_names) partitions = consumer.assignment() logger.info("Waiting on partition assignment", names=topic_names) while len(partitions) == 0: # Wait for the consumer to get partition assignment await asyncio.sleep(1.0) partitions = consumer.assignment() logger.info( "Initial partition assignment", partitions=[str(p) for p in partitions], ) async for message in consumer: logger.info( "Got Kafka message from sqrbot", topic=message.topic, partition=message.partition, offset=message.offset, ) try: message_info = await deserializer.deserialize(message.value) except Exception: logger.exception( "Failed to deserialize a message", topic=message.topic, partition=message.partition, offset=message.offset, ) continue event = message_info["message"] logger.debug( "New message", topic=message.topic, partition=message.partition, offset=message.offset, contents=event, ) try: await route_event( event=message_info["message"], app=app, schema_id=message_info["id"], topic=message.topic, partition=message.partition, offset=message.offset, ) except Exception: logger.exception( "Failed to handle message", topic=message.topic, partition=message.partition, offset=message.offset, ) except asyncio.CancelledError: logger.info("consume_kafka task got cancelled") finally: logger.info("consume_kafka task cancelling") await consumer.stop()
async def consume_events(app): """Consume events from templatebot-related topics in SQuaRE Events (Kafka). Notes ----- Templatebot has *two* Kafka consumers. This is one, and the other is in `templatebot.slack`. The Slack consumer only listens to topics from Slack (SQuaRE Bot), and is focused on responding to Slack-based workflows. This consumer is focused on backend-driven events, such as the ``templatebot-render_ready`` topic. """ logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"]) registry = RegistryApi( session=app["root"]["api.lsst.codes/httpSession"], url=app["root"]["templatebot/registryUrl"], ) deserializer = Deserializer(registry=registry) consumer_settings = { "bootstrap_servers": app["root"]["templatebot/brokerUrl"], "group_id": app["root"]["templatebot/eventsGroupId"], "auto_offset_reset": "latest", "ssl_context": app["root"]["templatebot/kafkaSslContext"], "security_protocol": app["root"]["templatebot/kafkaProtocol"], } consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(), **consumer_settings) try: await consumer.start() logger.info("Started Kafka consumer for events", **consumer_settings) topic_names = [app["root"]["templatebot/renderreadyTopic"]] logger.info("Subscribing to Kafka topics", names=topic_names) consumer.subscribe(topic_names) partitions = consumer.assignment() while len(partitions) == 0: # Wait for the consumer to get partition assignment await asyncio.sleep(1.0) partitions = consumer.assignment() logger.info( "Initial partition assignment for event topics", partitions=[str(p) for p in partitions], ) async for message in consumer: try: message_info = await deserializer.deserialize( message.value, include_schema=True) except Exception: logger.exception( "Failed to deserialize an event message", topic=message.topic, partition=message.partition, offset=message.offset, ) continue event = message_info["message"] logger.debug( "New event message", topic=message.topic, partition=message.partition, offset=message.offset, contents=event, ) try: await route_event( app=app, event=message_info["message"], schema_id=message_info["id"], schema=message_info["schema"], topic=message.topic, partition=message.partition, offset=message.offset, ) except Exception: logger.exception( "Failed to handle event message", topic=message.topic, partition=message.partition, offset=message.offset, ) except asyncio.CancelledError: logger.info("consume_events task got cancelled") finally: logger.info("consume_events task cancelling") await consumer.stop()
class KafkaConsumer(BaseConsumer): """KafkaConsumer is a client that publishes records to the Kafka cluster. """ _client: KafkaClient serializer: BaseSerializer _bootstrap_servers: Union[str, List[str]] _client_id: str _topics: List[str] _group_id: str _auto_offset_reset: str _max_retries: int _retry_interval: int _retry_backoff_coeff: int _isolation_level: str _assignors_data: Dict[str, Any] _store_manager: BaseStoreManager _running: bool _kafka_consumer: AIOKafkaConsumer _transactional_manager: KafkaTransactionalManager __current_offsets: Dict[str, BasePositioning] __last_offsets: Dict[str, BasePositioning] __last_committed_offsets: Dict[str, BasePositioning] _loop: asyncio.AbstractEventLoop logger: Logger def __init__(self, client: KafkaClient, serializer: BaseSerializer, topics: List[str], loop: asyncio.AbstractEventLoop, client_id: str = None, group_id: str = None, auto_offset_reset: str = 'earliest', max_retries: int = 10, retry_interval: int = 1000, retry_backoff_coeff: int = 2, assignors_data: Dict[str, Any] = None, store_manager: BaseStoreManager = None, isolation_level: str = 'read_uncommitted', transactional_manager: KafkaTransactionalManager = None) -> None: """ KafkaConsumer constructor Args: client (KafkaClient): Initialization class (contains, client_id / bootstraps_server) serializer (BaseSerializer): Serializer encode & decode event topics (List[str]): List of topics to subscribe to loop (asyncio.AbstractEventLoop): Asyncio loop client_id (str): Client name (if is none, KafkaConsumer use KafkaClient client_id) group_id (str): name of the consumer group, and to use for fetching and committing offsets. If None, offset commits are disabled auto_offset_reset (str): A policy for resetting offsets on OffsetOutOfRange errors: ‘earliest’ will move to the oldest available message, ‘latest’ will move to the most recent. Any other value will raise the exception max_retries (int): Number of retries before critical failure retry_interval (int): Interval before next retries retry_backoff_coeff (int): Backoff coeff for next retries assignors_data (Dict[str, Any]): Dict with assignors information, more details in StatefulsetPartitionAssignor store_manager (BaseStoreManager): If this store_manager is set, consumer call initialize_store_manager() otherwise listen_event was started isolation_level (str): Controls how to read messages written transactionally. If set to read_committed, will only return transactional messages which have been committed. If set to read_uncommitted, will return all messages, even transactional messages which have been aborted. Non-transactional messages will be returned unconditionally in either mode. Returns: None """ super().__init__() self.logger = getLogger('tonga') # Register KafkaClient self._client = client # Set default assignors_data if is None if assignors_data is None: assignors_data = {} # Create client_id if client_id is None: self._client_id = self._client.client_id + '-' + str(self._client.cur_instance) else: self._client_id = client_id if isinstance(serializer, BaseSerializer): self.serializer = serializer else: raise BadSerializer self._bootstrap_servers = self._client.bootstrap_servers self._topics = topics self._group_id = group_id self._auto_offset_reset = auto_offset_reset self._max_retries = max_retries self._retry_interval = retry_interval self._retry_backoff_coeff = retry_backoff_coeff self._isolation_level = isolation_level self._assignors_data = assignors_data self._store_manager = store_manager self._running = False self._loop = loop self.__current_offsets = dict() self.__last_offsets = dict() self.__last_committed_offsets = dict() self._transactional_manager = transactional_manager try: self.logger.info(json.dumps(assignors_data)) statefulset_assignor = StatefulsetPartitionAssignor(bytes(json.dumps(assignors_data), 'utf-8')) self._kafka_consumer = AIOKafkaConsumer(*self._topics, loop=self._loop, bootstrap_servers=self._bootstrap_servers, client_id=self._client_id, group_id=group_id, value_deserializer=self.serializer.decode, auto_offset_reset=self._auto_offset_reset, isolation_level=self._isolation_level, enable_auto_commit=False, key_deserializer=KafkaKeySerializer.decode, partition_assignment_strategy=[statefulset_assignor]) except KafkaError as err: self.logger.exception('%s', err.__str__()) raise err except ValueError as err: self.logger.exception('%s', err.__str__()) raise AioKafkaConsumerBadParams self.logger.debug('Create new consumer %s, group_id %s', self._client_id, group_id) async def start_consumer(self) -> None: """ Start consumer Returns: None Raises: AttributeError: KafkaConsumerError ValueError: If KafkaError or KafkaTimoutError is raised, exception value is contain in KafkaConsumerError.msg """ if self._running: raise KafkaConsumerAlreadyStartedError for retry in range(2): try: await self._kafka_consumer.start() self._running = True self.logger.debug('Start consumer : %s, group_id : %s, retry : %s', self._client_id, self._group_id, retry) except KafkaTimeoutError as err: self.logger.exception('%s', err.__str__()) await asyncio.sleep(1) except KafkaConnectionError as err: self.logger.exception('%s', err.__str__()) await asyncio.sleep(1) except KafkaError as err: self.logger.exception('%s', err.__str__()) raise err else: break else: raise ConsumerConnectionError async def stop_consumer(self) -> None: """ Stop consumer Returns: None Raises: AttributeError: KafkaConsumerError ValueError: If KafkaError is raised, exception value is contain in KafkaConsumerError.msg """ if not self._running: raise KafkaConsumerNotStartedError try: await self._kafka_consumer.stop() self._running = False self.logger.debug('Stop consumer : %s, group_id : %s', self._client_id, self._group_id) except KafkaTimeoutError as err: self.logger.exception('%s', err.__str__()) raise ConsumerKafkaTimeoutError except KafkaError as err: self.logger.exception('%s', err.__str__()) raise err def is_running(self) -> bool: return self._running async def get_last_committed_offsets(self) -> Dict[str, BasePositioning]: """ Get last committed offsets Returns: Dict[str, KafkaPositioning]: Contains all assigned partitions with last committed offsets """ last_committed_offsets: Dict[str, BasePositioning] = dict() self.logger.debug('Get last committed offsets') if self._group_id is None: raise IllegalOperation for tp in self._kafka_consumer.assignment(): offset = await self._kafka_consumer.committed(tp) last_committed_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \ KafkaPositioning(tp.topic, tp.partition, offset) return last_committed_offsets async def get_current_offsets(self) -> Dict[str, BasePositioning]: """ Get current offsets Returns: Dict[str, KafkaPositioning]: Contains all assigned partitions with current offsets """ current_offsets: Dict[str, BasePositioning] = dict() self.logger.debug('Get current offsets') for tp in self._kafka_consumer.assignment(): try: offset = await self._kafka_consumer.position(tp) current_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \ KafkaPositioning(tp.topic, tp.partition, offset) except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise err return current_offsets async def get_beginning_offsets(self) -> Dict[str, BasePositioning]: """ Get beginning offsets Returns: Dict[str, KafkaPositioning]: Contains all assigned partitions with beginning offsets """ beginning_offsets: Dict[str, BasePositioning] = dict() self.logger.debug('Get beginning offsets') for tp in self._kafka_consumer.assignment(): try: offset = (await self._kafka_consumer.beginning_offsets([tp]))[tp] beginning_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \ KafkaPositioning(tp.topic, tp.partition, offset) except KafkaTimeoutError as err: self.logger.exception('%s', err.__str__()) raise ConsumerKafkaTimeoutError except UnsupportedVersionError as err: self.logger.exception('%s', err.__str__()) raise err return beginning_offsets async def get_last_offsets(self) -> Dict[str, BasePositioning]: """ Get last offsets Returns: Dict[str, KafkaPositioning]: Contains all assigned partitions with last offsets """ last_offsets: Dict[str, BasePositioning] = dict() self.logger.debug('Get last offsets') for tp in self._kafka_consumer.assignment(): try: offset = (await self._kafka_consumer.end_offsets([tp]))[tp] last_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \ KafkaPositioning(tp.topic, tp.partition, offset) except KafkaTimeoutError as err: self.logger.exception('%s', err.__str__()) raise ConsumerKafkaTimeoutError except UnsupportedVersionError as err: self.logger.exception('%s', err.__str__()) raise err return last_offsets async def load_offsets(self, mod: str = 'earliest') -> None: """ This method was call before consume topics, assign position to consumer Args: mod: Start position of consumer (earliest, latest, committed) Returns: None """ self.logger.debug('Load offset mod : %s', mod) if not self._running: await self.start_consumer() if mod == 'latest': await self.seek_to_end() elif mod == 'earliest': await self.seek_to_beginning() elif mod == 'committed': await self.seek_to_last_commit() else: raise KafkaConsumerError self.__current_offsets = await self.get_current_offsets() self.__last_offsets = await self.get_last_offsets() if self._group_id is not None: self.__last_committed_offsets = await self.get_last_committed_offsets() for key, kafka_positioning in self.__last_committed_offsets.items(): if kafka_positioning.get_current_offset() is None: self.logger.debug('Seek to beginning, no committed offsets was found') await self.seek_to_beginning(kafka_positioning) async def debug_print_all_msg(self): """ Debug method, useful for display all msg was contained in assigned topic/partitions Returns: None """ while True: message = await self._kafka_consumer.getone() self.logger.info('----------------------------------------------------------------------------------------') self.logger.info('Topic %s, Partition %s, Offset %s, Key %s, Value %s, Headers %s', message.topic, message.partition, message.offset, message.key, message.value, message.headers) self.logger.info('----------------------------------------------------------------------------------------') async def listen_records(self, mod: str = 'earliest') -> None: """ Listens records from assigned topic / partitions Args: mod: Start position of consumer (earliest, latest, committed) Returns: None """ if not self._running: await self.load_offsets(mod) self.pprint_consumer_offsets() async for msg in self._kafka_consumer: # Debug Display self.logger.debug("---------------------------------------------------------------------------------") self.logger.debug('New Message on consumer %s, Topic %s, Partition %s, Offset %s, ' 'Key %s, Value %s, Headers %s', self._client_id, msg.topic, msg.partition, msg.offset, msg.key, msg.value, msg.headers) self.pprint_consumer_offsets() self.logger.debug("---------------------------------------------------------------------------------") key = KafkaPositioning.make_class_assignment_key(msg.topic, msg.partition) self.__current_offsets[key].set_current_offset(msg.offset) if self._transactional_manager is not None: self._transactional_manager.set_ctx(KafkaTransactionContext(msg.topic, msg.partition, msg.offset, self._group_id)) # self.last_offsets = await self.get_last_offsets() sleep_duration_in_ms = self._retry_interval for retries in range(0, self._max_retries): try: record_class = msg.value['record_class'] handler_class = msg.value['handler_class'] if handler_class is None: self.logger.debug('Empty handler') break self.logger.debug('Event name : %s Event content :\n%s', record_class.event_name(), record_class.__dict__) # Calls handle if event is instance BaseHandler if isinstance(handler_class, BaseEventHandler): transactional = await handler_class.handle(event=record_class) elif isinstance(handler_class, BaseCommandHandler): transactional = await handler_class.execute(event=record_class) elif isinstance(handler_class, BaseResultHandler): transactional = await handler_class.on_result(event=record_class) else: # Otherwise raise KafkaConsumerUnknownHandler raise UnknownHandler # If result is none (no transactional process), check if consumer has an # group_id (mandatory to commit in Kafka) if transactional is None and self._group_id is not None: # Check if next commit was possible (Kafka offset) if self.__last_committed_offsets[key] is None or \ self.__last_committed_offsets[key].get_current_offset() <= \ self.__current_offsets[key].get_current_offset(): self.logger.debug('Commit msg %s in topic %s partition %s offset %s', record_class.event_name(), msg.topic, msg.partition, self.__current_offsets[key].get_current_offset() + 1) tp = self.__current_offsets[key].to_topics_partition() await self._kafka_consumer.commit( {tp: self.__current_offsets[key].get_current_offset() + 1}) self.__last_committed_offsets[key].set_current_offset(msg.offset + 1) # Transactional process no commit elif transactional: self.logger.debug('Transaction end') self.__current_offsets = await self.get_current_offsets() self.__last_committed_offsets = await self.get_last_committed_offsets() # Otherwise raise KafkaConsumerUnknownHandlerReturn elif transactional is None and self._group_id is None: pass else: raise UnknownHandlerReturn # Break if everything was successfully processed break except UninitializedStore as err: self.logger.exception('%s', err.__str__()) retries = 0 await asyncio.sleep(10) except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned except ValueError as err: self.logger.exception('%s', err.__str__()) raise OffsetError except CommitFailedError as err: self.logger.exception('%s', err.__str__()) raise err except (KafkaError, HandlerException) as err: self.logger.exception('%s', err.__str__()) sleep_duration_in_s = int(sleep_duration_in_ms / 1000) await asyncio.sleep(sleep_duration_in_s) sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff if retries not in range(0, self._max_retries): await self.stop_consumer() self.logger.error('Max retries, close consumer and exit') exit(1) async def _refresh_offsets(self) -> None: """ This method refresh __current_offsets / __last_offsets / __last_committed_offsets Returns: None """ self.logger.debug('Call refresh offsets') self.__current_offsets = await self.get_current_offsets() self.__last_offsets = await self.get_last_offsets() if self._group_id is not None: self.__last_committed_offsets = await self.get_last_committed_offsets() else: raise IllegalOperation async def check_if_store_is_ready(self) -> None: """ If store is ready consumer set store initialize flag to true Returns: None """ # Check if local store is initialize self.logger.info('Started check_if_store_is_ready') if not self._store_manager.get_local_store().get_persistency().is_initialize(): key = KafkaPositioning.make_class_assignment_key(self._store_manager.get_topic_store(), self._client.cur_instance) if self.__last_offsets[key].get_current_offset() == 0: self._store_manager.__getattribute__('_initialize_local_store').__call__() self.logger.info('Local store was initialized') elif self.__current_offsets[key].get_current_offset() == self.__last_offsets[key].get_current_offset(): self._store_manager.__getattribute__('_initialize_local_store').__call__() self.logger.info('Local store was initialized') # Check if global store is initialize if not self._store_manager.get_global_store().get_persistency().is_initialize(): for key, positioning in self.__last_offsets.items(): if self._client.cur_instance != positioning.get_partition(): if positioning.get_current_offset() == 0: continue elif positioning.get_current_offset() == self.__current_offsets[key].get_current_offset(): continue else: break else: self._store_manager.__getattribute__('_initialize_global_store').__call__() self.logger.info('Global store was initialized') async def listen_store_records(self, rebuild: bool = False) -> None: """ Listens events for store construction Args: rebuild (bool): if true consumer seek to fist offset for rebuild own state Returns: None """ if self._store_manager is None: raise KeyError self.logger.info('Start listen store records') await self.start_consumer() await self._store_manager.__getattribute__('_initialize_stores').__call__() if not self._running: raise KafkaConsumerError('Fail to start tongaConsumer', 500) # Check if store is ready await self._refresh_offsets() await self.check_if_store_is_ready() self.pprint_consumer_offsets() async for msg in self._kafka_consumer: positioning_key = KafkaPositioning.make_class_assignment_key(msg.topic, msg.partition) self.__current_offsets[positioning_key].set_current_offset(msg.offset) # Debug Display self.logger.debug("---------------------------------------------------------------------------------") self.logger.debug('New Message on consumer %s, Topic %s, Partition %s, Offset %s, ' 'Key %s, Value %s, Headers %s', self._client_id, msg.topic, msg.partition, msg.offset, msg.key, msg.value, msg.headers) self.pprint_consumer_offsets() self.logger.debug("---------------------------------------------------------------------------------") # Check if store is ready await self.check_if_store_is_ready() sleep_duration_in_ms = self._retry_interval for retries in range(0, self._max_retries): try: record_class: BaseRecord = msg.value['record_class'] handler_class: BaseStoreRecordHandler = msg.value['handler_class'] self.logger.debug('Store event name : %s\nEvent content :\n%s\n', record_class.event_name(), record_class.__dict__) positioning = self.__current_offsets[positioning_key] if self._client.cur_instance == msg.partition: # Calls local_state_handler if event is instance BaseStorageBuilder if rebuild and not self._store_manager.get_local_store().get_persistency().is_initialize(): if isinstance(record_class, StoreRecord): self.logger.debug('Call local_store_handler') await handler_class.local_store_handler(store_record=record_class, positioning=positioning) else: raise UnknownStoreRecordHandler elif self._client.cur_instance != msg.partition: if isinstance(record_class, StoreRecord): self.logger.debug('Call global_store_handler') await handler_class.global_store_handler(store_record=record_class, positioning=positioning) else: raise UnknownStoreRecordHandler # Check if store is ready await self.check_if_store_is_ready() # Break if everything was successfully processed break except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned except ValueError as err: self.logger.exception('%s', err.__str__()) raise OffsetError except CommitFailedError as err: self.logger.exception('%s', err.__str__()) raise err except (KafkaError, HandlerException) as err: self.logger.exception('%s', err.__str__()) sleep_duration_in_s = int(sleep_duration_in_ms / 1000) await asyncio.sleep(sleep_duration_in_s) sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff if retries not in range(0, self._max_retries): await self.stop_consumer() self.logger.error('Max retries, close consumer and exit') exit(1) def is_lag(self) -> bool: """ Consumer has lag ? Returns: bool: True if consumer is lagging otherwise return false and consumer is up to date """ if self.__last_offsets == self.__current_offsets: return False return True async def seek_to_beginning(self, positioning: BasePositioning = None) -> None: """ Seek to fist offset, mod 'earliest'. If positioning is None consumer will seek all assigned partition to beginning Args: positioning (BasePositioning): Positioning class contain (topic name / partition number) Returns: None """ if not self._running: await self.start_consumer() if positioning is not None: try: await self._kafka_consumer.seek_to_beginning(positioning.to_topics_partition()) except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned except TypeError as err: self.logger.exception('%s', err.__str__()) raise TopicPartitionError self.logger.debug('Seek to beginning for topic : %s, partition : %s', positioning.get_partition(), positioning.get_partition()) else: try: await self._kafka_consumer.seek_to_beginning() except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned self.logger.debug('Seek to beginning for all topics & partitions') async def seek_to_end(self, positioning: BasePositioning = None) -> None: """ Seek to latest offset, mod 'latest'. If positioning is None consumer will seek all assigned partition to end Args: positioning (BasePositioning): Positioning class contain (topic name / partition number) Returns: None """ if not self._running: await self.start_consumer() if positioning is not None: try: await self._kafka_consumer.seek_to_end(positioning.to_topics_partition()) except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned except TypeError as err: self.logger.exception('%s', err.__str__()) raise TopicPartitionError self.logger.debug('Seek to end for topic : %s, partition : %s', positioning.get_topics(), positioning.get_partition()) else: try: await self._kafka_consumer.seek_to_end() except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned self.logger.debug('Seek to end for all topics & partitions') async def seek_to_last_commit(self, positioning: BasePositioning = None) -> None: """ Seek to last committed offsets, mod 'committed' If positioning is None consumer will seek all assigned partition to last committed offset Args: positioning (BasePositioning): Positioning class contain (topic name / partition number / offset number) Returns: None """ if self._group_id is None: raise IllegalOperation if not self._running: await self.start_consumer() if positioning: try: await self._kafka_consumer.seek_to_committed(positioning.to_topics_partition()) except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned except TypeError as err: self.logger.exception('%s', err.__str__()) raise TopicPartitionError self.logger.debug('Seek to last committed for topic : %s, partition : %s', positioning.get_topics(), positioning.get_partition()) else: try: await self._kafka_consumer.seek_to_committed() except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned self.logger.debug('Seek to last committed for all topics & partitions') async def seek_custom(self, positioning: BasePositioning) -> None: """ Seek to custom offsets Args: positioning (BasePositioning): Positioning class contain (topic name / partition number / offset number) Returns: None """ if not self._running: await self.start_consumer() if positioning is not None: try: await self._kafka_consumer.seek(positioning.to_topics_partition(), positioning.get_current_offset()) except ValueError as err: self.logger.exception('%s', err.__str__()) raise OffsetError except TypeError as err: self.logger.exception('%s', err.__str__()) raise TopicPartitionError except IllegalStateError as err: self.logger.exception('%s', err.__str__()) raise NoPartitionAssigned self.logger.debug('Custom seek for topic : %s, partition : %s, offset : %s', positioning.get_topics(), positioning.get_partition(), positioning.get_current_offset()) else: raise KafkaConsumerError async def _make_manual_commit(self, to_commit: List[BasePositioning]): commits = {} for positioning in to_commit: commits[positioning.to_topics_partition()] = positioning.get_current_offset() await self._kafka_consumer.commit(commits) async def subscriptions(self) -> frozenset: """ Get list of subscribed topic Returns: frozenset: List of subscribed topic """ if not self._running: await self.start_consumer() return self._kafka_consumer.subscription() def pprint_consumer_offsets(self) -> None: """ Debug tool, print all consumer position Returns: None """ self.logger.debug('Client ID = %s', self._client_id) self.logger.debug('Current Offset = %s', [positioning.pprint() for key, positioning in self.__current_offsets.items()]) self.logger.debug('Last Offset = %s', [positioning.pprint() for key, positioning in self.__last_offsets.items()]) self.logger.debug('Last committed offset = %s', [positioning.pprint() for key, positioning in self.__last_committed_offsets.items()]) def get_consumer(self) -> AIOKafkaConsumer: """ Get aiokafka consumer Returns: AIOKafkaConsumer: Current instance of AIOKafkaConsumer """ return self._kafka_consumer def get_offset_bundle(self) -> Dict[str, Dict[str, BasePositioning]]: """ Return a bundle with each assigned assigned topic/partition with current, latest, last committed topic/partition as dict Returns: Dict[str, Dict[TopicPartition, int]]: Contains current_offset / last_offset / last_committed_offset """ return { 'current_offset': self.__current_offsets.copy(), 'last_offset': self.__last_offsets.copy(), 'last_committed_offset': self.__last_committed_offsets.copy() } def get_current_offset(self) -> Dict[str, BasePositioning]: """ Return current offset of each assigned topic/partition Returns: Dict[str, BasePositioning]: Dict contains current offset of each assigned partition """ return self.__current_offsets.copy() def get_last_offset(self) -> Dict[str, BasePositioning]: """ Return last offset of each assigned topic/partition Returns: Dict[str, BasePositioning]: Dict contains latest offset of each assigned partition """ return self.__last_offsets.copy() def get_last_committed_offset(self) -> Dict[str, BasePositioning]: """ Return last committed offset of each assigned topic/partition Returns: Dict[str, BasePositioning]: Dict contains last committed offset of each assigned partition """ return self.__last_committed_offsets.copy()
async def test_kafka_consumer_partition( asyncio_kafka_consumer: AIOKafkaConsumer): assert [ TopicPartition(KAFKA_TEST_TOPIC, KAFKA_PARTITION), ] == sorted(list(asyncio_kafka_consumer.assignment()))