async def test_consumer_transactional_abort(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) producer2 = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) await producer2.start() self.add_cleanup(producer2.stop) consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest", isolation_level="read_committed") await consumer.start() self.add_cleanup(consumer.stop) # We will produce from a transactional producer and then from a # non-transactional. This should block consumption on that partition # until transaction is committed. await producer.begin_transaction() await producer.send_and_wait( self.topic, b'Hello from transaction', partition=0) meta2 = await producer2.send_and_wait( self.topic, b'Hello from non-transaction', partition=0) # The transaction blocked consumption task = self.loop.create_task(consumer.getone()) await asyncio.sleep(1, loop=self.loop) self.assertFalse(task.done()) tp = TopicPartition(self.topic, 0) self.assertEqual(consumer.last_stable_offset(tp), 0) self.assertEqual(consumer.highwater(tp), 2) await producer.abort_transaction() # Order should be preserved. We first yield the first message, although # it belongs to a committed afterwards transaction msg = await task self.assertEqual(msg.offset, meta2.offset) self.assertEqual(msg.timestamp, meta2.timestamp) self.assertEqual(msg.value, b"Hello from non-transaction") self.assertEqual(msg.key, None) with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for( consumer.getone(), timeout=0.5, loop=self.loop) tp = TopicPartition(self.topic, 0) self.assertEqual(consumer.last_stable_offset(tp), 3) self.assertEqual(consumer.highwater(tp), 3)
async def test_consumer_several_transactions(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) msgs = [] for i in range(10): await producer.begin_transaction() msg = b'Hello ' + str(i).encode() await producer.send(self.topic, msg, partition=0) if i % 3 == 0: await producer.commit_transaction() msgs.append(msg) else: await producer.abort_transaction() consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest", isolation_level="read_committed") await consumer.start() self.add_cleanup(consumer.stop) async for msg in consumer: self.assertEqual(msg.value, msgs.pop(0)) if not msgs: break with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for( consumer.getone(), timeout=0.5, loop=self.loop)
async def test_consumer_several_transactions(self): producer = AIOKafkaProducer(bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) msgs = [] for i in range(10): await producer.begin_transaction() msg = b'Hello ' + str(i).encode() await producer.send(self.topic, msg, partition=0) if i % 3 == 0: await producer.commit_transaction() msgs.append(msg) else: await producer.abort_transaction() consumer = AIOKafkaConsumer(self.topic, bootstrap_servers=self.hosts, auto_offset_reset="earliest", isolation_level="read_committed") await consumer.start() self.add_cleanup(consumer.stop) async for msg in consumer: self.assertEqual(msg.value, msgs.pop(0)) if not msgs: break with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for(consumer.getone(), timeout=0.5)
def test_manual_subscribe_pattern(self): msgs1 = yield from self.send_messages(0, range(0, 10)) msgs2 = yield from self.send_messages(1, range(10, 20)) available_msgs = msgs1 + msgs2 consumer = AIOKafkaConsumer(loop=self.loop, group_id='test-group', bootstrap_servers=self.hosts, auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(pattern="topic-test_manual_subs*") yield from consumer.start() yield from consumer.seek_to_committed() result = [] for i in range(20): msg = yield from consumer.getone() result.append(msg.value) self.assertEqual(set(available_msgs), set(result)) yield from consumer.commit( {TopicPartition(self.topic, 0): OffsetAndMetadata(9, '')}) yield from consumer.seek_to_committed(TopicPartition(self.topic, 0)) msg = yield from consumer.getone(TopicPartition(self.topic, 0)) self.assertEqual(msg.value, b'9') yield from consumer.commit( {TopicPartition(self.topic, 0): OffsetAndMetadata(10, '')}) yield from consumer.stop() # subscribe by topic consumer = AIOKafkaConsumer(loop=self.loop, group_id='test-group', bootstrap_servers=self.hosts, auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(topics=(self.topic, )) yield from consumer.start() yield from consumer.seek_to_committed() result = [] for i in range(10): msg = yield from consumer.getone() result.append(msg.value) self.assertEqual(set(msgs2), set(result)) self.assertEqual(consumer.subscription(), set([self.topic])) yield from consumer.stop()
def test_manual_subscribe_pattern(self): msgs1 = yield from self.send_messages(0, range(0, 10)) msgs2 = yield from self.send_messages(1, range(10, 20)) available_msgs = msgs1 + msgs2 consumer = AIOKafkaConsumer( loop=self.loop, group_id='test-group', bootstrap_servers=self.hosts, auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(pattern="topic-test_manual_subs*") yield from consumer.start() yield from consumer.seek_to_committed() result = [] for i in range(20): msg = yield from consumer.getone() result.append(msg.value) self.assertEqual(set(available_msgs), set(result)) yield from consumer.commit( {TopicPartition(self.topic, 0): OffsetAndMetadata(9, '')}) yield from consumer.seek_to_committed(TopicPartition(self.topic, 0)) msg = yield from consumer.getone() self.assertEqual(msg.value, b'9') yield from consumer.commit( {TopicPartition(self.topic, 0): OffsetAndMetadata(10, '')}) yield from consumer.stop() # subscribe by topic consumer = AIOKafkaConsumer( loop=self.loop, group_id='test-group', bootstrap_servers=self.hosts, auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(topics=(self.topic,)) yield from consumer.start() yield from consumer.seek_to_committed() result = [] for i in range(10): msg = yield from consumer.getone() result.append(msg.value) self.assertEqual(set(msgs2), set(result)) self.assertEqual(consumer.subscription(), set([self.topic])) yield from consumer.stop()
def test_producer_indempotence_no_duplicates(self): # Indempotent producer should retry produce in case of timeout error producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, enable_idempotence=True, request_timeout_ms=2000) yield from producer.start() self.add_cleanup(producer.stop) original_send = producer.client.send retry = [0] @asyncio.coroutine def mocked_send(*args, **kw): result = yield from original_send(*args, **kw) if result.API_KEY == ProduceResponse[0].API_KEY and retry[0] < 2: retry[0] += 1 raise RequestTimedOutError return result with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send meta = yield from producer.send_and_wait( self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest") yield from consumer.start() self.add_cleanup(consumer.stop) msg = yield from consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None) with self.assertRaises(asyncio.TimeoutError): yield from asyncio.wait_for(consumer.getone(), timeout=0.5)
def test_offset_reset_manual(self): yield from self.send_messages(0, list(range(0, 10))) consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, metadata_max_age_ms=200, group_id="offset_reset_group", auto_offset_reset="none") yield from consumer.start() self.add_cleanup(consumer.stop) with self.assertRaises(OffsetOutOfRangeError): yield from consumer.getmany(timeout_ms=1000) with self.assertRaises(OffsetOutOfRangeError): yield from consumer.getone()
def test_manual_subscribe_nogroup(self): msgs1 = yield from self.send_messages(0, range(0, 10)) msgs2 = yield from self.send_messages(1, range(10, 20)) available_msgs = msgs1 + msgs2 consumer = AIOKafkaConsumer( loop=self.loop, group_id=None, bootstrap_servers=self.hosts, auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(topics=(self.topic,)) yield from consumer.start() result = [] for i in range(20): msg = yield from consumer.getone() result.append(msg.value) self.assertEqual(set(available_msgs), set(result)) yield from consumer.stop()
def test_consumer_subscribe_pattern_autocreate_no_group_id(self): pattern = "^no-group-pattern-.*$" consumer = AIOKafkaConsumer(loop=self.loop, bootstrap_servers=self.hosts, metadata_max_age_ms=200, group_id=None, fetch_max_wait_ms=50, auto_offset_reset="earliest") self.add_cleanup(consumer.stop) yield from consumer.start() consumer.subscribe(pattern=pattern) # Start getter for the topics. Should not create any topics consume_task = self.loop.create_task(consumer.getone()) yield from asyncio.sleep(0.3, loop=self.loop) self.assertFalse(consume_task.done()) self.assertEqual(consumer.subscription(), set()) # Now lets autocreate the topic by fetching metadata for it. producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) self.add_cleanup(producer.stop) yield from producer.start() my_topic = "no-group-pattern-1" yield from producer.client._wait_on_metadata(my_topic) # Wait for consumer to refresh metadata with new topic yield from asyncio.sleep(0.3, loop=self.loop) self.assertFalse(consume_task.done()) self.assertTrue(consumer._client.cluster.topics() >= {my_topic}) self.assertEqual(consumer.subscription(), {my_topic}) # Add another topic my_topic2 = "no-group-pattern-2" yield from producer.client._wait_on_metadata(my_topic2) # Wait for consumer to refresh metadata with new topic yield from asyncio.sleep(0.3, loop=self.loop) self.assertFalse(consume_task.done()) self.assertTrue( consumer._client.cluster.topics() >= {my_topic, my_topic2}) self.assertEqual(consumer.subscription(), {my_topic, my_topic2}) # Now lets actualy produce some data and verify that it is consumed yield from producer.send(my_topic, b'test msg') data = yield from consume_task self.assertEqual(data.value, b'test msg')
def test_consumer_wait_topic(self): topic = "some-test-topic-for-autocreate" consumer = AIOKafkaConsumer( topic, loop=self.loop, bootstrap_servers=self.hosts) yield from consumer.start() consume_task = self.loop.create_task(consumer.getone()) # just to be sure getone does not fail (before produce) yield from asyncio.sleep(0.5, loop=self.loop) self.assertFalse(consume_task.done()) producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from producer.send(topic, b'test msg') yield from producer.stop() data = yield from consume_task self.assertEqual(data.value, b'test msg') yield from consumer.stop()
async def _test_control_record(self, isolation_level): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) async with producer.transaction(): meta = await producer.send_and_wait( self.topic, b'Hello from transaction', partition=0) consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest", isolation_level=isolation_level, fetch_max_bytes=10) await consumer.start() self.add_cleanup(consumer.stop) # Transaction marker will be next after the message consumer.seek(meta.topic_partition, meta.offset + 1) with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for( consumer.getone(), timeout=0.5, loop=self.loop) # We must not be stuck on previous position position = await consumer.position(meta.topic_partition) self.assertEqual(position, meta.offset + 2) # After producing some more data it should resume consumption async with producer.transaction(): meta2 = await producer.send_and_wait( self.topic, b'Hello from transaction 2', partition=0) msg = await consumer.getone() self.assertEqual(msg.offset, meta2.offset) self.assertEqual(msg.timestamp, meta2.timestamp) self.assertEqual(msg.value, b"Hello from transaction 2") self.assertEqual(msg.key, None)
def test_producer_ssl(self): # Produce by SSL consume by PLAINTEXT topic = "test_ssl_produce" context = self.create_ssl_context() producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=[ "{}:{}".format(self.kafka_host, self.kafka_ssl_port)], security_protocol="SSL", ssl_context=context) yield from producer.start() yield from producer.send_and_wait(topic=topic, value=b"Super msg") yield from producer.stop() consumer = AIOKafkaConsumer( topic, loop=self.loop, bootstrap_servers=self.hosts, enable_auto_commit=True, auto_offset_reset="earliest") yield from consumer.start() msg = yield from consumer.getone() self.assertEqual(msg.value, b"Super msg") yield from consumer.stop()
def test_producer_indempotence_simple(self): # The test here will just check if we can do simple produce with # enable_idempotence option, as no specific API changes is expected. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, enable_idempotence=True) yield from producer.start() self.add_cleanup(producer.stop) meta = yield from producer.send_and_wait(self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest") yield from consumer.start() self.add_cleanup(consumer.stop) msg = yield from consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None)
def test_producer_leader_change_preserves_order(self): # Before 0.5.0 we did not lock partition until a response came from # the server, but locked the node itself. # For example: Say the sender sent a request to node 1 and before an # failure answer came we updated metadata and leader become node 0. # This way we may send the next batch to node 0 without waiting for # node 1 batch to be reenqueued, resulting in out-of-order batches producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, linger_ms=1000) yield from producer.start() self.add_cleanup(producer.stop) # Alter metadata to convince the producer, that leader or partition 0 # is a different node yield from producer.partitions_for(self.topic) topic_meta = producer._metadata._partitions[self.topic] real_leader = topic_meta[0].leader topic_meta[0] = topic_meta[0]._replace(leader=real_leader + 1) # Make sure the first request for produce takes more time original_send = producer.client.send @asyncio.coroutine def mocked_send(node_id, request, *args, **kw): if node_id != real_leader and \ request.API_KEY == ProduceResponse[0].API_KEY: yield from asyncio.sleep(2, loop=self.loop) result = yield from original_send(node_id, request, *args, **kw) return result producer.client.send = mocked_send # Send Batch 1. This will end up waiting for some time on fake leader batch = producer.create_batch() meta = batch.append(key=b"key", value=b"1", timestamp=None) batch.close() fut = yield from producer.send_batch(batch, self.topic, partition=0) # Make sure we sent the request yield from asyncio.sleep(0.1, loop=self.loop) # Update metadata to return leader to real one yield from producer.client.force_metadata_update() # Send Batch 2, that if it's bugged will go straight to the real node batch2 = producer.create_batch() meta2 = batch2.append(key=b"key", value=b"2", timestamp=None) batch2.close() fut2 = yield from producer.send_batch(batch2, self.topic, partition=0) batch_meta = yield from fut batch_meta2 = yield from fut2 # Check the order of messages consumer = AIOKafkaConsumer(self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest") yield from consumer.start() self.add_cleanup(consumer.stop) msg = yield from consumer.getone() self.assertEqual(msg.offset, batch_meta.offset) self.assertEqual(msg.timestamp or -1, meta.timestamp) self.assertEqual(msg.value, b"1") self.assertEqual(msg.key, b"key") msg2 = yield from consumer.getone() self.assertEqual(msg2.offset, batch_meta2.offset) self.assertEqual(msg2.timestamp or -1, meta2.timestamp) self.assertEqual(msg2.value, b"2") self.assertEqual(msg2.key, b"key")
def test_producer_leader_change_preserves_order(self): # Before 0.5.0 we did not lock partition until a response came from # the server, but locked the node itself. # For example: Say the sender sent a request to node 1 and before an # failure answer came we updated metadata and leader become node 0. # This way we may send the next batch to node 0 without waiting for # node 1 batch to be reenqueued, resulting in out-of-order batches producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, linger_ms=1000) yield from producer.start() self.add_cleanup(producer.stop) # Alter metadata to convince the producer, that leader or partition 0 # is a different node yield from producer.partitions_for(self.topic) topic_meta = producer._metadata._partitions[self.topic] real_leader = topic_meta[0].leader topic_meta[0] = topic_meta[0]._replace(leader=real_leader + 1) # Make sure the first request for produce takes more time original_send = producer.client.send @asyncio.coroutine def mocked_send(node_id, request, *args, **kw): if node_id != real_leader and \ request.API_KEY == ProduceResponse[0].API_KEY: yield from asyncio.sleep(2, loop=self.loop) result = yield from original_send(node_id, request, *args, **kw) return result producer.client.send = mocked_send # Send Batch 1. This will end up waiting for some time on fake leader batch = producer.create_batch() meta = batch.append(key=b"key", value=b"1", timestamp=None) batch.close() fut = yield from producer.send_batch( batch, self.topic, partition=0) # Make sure we sent the request yield from asyncio.sleep(0.1, loop=self.loop) # Update metadata to return leader to real one yield from producer.client.force_metadata_update() # Send Batch 2, that if it's bugged will go straight to the real node batch2 = producer.create_batch() meta2 = batch2.append(key=b"key", value=b"2", timestamp=None) batch2.close() fut2 = yield from producer.send_batch( batch2, self.topic, partition=0) batch_meta = yield from fut batch_meta2 = yield from fut2 # Check the order of messages consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest") yield from consumer.start() self.add_cleanup(consumer.stop) msg = yield from consumer.getone() self.assertEqual(msg.offset, batch_meta.offset) self.assertEqual(msg.timestamp or -1, meta.timestamp) self.assertEqual(msg.value, b"1") self.assertEqual(msg.key, b"key") msg2 = yield from consumer.getone() self.assertEqual(msg2.offset, batch_meta2.offset) self.assertEqual(msg2.timestamp or -1, meta2.timestamp) self.assertEqual(msg2.value, b"2") self.assertEqual(msg2.key, b"key")
class KafkaConsumer(BaseConsumer): """ KafkaConsumer Class Attributes: serializer (BaseSerializer): Serializer encode & decode event _bootstrap_servers (Union[str, List[str]): ‘host[:port]’ string (or list of ‘host[:port]’ strings) that the consumer should contact to bootstrap initial cluster metadata _client_id (str): A name for this client. This string is passed in each request to servers and can be used to identify specific server-side log entries that correspond to this client _topics (List[str]): List of topics to subscribe to _group_id (str): name of the consumer group, and to use for fetching and committing offsets. If None, offset commits are disabled _auto_offset_reset (str): A policy for resetting offsets on OffsetOutOfRange errors: ‘earliest’ will move to the oldest available message, ‘latest’ will move to the most recent. Any other value will raise the exception _max_retries (int): Number of retries before critical failure _retry_interval (int): Interval before next retries _retry_backoff_coeff (int): Backoff coeff for next retries _isolation_level (str): Controls how to read messages written transactionally. If set to read_committed, will only return transactional messages which have been committed. If set to read_uncommitted, will return all messages, even transactional messages which have been aborted. Non-transactional messages will be returned unconditionally in either mode. _assignors_data (Dict[str, Any]): Dict with assignors information, more details in StatefulsetPartitionAssignor _store_builder (BaseStoreBuilder): If this flag is step consumer run build_stores() otherwise listen_event was started _running (bool): Is running flag _kafka_consumer (AIOKafkaConsumer): AioKafkaConsumer for more information go to __current_offsets (Dict[TopicPartition, int]): Contains current TopicPartition and offsets __last_offsets (Dict[TopicPartition, int]): Contains last TopicPartition and offsets __last_committed_offsets (Dict[TopicPartition, int]): Contains last committed TopicPartition and offsets _loop (AbstractEventLoop): Asyncio loop logger (Logger): Python logger """ serializer: BaseSerializer _bootstrap_servers: Union[str, List[str]] _client_id: str _topics: List[str] _group_id: str _auto_offset_reset: str _max_retries: int _retry_interval: int _retry_backoff_coeff: int _isolation_level: str _assignors_data: Dict[str, Any] _store_builder: BaseStoreBuilder _running: bool _kafka_consumer: AIOKafkaConsumer __current_offsets: Dict[TopicPartition, int] __last_offsets: Dict[TopicPartition, int] __last_committed_offsets: Dict[TopicPartition, int] _loop: AbstractEventLoop logger: Logger def __init__(self, name: str, serializer: BaseSerializer, bootstrap_servers: Union[str, List[str]], client_id: str, topics: List[str], loop: AbstractEventLoop, group_id: str = None, auto_offset_reset: str = 'earliest', max_retries: int = 10, retry_interval: int = 1000, retry_backoff_coeff: int = 2, assignors_data=None, store_builder: BaseStoreBuilder = None, isolation_level: str = 'read_uncommitted') -> None: """ KafkaConsumer constructor Args: name (str): Kafka consumer name serializer (BaseSerializer): Serializer encode & decode event bootstrap_servers (Union[str, List[str]): ‘host[:port]’ string (or list of ‘host[:port]’ strings) that the consumer should contact to bootstrap initial cluster metadata client_id (str): A name for this client. This string is passed in each request to servers and can be used to identify specific server-side log entries that correspond to this client topics (List[str]): List of topics to subscribe to loop (AbstractEventLoop): Asyncio loop group_id (str): name of the consumer group, and to use for fetching and committing offsets. If None, offset commits are disabled auto_offset_reset (str): A policy for resetting offsets on OffsetOutOfRange errors: ‘earliest’ will move to the oldest available message, ‘latest’ will move to the most recent. Any other value will raise the exception max_retries (int): Number of retries before critical failure retry_interval (int): Interval before next retries retry_backoff_coeff (int): Backoff coeff for next retries assignors_data (Dict[str, Any]): Dict with assignors information, more details in StatefulsetPartitionAssignor store_builder (bool): If this flag is step consumer run build_stores() otherwise listen_event was started isolation_level (str): Controls how to read messages written transactionally. If set to read_committed, will only return transactional messages which have been committed. If set to read_uncommitted, will return all messages, even transactional messages which have been aborted. Non-transactional messages will be returned unconditionally in either mode. Returns: None """ super().__init__() self.name = name if assignors_data is None: assignors_data = {} self.logger = logging.getLogger('tonga') if isinstance(serializer, BaseSerializer): self.serializer = serializer else: raise BadSerializer self._bootstrap_servers = bootstrap_servers self._client_id = client_id self._topics = topics self._group_id = group_id self._auto_offset_reset = auto_offset_reset self._max_retries = max_retries self._retry_interval = retry_interval self._retry_backoff_coeff = retry_backoff_coeff self._isolation_level = isolation_level self._assignors_data = assignors_data self._store_builder = store_builder self._running = False self._loop = loop self.__current_offsets = dict() self.__last_offsets = dict() self.__last_committed_offsets = dict() try: self.logger.info(json.dumps(assignors_data)) statefulset_assignor = StatefulsetPartitionAssignor( bytes(json.dumps(assignors_data), 'utf-8')) self._kafka_consumer = AIOKafkaConsumer( *self._topics, loop=self._loop, bootstrap_servers=self._bootstrap_servers, client_id=client_id, group_id=group_id, value_deserializer=self.serializer.decode, auto_offset_reset=self._auto_offset_reset, isolation_level=self._isolation_level, enable_auto_commit=False, key_deserializer=KafkaKeySerializer.decode, partition_assignment_strategy=[statefulset_assignor]) except KafkaError as err: self.logger.exception(f'{err.__str__()}') raise err except ValueError as err: self.logger.exception(f'{err.__str__()}') raise AioKafkaConsumerBadParams self.logger.debug( f'Create new consumer {client_id}, group_id {group_id}') async def start_consumer(self) -> None: """ Start consumer Returns: None Raises: AttributeError: KafkaConsumerError ValueError: If KafkaError or KafkaTimoutError is raised, exception value is contain in KafkaConsumerError.msg """ if self._running: raise KafkaConsumerAlreadyStartedError for retry in range(2): try: await self._kafka_consumer.start() self._running = True self.logger.debug( f'Start consumer : {self._client_id}, group_id : {self._group_id}' ) except KafkaTimeoutError as err: self.logger.exception(f'{err.__str__()}') await asyncio.sleep(1) except ConnectionError as err: self.logger.exception(f'{err.__str__()}') await asyncio.sleep(1) except KafkaError as err: self.logger.exception(f'{err.__str__()}') raise err else: break else: raise ConsumerConnectionError async def stop_consumer(self) -> None: """ Stop consumer Returns: None Raises: AttributeError: KafkaConsumerError ValueError: If KafkaError is raised, exception value is contain in KafkaConsumerError.msg """ if not self._running: raise KafkaConsumerNotStartedError try: await self._kafka_consumer.stop() self._running = False self.logger.debug( f'Stop consumer : {self._client_id}, group_id : {self._group_id}' ) except KafkaTimeoutError as err: self.logger.exception(f'{err.__str__()}') raise ConsumerKafkaTimeoutError except KafkaError as err: self.logger.exception(f'{err.__str__()}') raise err def is_running(self) -> bool: return self._running async def get_last_committed_offsets(self) -> Dict[TopicPartition, int]: """ Get last committed offsets Returns: Dict[TopicPartition, int]: Contains all assigned partitions with last committed offsets """ last_committed_offsets: Dict[TopicPartition, int] = dict() self.logger.debug(f'Get last committed offsets') if self._group_id is None: raise IllegalOperation for partition in self._kafka_consumer.assignment(): offset = await self._kafka_consumer.committed(partition) last_committed_offsets[partition] = offset return last_committed_offsets async def get_current_offsets(self) -> Dict[TopicPartition, int]: """ Get current offsets Returns: Dict[TopicPartition, int]: Contains all assigned partitions with current offsets """ current_offsets: Dict[TopicPartition, int] = dict() self.logger.debug(f'Get current offsets') for partition in self._kafka_consumer.assignment(): try: offset = await self._kafka_consumer.position(partition) except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise err current_offsets[partition] = offset return current_offsets async def get_beginning_offsets(self) -> Dict[TopicPartition, int]: """ Get beginning offsets Returns: Dict[TopicPartition, int]: Contains all assigned partitions with beginning offsets """ beginning_offsets: Dict[TopicPartition, int] = dict() self.logger.debug(f'Get beginning offsets') for partition in self._kafka_consumer.assignment(): try: offset = (await self._kafka_consumer.beginning_offsets([partition] ))[partition] except KafkaTimeoutError as err: self.logger.exception(f'{err.__str__()}') raise ConsumerKafkaTimeoutError except UnsupportedVersionError as err: self.logger.exception(f'{err.__str__()}') raise err beginning_offsets[partition] = offset return beginning_offsets async def get_last_offsets(self) -> Dict[TopicPartition, int]: """ Get last offsets Returns: Dict[TopicPartition, int]: Contains all assigned partitions with last offsets """ last_offsets: Dict[TopicPartition, int] = dict() self.logger.debug(f'Get last offsets') for partition in self._kafka_consumer.assignment(): try: offset = (await self._kafka_consumer.end_offsets([partition] ))[partition] except KafkaTimeoutError as err: self.logger.exception(f'{err.__str__()}') raise ConsumerKafkaTimeoutError except UnsupportedVersionError as err: self.logger.exception(f'{err.__str__()}') raise err last_offsets[partition] = offset return last_offsets async def load_offsets(self, mod: str = 'earliest') -> None: """ This method was call before consume topics, assign position to consumer Args: mod: Start position of consumer (earliest, latest, committed) Returns: None """ self.logger.debug(f'Load offset mod : {mod}') if not self._running: await self.start_consumer() if mod == 'latest': await self.seek_to_end() elif mod == 'earliest': await self.seek_to_beginning() elif mod == 'committed': await self.seek_to_last_commit() else: raise KafkaConsumerError('Unknown mod', 500) self.__current_offsets = await self.get_current_offsets() self.__last_offsets = await self.get_last_offsets() if self._group_id is not None: self.__last_committed_offsets = await self.get_last_committed_offsets( ) for tp, offset in self.__last_committed_offsets.items(): if offset is None: self.logger.debug( f'Seek to beginning, no committed offsets was found') await self.seek_to_beginning(tp.partition, tp.topic) async def debug_print_all_msg(self): """ Debug method, useful for display all msg was contained in assigned topic/partitions Returns: None """ while True: message = await self._kafka_consumer.getone() self.logger.info( '----------------------------------------------------------------------------------------' ) self.logger.info( f'Topic {message.topic}, Partition {message.partition}, Offset {message.offset}, ' f'Key {message.key}, Value {message.value}, Headers {message.headers}' ) self.logger.info( '----------------------------------------------------------------------------------------' ) async def listen_event(self, mod: str = 'earliest') -> None: """ Listens events from assigned topic / partitions Args: mod: Start position of consumer (earliest, latest, committed) Returns: None """ if not self._running: await self.load_offsets(mod) self.pprint_consumer_offsets() # await self.getone() async for msg in self._kafka_consumer: # Debug Display self.logger.debug( "---------------------------------------------------------------------------------" ) self.logger.debug( f'New Message on consumer {self._client_id}, Topic {msg.topic}, ' f'Partition {msg.partition}, Offset {msg.offset}, Key {msg.key}, Value {msg.value},' f'Headers {msg.headers}') self.pprint_consumer_offsets() self.logger.debug( "---------------------------------------------------------------------------------" ) tp = TopicPartition(msg.topic, msg.partition) self.__current_offsets[tp] = msg.offset # self.last_offsets = await self.get_last_offsets() sleep_duration_in_ms = self._retry_interval for retries in range(0, self._max_retries): try: decode_dict = msg.value event_class = decode_dict['event_class'] handler_class = decode_dict['handler_class'] logging.debug( f'Event name : {event_class.event_name()} Event content :\n{event_class.__dict__}' ) # Calls handle if event is instance BaseHandler if isinstance(handler_class, BaseEventHandler): result = await handler_class.handle( event=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) elif isinstance(handler_class, BaseCommandHandler): result = await handler_class.execute( event=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) elif isinstance(handler_class, BaseResultHandler): result = await handler_class.on_result( event=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) else: # Otherwise raise KafkaConsumerUnknownHandler raise UnknownHandler # If result is none (no transactional process), check if consumer has an # group_id (mandatory to commit in Kafka) if result is None and self._group_id is not None: # Check if next commit was possible (Kafka offset) if self.__last_committed_offsets[tp] is None or \ self.__last_committed_offsets[tp] <= self.__current_offsets[tp]: self.logger.debug( f'Commit msg {event_class.event_name()} in topic {msg.topic} partition ' f'{msg.partition} offset {self.__current_offsets[tp] + 1}' ) await self._kafka_consumer.commit( {tp: self.__current_offsets[tp] + 1}) self.__last_committed_offsets[tp] = msg.offset + 1 # Transactional process no commit elif result == 'transaction': self.logger.debug(f'Transaction end') self.__current_offsets = await self.get_current_offsets( ) self.__last_committed_offsets = await self.get_last_committed_offsets( ) # Otherwise raise KafkaConsumerUnknownHandlerReturn else: raise UnknownHandlerReturn # Break if everything was successfully processed break except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except ValueError as err: self.logger.exception(f'{err.__str__()}') raise OffsetError except CommitFailedError as err: self.logger.exception(f'{err.__str__()}') raise err except (KafkaError, HandlerException) as err: self.logger.exception(f'{err.__str__()}') sleep_duration_in_s = int(sleep_duration_in_ms / 1000) await asyncio.sleep(sleep_duration_in_s) sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff if retries not in range(0, self._max_retries): await self.stop_consumer() logging.error(f'Max retries, close consumer and exit') exit(1) def check_if_store_is_ready(self) -> None: # TODO Optimize check store is ready if self._store_builder.get_local_store().is_initialized( ) and self._store_builder.get_global_store().is_initialized(): return self.logger.info( '-------------------------------------------------------') ack = False for tp, offset in self.__last_offsets.items(): self.logger.info( f'Topic : {tp.topic}, partition : {tp.partition}, offset: {offset}' ) if tp.partition == self._store_builder.get_current_instance(): self.logger.info('Own partition') if offset == 0: self._store_builder.get_local_store().set_initialized(True) else: ack = False else: ack = True if offset == 0 else False if ack: self._store_builder.get_global_store().set_initialized(True) self.logger.info( '-------------------------------------------------------') async def listen_store_records(self, rebuild: bool = False) -> None: """ Listens events for store construction Args: rebuild (bool): if true consumer seek to fist offset for rebuild own state Returns: None """ if self._store_builder is None: raise KeyError self.logger.info('Start listen store records') await self.start_consumer() await self._store_builder.initialize_store_builder() if not self._running: raise KafkaConsumerError('Fail to start tongaConsumer', 500) # Check if store is ready self.check_if_store_is_ready() self.pprint_consumer_offsets() async for msg in self._kafka_consumer: # Debug Display self.logger.debug( "---------------------------------------------------------------------" ) self.logger.debug( f'New Message on store builder consumer {self._client_id}, Topic {msg.topic}, ' f'Partition {msg.partition}, Offset {msg.offset}, Key {msg.key}, Value {msg.value},' f'Headers {msg.headers}') self.pprint_consumer_offsets() self.logger.debug( "---------------------------------------------------------------------" ) # Check if store is ready self.check_if_store_is_ready() tp = TopicPartition(msg.topic, msg.partition) self.__current_offsets[tp] = msg.offset sleep_duration_in_ms = self._retry_interval for retries in range(0, self._max_retries): try: decode_dict = msg.value event_class: BaseModel = decode_dict['event_class'] handler_class: BaseStoreRecordHandler = decode_dict[ 'handler_class'] logging.debug( f'Store event name : {event_class.event_name()}\nEvent ' f'content :\n{event_class.__dict__}\n') result = None if msg.partition == self._store_builder.get_current_instance( ): # Calls local_state_handler if event is instance BaseStorageBuilder if rebuild and not self._store_builder.get_local_store( ).is_initialized(): if isinstance(event_class, BaseStoreRecord): result = await handler_class.local_store_handler( store_record=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) else: raise UnknownStoreRecordHandler elif msg.partition != self._store_builder.get_current_instance( ): if isinstance(event_class, BaseStoreRecord): result = await handler_class.global_store_handler( store_record=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) else: raise UnknownStoreRecordHandler # If result is none (no transactional process), check if consumer has an # group_id (mandatory to commit in Kafka) # TODO Add commit store later V2 # if result is None and self._group_id is not None: # # Check if next commit was possible (Kafka offset) # if self.__last_committed_offsets[tp] is None or \ # self.__last_committed_offsets[tp] <= self.__current_offsets[tp]: # self.logger.debug(f'Commit msg {event_class.event_name()} in topic {msg.topic} partition ' # f'{msg.partition} offset {self.__current_offsets[tp] + 1}') # await self._kafka_consumer.commit({tp: self.__current_offsets[tp] + 1}) # self.__last_committed_offsets[tp] = msg.offset + 1 # # Otherwise raise ValueError # else: # raise ValueError # Break if everything was successfully processed break except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except ValueError as err: self.logger.exception(f'{err.__str__()}') raise OffsetError except CommitFailedError as err: self.logger.exception(f'{err.__str__()}') raise err except (KafkaError, HandlerException) as err: self.logger.exception(f'{err.__str__()}') sleep_duration_in_s = int(sleep_duration_in_ms / 1000) await asyncio.sleep(sleep_duration_in_s) sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff if retries not in range(0, self._max_retries): await self.stop_consumer() logging.error(f'Max retries, close consumer and exit') exit(1) def is_lag(self) -> bool: """ Consumer has lag ? Returns: bool: True if consumer is lagging otherwise return false and consumer is up to date """ if self.__last_offsets == self.__current_offsets: return False return True async def get_next(self): """ Async generator for consumer msg Yields: Any: yields new msg """ while True: try: yield self._kafka_consumer.getone() except ConsumerStoppedError: raise StopAsyncIteration except (TopicAuthorizationFailedError, OffsetOutOfRangeError, NoOffsetForPartitionError) as err: raise KafkaConsumerError(err, 500) except RecordTooLargeError: pass async def get_many(self, partitions: List[TopicPartition] = None, max_records: int = None) -> Dict[Any, Any]: """ Get many msg in batch Args: partitions (List[TopicPartition]): List of topic / partition to get many msg max_records (int): Max msg records Returns: Dict[Any, Any]: Return a dict contain msg """ if not self._running: await self.start_consumer() self.logger.debug(f'Get many in partitions : {partitions}') return await self._kafka_consumer.getmany(*partitions, max_records=max_records) async def get_one(self, partitions: List[TopicPartition] = None) -> Any: """ Get one message Args: partitions (List[TopicPartition]): List of topic / partition to get many msg Returns: Any: return msg """ if not self._running: await self.start_consumer() self.logger.debug(f'Get one in partitions : {partitions}') return await self._kafka_consumer.getone(*partitions) async def seek_to_beginning(self, partition: int = None, topic: str = None) -> None: """ Seek to fist offset, mod 'earliest' Args: partition (int): Partition value topic (str): Topic name Returns: None """ if not self._running: await self.start_consumer() if partition is not None and topic is not None: try: await self._kafka_consumer.seek_to_beginning( TopicPartition(topic, partition)) except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except TypeError as err: self.logger.exception(f'{err.__str__()}') raise TopicPartitionError self.logger.debug( f'Seek to beginning for topic : {topic}, partition : {partition}' ) else: try: await self._kafka_consumer.seek_to_beginning() except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned self.logger.debug(f'Seek to beginning for all topics & partitions') async def seek_to_end(self, partition: int = None, topic: str = None) -> None: """ Seek to latest offset, mod 'latest' Args: partition (int): Partition value topic (str): Topic name Returns: None """ if not self._running: await self.start_consumer() if partition is not None and topic is not None: try: await self._kafka_consumer.seek_to_end( TopicPartition(topic, partition)) except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except TypeError as err: self.logger.exception(f'{err.__str__()}') raise TopicPartitionError self.logger.debug( f'Seek to end for topic : {topic}, partition : {partition}') else: try: await self._kafka_consumer.seek_to_end() except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned self.logger.debug(f'Seek to end for all topics & partitions') async def seek_to_last_commit(self, partition: int = None, topic: str = None) -> None: """ Seek to last committed offsets, mod 'committed' Args: partition (int): Partition value topic (str): Topic name Returns: None """ if self._group_id is None: raise IllegalOperation if not self._running: await self.start_consumer() if partition is not None and topic is not None: try: await self._kafka_consumer.seek_to_committed( TopicPartition(topic, partition)) except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except TypeError as err: self.logger.exception(f'{err.__str__()}') raise TopicPartitionError self.logger.debug( f'Seek to last committed for topic : {topic}, partition : {partition}' ) else: try: await self._kafka_consumer.seek_to_committed() except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned self.logger.debug( f'Seek to last committed for all topics & partitions') async def seek_custom(self, topic: str = None, partition: int = None, offset: int = None) -> None: """ Seek to custom offsets Args: partition (int): Partition value topic (str): Topic name offset (int): Offset value Returns: None """ if not self._running: await self.start_consumer() if partition is not None and topic is not None and offset is not None: try: await self._kafka_consumer.seek( TopicPartition(topic, partition), offset) except ValueError as err: self.logger.exception(f'{err.__str__()}') raise OffsetError except TypeError as err: self.logger.exception(f'{err.__str__()}') raise TopicPartitionError except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned self.logger.debug( f'Custom seek for topic : {topic}, partition : {partition}, offset : {offset}' ) else: raise KafkaConsumerError('Custom seek need 3 argv', 500) async def subscriptions(self) -> frozenset: """ Get list of subscribed topic Returns: frozenset: List of subscribed topic """ if not self._running: await self.start_consumer() return self._kafka_consumer.subscription() def pprint_consumer_offsets(self) -> None: """ Debug tool, print all consumer position Returns: None """ self.logger.debug(f'Client ID = {self._client_id}') self.logger.debug(f'Current Offset = {self.__current_offsets}') self.logger.debug(f'Last Offset = {self.__last_offsets}') self.logger.debug( f'Last committed offset = {self.__last_committed_offsets}') def get_aiokafka_consumer(self) -> AIOKafkaConsumer: """ Get aiokafka consumer Returns: AIOKafkaConsumer: Current instance of AIOKafkaConsumer """ return self._kafka_consumer def get_offset_bundle(self) -> Dict[str, Dict[TopicPartition, int]]: """ Return a bundle with each assigned assigned topic/partition with current, latest, last committed topic/partition as dict Returns: Dict[str, Dict[TopicPartition, int]]: Contains current_offset / last_offset / last_committed_offset """ return { 'current_offset': self.__current_offsets.copy(), 'last_offset': self.__last_offsets.copy(), 'last_committed_offset': self.__last_committed_offsets.copy() } def get_current_offset(self) -> Dict[TopicPartition, int]: """ Return current offset of each assigned topic/partition Returns: Dict[TopicPartition, int]: Dict contains current offset of each assigned partition """ return self.__current_offsets.copy() def get_last_offset(self) -> Dict[TopicPartition, int]: """ Return last offset of each assigned topic/partition Returns: Dict[TopicPartition, int]: Dict contains latest offset of each assigned partition """ return self.__last_offsets.copy() def get_last_committed_offset(self) -> Dict[TopicPartition, int]: """ Return last committed offset of each assigned topic/partition Returns: Dict[TopicPartition, int]: Dict contains last committed offset of each assigned partition """ return self.__last_committed_offsets.copy()
def test_rebalance_listener_with_coroutines(self): yield from self.send_messages(0, list(range(0, 10))) yield from self.send_messages(1, list(range(10, 20))) main_self = self class SimpleRebalanceListener(ConsumerRebalanceListener): def __init__(self, consumer): self.consumer = consumer self.revoke_mock = mock.Mock() self.assign_mock = mock.Mock() @asyncio.coroutine def on_partitions_revoked(self, revoked): self.revoke_mock(revoked) # If this commit would fail we will end up with wrong msgs # eturned in test below yield from self.consumer.commit() # Confirm that coordinator is actually waiting for callback to # complete yield from asyncio.sleep(0.2, loop=main_self.loop) main_self.assertTrue( self.consumer._coordinator.needs_join_prepare) @asyncio.coroutine def on_partitions_assigned(self, assigned): self.assign_mock(assigned) # Confirm that coordinator is actually waiting for callback to # complete yield from asyncio.sleep(0.2, loop=main_self.loop) main_self.assertFalse( self.consumer._coordinator.needs_join_prepare) tp0 = TopicPartition(self.topic, 0) tp1 = TopicPartition(self.topic, 1) consumer1 = AIOKafkaConsumer( loop=self.loop, group_id="test_rebalance_listener_with_coroutines", bootstrap_servers=self.hosts, enable_auto_commit=False, auto_offset_reset="earliest") listener1 = SimpleRebalanceListener(consumer1) consumer1.subscribe([self.topic], listener=listener1) yield from consumer1.start() self.add_cleanup(consumer1.stop) msg = yield from consumer1.getone(tp0) self.assertEqual(msg.value, b"0") msg = yield from consumer1.getone(tp1) self.assertEqual(msg.value, b"10") listener1.revoke_mock.assert_called_with(set([])) listener1.assign_mock.assert_called_with(set([tp0, tp1])) # By adding a 2nd consumer we trigger rebalance consumer2 = AIOKafkaConsumer( loop=self.loop, group_id="test_rebalance_listener_with_coroutines", bootstrap_servers=self.hosts, enable_auto_commit=False, auto_offset_reset="earliest") listener2 = SimpleRebalanceListener(consumer2) consumer2.subscribe([self.topic], listener=listener2) yield from consumer2.start() self.add_cleanup(consumer2.stop) msg1 = yield from consumer1.getone() msg2 = yield from consumer2.getone() # We can't predict the assignment in test if consumer1.assignment() == set([tp1]): msg1, msg2 = msg2, msg1 c1_assignment = set([tp1]) c2_assignment = set([tp0]) else: c1_assignment = set([tp0]) c2_assignment = set([tp1]) self.assertEqual(msg1.value, b"1") self.assertEqual(msg2.value, b"11") listener1.revoke_mock.assert_called_with(set([tp0, tp1])) self.assertEqual(listener1.revoke_mock.call_count, 2) listener1.assign_mock.assert_called_with(c1_assignment) self.assertEqual(listener1.assign_mock.call_count, 2) listener2.revoke_mock.assert_called_with(set([])) self.assertEqual(listener2.revoke_mock.call_count, 1) listener2.assign_mock.assert_called_with(c2_assignment) self.assertEqual(listener2.assign_mock.call_count, 1)