def test_producer_api_version(self): for text_version, api_version in [ ("auto", (0, 9, 0)), ("0.9.1", (0, 9, 1)), ("0.10.0", (0, 10, 0)), ("0.11", (0, 11, 0)), ("0.12.1", (0, 12, 1)), ("1.0.2", (1, 0, 2))]: producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, api_version=text_version) self.assertEqual(producer.client.api_version, api_version) yield from producer.stop() # invalid cases for version in ["0", "1", "0.10.0.1"]: with self.assertRaises(ValueError): AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, api_version=version) for version in [(0, 9), (0, 9, 1)]: with self.assertRaises(TypeError): AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, api_version=version)
def test_producer_send_error(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, retry_backoff_ms=100, linger_ms=5, request_timeout_ms=400) yield from producer.start() @asyncio.coroutine def mocked_send(nodeid, req): # RequestTimedOutCode error for partition=0 return ProduceResponse[0]([(self.topic, [(0, 7, 0), (1, 0, 111)])]) with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send fut1 = yield from producer.send(self.topic, b'text1', partition=0) fut2 = yield from producer.send(self.topic, b'text2', partition=1) with self.assertRaises(RequestTimedOutError): yield from fut1 resp = yield from fut2 self.assertEqual(resp.offset, 111) @asyncio.coroutine def mocked_send_with_sleep(nodeid, req): # RequestTimedOutCode error for partition=0 yield from asyncio.sleep(0.1, loop=self.loop) return ProduceResponse[0]([(self.topic, [(0, 7, 0)])]) with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send_with_sleep with self.assertRaises(RequestTimedOutError): future = yield from producer.send( self.topic, b'text1', partition=0) yield from future yield from producer.stop()
async def test_producer_transactional_flush_2_batches_before_commit(self): # We need to be sure if batches that are pending and batches that are # queued will be waited. To test this we need at least 2 batches on # the same partition. They will be sent one at a time. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) await producer.begin_transaction() batch = producer.create_batch() batch.append(timestamp=None, key=None, value=b"1") batch.close() fut1 = await producer.send_batch(batch, self.topic, partition=0) batch = producer.create_batch() batch.append(timestamp=None, key=None, value=b"2") batch.close() fut2 = await producer.send_batch(batch, self.topic, partition=0) await producer.commit_transaction() self.assertTrue(fut1.done()) self.assertTrue(fut2.done())
def send_messages(self, partition, messages, *, topic=None, timestamp_ms=None, return_inst=False, headers=None): topic = topic or self.topic ret = [] producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() try: yield from self.wait_topic(producer.client, topic) for msg in messages: if isinstance(msg, str): msg = msg.encode() elif isinstance(msg, int): msg = str(msg).encode() future = yield from producer.send( topic, msg, partition=partition, timestamp_ms=timestamp_ms, headers=headers) resp = yield from future self.assertEqual(resp.topic, topic) self.assertEqual(resp.partition, partition) if return_inst: ret.append(resp) else: ret.append(msg) finally: yield from producer.stop() return ret
async def test_producer_transactional_send_batch_outside_txn(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) batch = producer.create_batch() batch.append(timestamp=None, key=None, value=b"2") batch.close() # Can not send if not yet in transaction with self.assertRaises(IllegalOperation): await producer.send_batch(batch, self.topic, partition=0) await producer.begin_transaction() await producer.send(self.topic, value=b"1", partition=0) commit_task = ensure_future(producer.commit_transaction()) await asyncio.sleep(0.001, loop=self.loop) self.assertFalse(commit_task.done()) # Already not in transaction with self.assertRaises(IllegalOperation): await producer.send_batch(batch, self.topic, partition=0) await commit_task # Transaction needs to be restarted with self.assertRaises(IllegalOperation): await producer.send_batch(batch, self.topic, partition=0)
def test_producer_correct_time_returned(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() self.add_cleanup(producer.stop) send_time = (time.time() * 1000) res = yield from producer.send_and_wait( "XXXX", b'text1', partition=0) self.assertLess(res.timestamp - send_time, 1000) # 1s res = yield from producer.send_and_wait( "XXXX", b'text1', partition=0, timestamp_ms=123123123) self.assertEqual(res.timestamp, 123123123) expected_timestamp = 999999999 @asyncio.coroutine def mocked_send(*args, **kw): # There's no easy way to set LOG_APPEND_TIME on server, so use this # hack for now. return ProduceResponse[2]( topics=[ ('XXXX', [(0, 0, 0, expected_timestamp)])], throttle_time_ms=0) with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send res = yield from producer.send_and_wait( "XXXX", b'text1', partition=0) self.assertEqual(res.timestamp_type, LOG_APPEND_TIME) self.assertEqual(res.timestamp, expected_timestamp)
async def test_producer_transactional_simple(self): # The test here will just check if we can do simple produce with # transactional_id option and minimal setup. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) async with producer.transaction(): meta = await producer.send_and_wait( self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) msg = await consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None)
def test_producer_indempotence_not_supported(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, enable_idempotence=True) producer with self.assertRaises(UnsupportedVersionError): yield from producer.start() yield from producer.stop()
def test_producer_send_noack(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, acks=0) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) fut1 = yield from producer.send(self.topic, b"hello, Kafka!", partition=0) fut2 = yield from producer.send(self.topic, b"hello, Kafka!", partition=1) done, _ = yield from asyncio.wait([fut1, fut2], loop=self.loop) for item in done: self.assertEqual(item.result(), None)
def test_producer_send_with_headers(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() self.add_cleanup(producer.stop) fut = yield from producer.send( self.topic, b'msg', partition=0, headers=[("type", b"Normal")]) resp = yield from fut self.assertEqual(resp.partition, 0)
def test_producer_send_with_headers_raise_error(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() self.add_cleanup(producer.stop) with self.assertRaises(UnsupportedVersionError): yield from producer.send( self.topic, b'msg', partition=0, headers=[("type", b"Normal")])
def test_producer_send_batch(self): key = b'test key' value = b'test value' max_batch_size = 10000 producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, max_batch_size=max_batch_size) yield from producer.start() partitions = yield from producer.partitions_for(self.topic) partition = partitions.pop() # silly method to find current offset for this partition resp = yield from producer.send_and_wait( self.topic, value=b'discovering offset', partition=partition) offset = resp.offset # only fills up to its limits, then returns None batch = producer.create_batch() self.assertEqual(batch.record_count(), 0) num = 0 while True: metadata = batch.append(key=key, value=value, timestamp=None) if metadata is None: break num += 1 self.assertTrue(num > 0) self.assertEqual(batch.record_count(), num) # batch gets properly sent future = yield from producer.send_batch( batch, self.topic, partition=partition) resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertEqual(resp.partition, partition) self.assertEqual(resp.offset, offset + 1) # batch accepts a too-large message if it's the first too_large = b'm' * (max_batch_size + 1) batch = producer.create_batch() metadata = batch.append(key=None, value=too_large, timestamp=None) self.assertIsNotNone(metadata) # batch rejects a too-large message if it's not the first batch = producer.create_batch() batch.append(key=None, value=b"short", timestamp=None) metadata = batch.append(key=None, value=too_large, timestamp=None) self.assertIsNone(metadata) yield from producer.stop() # batch can't be sent after closing time with self.assertRaises(ProducerClosed): yield from producer.send_batch( batch, self.topic, partition=partition)
def test_producer_send(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) with self.assertRaisesRegexp(AssertionError, 'value must be bytes'): yield from producer.send(self.topic, 'hello, Kafka!') future = yield from producer.send(self.topic, b'hello, Kafka!') resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) self.assertEqual(resp.offset, 0) fut = yield from producer.send(self.topic, b'second msg', partition=1) resp = yield from fut self.assertEqual(resp.partition, 1) future = yield from producer.send(self.topic, b'value', key=b'KEY') resp = yield from future self.assertTrue(resp.partition in (0, 1)) resp = yield from producer.send_and_wait(self.topic, b'value') self.assertTrue(resp.partition in (0, 1)) yield from producer.stop() with self.assertRaises(ProducerClosed): yield from producer.send(self.topic, b'value', key=b'KEY')
def test_producer_send(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() self.add_cleanup(producer.stop) with self.assertRaises(TypeError): yield from producer.send(self.topic, 'hello, Kafka!', partition=0) future = yield from producer.send( self.topic, b'hello, Kafka!', partition=0) resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) self.assertEqual(resp.offset, 0) fut = yield from producer.send(self.topic, b'second msg', partition=1) resp = yield from fut self.assertEqual(resp.partition, 1) future = yield from producer.send(self.topic, b'value', key=b'KEY') resp = yield from future self.assertTrue(resp.partition in (0, 1)) resp = yield from producer.send_and_wait(self.topic, b'value') self.assertTrue(resp.partition in (0, 1)) yield from producer.stop() with self.assertRaises(ProducerClosed): yield from producer.send(self.topic, b'value', key=b'KEY')
def test_producer_warn_unclosed(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) producer_ref = weakref.ref(producer) yield from producer.start() with self.silence_loop_exception_handler(): with self.assertWarnsRegex( ResourceWarning, "Unclosed AIOKafkaProducer"): del producer gc.collect() # Assure that the reference was properly collected self.assertIsNone(producer_ref())
def test_producer_send_with_compression(self): with self.assertRaises(ValueError): producer = AIOKafkaProducer(loop=self.loop, compression_type="my_custom") producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, compression_type="gzip") yield from producer.start() yield from self.wait_topic(producer.client, self.topic) future = yield from producer.send(self.topic, b"this msg is compressed by client") resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) yield from producer.stop()
def test_producer_notopic(self): producer = AIOKafkaProducer( loop=self.loop, request_timeout_ms=200, bootstrap_servers=self.hosts) yield from producer.start() with mock.patch.object( AIOKafkaClient, '_metadata_update') as mocked: @asyncio.coroutine def dummy(*d, **kw): return mocked.side_effect = dummy with self.assertRaises(UnknownTopicOrPartitionError): yield from producer.send_and_wait('some_topic', b'hello') yield from producer.stop()
def test_producer_flush_test(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() self.add_cleanup(producer.stop) fut1 = yield from producer.send("producer_flush_test", b'text1') fut2 = yield from producer.send("producer_flush_test", b'text2') self.assertFalse(fut1.done()) self.assertFalse(fut2.done()) yield from producer.flush() self.assertTrue(fut1.done()) self.assertTrue(fut2.done())
async def test_producer_transactional_send_offsets_error_checks(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) # Not in transaction with self.assertRaises(IllegalOperation): await producer.send_offsets_to_transaction({}, group_id=None) # Not proper group_id async with producer.transaction(): with self.assertRaises(ValueError): await producer.send_offsets_to_transaction({}, group_id=None)
async def test_producer_require_transactional_id(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) await producer.start() self.add_cleanup(producer.stop) with self.assertRaises(IllegalOperation): await producer.begin_transaction() with self.assertRaises(IllegalOperation): await producer.commit_transaction() with self.assertRaises(IllegalOperation): await producer.abort_transaction() with self.assertRaises(IllegalOperation): async with producer.transaction(): pass with self.assertRaises(IllegalOperation): await producer.send_offsets_to_transaction({}, group_id="123")
async def test_producer_transactional_aborting_previous_failure(self): # If we were to fail to send some message we should get\ # OutOfOrderSequenceNumber producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) with self.assertRaises(OutOfOrderSequenceNumber): async with producer.transaction(): await producer.send_and_wait(self.topic, b'msg1', partition=0) # Imitate a not delivered message producer._txn_manager.increment_sequence_number( TopicPartition(self.topic, 0), 1) await producer.send_and_wait(self.topic, b'msg2', partition=0)
def test_producer_send_timeout(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() @asyncio.coroutine def mocked_send(nodeid, req): raise KafkaTimeoutError() with mock.patch.object(producer.client, "send") as mocked: mocked.side_effect = mocked_send fut1 = yield from producer.send(self.topic, b"text1") fut2 = yield from producer.send(self.topic, b"text2") done, _ = yield from asyncio.wait([fut1, fut2], loop=self.loop) for item in done: with self.assertRaises(KafkaTimeoutError): item.result()
def test_compress_decompress(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, compression_type="gzip") yield from producer.start() yield from self.wait_topic(producer.client, self.topic) msg1 = b'some-message'*10 msg2 = b'other-message'*30 yield from producer.send(self.topic, msg1, partition=1) yield from producer.send(self.topic, msg2, partition=1) yield from producer.stop() consumer = yield from self.consumer_factory() rmsg1 = yield from consumer.getone() self.assertEqual(rmsg1.value, msg1) rmsg2 = yield from consumer.getone() self.assertEqual(rmsg2.value, msg2)
def test_producer_send_leader_notfound(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, request_timeout_ms=200) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) with mock.patch.object(ClusterMetadata, "leader_for_partition") as mocked: mocked.return_value = -1 future = yield from producer.send(self.topic, b"text") with self.assertRaises(LeaderNotAvailableError): yield from future with mock.patch.object(ClusterMetadata, "leader_for_partition") as mocked: mocked.return_value = None future = yield from producer.send(self.topic, b"text") with self.assertRaises(NotLeaderForPartitionError): yield from future yield from producer.stop()
def test_producer_send_empty_batch(self): # We trigger a unique case here, we don't send any messages, but the # ProduceBatch will be created. It should be discarded as it contains # 0 messages by sender routine. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() self.add_cleanup(producer.stop) with self.assertRaises(TypeError): yield from producer.send(self.topic, 'text1') send_mock = mock.Mock() send_mock.side_effect = producer._sender._send_produce_req producer._sender._send_produce_req = send_mock yield from producer.flush() self.assertEqual(send_mock.call_count, 0)
def test_producer_invalid_leader_retry_metadata(self): # See related issue #362. The metadata can have a new node in leader # set while we still don't have metadata for that node. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, linger_ms=1000) yield from producer.start() self.add_cleanup(producer.stop) # Make sure we have fresh metadata for partitions yield from producer.partitions_for(self.topic) # Alter metadata to convince the producer, that leader or partition 0 # is a different node topic_meta = producer._metadata._partitions[self.topic] topic_meta[0] = topic_meta[0]._replace(leader=topic_meta[0].leader + 1) meta = yield from producer.send_and_wait(self.topic, b'hello, Kafka!') self.assertTrue(meta)
def test_producer_send_reenque_resets_waiters(self): # See issue #409. If reenqueue method does not reset the waiter # properly new batches will raise RecursionError. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, linger_ms=1000) yield from producer.start() self.add_cleanup(producer.stop) # 1st step is to force an error in produce sequense and force a # reenqueue on 1 batch. with mock.patch.object(producer.client, 'send') as mocked: send_fut = create_future(self.loop) @asyncio.coroutine def mocked_func(node_id, request): if not send_fut.done(): send_fut.set_result(None) raise UnknownTopicOrPartitionError() mocked.side_effect = mocked_func fut = yield from producer.send( self.topic, b'Some MSG', partition=0) yield from send_fut # 100ms backoff time yield from asyncio.sleep(0.11, loop=self.loop) self.assertFalse(fut.done()) self.assertTrue(producer._message_accumulator._batches) # Then we add another msg right after the reenqueue. As we use # linger_ms `_sender_routine` will be locked for some time after we # reenqueue batch, so this add will be forced to wait a longer time. # If drain_waiter is broken it will end up with a RecursionError. fut2 = yield from producer.send(self.topic, b'Some MSG 2', partition=0) yield from fut2 self.assertTrue(fut.done()) self.assertTrue(fut2.done()) msg1 = yield from fut msg2 = yield from fut2 # The order should be preserved self.assertLess(msg1.offset, msg2.offset)
async def test_producer_transactional_cancel_txn_methods(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") txn_manager = producer._txn_manager self.assertEqual(txn_manager.state, TransactionState.UNINITIALIZED) await producer.start() self.add_cleanup(producer.stop) self.assertEqual(txn_manager.state, TransactionState.READY) async def cancel(task): # Coroutines will not be started until we yield at least 1ce await asyncio.sleep(0) task.cancel() try: await task except asyncio.CancelledError: pass # test cancel begin_transaction. task = ensure_future(producer.begin_transaction()) await cancel(task) self.assertEqual(txn_manager.state, TransactionState.READY) # test cancel commit_transaction. Commit should not be cancelled. await producer.begin_transaction() self.assertEqual(txn_manager.state, TransactionState.IN_TRANSACTION) task = ensure_future(producer.commit_transaction()) await cancel(task) self.assertEqual( txn_manager.state, TransactionState.COMMITTING_TRANSACTION) await asyncio.sleep(0.1) self.assertEqual(txn_manager.state, TransactionState.READY) # test cancel abort_transaction. Abort should also not be cancelled. await producer.begin_transaction() self.assertEqual(txn_manager.state, TransactionState.IN_TRANSACTION) task = ensure_future(producer.abort_transaction()) await cancel(task) self.assertEqual( txn_manager.state, TransactionState.ABORTING_TRANSACTION) await asyncio.sleep(0.1) self.assertEqual(txn_manager.state, TransactionState.READY)
async def _test_control_record(self, isolation_level): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) async with producer.transaction(): meta = await producer.send_and_wait( self.topic, b'Hello from transaction', partition=0) consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest", isolation_level=isolation_level, fetch_max_bytes=10) await consumer.start() self.add_cleanup(consumer.stop) # Transaction marker will be next after the message consumer.seek(meta.topic_partition, meta.offset + 1) with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for( consumer.getone(), timeout=0.5, loop=self.loop) # We must not be stuck on previous position position = await consumer.position(meta.topic_partition) self.assertEqual(position, meta.offset + 2) # After producing some more data it should resume consumption async with producer.transaction(): meta2 = await producer.send_and_wait( self.topic, b'Hello from transaction 2', partition=0) msg = await consumer.getone() self.assertEqual(msg.offset, meta2.offset) self.assertEqual(msg.timestamp, meta2.timestamp) self.assertEqual(msg.value, b"Hello from transaction 2") self.assertEqual(msg.key, None)
def test_producer_sender_errors_propagate_to_producer(self): # Following on #362 there may be other unexpected errors in sender # routine that we wan't the user to see, rather than just get stuck. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, linger_ms=1000) yield from producer.start() self.add_cleanup(producer.stop) with mock.patch.object(producer._sender, '_send_produce_req') as m: m.side_effect = KeyError with self.assertRaisesRegex( KafkaError, "Unexpected error during batch delivery"): yield from producer.send_and_wait( self.topic, b'hello, Kafka!') with self.assertRaisesRegex( KafkaError, "Unexpected error during batch delivery"): yield from producer.send_and_wait( self.topic, b'hello, Kafka!')
def test_check_extended_message_record(self): s_time_ms = time.time() * 1000 producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) msg1 = b'some-message#1' yield from producer.send(self.topic, msg1, partition=1) yield from producer.stop() consumer = yield from self.consumer_factory() rmsg1 = yield from consumer.getone() self.assertEqual(rmsg1.value, msg1) self.assertEqual(rmsg1.serialized_key_size, -1) self.assertEqual(rmsg1.serialized_value_size, 14) if consumer._client.api_version >= (0, 10): self.assertNotEqual(rmsg1.timestamp, None) self.assertTrue(rmsg1.timestamp >= s_time_ms) self.assertEqual(rmsg1.timestamp_type, 0) else: self.assertEqual(rmsg1.timestamp, None) self.assertEqual(rmsg1.timestamp_type, None) yield from consumer.stop()
def test_producer_ssl(self): # Produce by SSL consume by PLAINTEXT topic = "test_ssl_produce" context = self.create_ssl_context() producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=[ "{}:{}".format(self.kafka_host, self.kafka_ssl_port) ], security_protocol="SSL", ssl_context=context) yield from producer.start() yield from producer.send_and_wait(topic=topic, value=b"Super msg") yield from producer.stop() consumer = AIOKafkaConsumer(topic, loop=self.loop, bootstrap_servers=self.hosts, enable_auto_commit=True, auto_offset_reset="earliest") yield from consumer.start() msg = yield from consumer.getone() self.assertEqual(msg.value, b"Super msg") yield from consumer.stop()
async def test_producer_warn_unclosed(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) producer_ref = weakref.ref(producer) await producer.start() with self.silence_loop_exception_handler(): with self.assertWarnsRegex(ResourceWarning, "Unclosed AIOKafkaProducer"): del producer # _sender_routine will contain a reference and will only be # freed after loop will spin once. Not sure why dou... await asyncio.sleep(0, loop=self.loop) gc.collect() # Assure that the reference was properly collected self.assertIsNone(producer_ref())
async def test_producer_send_empty_batch(self): # We trigger a unique case here, we don't send any messages, but the # ProduceBatch will be created. It should be discarded as it contains # 0 messages by sender routine. producer = AIOKafkaProducer(bootstrap_servers=self.hosts) await producer.start() self.add_cleanup(producer.stop) with self.assertRaises(TypeError): await producer.send(self.topic, 'text1') send_mock = mock.Mock() send_mock.side_effect = producer._sender._send_produce_req producer._sender._send_produce_req = send_mock await producer.flush() self.assertEqual(send_mock.call_count, 0)
def test_producer_send_noack(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, acks=0) yield from producer.start() fut1 = yield from producer.send( self.topic, b'hello, Kafka!', partition=0) fut2 = yield from producer.send( self.topic, b'hello, Kafka!', partition=1) done, _ = yield from asyncio.wait([fut1, fut2], loop=self.loop) for item in done: self.assertEqual(item.result(), None) yield from producer.stop()
async def test_producer_invalid_leader_retry_metadata(self): # See related issue #362. The metadata can have a new node in leader # set while we still don't have metadata for that node. producer = AIOKafkaProducer( bootstrap_servers=self.hosts, linger_ms=1000) await producer.start() self.add_cleanup(producer.stop) # Make sure we have fresh metadata for partitions await producer.partitions_for(self.topic) # Alter metadata to convince the producer, that leader or partition 0 # is a different node topic_meta = producer._metadata._partitions[self.topic] topic_meta[0] = topic_meta[0]._replace(leader=topic_meta[0].leader + 1) meta = await producer.send_and_wait(self.topic, b'hello, Kafka!') self.assertTrue(meta)
async def test_producer_send_timeout(self): producer = AIOKafkaProducer(bootstrap_servers=self.hosts) await producer.start() async def mocked_send(nodeid, req): raise KafkaTimeoutError() with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send fut1 = await producer.send(self.topic, b'text1') fut2 = await producer.send(self.topic, b'text2') done, _ = await asyncio.wait([fut1, fut2]) for item in done: with self.assertRaises(KafkaTimeoutError): item.result() await producer.stop()
async def test_producer_send_reenque_resets_waiters(self): # See issue #409. If reenqueue method does not reset the waiter # properly new batches will raise RecursionError. producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, linger_ms=1000) await producer.start() self.add_cleanup(producer.stop) # 1st step is to force an error in produce sequense and force a # reenqueue on 1 batch. with mock.patch.object(producer.client, 'send') as mocked: send_fut = create_future(self.loop) @asyncio.coroutine def mocked_func(node_id, request): if not send_fut.done(): send_fut.set_result(None) raise UnknownTopicOrPartitionError() mocked.side_effect = mocked_func fut = await producer.send(self.topic, b'Some MSG', partition=0) await send_fut # 100ms backoff time await asyncio.sleep(0.11, loop=self.loop) self.assertFalse(fut.done()) self.assertTrue(producer._message_accumulator._batches) # Then we add another msg right after the reenqueue. As we use # linger_ms `_sender_routine` will be locked for some time after we # reenqueue batch, so this add will be forced to wait a longer time. # If drain_waiter is broken it will end up with a RecursionError. fut2 = await producer.send(self.topic, b'Some MSG 2', partition=0) await fut2 self.assertTrue(fut.done()) self.assertTrue(fut2.done()) msg1 = await fut msg2 = await fut2 # The order should be preserved self.assertLess(msg1.offset, msg2.offset)
def test_producer_send(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) with self.assertRaisesRegexp(AssertionError, 'value must be bytes'): yield from producer.send(self.topic, 'hello, Kafka!') future = yield from producer.send(self.topic, b'hello, Kafka!') resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) self.assertEqual(resp.offset, 0) fut = yield from producer.send(self.topic, b'second msg', partition=1) resp = yield from fut self.assertEqual(resp.partition, 1) future = yield from producer.send(self.topic, b'value', key=b'KEY') resp = yield from future self.assertTrue(resp.partition in (0, 1)) yield from producer.stop() with self.assertRaises(ProducerClosed): yield from producer.send(self.topic, b'value', key=b'KEY')
async def test_producer_sender_errors_propagate_to_producer(self): # Following on #362 there may be other unexpected errors in sender # routine that we want the user to see, rather than just get stuck. producer = AIOKafkaProducer(bootstrap_servers=self.hosts, linger_ms=1000) await producer.start() self.add_cleanup(producer.stop) with mock.patch.object(producer._sender, '_send_produce_req') as m: m.side_effect = KeyError with self.assertRaisesRegex( KafkaError, "Unexpected error during batch delivery"): await producer.send_and_wait(self.topic, b'hello, Kafka!') with self.assertRaisesRegex(KafkaError, "Unexpected error during batch delivery"): await producer.send_and_wait(self.topic, b'hello, Kafka!')
def test_producer_send_with_compression(self): with self.assertRaises(ValueError): producer = AIOKafkaProducer(loop=self.loop, compression_type='my_custom') producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, compression_type='gzip') yield from producer.start() yield from self.wait_topic(producer.client, self.topic) future = yield from producer.send(self.topic, b'this msg is compressed by client') resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) yield from producer.stop()
async def test_producer_transactional_flush_before_commit(self): # We need to be sure, that we send all pending batches before # committing the transaction producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) await producer.begin_transaction() futs = [] for i in range(10): fut = await producer.send(self.topic, b"Super msg") futs.append(fut) await producer.commit_transaction() for fut in futs: self.assertTrue(fut.done())
async def test_producer_send_leader_notfound(self): producer = AIOKafkaProducer(bootstrap_servers=self.hosts, request_timeout_ms=200) await producer.start() with mock.patch.object(ClusterMetadata, 'leader_for_partition') as mocked: mocked.return_value = -1 future = await producer.send(self.topic, b'text') with self.assertRaises(LeaderNotAvailableError): await future with mock.patch.object(ClusterMetadata, 'leader_for_partition') as mocked: mocked.return_value = None future = await producer.send(self.topic, b'text') with self.assertRaises(NotLeaderForPartitionError): await future await producer.stop()
async def test_producer_ssl(self): # Produce by SSL consume by PLAINTEXT topic = "test_ssl_produce" context = self.create_ssl_context() producer = AIOKafkaProducer( bootstrap_servers=[ f"{self.kafka_host}:{self.kafka_ssl_port}"], security_protocol="SSL", ssl_context=context) await producer.start() await producer.send_and_wait(topic=topic, value=b"Super msg") await producer.stop() consumer = AIOKafkaConsumer( topic, bootstrap_servers=self.hosts, enable_auto_commit=True, auto_offset_reset="earliest") await consumer.start() msg = await consumer.getone() self.assertEqual(msg.value, b"Super msg") await consumer.stop()
async def test_producer_indempotence_simple(self): # The test here will just check if we can do simple produce with # enable_idempotence option, as no specific API changes is expected. producer = AIOKafkaProducer(bootstrap_servers=self.hosts, enable_idempotence=True) await producer.start() self.add_cleanup(producer.stop) meta = await producer.send_and_wait(self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer(self.topic, bootstrap_servers=self.hosts, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) msg = await consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None)
async def test_producer_indempotence_no_duplicates(self): # Idempotent producer should retry produce in case of timeout error producer = AIOKafkaProducer( bootstrap_servers=self.hosts, enable_idempotence=True, request_timeout_ms=2000) await producer.start() self.add_cleanup(producer.stop) original_send = producer.client.send retry = [0] async def mocked_send(*args, **kw): result = await original_send(*args, **kw) if result.API_KEY == ProduceResponse[0].API_KEY and retry[0] < 2: retry[0] += 1 raise RequestTimedOutError return result with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send meta = await producer.send_and_wait( self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer( self.topic, bootstrap_servers=self.hosts, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) msg = await consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None) with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for(consumer.getone(), timeout=0.5)
def test_producer_send_with_compression(self): with self.assertRaises(ValueError): producer = AIOKafkaProducer(loop=self.loop, compression_type='my_custom') producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, compression_type='gzip') yield from producer.start() # short message will not be compressed future = yield from producer.send( self.topic, b'this msg is too short for compress') resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) # now message will be compressed resp = yield from producer.send_and_wait(self.topic, b'large_message-' * 100) self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) yield from producer.stop()
async def send_messages(self, partition, messages, *, topic=None, timestamp_ms=None, return_inst=False, headers=None): topic = topic or self.topic ret = [] producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) await producer.start() try: await self.wait_topic(producer.client, topic) for msg in messages: if isinstance(msg, str): msg = msg.encode() elif isinstance(msg, int): msg = str(msg).encode() future = await producer.send(topic, msg, partition=partition, timestamp_ms=timestamp_ms, headers=headers) resp = await future self.assertEqual(resp.topic, topic) self.assertEqual(resp.partition, partition) if return_inst: ret.append(resp) else: ret.append(msg) finally: await producer.stop() return ret
def test_get_offsets(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts) yield from client.bootstrap() subscription = SubscriptionState('earliest') subscription.subscribe(topics=('topic1', )) coordinator = GroupCoordinator(client, subscription, loop=self.loop, group_id='getoffsets-group') yield from self.wait_topic(client, 'topic1') producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from producer.send('topic1', b'first msg', partition=0) yield from producer.send('topic1', b'second msg', partition=1) yield from producer.send('topic1', b'third msg', partition=1) yield from producer.stop() yield from coordinator.ensure_active_group() offsets = { TopicPartition('topic1', 0): OffsetAndMetadata(1, ''), TopicPartition('topic1', 1): OffsetAndMetadata(2, '') } yield from coordinator.commit_offsets(offsets) self.assertEqual(subscription.all_consumed_offsets(), {}) subscription.seek(('topic1', 0), 0) subscription.seek(('topic1', 1), 0) yield from coordinator.refresh_committed_offsets() self.assertEqual(subscription.assignment[('topic1', 0)].committed, 1) self.assertEqual(subscription.assignment[('topic1', 1)].committed, 2) yield from coordinator.close() yield from client.close()
def test_producer_send_with_serializer(self): def key_serializer(val): return val.upper().encode() def serializer(val): return json.dumps(val).encode() producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, value_serializer=serializer, key_serializer=key_serializer, acks='all', max_request_size=1000) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) key = 'some key' value = {'strKey': 23523.443, 23: 'STRval'} future = yield from producer.send(self.topic, value, key=key) resp = yield from future partition = resp.partition offset = resp.offset self.assertTrue(partition in (0, 1)) # partition future = yield from producer.send(self.topic, 'some str', key=key) resp = yield from future # expect the same partition bcs the same key self.assertEqual(resp.partition, partition) # expect offset +1 self.assertEqual(resp.offset, offset + 1) value[23] = '*VALUE' * 800 with self.assertRaises(MessageSizeTooLargeError): yield from producer.send(self.topic, value, key=key) yield from producer.stop() yield from producer.stop() # shold be Ok
async def test_producer_transactional_fences_off_previous(self): # Test 2 producers fencing one another by using the same # transactional_id producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) producer2 = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p2") await producer2.start() self.add_cleanup(producer2.stop) async with producer2.transaction(): await producer2.send_and_wait(self.topic, b'hello, Kafka! 2') with self.assertRaises(ProducerFenced): async with producer.transaction(): await producer.send_and_wait(self.topic, b'hello, Kafka!')
def test_producer_send_leader_notfound(self): producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, request_timeout_ms=200) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) with mock.patch.object( ClusterMetadata, 'leader_for_partition') as mocked: mocked.return_value = -1 future = yield from producer.send(self.topic, b'text') with self.assertRaises(LeaderNotAvailableError): yield from future with mock.patch.object( ClusterMetadata, 'leader_for_partition') as mocked: mocked.return_value = None future = yield from producer.send(self.topic, b'text') with self.assertRaises(NotLeaderForPartitionError): yield from future yield from producer.stop()
def test_producer_send_error(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, retry_backoff_ms=100, linger_ms=5, request_timeout_ms=400) yield from producer.start() @asyncio.coroutine def mocked_send(nodeid, req): # RequestTimedOutCode error for partition=0 return ProduceResponse[0]([(self.topic, [(0, 7, 0), (1, 0, 111)])]) with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send fut1 = yield from producer.send(self.topic, b'text1', partition=0) fut2 = yield from producer.send(self.topic, b'text2', partition=1) with self.assertRaises(RequestTimedOutError): yield from fut1 resp = yield from fut2 self.assertEqual(resp.offset, 111) @asyncio.coroutine def mocked_send_with_sleep(nodeid, req): # RequestTimedOutCode error for partition=0 yield from asyncio.sleep(0.1, loop=self.loop) return ProduceResponse[0]([(self.topic, [(0, 7, 0)])]) with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send_with_sleep with self.assertRaises(RequestTimedOutError): future = yield from producer.send(self.topic, b'text1', partition=0) yield from future yield from producer.stop()
def test_producer_send_batch(self): key = b'test key' value = b'test value' max_batch_size = 10000 producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, max_batch_size=max_batch_size) yield from producer.start() partitions = yield from producer.partitions_for(self.topic) partition = partitions.pop() # silly method to find current offset for this partition resp = yield from producer.send_and_wait(self.topic, value=b'discovering offset', partition=partition) offset = resp.offset # only fills up to its limits, then returns None batch = producer.create_batch() self.assertEqual(batch.record_count(), 0) num = 0 while True: metadata = batch.append(key=key, value=value, timestamp=None) if metadata is None: break num += 1 self.assertTrue(num > 0) self.assertEqual(batch.record_count(), num) # batch gets properly sent future = yield from producer.send_batch(batch, self.topic, partition=partition) resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertEqual(resp.partition, partition) self.assertEqual(resp.offset, offset + 1) # batch accepts a too-large message if it's the first too_large = b'm' * (max_batch_size + 1) batch = producer.create_batch() metadata = batch.append(key=None, value=too_large, timestamp=None) self.assertIsNotNone(metadata) # batch rejects a too-large message if it's not the first batch = producer.create_batch() batch.append(key=None, value=b"short", timestamp=None) metadata = batch.append(key=None, value=too_large, timestamp=None) self.assertIsNone(metadata) yield from producer.stop() # batch can't be sent after closing time with self.assertRaises(ProducerClosed): yield from producer.send_batch(batch, self.topic, partition=partition)
class KafkaProducer(BaseProducer): """ KafkaProducer Class, this class make bridge between AioKafkaProducer an tonga Attributes: logger (Logger): Python logger serializer (BaseSerializer): Serializer encode & decode event _bootstrap_servers (Union[str, List[str]): ‘host[:port]’ string (or list of ‘host[:port]’ strings) that the consumer should contact to bootstrap initial cluster metadata _client_id (str): A name for this client. This string is passed in each request to servers and can be used to identify specific server-side log entries that correspond to this client _acks (Union[int, str]): The number of acknowledgments the producer requires the leader to have received before considering a request complete. Possible value (0 / 1 / all) _running (bool): Is running flag _transactional_id (str): Id for make transactional process _kafka_producer (AIOKafkaProducer): AioKafkaProducer for more information go to _loop (AbstractEventLoop): Asyncio loop """ logger: Logger serializer: BaseSerializer _client: KafkaClient _bootstrap_servers: Union[str, List[str]] _client_id: str _acks: Union[int, str] _running: bool _transactional_id: str _kafka_producer: AIOKafkaProducer _loop: asyncio.AbstractEventLoop def __init__(self, client: KafkaClient, serializer: BaseSerializer, loop: asyncio.AbstractEventLoop, partitioner: BasePartitioner, client_id: str = None, acks: Union[int, str] = 1, transactional_id: str = None) -> None: """ KafkaProducer constructor Args: client (KafkaClient): Initialization class (contains, client_id / bootstraps_server) serializer (BaseSerializer): Serializer encode & decode event acks (Union[int, str]): The number of acknowledgments the producer requires the leader to have received before considering a request complete. Possible value (0 / 1 / all) client_id (str): Client name (if is none, KafkaConsumer use KafkaClient client_id) transactional_id: Id for make transactional process Raises: AioKafkaProducerBadParams: raised when producer was call with bad params KafkaProducerError: raised when some generic error was raised form Aiokafka Returns: None """ super().__init__() self.logger = getLogger('tonga') self._client = client # Create client_id if client_id is None: self._client_id = self._client.client_id + '-' + str( self._client.cur_instance) else: self._client_id = client_id self._bootstrap_servers = self._client.bootstrap_servers self._acks = acks if isinstance(serializer, BaseSerializer): self.serializer = serializer else: raise BadSerializer self._transactional_id = transactional_id self._running = False self._loop = loop try: self._kafka_producer = AIOKafkaProducer( loop=self._loop, bootstrap_servers=self._bootstrap_servers, client_id=self._client_id, acks=self._acks, value_serializer=self.serializer.encode, transactional_id=self._transactional_id, key_serializer=KafkaKeySerializer.encode, partitioner=partitioner) except ValueError as err: self.logger.exception('%s', err.__str__()) raise AioKafkaProducerBadParams except KafkaError as err: self.logger.exception('%s', err.__str__()) raise KafkaProducerError self.logger.debug('Create new producer %s', self._client_id) async def start_producer(self) -> None: """ Start producer Raises: KafkaProducerAlreadyStartedError: raised when producer was already started ProducerConnectionError: raised when producer can't connect to broker KafkaError: raised when catch KafkaError Returns: None """ if self._running: raise KafkaProducerAlreadyStartedError for retry in range(2): try: await self._kafka_producer.start() self._running = True self.logger.debug('Start producer : %s', self._client_id) except KafkaTimeoutError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) await asyncio.sleep(1) except ConnectionError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) await asyncio.sleep(1) except KafkaError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise err else: break else: raise ProducerConnectionError async def stop_producer(self) -> None: """ Stop producer Raises: KafkaProducerNotStartedError: raised when producer was not started KafkaProducerTimeoutError: raised when producer timeout on broker KafkaError: raised when catch KafkaError Returns: None """ if not self._running: raise KafkaProducerNotStartedError try: await self._kafka_producer.stop() self._running = False self.logger.debug('Stop producer : %s', self._client_id) except KafkaTimeoutError as err: self.logger.exception('%s', err.__str__()) raise KafkaProducerTimeoutError except KafkaError as err: self.logger.exception('%s', err.__str__()) raise err def is_running(self) -> bool: """ Get is running Returns: bool: running """ return self._running # Transaction sugar function def init_transaction(self) -> TransactionContext: """ Inits transaction Returns: TransactionContext: Aiokafka TransactionContext """ return self._kafka_producer.transaction() async def end_transaction(self, committed_offsets: Dict[str, BasePositioning], group_id: str) -> None: """ Ends transaction Args: committed_offsets (Dict[str, BasePositioning]): Committed offsets during transaction group_id (str): Group_id to commit Returns: None """ kafka_committed_offsets = dict() for key, positioning in committed_offsets.items(): kafka_committed_offsets[positioning.to_topics_partition( )] = positioning.get_current_offset() await self._kafka_producer.send_offsets_to_transaction( kafka_committed_offsets, group_id) async def send_and_wait(self, msg: Union[BaseRecord, StoreRecord], topic: str) -> BasePositioning: """ Send a message and await an acknowledgments Args: msg (BaseRecord): Event to send in Kafka, inherit form BaseRecord topic (str): Topic name to send massage Raises: KeyErrorSendEvent: raised when KeyError was raised ValueErrorSendEvent: raised when ValueError was raised TypeErrorSendEvent: raised when TypeError was raised KafkaError: raised when catch KafkaError FailToSendEvent: raised when producer fail to send event Returns: None """ if not self._running: await self.start_producer() for retry in range(4): try: if isinstance(msg, BaseRecord): self.logger.debug('Send record %s', msg.to_dict()) record_metadata = await self._kafka_producer.send_and_wait( topic=topic, value=msg, key=msg.partition_key) elif isinstance(msg, StoreRecord): self.logger.debug('Send store record %s', msg.to_dict()) record_metadata = await self._kafka_producer.send_and_wait( topic=topic, value=msg, key=msg.key) else: self.logger.error('Fail to send msg %s', msg.event_name()) raise UnknownEventBase except KafkaTimeoutError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) await asyncio.sleep(1) except KeyError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise KeyErrorSendEvent except ValueError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise ValueErrorSendEvent except TypeError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise TypeErrorSendEvent except KafkaError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise err else: return KafkaPositioning(record_metadata.topic, record_metadata.partition, record_metadata.offset) else: raise FailToSendEvent async def send(self, msg: Union[BaseRecord, StoreRecord], topic: str) -> Awaitable: """ Send a message and await an acknowledgments Args: msg (BaseRecord): Event to send in Kafka, inherit form BaseRecord topic (str): Topic name to send massage Raises: KeyErrorSendEvent: raised when KeyError was raised ValueErrorSendEvent: raised when ValueError was raised TypeErrorSendEvent: raised when TypeError was raised KafkaError: raised when catch KafkaError FailToSendEvent: raised when producer fail to send event Returns: None """ if not self._running: await self.start_producer() for retry in range(4): try: if isinstance(msg, BaseRecord): self.logger.debug('Send record %s', msg.to_dict()) record_promise = self._kafka_producer.send( topic=topic, value=msg, key=msg.partition_key) elif isinstance(msg, StoreRecord): self.logger.debug('Send store record %s', msg.to_dict()) record_promise = self._kafka_producer.send(topic=topic, value=msg, key=msg.key) else: raise UnknownEventBase except KafkaTimeoutError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) await asyncio.sleep(1) except KeyError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise KeyErrorSendEvent except ValueError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise ValueErrorSendEvent except TypeError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise TypeErrorSendEvent except KafkaError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise err else: return record_promise else: raise FailToSendEvent async def create_batch(self) -> BatchBuilder: """ Creates an empty batch Returns: BatchBuilder: Empty batch """ if not self._running: await self.start_producer() self.logger.debug('Create batch') return self._kafka_producer.create_batch() async def send_batch(self, batch: BatchBuilder, topic: str, partition: int = 0) -> None: """ Sends batch Args: batch (BatchBuilder): BatchBuilder topic (str): Topic name partition (int): Partition number Raises: KeyErrorSendEvent: raised when KeyError was raised ValueErrorSendEvent: raised when ValueError was raised TypeErrorSendEvent: raised when TypeError was raised KafkaError: raised when catch KafkaError FailToSendBatch: raised when producer fail to send batch Returns: None """ if not self._running: await self.start_producer() for retry in range(4): try: self.logger.debug('Send batch') await self._kafka_producer.send_batch(batch=batch, topic=topic, partition=partition) except KafkaTimeoutError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) await asyncio.sleep(1) except KeyError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise KeyErrorSendEvent except ValueError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise ValueErrorSendEvent except TypeError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise TypeErrorSendEvent except KafkaError as err: self.logger.exception('retry: %s, err: %s', retry, err.__str__()) raise err else: break else: raise FailToSendBatch async def partitions_by_topic(self, topic: str) -> List[int]: """ Get partitions by topic name Args: topic (str): topic name Returns: List[int]: list of partitions """ if not self._running: await self.start_producer() try: self.logger.debug('Get partitions by topic') partitions = await self._kafka_producer.partitions_for(topic) except KafkaTimeoutError as err: self.logger.exception('%s', err.__str__()) raise KafkaProducerTimeoutError except KafkaError as err: self.logger.exception('%s', err.__str__()) raise err return partitions def get_producer(self) -> AIOKafkaProducer: """ Get kafka producer Returns: AIOKafkaProducer: AioKafkaProducer instance """ return self._kafka_producer
class KafkaProducer(object): """docstring for KafkaProducer""" """ metadata_max_age_ms 强制刷新最大时间ms request_timeout_ms 超时时间ms acks (0, -1, 1, all) 0: 只管发送; -1: 默认-1=all需要等待所有副本确认; 1: 只需要leader节点接收到数据; compression_type('gzip', 'snappy', 'lz4', None) 数据压缩格式默认为None max_batch_size 每个分区缓冲数据最大 max_request_size 一次请求的最大大小,超过会自动发送send linger_ms 延迟发送时间 connections_max_idle_ms 空闲连接关闭检测时间 enable_idempotence 保证数据送达标志为True acks必须为(-1, all) """ Producer = AIOKafkaProducer(loop=loop, bootstrap_servers='localhost', metadata_max_age_ms=30000, request_timeout_ms=1000, max_batch_size=16384, max_request_size=1048576, linger_ms=0, connections_max_idle_ms=540000) async def partitions_for(self): return await self.Producer.partitions_for() async def start(self): if self.Producer._sender_task is None: await self.Producer.start() async def stop(self): await self.Producer.stop() async def flush(self): await self.Producer.flush() @classmethod def code_data(cls, value, data_type=None, uid=None, country_id=None): data = {"data": value, "createTime": int(time.time() * 1000)} if data_type is not None: data["type"] = int(data_type) if uid is not None: data["uid"] = int(uid) if country_id is not None: data["country_id"] = country_id return data async def send(self, topic, value, key=None, data_type=None, uid=None, country_id=None, partition=None, timestamp_ms=None): try: data = self.code_data(value, data_type, uid, country_id) data = bytes(ujson.dumps(data), encoding='utf-8') return await self.Producer.send_and_wait( topic, value=data, key=bytes(key, encoding='utf-8'), partition=partition, timestamp_ms=timestamp_ms) except Exception: print(traceback.format_exc()) async def send_many(self, topic, values, key, data_type=None, uid=None, country_id=None, partition=None, timestamp_ms=None): batch = self.Producer.create_batch() for value in values: data = self.code_data(value, data_type, uid, country_id) metadata = batch.append(key=key, value=data, timestamp=timestamp_ms) if metadata is None: await self.Producer.send_batch(batch, topic, partition=partition) batch = self.Producer.create_batch() continue await self.Producer.send_batch(batch, topic, partition=partition) async def __aenter__(self): await self.start() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.flush()
class RiskProducer(KafkaProducer): Producer = AIOKafkaProducer(loop=loop, bootstrap_servers='172.31.10.78:9092') """docstring for Master_MysqlConn""" def __init__(self): super(RiskProducer, self).__init__()
async def test_producer_transactional_send_offsets_and_abort(self): # Following previous, we will process but abort transaction. Commit # should not be processed and the same data should be returned after # reset # Setup some messages in INPUT topic await self.send_messages(0, list(range(0, 100))) await self.send_messages(1, list(range(100, 200))) in_topic = self.topic out_topic = self.topic + "-out" group_id = self.topic + "-group" consumer = AIOKafkaConsumer(in_topic, loop=self.loop, bootstrap_servers=self.hosts, enable_auto_commit=False, group_id=group_id, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) assignment = consumer.assignment() self.assertTrue(assignment) for tp in assignment: await consumer.commit({tp: 0}) offset_before = await consumer.committed(tp) self.assertEqual(offset_before, 0) async def transform(raise_error): while True: batch = await consumer.getmany(timeout_ms=5000, max_records=20) if not batch: break async with producer.transaction(): offsets = {} for tp, msgs in batch.items(): for msg in msgs: out_msg = b"OUT-" + msg.value # We produce to the same partition producer.send(out_topic, value=out_msg, partition=tp.partition) offsets[tp] = msg.offset + 1 await producer.send_offsets_to_transaction( offsets, group_id) if raise_error: raise ValueError() try: await transform(raise_error=True) except ValueError: pass for tp in assignment: offset = await consumer.committed(tp) self.assertEqual(offset, 0) await consumer.seek_to_committed() await transform(raise_error=False) for tp in assignment: offset = await consumer.committed(tp) self.assertEqual(offset, 100)
async def test_producer_leader_change_preserves_order(self): # Before 0.5.0 we did not lock partition until a response came from # the server, but locked the node itself. # For example: Say the sender sent a request to node 1 and before an # failure answer came we updated metadata and leader become node 0. # This way we may send the next batch to node 0 without waiting for # node 1 batch to be reenqueued, resulting in out-of-order batches producer = AIOKafkaProducer( bootstrap_servers=self.hosts, linger_ms=1000) await producer.start() self.add_cleanup(producer.stop) # Alter metadata to convince the producer, that leader or partition 0 # is a different node await producer.partitions_for(self.topic) topic_meta = producer._metadata._partitions[self.topic] real_leader = topic_meta[0].leader topic_meta[0] = topic_meta[0]._replace(leader=real_leader + 1) # Make sure the first request for produce takes more time original_send = producer.client.send async def mocked_send(node_id, request, *args, **kw): if node_id != real_leader and \ request.API_KEY == ProduceResponse[0].API_KEY: await asyncio.sleep(2) result = await original_send(node_id, request, *args, **kw) return result producer.client.send = mocked_send # Send Batch 1. This will end up waiting for some time on fake leader batch = producer.create_batch() meta = batch.append(key=b"key", value=b"1", timestamp=None) batch.close() fut = await producer.send_batch( batch, self.topic, partition=0) # Make sure we sent the request await asyncio.sleep(0.1) # Update metadata to return leader to real one await producer.client.force_metadata_update() # Send Batch 2, that if it's bugged will go straight to the real node batch2 = producer.create_batch() meta2 = batch2.append(key=b"key", value=b"2", timestamp=None) batch2.close() fut2 = await producer.send_batch( batch2, self.topic, partition=0) batch_meta = await fut batch_meta2 = await fut2 # Check the order of messages consumer = AIOKafkaConsumer( self.topic, bootstrap_servers=self.hosts, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) msg = await consumer.getone() self.assertEqual(msg.offset, batch_meta.offset) self.assertEqual(msg.timestamp or -1, meta.timestamp) self.assertEqual(msg.value, b"1") self.assertEqual(msg.key, b"key") msg2 = await consumer.getone() self.assertEqual(msg2.offset, batch_meta2.offset) self.assertEqual(msg2.timestamp or -1, meta2.timestamp) self.assertEqual(msg2.value, b"2") self.assertEqual(msg2.key, b"key")
def __init__(self, client: KafkaClient, serializer: BaseSerializer, loop: asyncio.AbstractEventLoop, partitioner: BasePartitioner, client_id: str = None, acks: Union[int, str] = 1, transactional_id: str = None) -> None: """ KafkaProducer constructor Args: client (KafkaClient): Initialization class (contains, client_id / bootstraps_server) serializer (BaseSerializer): Serializer encode & decode event acks (Union[int, str]): The number of acknowledgments the producer requires the leader to have received before considering a request complete. Possible value (0 / 1 / all) client_id (str): Client name (if is none, KafkaConsumer use KafkaClient client_id) transactional_id: Id for make transactional process Raises: AioKafkaProducerBadParams: raised when producer was call with bad params KafkaProducerError: raised when some generic error was raised form Aiokafka Returns: None """ super().__init__() self.logger = getLogger('tonga') self._client = client # Create client_id if client_id is None: self._client_id = self._client.client_id + '-' + str( self._client.cur_instance) else: self._client_id = client_id self._bootstrap_servers = self._client.bootstrap_servers self._acks = acks if isinstance(serializer, BaseSerializer): self.serializer = serializer else: raise BadSerializer self._transactional_id = transactional_id self._running = False self._loop = loop try: self._kafka_producer = AIOKafkaProducer( loop=self._loop, bootstrap_servers=self._bootstrap_servers, client_id=self._client_id, acks=self._acks, value_serializer=self.serializer.encode, transactional_id=self._transactional_id, key_serializer=KafkaKeySerializer.encode, partitioner=partitioner) except ValueError as err: self.logger.exception('%s', err.__str__()) raise AioKafkaProducerBadParams except KafkaError as err: self.logger.exception('%s', err.__str__()) raise KafkaProducerError self.logger.debug('Create new producer %s', self._client_id)