async def test_producer_send_leader_notfound(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, request_timeout_ms=200) await producer.start() with mock.patch.object(ClusterMetadata, 'leader_for_partition') as mocked: mocked.return_value = -1 future = await producer.send(self.topic, b'text') with self.assertRaises(LeaderNotAvailableError): await future with mock.patch.object(ClusterMetadata, 'leader_for_partition') as mocked: mocked.return_value = None future = await producer.send(self.topic, b'text') with self.assertRaises(NotLeaderForPartitionError): await future await producer.stop()
async def test_producer_transactional_flush_before_commit(self): # We need to be sure, that we send all pending batches before # committing the transaction producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) await producer.begin_transaction() futs = [] for i in range(10): fut = await producer.send(self.topic, b"Super msg") futs.append(fut) await producer.commit_transaction() for fut in futs: self.assertTrue(fut.done())
async def test_producer_ssl(self): # Produce by SSL consume by PLAINTEXT topic = "test_ssl_produce" context = self.create_ssl_context() producer = AIOKafkaProducer( bootstrap_servers=[ f"{self.kafka_host}:{self.kafka_ssl_port}"], security_protocol="SSL", ssl_context=context) await producer.start() await producer.send_and_wait(topic=topic, value=b"Super msg") await producer.stop() consumer = AIOKafkaConsumer( topic, bootstrap_servers=self.hosts, enable_auto_commit=True, auto_offset_reset="earliest") await consumer.start() msg = await consumer.getone() self.assertEqual(msg.value, b"Super msg") await consumer.stop()
async def test_producer_sender_errors_propagate_to_producer(self): # Following on #362 there may be other unexpected errors in sender # routine that we want the user to see, rather than just get stuck. producer = AIOKafkaProducer( bootstrap_servers=self.hosts, linger_ms=1000) await producer.start() self.add_cleanup(producer.stop) with mock.patch.object(producer._sender, '_send_produce_req') as m: m.side_effect = KeyError with self.assertRaisesRegex( KafkaError, "Unexpected error during batch delivery"): await producer.send_and_wait( self.topic, b'hello, Kafka!') with self.assertRaisesRegex( KafkaError, "Unexpected error during batch delivery"): await producer.send_and_wait( self.topic, b'hello, Kafka!')
async def test_producer_indempotence_simple(self): # The test here will just check if we can do simple produce with # enable_idempotence option, as no specific API changes is expected. producer = AIOKafkaProducer(bootstrap_servers=self.hosts, enable_idempotence=True) await producer.start() self.add_cleanup(producer.stop) meta = await producer.send_and_wait(self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer(self.topic, bootstrap_servers=self.hosts, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) msg = await consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None)
def test_producer_indempotence_no_duplicates(self): # Indempotent producer should retry produce in case of timeout error producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, enable_idempotence=True, request_timeout_ms=2000) yield from producer.start() self.add_cleanup(producer.stop) original_send = producer.client.send retry = [0] @asyncio.coroutine def mocked_send(*args, **kw): result = yield from original_send(*args, **kw) if result.API_KEY == ProduceResponse[0].API_KEY and retry[0] < 2: retry[0] += 1 raise RequestTimedOutError return result with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send meta = yield from producer.send_and_wait( self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer( self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest") yield from consumer.start() self.add_cleanup(consumer.stop) msg = yield from consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None) with self.assertRaises(asyncio.TimeoutError): yield from asyncio.wait_for(consumer.getone(), timeout=0.5)
async def _test_control_record(self, isolation_level): producer = AIOKafkaProducer(bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) async with producer.transaction(): meta = await producer.send_and_wait(self.topic, b'Hello from transaction', partition=0) consumer = AIOKafkaConsumer(self.topic, bootstrap_servers=self.hosts, auto_offset_reset="earliest", isolation_level=isolation_level, fetch_max_bytes=10) await consumer.start() self.add_cleanup(consumer.stop) # Transaction marker will be next after the message consumer.seek(meta.topic_partition, meta.offset + 1) with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for(consumer.getone(), timeout=0.5) # We must not be stuck on previous position position = await consumer.position(meta.topic_partition) self.assertEqual(position, meta.offset + 2) # After producing some more data it should resume consumption async with producer.transaction(): meta2 = await producer.send_and_wait(self.topic, b'Hello from transaction 2', partition=0) msg = await consumer.getone() self.assertEqual(msg.offset, meta2.offset) self.assertEqual(msg.timestamp, meta2.timestamp) self.assertEqual(msg.value, b"Hello from transaction 2") self.assertEqual(msg.key, None)
def send_messages(self, partition, messages): ret = [] producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() try: yield from self.wait_topic(producer.client, self.topic) for msg in messages: if isinstance(msg, str): msg = msg.encode() elif isinstance(msg, int): msg = str(msg).encode() future = yield from producer.send(self.topic, msg, partition=partition) resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertEqual(resp.partition, partition) ret.append(msg) finally: yield from producer.stop() return ret
def test_producer_ssl(self): # Produce by SSL consume by PLAINTEXT topic = "test_ssl_produce" context = self.create_ssl_context() producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=[ "{}:{}".format(self.kafka_host, self.kafka_ssl_port)], security_protocol="SSL", ssl_context=context) yield from producer.start() yield from producer.send_and_wait(topic=topic, value=b"Super msg") yield from producer.stop() consumer = AIOKafkaConsumer( topic, loop=self.loop, bootstrap_servers=self.hosts, enable_auto_commit=True, auto_offset_reset="earliest") yield from consumer.start() msg = yield from consumer.getone() self.assertEqual(msg.value, b"Super msg") yield from consumer.stop()
def test_get_offsets(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts) yield from client.bootstrap() subscription = SubscriptionState('earliest') subscription.subscribe(topics=('topic1', )) coordinator = GroupCoordinator(client, subscription, loop=self.loop, group_id='getoffsets-group') yield from self.wait_topic(client, 'topic1') producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from producer.send('topic1', b'first msg', partition=0) yield from producer.send('topic1', b'second msg', partition=1) yield from producer.send('topic1', b'third msg', partition=1) yield from producer.stop() yield from coordinator.ensure_active_group() offsets = { TopicPartition('topic1', 0): OffsetAndMetadata(1, ''), TopicPartition('topic1', 1): OffsetAndMetadata(2, '') } yield from coordinator.commit_offsets(offsets) self.assertEqual(subscription.all_consumed_offsets(), {}) subscription.seek(('topic1', 0), 0) subscription.seek(('topic1', 1), 0) yield from coordinator.refresh_committed_offsets() self.assertEqual(subscription.assignment[('topic1', 0)].committed, 1) self.assertEqual(subscription.assignment[('topic1', 1)].committed, 2) yield from coordinator.close() yield from client.close()
def test_producer_send(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) with self.assertRaisesRegexp(AssertionError, 'value must be bytes'): yield from producer.send(self.topic, 'hello, Kafka!') future = yield from producer.send(self.topic, b'hello, Kafka!') resp = yield from future self.assertEqual(resp.topic, self.topic) self.assertTrue(resp.partition in (0, 1)) self.assertEqual(resp.offset, 0) fut = yield from producer.send(self.topic, b'second msg', partition=1) resp = yield from fut self.assertEqual(resp.partition, 1) future = yield from producer.send(self.topic, b'value', key=b'KEY') resp = yield from future self.assertTrue(resp.partition in (0, 1)) yield from producer.stop() with self.assertRaises(ProducerClosed): yield from producer.send(self.topic, b'value', key=b'KEY')
def test_check_extended_message_record(self): s_time_ms = time.time() * 1000 producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) msg1 = b'some-message#1' yield from producer.send(self.topic, msg1, partition=1) yield from producer.stop() consumer = yield from self.consumer_factory() rmsg1 = yield from consumer.getone() self.assertEqual(rmsg1.value, msg1) self.assertEqual(rmsg1.serialized_key_size, -1) self.assertEqual(rmsg1.serialized_value_size, 14) if consumer._client.api_version >= (0, 10): self.assertNotEqual(rmsg1.timestamp, None) self.assertTrue(rmsg1.timestamp >= s_time_ms) self.assertEqual(rmsg1.timestamp_type, 0) else: self.assertEqual(rmsg1.timestamp, None) self.assertEqual(rmsg1.timestamp_type, None) yield from consumer.stop()
def test_producer_send_with_serializer(self): def key_serializer(val): return val.upper().encode() def serializer(val): return json.dumps(val).encode() producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, value_serializer=serializer, key_serializer=key_serializer, acks='all', max_request_size=1000) yield from producer.start() yield from self.wait_topic(producer.client, self.topic) key = 'some key' value = {'strKey': 23523.443, 23: 'STRval'} future = yield from producer.send(self.topic, value, key=key) resp = yield from future partition = resp.partition offset = resp.offset self.assertTrue(partition in (0, 1)) # partition future = yield from producer.send(self.topic, 'some str', key=key) resp = yield from future # expect the same partition bcs the same key self.assertEqual(resp.partition, partition) # expect offset +1 self.assertEqual(resp.offset, offset + 1) value[23] = '*VALUE' * 800 with self.assertRaises(MessageSizeTooLargeError): yield from producer.send(self.topic, value, key=key) yield from producer.stop() yield from producer.stop() # shold be Ok
def test_producer_send_error(self): producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, retry_backoff_ms=100, linger_ms=5, request_timeout_ms=400) yield from producer.start() @asyncio.coroutine def mocked_send(nodeid, req): # RequestTimedOutCode error for partition=0 return ProduceResponse[0]([(self.topic, [(0, 7, 0), (1, 0, 111)])]) with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send fut1 = yield from producer.send(self.topic, b'text1', partition=0) fut2 = yield from producer.send(self.topic, b'text2', partition=1) with self.assertRaises(RequestTimedOutError): yield from fut1 resp = yield from fut2 self.assertEqual(resp.offset, 111) @asyncio.coroutine def mocked_send_with_sleep(nodeid, req): # RequestTimedOutCode error for partition=0 yield from asyncio.sleep(0.1, loop=self.loop) return ProduceResponse[0]([(self.topic, [(0, 7, 0)])]) with mock.patch.object(producer.client, 'send') as mocked: mocked.side_effect = mocked_send_with_sleep with self.assertRaises(RequestTimedOutError): future = yield from producer.send(self.topic, b'text1', partition=0) yield from future yield from producer.stop()
async def test_producer_transactional_simple(self): # The test here will just check if we can do simple produce with # transactional_id option and minimal setup. producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer") await producer.start() self.add_cleanup(producer.stop) async with producer.transaction(): meta = await producer.send_and_wait(self.topic, b'hello, Kafka!') consumer = AIOKafkaConsumer(self.topic, loop=self.loop, bootstrap_servers=self.hosts, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) msg = await consumer.getone() self.assertEqual(msg.offset, meta.offset) self.assertEqual(msg.timestamp, meta.timestamp) self.assertEqual(msg.value, b"hello, Kafka!") self.assertEqual(msg.key, None)
async def send_messages(self, partition, messages, *, topic=None, timestamp_ms=None, return_inst=False, headers=None): topic = topic or self.topic ret = [] producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts) await producer.start() try: await self.wait_topic(producer.client, topic) for msg in messages: if isinstance(msg, str): msg = msg.encode() elif isinstance(msg, int): msg = str(msg).encode() future = await producer.send(topic, msg, partition=partition, timestamp_ms=timestamp_ms, headers=headers) resp = await future self.assertEqual(resp.topic, topic) self.assertEqual(resp.partition, partition) if return_inst: ret.append(resp) else: ret.append(msg) finally: await producer.stop() return ret
def __init__(self, client: KafkaClient, serializer: BaseSerializer, loop: asyncio.AbstractEventLoop, partitioner: BasePartitioner, client_id: str = None, acks: Union[int, str] = 1, transactional_id: str = None) -> None: """ KafkaProducer constructor Args: client (KafkaClient): Initialization class (contains, client_id / bootstraps_server) serializer (BaseSerializer): Serializer encode & decode event acks (Union[int, str]): The number of acknowledgments the producer requires the leader to have received before considering a request complete. Possible value (0 / 1 / all) client_id (str): Client name (if is none, KafkaConsumer use KafkaClient client_id) transactional_id: Id for make transactional process Raises: AioKafkaProducerBadParams: raised when producer was call with bad params KafkaProducerError: raised when some generic error was raised form Aiokafka Returns: None """ super().__init__() self.logger = getLogger('tonga') self._client = client # Create client_id if client_id is None: self._client_id = self._client.client_id + '-' + str( self._client.cur_instance) else: self._client_id = client_id self._bootstrap_servers = self._client.bootstrap_servers self._acks = acks if isinstance(serializer, BaseSerializer): self.serializer = serializer else: raise BadSerializer self._transactional_id = transactional_id self._running = False self._loop = loop try: self._kafka_producer = AIOKafkaProducer( loop=self._loop, bootstrap_servers=self._bootstrap_servers, client_id=self._client_id, acks=self._acks, value_serializer=self.serializer.encode, transactional_id=self._transactional_id, key_serializer=KafkaKeySerializer.encode, partitioner=partitioner) except ValueError as err: self.logger.exception('%s', err.__str__()) raise AioKafkaProducerBadParams except KafkaError as err: self.logger.exception('%s', err.__str__()) raise KafkaProducerError self.logger.debug('Create new producer %s', self._client_id)
async def test_producer_transactional_send_offsets_and_abort(self): # Following previous, we will process but abort transaction. Commit # should not be processed and the same data should be returned after # reset # Setup some messages in INPUT topic await self.send_messages(0, list(range(0, 100))) await self.send_messages(1, list(range(100, 200))) in_topic = self.topic out_topic = self.topic + "-out" group_id = self.topic + "-group" consumer = AIOKafkaConsumer(in_topic, loop=self.loop, bootstrap_servers=self.hosts, enable_auto_commit=False, group_id=group_id, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) producer = AIOKafkaProducer(loop=self.loop, bootstrap_servers=self.hosts, transactional_id="sobaka_producer", client_id="p1") await producer.start() self.add_cleanup(producer.stop) assignment = consumer.assignment() self.assertTrue(assignment) for tp in assignment: await consumer.commit({tp: 0}) offset_before = await consumer.committed(tp) self.assertEqual(offset_before, 0) async def transform(raise_error): while True: batch = await consumer.getmany(timeout_ms=5000, max_records=20) if not batch: break async with producer.transaction(): offsets = {} for tp, msgs in batch.items(): for msg in msgs: out_msg = b"OUT-" + msg.value # We produce to the same partition producer.send(out_topic, value=out_msg, partition=tp.partition) offsets[tp] = msg.offset + 1 await producer.send_offsets_to_transaction( offsets, group_id) if raise_error: raise ValueError() try: await transform(raise_error=True) except ValueError: pass for tp in assignment: offset = await consumer.committed(tp) self.assertEqual(offset, 0) await consumer.seek_to_committed() await transform(raise_error=False) for tp in assignment: offset = await consumer.committed(tp) self.assertEqual(offset, 100)
class KafkaProducer(object): """docstring for KafkaProducer""" """ metadata_max_age_ms 强制刷新最大时间ms request_timeout_ms 超时时间ms acks (0, -1, 1, all) 0: 只管发送; -1: 默认-1=all需要等待所有副本确认; 1: 只需要leader节点接收到数据; compression_type('gzip', 'snappy', 'lz4', None) 数据压缩格式默认为None max_batch_size 每个分区缓冲数据最大 max_request_size 一次请求的最大大小,超过会自动发送send linger_ms 延迟发送时间 connections_max_idle_ms 空闲连接关闭检测时间 enable_idempotence 保证数据送达标志为True acks必须为(-1, all) """ Producer = AIOKafkaProducer(loop=loop, bootstrap_servers='localhost', metadata_max_age_ms=30000, request_timeout_ms=1000, max_batch_size=16384, max_request_size=1048576, linger_ms=0, connections_max_idle_ms=540000) async def partitions_for(self): return await self.Producer.partitions_for() async def start(self): if self.Producer._sender_task is None: await self.Producer.start() async def stop(self): await self.Producer.stop() async def flush(self): await self.Producer.flush() @classmethod def code_data(cls, value, data_type=None, uid=None, country_id=None): data = {"data": value, "createTime": int(time.time() * 1000)} if data_type is not None: data["type"] = int(data_type) if uid is not None: data["uid"] = int(uid) if country_id is not None: data["country_id"] = country_id return data async def send(self, topic, value, key=None, data_type=None, uid=None, country_id=None, partition=None, timestamp_ms=None): try: data = self.code_data(value, data_type, uid, country_id) data = bytes(ujson.dumps(data), encoding='utf-8') return await self.Producer.send_and_wait( topic, value=data, key=bytes(key, encoding='utf-8'), partition=partition, timestamp_ms=timestamp_ms) except Exception: print(traceback.format_exc()) async def send_many(self, topic, values, key, data_type=None, uid=None, country_id=None, partition=None, timestamp_ms=None): batch = self.Producer.create_batch() for value in values: data = self.code_data(value, data_type, uid, country_id) metadata = batch.append(key=key, value=data, timestamp=timestamp_ms) if metadata is None: await self.Producer.send_batch(batch, topic, partition=partition) batch = self.Producer.create_batch() continue await self.Producer.send_batch(batch, topic, partition=partition) async def __aenter__(self): await self.start() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.flush()
class RiskProducer(KafkaProducer): Producer = AIOKafkaProducer(loop=loop, bootstrap_servers='172.31.10.78:9092') """docstring for Master_MysqlConn""" def __init__(self): super(RiskProducer, self).__init__()
async def test_producer_leader_change_preserves_order(self): # Before 0.5.0 we did not lock partition until a response came from # the server, but locked the node itself. # For example: Say the sender sent a request to node 1 and before an # failure answer came we updated metadata and leader become node 0. # This way we may send the next batch to node 0 without waiting for # node 1 batch to be reenqueued, resulting in out-of-order batches producer = AIOKafkaProducer( bootstrap_servers=self.hosts, linger_ms=1000) await producer.start() self.add_cleanup(producer.stop) # Alter metadata to convince the producer, that leader or partition 0 # is a different node await producer.partitions_for(self.topic) topic_meta = producer._metadata._partitions[self.topic] real_leader = topic_meta[0].leader topic_meta[0] = topic_meta[0]._replace(leader=real_leader + 1) # Make sure the first request for produce takes more time original_send = producer.client.send async def mocked_send(node_id, request, *args, **kw): if node_id != real_leader and \ request.API_KEY == ProduceResponse[0].API_KEY: await asyncio.sleep(2) result = await original_send(node_id, request, *args, **kw) return result producer.client.send = mocked_send # Send Batch 1. This will end up waiting for some time on fake leader batch = producer.create_batch() meta = batch.append(key=b"key", value=b"1", timestamp=None) batch.close() fut = await producer.send_batch( batch, self.topic, partition=0) # Make sure we sent the request await asyncio.sleep(0.1) # Update metadata to return leader to real one await producer.client.force_metadata_update() # Send Batch 2, that if it's bugged will go straight to the real node batch2 = producer.create_batch() meta2 = batch2.append(key=b"key", value=b"2", timestamp=None) batch2.close() fut2 = await producer.send_batch( batch2, self.topic, partition=0) batch_meta = await fut batch_meta2 = await fut2 # Check the order of messages consumer = AIOKafkaConsumer( self.topic, bootstrap_servers=self.hosts, auto_offset_reset="earliest") await consumer.start() self.add_cleanup(consumer.stop) msg = await consumer.getone() self.assertEqual(msg.offset, batch_meta.offset) self.assertEqual(msg.timestamp or -1, meta.timestamp) self.assertEqual(msg.value, b"1") self.assertEqual(msg.key, b"key") msg2 = await consumer.getone() self.assertEqual(msg2.offset, batch_meta2.offset) self.assertEqual(msg2.timestamp or -1, meta2.timestamp) self.assertEqual(msg2.value, b"2") self.assertEqual(msg2.key, b"key")
Usage: python test_merge.py fill: fills kafka topics with data python test_merge.py: runs the merger from the beginning of the topics python test_merge.py slave: runs the merger from the current offsets """ if __name__ == "__main__": import sys import random results = {} loop = asyncio.get_event_loop() topics = ('test1', 'test2', 'test3', 'test4', 'test5') if len(sys.argv) >= 2 and sys.argv[1] == 'fill': producer = AIOKafkaProducer(loop=loop) loop.run_until_complete(producer.start()) for i in range(5000): for topic in topics: # data is topic-i-xxxxx... in order to make long messages that needs to be polled in several requests # time.sleep(0.0001) loop.run_until_complete( producer.send( topic, '-'.join( (topic, str(i), 1000 * 'x')).encode('ascii'))) loop.run_until_complete(producer.stop()) else: class Test(StreamSorter): async def key(self, msg):