def test_large_messages(self): # Produce 10 "normal" size messages small_messages = self.send_messages(0, [str(x) for x in range(10)]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = self.send_messages( 0, [random_string(5000) for x in range(10)]) # Brokers prior to 0.11 will return the next message # if it is smaller than max_bytes (called buffer_size in SimpleConsumer) # Brokers 0.11 and later that store messages in v2 format # internally will return the next message only if the # full MessageSet is smaller than max_bytes. # For that reason, we set the max buffer size to a little more # than the size of all large messages combined consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) actual_messages = set([ x.message.value for x in consumer if not isinstance(x.message, PartialMessage) ]) self.assertEqual(expected_messages, actual_messages) consumer.stop()
def test_produce_many_simple(simple_client, topic): """Test multiple produces using the SimpleClient """ start_offset = current_offset(simple_client, topic, 0) assert_produce_request( simple_client, topic, [ create_message(("Test message %d" % i).encode('utf-8')) for i in range(100) ], start_offset, 100, ) assert_produce_request( simple_client, topic, [ create_message(("Test message %d" % i).encode('utf-8')) for i in range(100) ], start_offset + 100, 100, )
def test_simple_consumer_blocking(self): consumer = self.consumer() # Ask for 5 messages, nothing in queue, block 1 second with Timer() as t: messages = consumer.get_messages(block=True, timeout=1) self.assert_message_count(messages, 0) self.assertGreaterEqual(t.interval, 1) self.send_messages(0, range(0, 5)) self.send_messages(1, range(5, 10)) # Ask for 5 messages, 10 in queue. Get 5 back, no blocking with Timer() as t: messages = consumer.get_messages(count=5, block=True, timeout=3) self.assert_message_count(messages, 5) self.assertLess(t.interval, 3) # Ask for 10 messages, get 5 back, block 1 second with Timer() as t: messages = consumer.get_messages(count=10, block=True, timeout=1) self.assert_message_count(messages, 5) self.assertGreaterEqual(t.interval, 1) # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1 # second, get 5 back, no blocking self.send_messages(0, range(0, 3)) self.send_messages(1, range(3, 5)) with Timer() as t: messages = consumer.get_messages(count=10, block=1, timeout=1) self.assert_message_count(messages, 5) self.assertLessEqual(t.interval, 1) consumer.stop()
def test_simple_consumer__seek(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer() # Rewind 10 messages from the end consumer.seek(-10, 2) self.assert_message_count([ message for message in consumer ], 10) # Rewind 13 messages from the end consumer.seek(-13, 2) self.assert_message_count([ message for message in consumer ], 13) # Set absolute offset consumer.seek(100) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(100, partition=0) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(101, partition=1) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(90, partition=0) self.assert_message_count([ message for message in consumer ], 10) consumer.seek(20, partition=1) self.assert_message_count([ message for message in consumer ], 80) consumer.seek(0, partition=1) self.assert_message_count([ message for message in consumer ], 100) consumer.stop()
def test_simple_consumer_pending(self): # make sure that we start with no pending messages consumer = self.consumer() self.assertEquals(consumer.pending(), 0) self.assertEquals(consumer.pending(partitions=[0]), 0) self.assertEquals(consumer.pending(partitions=[1]), 0) # Produce 10 messages to partitions 0 and 1 self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) consumer = self.consumer() self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) # move to last message, so one partition should have 1 pending # message and other 0 consumer.seek(-1, 2) self.assertEqual(consumer.pending(), 1) pending_part1 = consumer.pending(partitions=[0]) pending_part2 = consumer.pending(partitions=[1]) self.assertEquals(set([0, 1]), set([pending_part1, pending_part2])) consumer.stop()
def test_simple_consumer__seek(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer() # Rewind 10 messages from the end consumer.seek(-10, 2) self.assert_message_count([ message for message in consumer ], 10) # Rewind 13 messages from the end consumer.seek(-13, 2) self.assert_message_count([ message for message in consumer ], 13) # Set absolute offset consumer.seek(100) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(100, partition=0) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(101, partition=1) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(90, partition=0) self.assert_message_count([ message for message in consumer ], 10) consumer.seek(20, partition=1) self.assert_message_count([ message for message in consumer ], 80) consumer.seek(0, partition=1) self.assert_message_count([ message for message in consumer ], 100) consumer.stop()
def test_simple_consumer_pending(self): # make sure that we start with no pending messages consumer = self.consumer() self.assertEquals(consumer.pending(), 0) self.assertEquals(consumer.pending(partitions=[0]), 0) self.assertEquals(consumer.pending(partitions=[1]), 0) # Produce 10 messages to partitions 0 and 1 self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) consumer = self.consumer() self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) # move to last message, so one partition should have 1 pending # message and other 0 consumer.seek(-1, 2) self.assertEqual(consumer.pending(), 1) pending_part1 = consumer.pending(partitions=[0]) pending_part2 = consumer.pending(partitions=[1]) self.assertEquals(set([0, 1]), set([pending_part1, pending_part2])) consumer.stop()
def test_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.consumer( group='test_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # Grab the first 195 messages output_msgs1 = [ consumer1.get_message().message.value for _ in range(195) ] self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 consumer2 = self.consumer( group='test_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # 181-200 self.assert_message_count([ message for message in consumer2 ], 20) consumer1.stop() consumer2.stop()
def test_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.consumer( group='test_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # Grab the first 195 messages output_msgs1 = [ consumer1.get_message().message.value for _ in range(195) ] self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 consumer2 = self.consumer( group='test_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # 181-200 self.assert_message_count([ message for message in consumer2 ], 20) consumer1.stop() consumer2.stop()
def test_simple_consumer_blocking(self): consumer = self.consumer() # Ask for 5 messages, nothing in queue, block 1 second with Timer() as t: messages = consumer.get_messages(block=True, timeout=1) self.assert_message_count(messages, 0) self.assertGreaterEqual(t.interval, 1) self.send_messages(0, range(0, 5)) self.send_messages(1, range(5, 10)) # Ask for 5 messages, 10 in queue. Get 5 back, no blocking with Timer() as t: messages = consumer.get_messages(count=5, block=True, timeout=3) self.assert_message_count(messages, 5) self.assertLess(t.interval, 3) # Ask for 10 messages, get 5 back, block 1 second with Timer() as t: messages = consumer.get_messages(count=10, block=True, timeout=1) self.assert_message_count(messages, 5) self.assertGreaterEqual(t.interval, 1) # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1 # second, get 5 back, no blocking self.send_messages(0, range(0, 3)) self.send_messages(1, range(3, 5)) with Timer() as t: messages = consumer.get_messages(count=10, block=1, timeout=1) self.assert_message_count(messages, 5) self.assertLessEqual(t.interval, 1) consumer.stop()
def test_simple_consumer_no_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Default largest consumer = self.consumer(auto_offset_reset=None) # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) with self.assertRaises(OffsetOutOfRangeError): consumer.get_message()
def test_simple_consumer_smallest_offset_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer(auto_offset_reset='smallest') # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) # Since auto_offset_reset is set to smallest we should read all 200 # messages from beginning. self.assert_message_count([message for message in consumer], 200)
def test_simple_consumer_gzip(self): self.send_gzip_message(0, range(0, 100)) self.send_gzip_message(1, range(100, 200)) # Start a consumer consumer = self.consumer() self.assert_message_count([ message for message in consumer ], 200) consumer.stop()
def test_simple_consumer_no_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Default largest consumer = self.consumer(auto_offset_reset=None) # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) with self.assertRaises(OffsetOutOfRangeError): consumer.get_message()
def test_multi_process_consumer(self): # Produce 100 messages to partitions 0 and 1 self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer(consumer = MultiProcessConsumer) self.assert_message_count([ message for message in consumer ], 200) consumer.stop()
def test_simple_consumer_smallest_offset_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer(auto_offset_reset='smallest') # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) # Since auto_offset_reset is set to smallest we should read all 200 # messages from beginning. self.assert_message_count([message for message in consumer], 200)
def test_multi_process_consumer(self): # Produce 100 messages to partitions 0 and 1 self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer(consumer = MultiProcessConsumer) self.assert_message_count([ message for message in consumer ], 200) consumer.stop()
def test_simple_consumer_gzip(self): self.send_gzip_message(0, range(0, 100)) self.send_gzip_message(1, range(100, 200)) # Start a consumer consumer = self.consumer() self.assert_message_count([ message for message in consumer ], 200) consumer.stop()
def test_produce_many_snappy(self): self.skipTest("All snappy integration tests fail with nosnappyjava") start_offset = self.current_offset(self.topic, 0) self.assert_produce_request([ create_snappy_message([("Snappy 1 %d" % i, None) for i in range(100)]), create_snappy_message([("Snappy 2 %d" % i, None) for i in range(100)]), ], start_offset, 200, )
def test_produce_many_snappy(self): self.skipTest("All snappy integration tests fail with nosnappyjava") start_offset = self.current_offset(self.topic, 0) self.assert_produce_request([ create_snappy_message([("Snappy 1 %d" % i, None) for i in range(100)]), create_snappy_message([("Snappy 2 %d" % i, None) for i in range(100)]), ], start_offset, 200, )
def test_produce_many_gzip(self): start_offset = self.current_offset(self.topic, 0) message1 = create_gzip_message([ (("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100)]) message2 = create_gzip_message([ (("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100)]) self.assert_produce_request( [ message1, message2 ], start_offset, 200, )
def test_produce_many_gzip(self): start_offset = self.current_offset(self.topic, 0) message1 = create_gzip_message([ (("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100)]) message2 = create_gzip_message([ (("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100)]) self.assert_produce_request( [ message1, message2 ], start_offset, 200, )
def test_multi_proc_pending(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) # set group to None and auto_commit to False to avoid interactions w/ # offset commit/fetch apis consumer = MultiProcessConsumer(self.client, None, self.topic, auto_commit=False, iter_timeout=0) self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) consumer.stop()
def test_multi_proc_pending(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) # set group to None and auto_commit to False to avoid interactions w/ # offset commit/fetch apis consumer = MultiProcessConsumer(self.client, None, self.topic, auto_commit=False, iter_timeout=0) self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) consumer.stop()
def test_simple_consumer_largest_offset_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Default largest consumer = self.consumer() # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) # Since auto_offset_reset is set to largest we should not read any # messages. self.assert_message_count([message for message in consumer], 0) # Send 200 new messages to the queue self.send_messages(0, range(200, 300)) self.send_messages(1, range(300, 400)) # Since the offset is set to largest we should read all the new messages. self.assert_message_count([message for message in consumer], 200)
def test_produce_mixed(self): start_offset = self.current_offset(self.topic, 0) msg_count = 1+100 messages = [ create_message(b"Just a plain message"), create_gzip_message([ (("Gzipped %d" % i).encode('utf-8'), None) for i in range(100)]), ] # All snappy integration tests fail with nosnappyjava if False and has_snappy(): msg_count += 100 messages.append(create_snappy_message([("Snappy %d" % i, None) for i in range(100)])) self.assert_produce_request(messages, start_offset, msg_count)
def get_admin_clients(self, cnt=1, **params): params.setdefault('client_id', 'admin_client') params['bootstrap_servers'] = self.bootstrap_server() client_id = params['client_id'] for x in range(cnt): params['client_id'] = '%s_%s' % (client_id, random_string(4)) yield KafkaAdminClient(**params)
def test_simple_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Create 1st consumer and change offsets consumer = self.consumer(group='test_simple_consumer_load_initial_offsets') self.assertEqual(consumer.offsets, {0: 0, 1: 0}) consumer.offsets.update({0:51, 1:101}) # Update counter after manual offsets update consumer.count_since_commit += 1 consumer.commit() # Create 2nd consumer and check initial offsets consumer = self.consumer(group='test_simple_consumer_load_initial_offsets', auto_commit=False) self.assertEqual(consumer.offsets, {0: 51, 1: 101})
def test_first_send_failed(self): # lets create a queue and add 10 messages for 10 different partitions # to show how retries should work ideally for i in range(10): self.queue.put((TopicPartition("test", i), "msg %i", "key %i")) # Mock offsets counter for closure offsets = collections.defaultdict( lambda: collections.defaultdict(lambda: 0)) self.client.is_first_time = True def send_side_effect(reqs, *args, **kwargs): if self.client.is_first_time: self.client.is_first_time = False return [FailedPayloadsError(req) for req in reqs] responses = [] for req in reqs: offset = offsets[req.topic][req.partition] offsets[req.topic][req.partition] += len(req.messages) responses.append( ProduceResponsePayload(req.topic, req.partition, 0, offset)) return responses self.client.send_produce_request.side_effect = send_side_effect self._run_process(2) # the queue should be void at the end of the test self.assertEqual(self.queue.empty(), True) # there should be 5 non-void calls: 1st failed batch of 3 msgs # plus 3 batches of 3 msgs each + 1 batch of 1 message self.assertEqual(self.client.send_produce_request.call_count, 5)
def get_clients(self, cnt=1, client_id=None): if client_id is None: client_id = 'client' return tuple( KafkaClient(client_id='%s_%s' % (client_id, random_string(4)), bootstrap_servers=self.bootstrap_server()) for x in range(cnt))
def test_produce_mixed(self): start_offset = self.current_offset(self.topic, 0) msg_count = 1+100 messages = [ create_message(b"Just a plain message"), create_gzip_message([ (("Gzipped %d" % i).encode('utf-8'), None) for i in range(100)]), ] # All snappy integration tests fail with nosnappyjava if False and has_snappy(): msg_count += 100 messages.append(create_snappy_message([("Snappy %d" % i, None) for i in range(100)])) self.assert_produce_request(messages, start_offset, msg_count)
def test_simple_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Create 1st consumer and change offsets consumer = self.consumer(group='test_simple_consumer_load_initial_offsets') self.assertEqual(consumer.offsets, {0: 0, 1: 0}) consumer.offsets.update({0:51, 1:101}) # Update counter after manual offsets update consumer.count_since_commit += 1 consumer.commit() # Create 2nd consumer and check initial offsets consumer = self.consumer(group='test_simple_consumer_load_initial_offsets', auto_commit=False) self.assertEqual(consumer.offsets, {0: 51, 1: 101})
def _send_request(self, request, timeout=None): def _failure(error): raise error retries = 10 while True: node_id = self._client.least_loaded_node() for connect_retry in range(40): self._client.maybe_connect(node_id) if self._client.connected(node_id): break self._client.poll(timeout_ms=100) else: raise RuntimeError( 'Could not connect to broker with node id %d' % (node_id, )) try: future = self._client.send(node_id, request) future.error_on_callbacks = True future.add_errback(_failure) self._client.poll(future=future, timeout_ms=timeout) return future.value except Exception as exc: time.sleep(1) retries -= 1 if retries == 0: raise exc else: pass # retry
def test_simple_consumer_largest_offset_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Default largest consumer = self.consumer() # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) # Since auto_offset_reset is set to largest we should not read any # messages. self.assert_message_count([message for message in consumer], 0) # Send 200 new messages to the queue self.send_messages(0, range(200, 300)) self.send_messages(1, range(300, 400)) # Since the offset is set to largest we should read all the new messages. self.assert_message_count([message for message in consumer], 200)
def test_kafka_consumer(kafka_consumer_factory, send_messages): """Test KafkaConsumer""" consumer = kafka_consumer_factory(auto_offset_reset='earliest') send_messages(range(0, 100), partition=0) send_messages(range(0, 100), partition=1) cnt = 0 messages = {0: [], 1: []} for message in consumer: logging.debug("Consumed message %s", repr(message)) cnt += 1 messages[message.partition].append(message) if cnt >= 200: break assert_message_count(messages[0], 100) assert_message_count(messages[1], 100)
def test_kafka_consumer_max_bytes_simple(kafka_consumer_factory, topic, send_messages): send_messages(range(100, 200), partition=0) send_messages(range(200, 300), partition=1) # Start a consumer consumer = kafka_consumer_factory( auto_offset_reset='earliest', fetch_max_bytes=300) seen_partitions = set() for i in range(90): poll_res = consumer.poll(timeout_ms=100) for partition, msgs in poll_res.items(): for msg in msgs: seen_partitions.add(partition) # Check that we fetched at least 1 message from both partitions assert seen_partitions == {TopicPartition(topic, 0), TopicPartition(topic, 1)}
def test_async_producer_not_leader(self): for i in range(10): self.queue.put((TopicPartition("test", i), "msg %i", "key %i")) # Mock offsets counter for closure offsets = collections.defaultdict(lambda: collections.defaultdict(lambda: 0)) self.client.is_first_time = True def send_side_effect(reqs, *args, **kwargs): if self.client.is_first_time: self.client.is_first_time = False return [ProduceResponsePayload(req.topic, req.partition, NotLeaderForPartitionError.errno, -1) for req in reqs] responses = [] for req in reqs: offset = offsets[req.topic][req.partition] offsets[req.topic][req.partition] += len(req.messages) responses.append( ProduceResponsePayload(req.topic, req.partition, 0, offset) ) return responses self.client.send_produce_request.side_effect = send_side_effect self._run_process(2) # the queue should be void at the end of the test self.assertEqual(self.queue.empty(), True) # there should be 5 non-void calls: 1st failed batch of 3 msgs # + 3 batches of 3 msgs each + 1 batch of 1 msg = 1 + 3 + 1 = 5 self.assertEqual(self.client.send_produce_request.call_count, 5)
def test_first_send_failed(self): # lets create a queue and add 10 messages for 10 different partitions # to show how retries should work ideally for i in range(10): self.queue.put((TopicPartition("test", i), "msg %i", "key %i")) # Mock offsets counter for closure offsets = collections.defaultdict(lambda: collections.defaultdict(lambda: 0)) self.client.is_first_time = True def send_side_effect(reqs, *args, **kwargs): if self.client.is_first_time: self.client.is_first_time = False return [FailedPayloadsError(req) for req in reqs] responses = [] for req in reqs: offset = offsets[req.topic][req.partition] offsets[req.topic][req.partition] += len(req.messages) responses.append( ProduceResponsePayload(req.topic, req.partition, 0, offset) ) return responses self.client.send_produce_request.side_effect = send_side_effect self._run_process(2) # the queue should be void at the end of the test self.assertEqual(self.queue.empty(), True) # there should be 5 non-void calls: 1st failed batch of 3 msgs # plus 3 batches of 3 msgs each + 1 batch of 1 message self.assertEqual(self.client.send_produce_request.call_count, 5)
def get_producers(self, cnt, **params): params.setdefault('client_id', 'producer') params['bootstrap_servers'] = self.bootstrap_server() client_id = params['client_id'] for x in range(cnt): params['client_id'] = '%s_%s' % (client_id, random_string(4)) yield KafkaProducer(**params)
def test_lz4_incremental(): for i in range(1000): # lz4 max single block size is 4MB # make sure we test with multiple-blocks b1 = random_string(100).encode('utf-8') * 50000 b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) assert b1 == b2
def test_lz4_incremental(): for i in range(1000): # lz4 max single block size is 4MB # make sure we test with multiple-blocks b1 = random_string(100).encode('utf-8') * 50000 b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) assert b1 == b2
def get_consumers(self, cnt, topics, **params): params.setdefault('client_id', 'consumer') params.setdefault('heartbeat_interval_ms', 500) params['bootstrap_servers'] = self.bootstrap_server() client_id = params['client_id'] for x in range(cnt): params['client_id'] = '%s_%s' % (client_id, random_string(4)) yield KafkaConsumer(*topics, **params)
def test_produce_10k_simple(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [create_message(("Test message %d" % i).encode('utf-8')) for i in range(10000)], start_offset, 10000, )
def test_produce_10k_simple(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [create_message(("Test message %d" % i).encode('utf-8')) for i in range(10000)], start_offset, 10000, )
def test_kafka_consumer_max_bytes_simple(self): self.send_messages(0, range(100, 200)) self.send_messages(1, range(200, 300)) # Start a consumer consumer = self.kafka_consumer( auto_offset_reset='earliest', fetch_max_bytes=300) seen_partitions = set([]) for i in range(10): poll_res = consumer.poll(timeout_ms=100) for partition, msgs in six.iteritems(poll_res): for msg in msgs: seen_partitions.add(partition) # Check that we fetched at least 1 message from both partitions self.assertEqual( seen_partitions, set([ TopicPartition(self.topic, 0), TopicPartition(self.topic, 1)])) consumer.close()
def test_kafka_consumer_max_bytes_simple(self): self.send_messages(0, range(100, 200)) self.send_messages(1, range(200, 300)) # Start a consumer consumer = self.kafka_consumer( auto_offset_reset='earliest', fetch_max_bytes=300) seen_partitions = set([]) for i in range(10): poll_res = consumer.poll(timeout_ms=100) for partition, msgs in six.iteritems(poll_res): for msg in msgs: seen_partitions.add(partition) # Check that we fetched at least 1 message from both partitions self.assertEqual( seen_partitions, set([ TopicPartition(self.topic, 0), TopicPartition(self.topic, 1)])) consumer.close()
def run(args): try: props = {} for prop in args.producer_config: k, v = prop.split('=') try: v = int(v) except ValueError: pass if v == 'None': v = None props[k] = v if args.brokers: brokers = start_brokers(args.brokers) props['bootstrap_servers'] = [ '{0}:{1}'.format(broker.host, broker.port) for broker in brokers ] print("---> bootstrap_servers={0}".format( props['bootstrap_servers'])) print() print('-> OK!') print() print('Initializing producer...') record = bytes(bytearray(args.record_size)) props['metrics_sample_window_ms'] = args.stats_interval * 1000 producer = KafkaProducer(**props) for k, v in props.items(): print('---> {0}={1}'.format(k, v)) print('---> send {0} byte records'.format(args.record_size)) print('---> report stats every {0} secs'.format( args.stats_interval)) print('---> raw metrics? {0}'.format(args.raw_metrics)) timer_stop = threading.Event() timer = StatsReporter(args.stats_interval, producer, event=timer_stop, raw_metrics=args.raw_metrics) timer.start() print('-> OK!') print() for i in range(args.num_records): producer.send(topic=args.topic, value=record) producer.flush() timer_stop.set() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info) sys.exit(1)
def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory, send_messages): GROUP_ID = random_string(10) send_messages(range(0, 100), partition=0) send_messages(range(100, 200), partition=1) # Start a consumer and grab the first 180 messages consumer1 = kafka_consumer_factory( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) output_msgs1 = [] for _ in range(180): m = next(consumer1) output_msgs1.append(m) assert_message_count(output_msgs1, 180) # Normally we let the pytest fixture `kafka_consumer_factory` handle # closing as part of its teardown. Here we manually call close() to force # auto-commit to occur before the second consumer starts. That way the # second consumer only consumes previously unconsumed messages. consumer1.close() # Start a second consumer to grab 181-200 consumer2 = kafka_consumer_factory( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) output_msgs2 = [] for _ in range(20): m = next(consumer2) output_msgs2.append(m) assert_message_count(output_msgs2, 20) # Verify the second consumer wasn't reconsuming messages that the first # consumer already saw assert_message_count(output_msgs1 + output_msgs2, 200)
def test_produce_100k_gzipped(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request([ create_gzip_message([ (("Gzipped batch 1, message %d" % i).encode('utf-8'), None) for i in range(50000)]) ], start_offset, 50000, ) self.assert_produce_request([ create_gzip_message([ (("Gzipped batch 1, message %d" % i).encode('utf-8'), None) for i in range(50000)]) ], start_offset+50000, 50000, )
def test_produce_100k_gzipped(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request([ create_gzip_message([ (("Gzipped batch 1, message %d" % i).encode('utf-8'), None) for i in range(50000)]) ], start_offset, 50000, ) self.assert_produce_request([ create_gzip_message([ (("Gzipped batch 1, message %d" % i).encode('utf-8'), None) for i in range(50000)]) ], start_offset+50000, 50000, )
def test_produce_many_simple(simple_client, topic): """Test multiple produces using the SimpleClient """ start_offset = current_offset(simple_client, topic, 0) assert_produce_request( simple_client, topic, [create_message(("Test message %d" % i).encode('utf-8')) for i in range(100)], start_offset, 100, ) assert_produce_request( simple_client, topic, [create_message(("Test message %d" % i).encode('utf-8')) for i in range(100)], start_offset+100, 100, )
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = self.send_messages(0, [ str(x) for x in range(10) ]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ]) # Brokers prior to 0.11 will return the next message # if it is smaller than max_bytes (called buffer_size in SimpleConsumer) # Brokers 0.11 and later that store messages in v2 format # internally will return the next message only if the # full MessageSet is smaller than max_bytes. # For that reason, we set the max buffer size to a little more # than the size of all large messages combined consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) actual_messages = set([ x.message.value for x in consumer ]) self.assertEqual(expected_messages, actual_messages) consumer.stop()
def test_kafka_consumer__offset_commit_resume(self): GROUP_ID = random_string(10) self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.kafka_consumer( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) # Grab the first 180 messages output_msgs1 = [] for _ in range(180): m = next(consumer1) output_msgs1.append(m) self.assert_message_count(output_msgs1, 180) consumer1.close() # The total offset across both partitions should be at 180 consumer2 = self.kafka_consumer( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) # 181-200 output_msgs2 = [] for _ in range(20): m = next(consumer2) output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200) consumer2.close()
def _next_partition(self, topic): if topic not in self.partition_cycles: if not self.client.has_metadata_for_topic(topic): self.client.ensure_topic_exists(topic) self.partition_cycles[topic] = cycle(self.client.get_partition_ids_for_topic(topic)) # Randomize the initial partition that is returned if self.random_start: num_partitions = len(self.client.get_partition_ids_for_topic(topic)) for _ in range(random.randint(0, num_partitions-1)): next(self.partition_cycles[topic]) return next(self.partition_cycles[topic])
def test_wo_retries(self): # lets create a queue and add 10 messages for 1 partition for i in range(10): self.queue.put((TopicPartition("test", 0), "msg %i", "key %i")) self._run_process() # the queue should be void at the end of the test self.assertEqual(self.queue.empty(), True) # there should be 4 non-void cals: # 3 batches of 3 msgs each + 1 batch of 1 message self.assertEqual(self.client.send_produce_request.call_count, 4)
def run(args): try: props = {} for prop in args.producer_config: k, v = prop.split('=') try: v = int(v) except ValueError: pass if v == 'None': v = None props[k] = v if args.brokers: brokers = start_brokers(args.brokers) props['bootstrap_servers'] = ['{0}:{1}'.format(broker.host, broker.port) for broker in brokers] print("---> bootstrap_servers={0}".format(props['bootstrap_servers'])) print() print('-> OK!') print() print('Initializing producer...') record = bytes(bytearray(args.record_size)) props['metrics_sample_window_ms'] = args.stats_interval * 1000 producer = KafkaProducer(**props) for k, v in props.items(): print('---> {0}={1}'.format(k, v)) print('---> send {0} byte records'.format(args.record_size)) print('---> report stats every {0} secs'.format(args.stats_interval)) print('---> raw metrics? {0}'.format(args.raw_metrics)) timer_stop = threading.Event() timer = StatsReporter(args.stats_interval, producer, event=timer_stop, raw_metrics=args.raw_metrics) timer.start() print('-> OK!') print() for i in range(args.num_records): producer.send(topic=args.topic, value=record) producer.flush() timer_stop.set() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info) sys.exit(1)
def test_kafka_consumer__blocking(self): TIMEOUT_MS = 500 consumer = self.kafka_consumer(auto_offset_reset='earliest', enable_auto_commit=False, consumer_timeout_ms=TIMEOUT_MS) # Manual assignment avoids overhead of consumer group mgmt consumer.unsubscribe() consumer.assign([TopicPartition(self.topic, 0)]) # Ask for 5 messages, nothing in queue, block 500ms with Timer() as t: with self.assertRaises(StopIteration): msg = next(consumer) self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) self.send_messages(0, range(0, 10)) # Ask for 5 messages, 10 in queue. Get 5 back, no blocking messages = set() with Timer() as t: for i in range(5): msg = next(consumer) messages.add((msg.partition, msg.offset)) self.assertEqual(len(messages), 5) self.assertLess(t.interval, TIMEOUT_MS / 1000.0 ) # Ask for 10 messages, get 5 back, block 500ms messages = set() with Timer() as t: with self.assertRaises(StopIteration): for i in range(10): msg = next(consumer) messages.add((msg.partition, msg.offset)) self.assertEqual(len(messages), 5) self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 ) consumer.close()
def test_multi_process_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.consumer( consumer=MultiProcessConsumer, group='test_multi_process_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # Grab the first 195 messages output_msgs1 = [] idx = 0 for message in consumer1: output_msgs1.append(message.message.value) idx += 1 if idx >= 195: break self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 consumer2 = self.consumer( consumer=MultiProcessConsumer, group='test_multi_process_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # 181-200 self.assert_message_count([ message for message in consumer2 ], 20) consumer1.stop() consumer2.stop()
def test_producer_async_queue_overfilled(self, mock): queue_size = 2 producer = Producer(MagicMock(), async_send=True, async_queue_maxsize=queue_size) topic = b'test-topic' partition = 0 message = b'test-message' with self.assertRaises(AsyncProducerQueueFull): message_list = [message] * (queue_size + 1) producer.send_messages(topic, partition, *message_list) self.assertEqual(producer.queue.qsize(), queue_size) for _ in range(producer.queue.qsize()): producer.queue.get()
def test_kafka_consumer_max_bytes_one_msg(self): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. self.send_messages(0, range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time # But 0.11.0.0 returns 1 MessageSet at a time when the messages are # stored in the new v2 format by the broker. # # DP Note: This is a strange test. The consumer shouldn't care # how many messages are included in a FetchResponse, as long as it is # non-zero. I would not mind if we deleted this test. It caused # a minor headache when testing 0.11.0.0. group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer( group_id=group, auto_offset_reset='earliest', consumer_timeout_ms=5000, fetch_max_bytes=1) fetched_msgs = [next(consumer) for i in range(10)] self.assertEqual(len(fetched_msgs), 10) consumer.close()