def test_switch_leader(self): producer = Producer(self.client) topic = self.topic try: for index in range(1, 3): # cause the client to establish connections to all the brokers log.debug("Pass: %d. Sending 10 random messages", index) yield self._send_random_messages(producer, topic, 10) # Ensure that the follower is in sync log.debug("Ensuring topic/partition is replicated.") part_meta = self.client.partition_meta[TopicAndPartition( self.topic, 0)] # Ensure the all the replicas are in-sync before proceeding while len(part_meta.isr) != 2: # pragma: no cover log.debug("Waiting for Kafka replica to become synced") if len(part_meta.replicas) != 2: log.error( "Kafka replica 'disappeared'!" "Partitition Meta: %r", part_meta) yield async_delay(1.0) yield self.client.load_metadata_for_topics(self.topic) part_meta = self.client.partition_meta[TopicAndPartition( self.topic, 0)] # kill leader for partition 0 log.debug("Killing leader of partition 0") broker, kill_time = self._kill_leader(topic, 0) log.debug("Sending 1 more message: 'part 1'") yield producer.send_messages(topic, msgs=['part 1']) log.debug("Sending 1 more message: 'part 2'") yield producer.send_messages(topic, msgs=['part 2']) # send to new leader log.debug("Sending 10 more messages") yield self._send_random_messages(producer, topic, 10) # Make sure the ZK ephemeral time (~6 seconds) has elapsed wait_time = (kill_time + 6.5) - time.time() if wait_time > 0: log.debug("Waiting: %4.2f for ZK timeout", wait_time) yield async_delay(wait_time) # restart the kafka broker log.debug("Restarting the broker") broker.restart() # count number of messages log.debug("Getting message count") count = yield self._count_messages(topic) self.assertEqual(count, 22 * index) finally: log.debug("Stopping the producer") yield producer.stop() log.debug("Producer stopped") log.debug("Test complete.")
def test_switch_leader(self): producer = Producer(self.client) topic = self.topic try: for index in range(1, 3): # cause the client to establish connections to all the brokers log.debug("Pass: %d. Sending 10 random messages", index) yield self._send_random_messages(producer, topic, 10) # Ensure that the follower is in sync log.debug("Ensuring topic/partition is replicated.") part_meta = self.client.partition_meta[TopicAndPartition( self.topic, 0)] # Ensure the all the replicas are in-sync before proceeding while len(part_meta.isr) != 2: # pragma: no cover log.debug("Waiting for Kafka replica to become synced") if len(part_meta.replicas) != 2: log.error("Kafka replica 'disappeared'!" "Partitition Meta: %r", part_meta) yield async_delay(1.0) yield self.client.load_metadata_for_topics(self.topic) part_meta = self.client.partition_meta[TopicAndPartition( self.topic, 0)] # kill leader for partition 0 log.debug("Killing leader of partition 0") broker, kill_time = self._kill_leader(topic, 0) log.debug("Sending 1 more message: 'part 1'") yield producer.send_messages(topic, msgs=['part 1']) log.debug("Sending 1 more message: 'part 2'") yield producer.send_messages(topic, msgs=['part 2']) # send to new leader log.debug("Sending 10 more messages") yield self._send_random_messages(producer, topic, 10) # Make sure the ZK ephemeral time (~6 seconds) has elapsed wait_time = (kill_time + 6.5) - time.time() if wait_time > 0: log.debug("Waiting: %4.2f for ZK timeout", wait_time) yield async_delay(wait_time) # restart the kafka broker log.debug("Restarting the broker") broker.restart() # count number of messages log.debug("Getting message count") count = yield self._count_messages(topic) self.assertEqual(count, 22 * index) finally: log.debug("Stopping the producer") yield producer.stop() log.debug("Producer stopped") log.debug("Test complete.")
def test_consumer_commit_offsets(self): # Start off by sending messages before the consumer is started yield self.send_messages(self.partition, range(0, 100)) # Create a consumer, allow commit, disable auto-commit consumer = self.consumer(consumer_group=self.id(), auto_commit_every_n=0, auto_commit_every_ms=0) # Check for messages on the processor self.assertFalse(consumer.processor._messages) # Start the consumer from the beginning start_d = consumer.start(OFFSET_EARLIEST) # Send some more messages yield self.send_messages(self.partition, range(100, 200)) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 200: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 200 self.assertEqual(len(consumer.processor._messages), 200) # Stop the consumer consumer.stop() self.successResultOf(start_d) # Commit the offsets yield consumer.commit() # Send some more messages last_batch = yield self.send_messages(self.partition, range(200, 300)) # Create another consumer consumer2 = self.consumer(consumer_group=self.id(), auto_commit_every_n=0, auto_commit_every_ms=0) # Start it at the last offset for the group start_d2 = consumer2.start(OFFSET_COMMITTED) # Loop waiting for all the messages to show up while len(consumer2.processor._messages) < 100: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 100, and the right 100 self.assertEqual(len(consumer2.processor._messages), 100) self.assertEqual( last_batch, [x.message.value for x in consumer2.processor._messages]) # Stop the consumer consumer2.stop() self.successResultOf(start_d2)
def test_consumer_commit_offsets(self): # Start off by sending messages before the consumer is started yield self.send_messages(self.partition, range(0, 100)) # Create a consumer, allow commit, disable auto-commit consumer = self.consumer(consumer_group=self.id(), auto_commit_every_n=0, auto_commit_every_ms=0) # Check for messages on the processor self.assertFalse(consumer.processor._messages) # Start the consumer from the beginning start_d = consumer.start(OFFSET_EARLIEST) # Send some more messages yield self.send_messages(self.partition, range(100, 200)) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 200: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 200 self.assertEqual(len(consumer.processor._messages), 200) # Stop the consumer consumer.stop() self.successResultOf(start_d) # Commit the offsets yield consumer.commit() # Send some more messages last_batch = yield self.send_messages(self.partition, range(200, 300)) # Create another consumer consumer2 = self.consumer(consumer_group=self.id(), auto_commit_every_n=0, auto_commit_every_ms=0) # Start it at the last offset for the group start_d2 = consumer2.start(OFFSET_COMMITTED) # Loop waiting for all the messages to show up while len(consumer2.processor._messages) < 100: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 100, and the right 100 self.assertEqual(len(consumer2.processor._messages), 100) self.assertEqual(last_batch, [x.message.value for x in consumer2.processor._messages]) # Stop the consumer consumer2.stop() self.successResultOf(start_d2)
def test_consumer_restart(self): sent_messages = yield self.send_messages(self.partition, range(0, 100)) # Create & start our default consumer (auto-commit) consumer = self.consumer() # Check for messages on the processor self.assertFalse(consumer.processor._messages) # Start the consumer from the beginning start_d = consumer.start(OFFSET_EARLIEST) # Send some more messages sent_messages += yield self.send_messages( self.partition, range(100, 200)) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 200: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 200 self.assertEqual(len(consumer.processor._messages), 200) # Stop the consumer and record offset at which to restart (next after # last processed message offset) offset = consumer.stop() + 1 self.successResultOf(start_d) # Send some more messages sent_messages += yield self.send_messages( self.partition, range(200, 250)) # Restart the consumer at the returned offset start_d2 = consumer.start(offset) # Loop waiting for the new message while len(consumer.processor._messages) < 250: # Wait a bit for them to arrive yield async_delay() # make sure we got them all self.assert_message_count(consumer.processor._messages, 250) expected_messages = set(sent_messages) actual_messages = set([x.message.value for x in consumer.processor._messages]) self.assertEqual(expected_messages, actual_messages) # Clean up consumer.stop() self.successResultOf(start_d2)
def test_consumer_restart(self): sent_messages = yield self.send_messages(self.partition, range(0, 100)) # Create & start our default consumer (auto-commit) consumer = self.consumer() # Check for messages on the processor self.assertFalse(consumer.processor._messages) # Start the consumer from the beginning start_d = consumer.start(OFFSET_EARLIEST) # Send some more messages sent_messages += yield self.send_messages(self.partition, range(100, 200)) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 200: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 200 self.assertEqual(len(consumer.processor._messages), 200) # Stop the consumer and record offset at which to restart (next after # last processed message offset) offset = consumer.stop() + 1 self.successResultOf(start_d) # Send some more messages sent_messages += yield self.send_messages(self.partition, range(200, 250)) # Restart the consumer at the returned offset start_d2 = consumer.start(offset) # Loop waiting for the new message while len(consumer.processor._messages) < 250: # Wait a bit for them to arrive yield async_delay() # make sure we got them all self.assert_message_count(consumer.processor._messages, 250) expected_messages = set(sent_messages) actual_messages = set( [x.message.value for x in consumer.processor._messages]) self.assertEqual(expected_messages, actual_messages) # Clean up consumer.stop() self.successResultOf(start_d2)
def test_huge_messages(self): # Produce 10 "normal" size messages yield self.send_messages(0, [str(x) for x in range(10)]) # Setup a max buffer size for the consumer, and put a message in # Kafka that's bigger than that MAX_FETCH_BUFFER_SIZE_BYTES = (256 * 1024) - 10 huge_message, = yield self.send_messages( 0, [random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)]) # Create a consumer with the (smallish) max buffer size consumer = self.consumer(max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES) # This consumer fails to get the message, and errbacks the start # deferred d = consumer.start(OFFSET_EARLIEST) # Loop waiting for the errback to be called while not d.called: # Wait a bit for them to arrive yield async_delay() # Make sure the failure is as expected self.failureResultOf(d, ConsumerFetchSizeTooSmall) # Make sure the smaller, earlier messages were delivered self.assert_message_count(consumer.processor._messages, 10) # last offset seen last_offset = consumer.processor._messages[-1].offset # Stop the consumer: d already errbacked, but stop still must be called consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer() # Start just past the last message processed d = big_consumer.start(last_offset + 1) # Consume giant message successfully while not big_consumer.processor._messages: # Wait a bit for it to arrive yield async_delay() self.assertEqual(big_consumer.processor._messages[0].message.value, huge_message) # Clean up big_consumer.stop() self.successResultOf(d)
def test_huge_messages(self): # Produce 10 "normal" size messages yield self.send_messages(0, [str(x) for x in range(10)]) # Setup a max buffer size for the consumer, and put a message in # Kafka that's bigger than that MAX_FETCH_BUFFER_SIZE_BYTES = (256 * 1024) - 10 huge_message, = yield self.send_messages( 0, [random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)]) # Create a consumer with the (smallish) max buffer size consumer = self.consumer(max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES) # This consumer fails to get the message, and errbacks the start # deferred d = consumer.start(OFFSET_EARLIEST) # Loop waiting for the errback to be called while not d.called: # Wait a bit for them to arrive yield async_delay() # Make sure the failure is as expected self.failureResultOf(d, ConsumerFetchSizeTooSmall) # Make sure the smaller, earlier messages were delivered self.assert_message_count(consumer.processor._messages, 10) # last offset seen last_offset = consumer.processor._messages[-1].offset # Stop the consumer: d already errbacked, but stop still must be called consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer() # Start just past the last message processed d = big_consumer.start(last_offset + 1) # Consume giant message successfully while not big_consumer.processor._messages: # Wait a bit for it to arrive yield async_delay() self.assertEqual(big_consumer.processor._messages[0].message.value, huge_message) # Clean up big_consumer.stop() self.successResultOf(d)
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = yield self.send_messages( 0, [str(x) for x in range(10)]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = yield self.send_messages( 0, [random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(10)]) # Consumer should still get all of them consumer = self.consumer() # Start the consumer from the beginning d = consumer.start(OFFSET_EARLIEST) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 20: # Wait a bit for them to arrive yield async_delay() expected_messages = set(small_messages + large_messages) actual_messages = set([x.message.value for x in consumer.processor._messages]) self.assertEqual(expected_messages, actual_messages) # Clean up consumer.stop() self.successResultOf(d)
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = yield self.send_messages(0, [str(x) for x in range(10)]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = yield self.send_messages( 0, [random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(10)]) # Consumer should still get all of them consumer = self.consumer() # Start the consumer from the beginning d = consumer.start(OFFSET_EARLIEST) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 20: # Wait a bit for them to arrive yield async_delay() expected_messages = set(small_messages + large_messages) actual_messages = set( [x.message.value for x in consumer.processor._messages]) self.assertEqual(expected_messages, actual_messages) # Clean up consumer.stop() self.successResultOf(d)
def test_consumer(self): yield async_delay(3) # 0.8.1.1 fails otherwise yield self.send_messages(self.partition, range(0, 100)) # Create a consumer. consumer = self.consumer() # Check for messages on the processor self.assertFalse(consumer.processor._messages) # Start the consumer from the beginning start_d = consumer.start(OFFSET_EARLIEST) # Send some more messages yield self.send_messages(self.partition, range(100, 200)) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 200: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 200 self.assertEqual(len(consumer.processor._messages), 200) # Send some more messages yield self.send_messages(self.partition, range(200, 250)) # Loop waiting for the new message while len(consumer.processor._messages) < 250: # Wait a bit for them to arrive yield async_delay() # make sure we got them all self.assert_message_count(consumer.processor._messages, 250) # Clean up consumer.stop() self.successResultOf(start_d)
def test_consumer(self): yield self.send_messages(self.partition, range(0, 100)) # Create a consumer. consumer = self.consumer() # Check for messages on the processor self.assertFalse(consumer.processor._messages) # Start the consumer from the beginning start_d = consumer.start(OFFSET_EARLIEST) # Send some more messages yield self.send_messages(self.partition, range(100, 200)) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 200: # Wait a bit for them to arrive yield async_delay() # Make sure we got all 200 self.assertEqual(len(consumer.processor._messages), 200) # Send some more messages yield self.send_messages(self.partition, range(200, 250)) # Loop waiting for the new message while len(consumer.processor._messages) < 250: # Wait a bit for them to arrive yield async_delay() # make sure we got them all self.assert_message_count(consumer.processor._messages, 250) # Clean up consumer.stop() self.successResultOf(start_d)
def test_producer_batched_gzipped_hashed_partitioner(self): start_offset0 = yield self.current_offset(self.topic, 0) start_offset1 = yield self.current_offset(self.topic, 1) offsets = (start_offset0, start_offset1) requests = [] msgs_by_partition = ([], []) keys_by_partition = ([], []) partitioner = HashedPartitioner(self.topic, [0, 1]) producer = Producer(self.client, codec=CODEC_GZIP, batch_send=True, batch_every_n=100, batch_every_t=None, partitioner_class=HashedPartitioner) # Send ten groups of messages, each with a different key for i in range(10): msg_group = [] key = 'Key: {}'.format(i) part = partitioner.partition(key, [0, 1]) for j in range(10): msg = self.msg('Group:{} Msg:{}'.format(i, j)) msg_group.append(msg) msgs_by_partition[part].append(msg) keys_by_partition[part].append(key) request = producer.send_messages(self.topic, key=key, msgs=msg_group) requests.append(request) yield async_delay(.5) # Make the NoResult test have teeth... if i < 9: # This is to ensure we really are batching all the requests self.assertNoResult(request) # Now ensure we can retrieve the right messages from each partition for part in [0, 1]: yield self.assert_fetch_offset(part, offsets[part], msgs_by_partition[part], keys_by_partition[part], fetch_size=20480) yield producer.stop()
def test_producer_batched_gzipped_hashed_partitioner(self): start_offset0 = yield self.current_offset(self.topic, 0) start_offset1 = yield self.current_offset(self.topic, 1) offsets = (start_offset0, start_offset1) requests = [] msgs_by_partition = ([], []) keys_by_partition = ([], []) partitioner = HashedPartitioner(self.topic, [0, 1]) producer = Producer( self.client, codec=CODEC_GZIP, batch_send=True, batch_every_n=100, batch_every_t=None, partitioner_class=HashedPartitioner) # Send ten groups of messages, each with a different key for i in range(10): msg_group = [] key = 'Key: {}'.format(i) part = partitioner.partition(key, [0, 1]) for j in range(10): msg = self.msg('Group:{} Msg:{}'.format(i, j)) msg_group.append(msg) msgs_by_partition[part].append(msg) keys_by_partition[part].append(key) request = producer.send_messages( self.topic, key=key, msgs=msg_group) requests.append(request) yield async_delay(.5) # Make the NoResult test have teeth... if i < 9: # This is to ensure we really are batching all the requests self.assertNoResult(request) # Now ensure we can retrieve the right messages from each partition for part in [0, 1]: yield self.assert_fetch_offset( part, offsets[part], msgs_by_partition[part], keys_by_partition[part], fetch_size=20480) yield producer.stop()
def test_throughput(self): yield async_delay(3) # 0.8.1.1 fails otherwise # Flag to shutdown keep_running = True # Count of messages sent sent_msgs_count = [0] total_messages_size = [0] # setup MESSAGE_BLOCK_SIZEx1024-ish byte messages to send over and over constant_messages = [ self.msg(s) for s in [random_string(1024) for x in range(MESSAGE_BLOCK_SIZE)] ] large_messages = [ self.msg(s) for s in [ random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(MESSAGE_BLOCK_SIZE) ] ] constant_messages_size = len(constant_messages[0]) * MESSAGE_BLOCK_SIZE large_messages_size = len(large_messages[0]) * MESSAGE_BLOCK_SIZE # Create a producer and send some messages producer = Producer(self.client) # Create consumers (1/partition) consumers = [ self.consumer(partition=p, fetch_max_wait_time=50) for p in range(PARTITION_COUNT) ] def log_error(failure): log.exception("Failure sending messages: %r", failure) # pragma: no cover def sent_msgs(resps): log.info("Messages Sent: %r", resps) sent_msgs_count[0] += MESSAGE_BLOCK_SIZE return resps def send_msgs(): # randomly, 1/20 of the time, send large messages if randint(0, 19): ## if True: messages = constant_messages large = '' total_messages_size[0] += constant_messages_size else: messages = large_messages large = ' large' total_messages_size[0] += large_messages_size log.info("Sending: %d%s messages", len(messages), large) d = producer.send_messages(self.topic, msgs=messages) # As soon as we get a response from the broker, count them # and if we're still supposed to, send more d.addCallback(sent_msgs) if keep_running: d.addCallback(lambda _: self.reactor.callLater(0, send_msgs)) ## d.addCallback(lambda _: send_msgs()) d.addErrback(log_error) # Start sending messages, MESSAGE_BLOCK_SIZE at a time, 1K or 384K each send_msgs() # Start the consumers from the beginning fetch_start = time.time() start_ds = [consumer.start(OFFSET_EARLIEST) for consumer in consumers] # Let them all run for awhile... log.info("Waiting %d seconds...", PRODUCE_TIME) yield async_delay(PRODUCE_TIME) # Tell the producer to stop keep_running = False # Wait up to PRODUCE_TIME for the consumers to catch up log.info( "Waiting up to %d seconds for " "consumers to finish consuming...", PRODUCE_TIME) deadline = time.time() + PRODUCE_TIME * 2 while time.time() < deadline: consumed = sum( [len(consumer.processor._messages) for consumer in consumers]) log.debug("Consumed %d messages.", consumed) if sent_msgs_count[0] == consumed: break yield async_delay(1) fetch_time = time.time() - fetch_start consumed_bytes = sum( [c.processor._messages_bytes[0] for c in consumers]) result_msg = ("Sent: {} messages ({:,} total bytes) in ~{} seconds" " ({}/sec), Consumed: {} in {:.2f} seconds.".format( sent_msgs_count[0], total_messages_size[0], PRODUCE_TIME, sent_msgs_count[0] / PRODUCE_TIME, consumed, fetch_time)) # Log the result, and print to stderr to get around nose capture log.info(result_msg) print("\n\t Performance Data: " + result_msg, file=sys.stderr) # And print data as stats stat('Production_Time', PRODUCE_TIME) stat('Consumption_Time', fetch_time) stat('Messages_Produced', sent_msgs_count[0]) stat('Messages_Consumed', consumed) stat('Messages_Bytes_Produced', total_messages_size[0]) stat('Messages_Bytes_Consumed', consumed_bytes) stat('Messages_Produced_Per_Second', sent_msgs_count[0] / PRODUCE_TIME) stat('Messages_Consumed_Per_Second', consumed / fetch_time) stat('Message_Bytes_Produced_Per_Second', total_messages_size[0] / PRODUCE_TIME) stat('Message_Bytes_Consumed_Per_Second', consumed_bytes / fetch_time) # Clean up log.debug('Stopping producer: %r', producer) yield producer.stop() log.debug('Stopping consumers: %r', consumers) for consumer in consumers: consumer.stop() [self.successResultOf(start_d) for start_d in start_ds] # make sure we got all the messages we sent self.assertEqual( sent_msgs_count[0], sum([len(consumer.processor._messages) for consumer in consumers]))