def test_produce_and_consume(request, sasl_kafka): topic_name = special_to_underscore(request.node.name + random_string(4)) sasl_kafka.create_topics([topic_name], num_partitions=2) producer, = sasl_kafka.get_producers(1) messages_and_futures = [] # [(message, produce_future),] for i in range(100): encoded_msg = "{}-{}-{}".format(i, request.node.name, uuid.uuid4()).encode("utf-8") future = producer.send(topic_name, value=encoded_msg, partition=i % 2) messages_and_futures.append((encoded_msg, future)) producer.flush() for (msg, f) in messages_and_futures: assert f.succeeded() consumer, = sasl_kafka.get_consumers(1, [topic_name]) messages = {0: [], 1: []} for i, message in enumerate(consumer, 1): logging.debug("Consumed message %s", repr(message)) messages[message.partition].append(message) if i >= 100: break assert_message_count(messages[0], 50) assert_message_count(messages[1], 50)
def test_kafka_consumer(kafka_consumer_factory, send_messages): """Test KafkaConsumer""" consumer = kafka_consumer_factory(auto_offset_reset='earliest') send_messages(range(0, 100), partition=0) send_messages(range(0, 100), partition=1) cnt = 0 messages = {0: [], 1: []} for message in consumer: logging.debug("Consumed message %s", repr(message)) cnt += 1 messages[message.partition].append(message) if cnt >= 200: break assert_message_count(messages[0], 100) assert_message_count(messages[1], 100)
def test_kafka_consumer_max_bytes_one_msg(kafka_consumer_factory, send_messages): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. send_messages(range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time # But 0.11.0.0 returns 1 MessageSet at a time when the messages are # stored in the new v2 format by the broker. # # DP Note: This is a strange test. The consumer shouldn't care # how many messages are included in a FetchResponse, as long as it is # non-zero. I would not mind if we deleted this test. It caused # a minor headache when testing 0.11.0.0. group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = kafka_consumer_factory( group_id=group, auto_offset_reset='earliest', consumer_timeout_ms=5000, fetch_max_bytes=1) fetched_msgs = [next(consumer) for i in range(10)] assert_message_count(fetched_msgs, 10)
def test_kafka_consumer__blocking(kafka_consumer_factory, topic, send_messages): TIMEOUT_MS = 500 consumer = kafka_consumer_factory(auto_offset_reset='earliest', enable_auto_commit=False, consumer_timeout_ms=TIMEOUT_MS) # Manual assignment avoids overhead of consumer group mgmt consumer.unsubscribe() consumer.assign([TopicPartition(topic, 0)]) # Ask for 5 messages, nothing in queue, block 500ms with Timer() as t: with pytest.raises(StopIteration): msg = next(consumer) assert t.interval >= (TIMEOUT_MS / 1000.0) send_messages(range(0, 10)) # Ask for 5 messages, 10 in queue. Get 5 back, no blocking messages = [] with Timer() as t: for i in range(5): msg = next(consumer) messages.append(msg) assert_message_count(messages, 5) assert t.interval < (TIMEOUT_MS / 1000.0) # Ask for 10 messages, get 5 back, block 500ms messages = [] with Timer() as t: with pytest.raises(StopIteration): for i in range(10): msg = next(consumer) messages.append(msg) assert_message_count(messages, 5) assert t.interval >= (TIMEOUT_MS / 1000.0)
def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory, send_messages): GROUP_ID = random_string(10) send_messages(range(0, 100), partition=0) send_messages(range(100, 200), partition=1) # Start a consumer and grab the first 180 messages consumer1 = kafka_consumer_factory( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) output_msgs1 = [] for _ in range(180): m = next(consumer1) output_msgs1.append(m) assert_message_count(output_msgs1, 180) # Normally we let the pytest fixture `kafka_consumer_factory` handle # closing as part of its teardown. Here we manually call close() to force # auto-commit to occur before the second consumer starts. That way the # second consumer only consumes previously unconsumed messages. consumer1.close() # Start a second consumer to grab 181-200 consumer2 = kafka_consumer_factory( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) output_msgs2 = [] for _ in range(20): m = next(consumer2) output_msgs2.append(m) assert_message_count(output_msgs2, 20) # Verify the second consumer wasn't reconsuming messages that the first # consumer already saw assert_message_count(output_msgs1 + output_msgs2, 200)