def test_kafka_producer_proper_record_metadata(kafka_broker, compression): if compression == 'zstd' and env_kafka_version() < (2, 1, 0): pytest.skip('zstd requires 2.1.0 or more') connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression) magic = producer._max_usable_produce_magic() # record headers are supported in 0.11.0 if env_kafka_version() < (0, 11, 0): headers = None else: headers = [("Header Key", b"Header Value")] topic = random_string(5) future = producer.send( topic, value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None assert record.topic == topic assert record.partition == 0 assert record.topic_partition == TopicPartition(topic, 0) assert record.offset == 0 if magic >= 1: assert record.timestamp == 9999999 else: assert record.timestamp == -1 # NO_TIMESTAMP if magic >= 2: assert record.checksum is None elif magic == 1: assert record.checksum == 1370034956 else: assert record.checksum == 3296137851 assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 if headers: assert record.serialized_header_size == 22 if magic == 0: pytest.skip('generated timestamp case is skipped for broker 0.9 and below') send_time = time.time() * 1000 future = producer.send( topic, value=b"Simple value", key=b"Simple key", timestamp_ms=None, partition=0) record = future.get(timeout=5) assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation
def _create_topic(self, topic_name, num_partitions, replication_factor, timeout_ms=10000): if num_partitions is None: num_partitions = self.partitions if replication_factor is None: replication_factor = self.replicas # Try different methods to create a topic, from the fastest to the slowest if self.auto_create_topic and \ num_partitions == self.partitions and \ replication_factor == self.replicas: self._send_request(MetadataRequest[0]([topic_name])) elif env_kafka_version() >= (0, 10, 1, 0): request = CreateTopicsRequest[0]( [(topic_name, num_partitions, replication_factor, [], [])], timeout_ms) result = self._send_request(request, timeout=timeout_ms) for topic_result in result[0].topic_error_codes: error_code = topic_result[1] if error_code != 0: raise errors.for_code(error_code) else: args = self.kafka_run_class_args('kafka.admin.TopicCommand', '--zookeeper', '%s:%s/%s' % (self.zookeeper.host, self.zookeeper.port, self.zk_chroot), '--create', '--topic', topic_name, '--partitions', self.partitions \ if num_partitions is None else num_partitions, '--replication-factor', self.replicas \ if replication_factor is None \ else replication_factor) if env_kafka_version() >= (0, 10): args.append('--if-not-exists') env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if proc.returncode != 0: if 'kafka.common.TopicExistsException' not in stdout: self.out("Failed to create topic %s" % (topic_name, )) self.out(stdout) self.out(stderr) raise RuntimeError("Failed to create topic %s" % (topic_name, ))
def _create_topic(self, topic_name, num_partitions=None, replication_factor=None, timeout_ms=10000): if num_partitions is None: num_partitions = self.partitions if replication_factor is None: replication_factor = self.replicas # Try different methods to create a topic, from the fastest to the slowest if self.auto_create_topic and num_partitions == self.partitions and replication_factor == self.replicas: self._create_topic_via_metadata(topic_name, timeout_ms) elif env_kafka_version() >= (0, 10, 1, 0): try: self._create_topic_via_admin_api(topic_name, num_partitions, replication_factor, timeout_ms) except InvalidReplicationFactorError: # wait and try again # on travis the brokers sometimes take a while to find themselves time.sleep(0.5) self._create_topic_via_admin_api(topic_name, num_partitions, replication_factor, timeout_ms) else: self._create_topic_via_cli(topic_name, num_partitions, replication_factor)
def _create_topic_via_cli(self, topic_name, num_partitions, replication_factor): args = self.kafka_run_class_args('kafka.admin.TopicCommand', '--zookeeper', '%s:%s/%s' % (self.zookeeper.host, self.zookeeper.port, self.zk_chroot), '--create', '--topic', topic_name, '--partitions', self.partitions \ if num_partitions is None else num_partitions, '--replication-factor', self.replicas \ if replication_factor is None \ else replication_factor) if env_kafka_version() >= (0, 10): args.append('--if-not-exists') env = self.kafka_run_class_env() proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if proc.returncode != 0: if 'kafka.common.TopicExistsException' not in stdout: self.out("Failed to create topic %s" % (topic_name, )) self.out(stdout) self.out(stderr) raise RuntimeError("Failed to create topic %s" % (topic_name, ))
def test_kafka_version_infer(kafka_consumer_factory): consumer = kafka_consumer_factory() actual_ver_major_minor = env_kafka_version()[:2] client = consumer._client conn = list(client._conns.values())[0] inferred_ver_major_minor = conn.check_version()[:2] assert actual_ver_major_minor == inferred_ver_major_minor, \ "Was expecting inferred broker version to be %s but was %s" % (actual_ver_major_minor, inferred_ver_major_minor)
def test_end_to_end(kafka_broker, compression): if compression == 'lz4': if env_kafka_version() < (0, 8, 2): pytest.skip('LZ4 requires 0.8.2') elif platform.python_implementation() == 'PyPy': pytest.skip('python-lz4 crashes on older versions of pypy') if compression == 'zstd' and env_kafka_version() < (2, 1, 0): pytest.skip('zstd requires kafka 2.1.0 or newer') connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression, value_serializer=str.encode) consumer = KafkaConsumer(bootstrap_servers=connect_str, group_id=None, consumer_timeout_ms=30000, auto_offset_reset='earliest', value_deserializer=bytes.decode) topic = random_string(5) messages = 100 futures = [] for i in range(messages): futures.append(producer.send(topic, 'msg %d' % i)) ret = [f.get(timeout=30) for f in futures] assert len(ret) == messages producer.close() consumer.subscribe([topic]) msgs = set() for i in range(messages): try: msgs.add(next(consumer).value) except StopIteration: break assert msgs == set(['msg %d' % (i,) for i in range(messages)]) consumer.close()
def kafka_broker_factory(zookeeper): """Return a Kafka broker fixture factory""" assert env_kafka_version( ), 'KAFKA_VERSION must be specified to run integration tests' _brokers = [] def factory(**broker_params): params = {} if broker_params is None else broker_params.copy() params.setdefault('partitions', 4) num_brokers = params.pop('num_brokers', 1) brokers = tuple( KafkaFixture.instance(x, zookeeper, **params) for x in range(num_brokers)) _brokers.extend(brokers) return brokers yield factory for broker in _brokers: broker.close()
def setUpClass(cls): # noqa if env_kafka_version() < (0, 11) or DISABLED: return cls.zk = ZookeeperFixture.instance() cls.server = KafkaFixture.instance(0, cls.zk)
class TestKafkaProducerIntegration(KafkaIntegrationTestCase): @classmethod def setUpClass(cls): # noqa if not os.environ.get('KAFKA_VERSION'): return cls.zk = ZookeeperFixture.instance() cls.server = KafkaFixture.instance(0, cls.zk) @classmethod def tearDownClass(cls): # noqa if not os.environ.get('KAFKA_VERSION'): return cls.server.close() cls.zk.close() def test_produce_10k_simple(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [ create_message(("Test message %d" % i).encode('utf-8')) for i in range(10000) ], start_offset, 10000, ) def test_produce_many_gzip(self): start_offset = self.current_offset(self.topic, 0) message1 = create_gzip_message([ (("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100) ]) message2 = create_gzip_message([ (("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100) ]) self.assert_produce_request( [message1, message2], start_offset, 200, ) def test_produce_many_snappy(self): self.skipTest("All snappy integration tests fail with nosnappyjava") start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [ create_snappy_message([("Snappy 1 %d" % i, None) for i in range(100)]), create_snappy_message([("Snappy 2 %d" % i, None) for i in range(100)]), ], start_offset, 200, ) def test_produce_mixed(self): start_offset = self.current_offset(self.topic, 0) msg_count = 1 + 100 messages = [ create_message(b"Just a plain message"), create_gzip_message([(("Gzipped %d" % i).encode('utf-8'), None) for i in range(100)]), ] # All snappy integration tests fail with nosnappyjava if False and has_snappy(): msg_count += 100 messages.append( create_snappy_message([("Snappy %d" % i, None) for i in range(100)])) self.assert_produce_request(messages, start_offset, msg_count) def test_produce_100k_gzipped(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [ create_gzip_message([ (("Gzipped batch 1, message %d" % i).encode('utf-8'), None) for i in range(50000) ]) ], start_offset, 50000, ) self.assert_produce_request( [ create_gzip_message([ (("Gzipped batch 1, message %d" % i).encode('utf-8'), None) for i in range(50000) ]) ], start_offset + 50000, 50000, ) ############################ # SimpleProducer Tests # ############################ def test_simple_producer_new_topic(self): producer = SimpleProducer(self.client) resp = producer.send_messages('new_topic', self.msg('foobar')) self.assert_produce_response(resp, 0) producer.stop() def test_simple_producer(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = SimpleProducer(self.client, random_start=False) # Goes to first partition, randomly. resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) self.assert_produce_response(resp, start_offsets[0]) # Goes to the next partition, randomly. resp = producer.send_messages(self.topic, self.msg("three")) self.assert_produce_response(resp, start_offsets[1]) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("two")]) self.assert_fetch_offset(partitions[1], start_offsets[1], [self.msg("three")]) # Goes back to the first partition because there's only two partitions resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assert_produce_response(resp, start_offsets[0] + 2) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ]) producer.stop() def test_producer_random_order(self): producer = SimpleProducer(self.client, random_start=True) resp1 = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) resp2 = producer.send_messages(self.topic, self.msg("three")) resp3 = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assertEqual(resp1[0].partition, resp3[0].partition) self.assertNotEqual(resp1[0].partition, resp2[0].partition) def test_producer_ordered_start(self): producer = SimpleProducer(self.client, random_start=False) resp1 = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) resp2 = producer.send_messages(self.topic, self.msg("three")) resp3 = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assertEqual(resp1[0].partition, 0) self.assertEqual(resp2[0].partition, 1) self.assertEqual(resp3[0].partition, 0) def test_async_simple_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = SimpleProducer(self.client, async_send=True, random_start=False) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) # flush messages producer.stop() self.assert_fetch_offset(partition, start_offset, [self.msg("one")]) def test_batched_simple_producer__triggers_by_message(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] # Configure batch producer batch_messages = 5 batch_interval = 5 producer = SimpleProducer(self.client, async_send=True, batch_send_every_n=batch_messages, batch_send_every_t=batch_interval, random_start=False) # Send 4 messages -- should not trigger a batch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) # send 3 more messages -- should trigger batch on first 5 resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait until producer has pulled all messages from internal queue # this should signal that the first batch was sent, and the producer # is now waiting for enough messages to batch again (or a timeout) timeout = 5 start = time.time() while not producer.queue.empty(): if time.time() - start > timeout: self.fail('timeout waiting for producer queue to empty') time.sleep(0.1) # send messages groups all *msgs in a single call to the same partition # so we should see all messages from the first call in one partition self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) # Because we are batching every 5 messages, we should only see one self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), ]) producer.stop() def test_batched_simple_producer__triggers_by_time(self): self.skipTest("Flakey test -- should be refactored or removed") partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] batch_interval = 5 producer = SimpleProducer(self.client, async_send=True, batch_send_every_n=100, batch_send_every_t=batch_interval, random_start=False) # Send 5 messages and do a fetch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait the timeout out time.sleep(batch_interval) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), self.msg("six"), self.msg("seven"), ]) producer.stop() ############################ # KeyedProducer Tests # ############################ @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) key = "test" resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assert_produce_response(resp, start_offsets[0]) resp = producer.send_messages(self.topic, self.key("key2"), None) self.assert_produce_response(resp, start_offsets[1]) resp = producer.send_messages(self.topic, self.key("key3"), None) self.assert_produce_response(resp, start_offsets[0] + 1) resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp, start_offsets[1] + 1) self.assert_fetch_offset(partitions[0], start_offsets[0], [self.msg("one"), None]) self.assert_fetch_offset(partitions[1], start_offsets[1], [None, self.msg("four")]) producer.stop() def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0] + 0) self.assert_produce_response(resp2, start_offsets[1] + 0) self.assert_produce_response(resp3, start_offsets[0] + 1) self.assert_produce_response(resp4, start_offsets[1] + 1) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("three")]) self.assert_fetch_offset( partitions[1], start_offsets[1], [self.msg("two"), self.msg("four")]) producer.stop() def test_hashed_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five")) offsets = { partitions[0]: start_offsets[0], partitions[1]: start_offsets[1] } messages = {partitions[0]: [], partitions[1]: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 partition = partitions[k] offset = offsets[partition] self.assert_produce_response(resp, offset) offsets[partition] += 1 messages[partition].append(msg) self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]]) self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]]) producer.stop() def test_async_keyed_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner, async_send=True, batch_send_every_t=1) resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [self.msg("one")]) producer.stop() ############################ # Producer ACK Tests # ############################ def test_acks_none(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = Producer( self.client, req_acks=Producer.ACK_NOT_REQUIRED, ) resp = producer.send_messages(self.topic, partition, self.msg("one")) # No response from produce request with no acks required self.assertEqual(len(resp), 0) # But the message should still have been delivered self.assert_fetch_offset(partition, start_offset, [self.msg("one")]) producer.stop() def test_acks_local_write(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = Producer( self.client, req_acks=Producer.ACK_AFTER_LOCAL_WRITE, ) resp = producer.send_messages(self.topic, partition, self.msg("one")) self.assert_produce_response(resp, start_offset) self.assert_fetch_offset(partition, start_offset, [self.msg("one")]) producer.stop() def test_acks_cluster_commit(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = Producer( self.client, req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT, ) resp = producer.send_messages(self.topic, partition, self.msg("one")) self.assert_produce_response(resp, start_offset) self.assert_fetch_offset(partition, start_offset, [self.msg("one")]) producer.stop() def assert_produce_request(self, messages, initial_offset, message_ct, partition=0): produce = ProduceRequestPayload(self.topic, partition, messages=messages) # There should only be one response message from the server. # This will throw an exception if there's more than one. resp = self.client.send_produce_request([produce]) self.assert_produce_response(resp, initial_offset) self.assertEqual(self.current_offset(self.topic, partition), initial_offset + message_ct) def assert_produce_response(self, resp, initial_offset): self.assertEqual(len(resp), 1) self.assertEqual(resp[0].error, 0) self.assertEqual(resp[0].offset, initial_offset) def assert_fetch_offset(self, partition, start_offset, expected_messages): # There should only be one response message from the server. # This will throw an exception if there's more than one. resp, = self.client.send_fetch_request( [FetchRequestPayload(self.topic, partition, start_offset, 1024)]) self.assertEqual(resp.error, 0) self.assertEqual(resp.partition, partition) messages = [x.message.value for x in resp.messages] self.assertEqual(messages, expected_messages) self.assertEqual(resp.highwaterMark, start_offset + len(expected_messages))
def tearDownClass(cls): # noqa if env_kafka_version() < (0, 11) or DISABLED: return cls.server.close() cls.zk.close()
class TestKafkaClientIntegration(KafkaIntegrationTestCase): @classmethod def setUpClass(cls): # noqa if not os.environ.get('KAFKA_VERSION'): return cls.zk = ZookeeperFixture.instance() cls.server = KafkaFixture.instance(0, cls.zk) @classmethod def tearDownClass(cls): # noqa if not os.environ.get('KAFKA_VERSION'): return cls.server.close() cls.zk.close() def test_consume_none(self): fetch = FetchRequestPayload(self.topic, 0, 0, 1024) fetch_resp, = self.client.send_fetch_request([fetch]) self.assertEqual(fetch_resp.error, 0) self.assertEqual(fetch_resp.topic, self.topic) self.assertEqual(fetch_resp.partition, 0) messages = list(fetch_resp.messages) self.assertEqual(len(messages), 0) def test_ensure_topic_exists(self): # assume that self.topic was created by setUp # if so, this should succeed self.client.ensure_topic_exists(self.topic, timeout=1) # ensure_topic_exists should fail with KafkaTimeoutError with self.assertRaises(KafkaTimeoutError): self.client.ensure_topic_exists('this_topic_doesnt_exist', timeout=0) def test_send_produce_request_maintains_request_response_order(self): self.client.ensure_topic_exists('foo') self.client.ensure_topic_exists('bar') requests = [ ProduceRequestPayload('foo', 0, [create_message(b'a'), create_message(b'b')]), ProduceRequestPayload('bar', 1, [create_message(b'a'), create_message(b'b')]), ProduceRequestPayload('foo', 1, [create_message(b'a'), create_message(b'b')]), ProduceRequestPayload('bar', 0, [create_message(b'a'), create_message(b'b')]), ] responses = self.client.send_produce_request(requests) while len(responses): request = requests.pop() response = responses.pop() self.assertEqual(request.topic, response.topic) self.assertEqual(request.partition, response.partition) #################### # Offset Tests # #################### @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_commit_fetch_offsets(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp, ) = self.client.send_offset_commit_request('group', [req]) self.assertEqual(resp.error, 0) req = OffsetFetchRequestPayload(self.topic, 0) (resp, ) = self.client.send_offset_fetch_request('group', [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, '') # Metadata isn't stored for now @pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') def test_commit_fetch_offsets_dual(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp, ) = self.client.send_offset_commit_request_kafka('group', [req]) self.assertEqual(resp.error, 0) (resp, ) = self.client.send_offset_fetch_request_kafka('group', [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) # Metadata is stored in kafka self.assertEqual(resp.metadata, 'metadata')
resp = client.send_produce_request([produce]) assert_produce_response(resp, initial_offset) assert current_offset(client, topic, partition) == initial_offset + message_ct def assert_produce_response(resp, initial_offset): """Verify that a produce response is well-formed """ assert len(resp) == 1 assert resp[0].error == 0 assert resp[0].offset == initial_offset @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_produce_many_simple(simple_client, topic): """Test multiple produces using the SimpleClient """ start_offset = current_offset(simple_client, topic, 0) assert_produce_request( simple_client, topic, [ create_message(("Test message %d" % i).encode('utf-8')) for i in range(100) ], start_offset, 100, )
def setUp(self): if env_kafka_version() < (0, 11) or DISABLED: self.skipTest( 'Admin ACL Integration test requires KAFKA_VERSION >= 0.11') super(TestAdminClientIntegration, self).setUp()
from . import unittest from kafka import (KafkaConsumer, MultiProcessConsumer, SimpleConsumer, create_message, create_gzip_message, KafkaProducer) import kafka.codec from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES from kafka.errors import (ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, UnsupportedVersionError, KafkaTimeoutError, UnsupportedCodecError) from kafka.structs import (ProduceRequestPayload, TopicPartition, OffsetAndTimestamp) from test.fixtures import ZookeeperFixture, KafkaFixture from test.testutil import KafkaIntegrationTestCase, Timer, assert_message_count, env_kafka_version, random_string @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_kafka_consumer(kafka_consumer_factory, send_messages): """Test KafkaConsumer""" consumer = kafka_consumer_factory(auto_offset_reset='earliest') send_messages(range(0, 100), partition=0) send_messages(range(0, 100), partition=1) cnt = 0 messages = {0: [], 1: []} for message in consumer: logging.debug("Consumed message %s", repr(message)) cnt += 1 messages[message.partition].append(message) if cnt >= 200: break assert_message_count(messages[0], 100)
class TestKafkaAdminClientIntegration(KafkaIntegrationTestCase): @classmethod def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): return cls.zk = ZookeeperFixture.instance() cls.server = KafkaFixture.instance(0, cls.zk) @classmethod def tearDownClass(cls): if not os.environ.get('KAFKA_VERSION'): return cls.server.close() cls.zk.close() @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason='Unsupported Kafka Version') def test_create_delete_topics(self): admin = AdminClient(self.client_async) topic = NewTopic( name='topic', num_partitions=1, replication_factor=1, ) metadata_request = MetadataRequest[1]() response = admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS) # Error code 7 means that RequestTimedOut but we can safely assume # that topic is created or will be created eventually. # see this https://cwiki.apache.org/confluence/display/KAFKA/ # KIP-4+-+Command+line+and+centralized+administrative+operations self.assertTrue( response[0].topic_errors[0][1] == 0 or response[0].topic_errors[0][1] == 7 ) time.sleep(1) # allows the topic to be created delete_response = admin.delete_topics(['topic'], timeout=1) self.assertTrue( response[0].topic_errors[0][1] == 0 or response[0].topic_errors[0][1] == 7 ) @pytest.mark.skipif(env_kafka_version() < (1, 0, 0), reason='Unsupported Kafka Version') def test_create_partitions(self): admin = AdminClient(self.client_async) topic = NewTopic( name='topic', num_partitions=1, replication_factor=1, ) metadata_request = MetadataRequest[1]() admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS) time.sleep(1) # allows the topic to be created new_partitions_info = NewPartitionsInfo('topic', 2, [[0]]) response = admin.create_partitions([new_partitions_info], timeout=1, validate_only=False) self.assertTrue( response[0].topic_errors[0][1] == 0 or response[0].topic_errors[0][1] == 7 )
import kafka.codec from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES from kafka.errors import ( ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, UnsupportedVersionError, KafkaTimeoutError, UnsupportedCodecError, ConsumerTimeout ) from kafka.protocol.message import PartialMessage from kafka.structs import ( ProduceRequestPayload, TopicPartition, OffsetAndTimestamp ) from test.fixtures import ZookeeperFixture, KafkaFixture from test.testutil import KafkaIntegrationTestCase, Timer, assert_message_count, env_kafka_version, random_string @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_kafka_consumer(kafka_consumer_factory, send_messages): """Test KafkaConsumer""" consumer = kafka_consumer_factory(auto_offset_reset='earliest') send_messages(range(0, 100), partition=0) send_messages(range(0, 100), partition=1) cnt = 0 messages = {0: [], 1: []} for message in consumer: logging.debug("Consumed message %s", repr(message)) cnt += 1 messages[message.partition].append(message) if cnt >= 200: break assert_message_count(messages[0], 100)
import pytest from logging import info from test.testutil import env_kafka_version, random_string from threading import Event, Thread from time import time, sleep from kafka.admin import ( ACLFilter, ACLOperation, ACLPermissionType, ResourcePattern, ResourceType, ACL, ConfigResource, ConfigResourceType) from kafka.errors import (NoError, GroupCoordinatorNotAvailableError) @pytest.mark.skipif(env_kafka_version() < (0, 11), reason="ACL features require broker >=0.11") def test_create_describe_delete_acls(kafka_admin_client): """Tests that we can add, list and remove ACLs """ # Check that we don't have any ACLs in the cluster acls, error = kafka_admin_client.describe_acls( ACLFilter( principal=None, host="*", operation=ACLOperation.ANY, permission_type=ACLPermissionType.ANY, resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic") ) ) assert error is NoError assert len(acls) == 0
import pytest from logging import info from test.testutil import env_kafka_version, random_string from threading import Event, Thread from time import time, sleep from kafka.admin import (ACLFilter, ACLOperation, ACLPermissionType, ResourcePattern, ResourceType, ACL, ConfigResource, ConfigResourceType) from kafka.errors import (NoError, GroupCoordinatorNotAvailableError, NonEmptyGroupError, GroupIdNotFoundError) @pytest.mark.skipif(env_kafka_version() < (0, 11), reason="ACL features require broker >=0.11") def test_create_describe_delete_acls(kafka_admin_client): """Tests that we can add, list and remove ACLs """ # Check that we don't have any ACLs in the cluster acls, error = kafka_admin_client.describe_acls( ACLFilter(principal=None, host="*", operation=ACLOperation.ANY, permission_type=ACLPermissionType.ANY, resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic"))) assert error is NoError assert len(acls) == 0
def tearDown(self): if env_kafka_version() < (0, 11) or DISABLED: return super(TestAdminClientIntegration, self).tearDown()
class TestConsumerIntegration(KafkaIntegrationTestCase): maxDiff = None @classmethod def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): return cls.zk = ZookeeperFixture.instance() chroot = random_string(10) cls.server1 = KafkaFixture.instance(0, cls.zk, zk_chroot=chroot) cls.server2 = KafkaFixture.instance(1, cls.zk, zk_chroot=chroot) cls.server = cls.server1 # Bootstrapping server @classmethod def tearDownClass(cls): if not os.environ.get('KAFKA_VERSION'): return cls.server1.close() cls.server2.close() cls.zk.close() def send_messages(self, partition, messages): messages = [ create_message(self.msg(str(msg))) for msg in messages ] produce = ProduceRequestPayload(self.topic, partition, messages = messages) resp, = self.client.send_produce_request([produce]) self.assertEqual(resp.error, 0) return [ x.value for x in messages ] def send_gzip_message(self, partition, messages): message = create_gzip_message([(self.msg(str(msg)), None) for msg in messages]) produce = ProduceRequestPayload(self.topic, partition, messages = [message]) resp, = self.client.send_produce_request([produce]) self.assertEqual(resp.error, 0) def assert_message_count(self, messages, num_messages): # Make sure we got them all self.assertEqual(len(messages), num_messages) # Make sure there are no duplicates self.assertEqual(len(set(messages)), num_messages) def consumer(self, **kwargs): if os.environ['KAFKA_VERSION'] == "0.8.0": # Kafka 0.8.0 simply doesn't support offset requests, so hard code it being off kwargs['group'] = None kwargs['auto_commit'] = False else: kwargs.setdefault('group', None) kwargs.setdefault('auto_commit', False) consumer_class = kwargs.pop('consumer', SimpleConsumer) group = kwargs.pop('group', None) topic = kwargs.pop('topic', self.topic) if consumer_class in [SimpleConsumer, MultiProcessConsumer]: kwargs.setdefault('iter_timeout', 0) return consumer_class(self.client, group, topic, **kwargs) def kafka_consumer(self, **configs): brokers = '%s:%d' % (self.server.host, self.server.port) consumer = KafkaConsumer(self.topic, bootstrap_servers=brokers, **configs) return consumer def kafka_producer(self, **configs): brokers = '%s:%d' % (self.server.host, self.server.port) producer = KafkaProducer( bootstrap_servers=brokers, **configs) return producer def test_simple_consumer(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer = self.consumer() self.assert_message_count([ message for message in consumer ], 200) consumer.stop() def test_simple_consumer_gzip(self): self.send_gzip_message(0, range(0, 100)) self.send_gzip_message(1, range(100, 200)) # Start a consumer consumer = self.consumer() self.assert_message_count([ message for message in consumer ], 200) consumer.stop() def test_simple_consumer_smallest_offset_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer(auto_offset_reset='smallest') # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) # Since auto_offset_reset is set to smallest we should read all 200 # messages from beginning. self.assert_message_count([message for message in consumer], 200) def test_simple_consumer_largest_offset_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Default largest consumer = self.consumer() # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) # Since auto_offset_reset is set to largest we should not read any # messages. self.assert_message_count([message for message in consumer], 0) # Send 200 new messages to the queue self.send_messages(0, range(200, 300)) self.send_messages(1, range(300, 400)) # Since the offset is set to largest we should read all the new messages. self.assert_message_count([message for message in consumer], 200) def test_simple_consumer_no_reset(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Default largest consumer = self.consumer(auto_offset_reset=None) # Move fetch offset ahead of 300 message (out of range) consumer.seek(300, 2) with self.assertRaises(OffsetOutOfRangeError): consumer.get_message() @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_simple_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Create 1st consumer and change offsets consumer = self.consumer(group='test_simple_consumer_load_initial_offsets') self.assertEqual(consumer.offsets, {0: 0, 1: 0}) consumer.offsets.update({0:51, 1:101}) # Update counter after manual offsets update consumer.count_since_commit += 1 consumer.commit() # Create 2nd consumer and check initial offsets consumer = self.consumer(group='test_simple_consumer_load_initial_offsets', auto_commit=False) self.assertEqual(consumer.offsets, {0: 51, 1: 101}) def test_simple_consumer__seek(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer() # Rewind 10 messages from the end consumer.seek(-10, 2) self.assert_message_count([ message for message in consumer ], 10) # Rewind 13 messages from the end consumer.seek(-13, 2) self.assert_message_count([ message for message in consumer ], 13) # Set absolute offset consumer.seek(100) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(100, partition=0) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(101, partition=1) self.assert_message_count([ message for message in consumer ], 0) consumer.seek(90, partition=0) self.assert_message_count([ message for message in consumer ], 10) consumer.seek(20, partition=1) self.assert_message_count([ message for message in consumer ], 80) consumer.seek(0, partition=1) self.assert_message_count([ message for message in consumer ], 100) consumer.stop() @pytest.mark.skipif(env_kafka_version() >= (2, 0), reason="SimpleConsumer blocking does not handle PartialMessage change in kafka 2.0+") def test_simple_consumer_blocking(self): consumer = self.consumer() # Ask for 5 messages, nothing in queue, block 1 second with Timer() as t: messages = consumer.get_messages(block=True, timeout=1) self.assert_message_count(messages, 0) self.assertGreaterEqual(t.interval, 1) self.send_messages(0, range(0, 5)) self.send_messages(1, range(5, 10)) # Ask for 5 messages, 10 in queue. Get 5 back, no blocking with Timer() as t: messages = consumer.get_messages(count=5, block=True, timeout=3) self.assert_message_count(messages, 5) self.assertLess(t.interval, 3) # Ask for 10 messages, get 5 back, block 1 second with Timer() as t: messages = consumer.get_messages(count=10, block=True, timeout=1) self.assert_message_count(messages, 5) self.assertGreaterEqual(t.interval, 1) # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1 # second, get 5 back, no blocking self.send_messages(0, range(0, 3)) self.send_messages(1, range(3, 5)) with Timer() as t: messages = consumer.get_messages(count=10, block=1, timeout=1) self.assert_message_count(messages, 5) self.assertLessEqual(t.interval, 1) consumer.stop() def test_simple_consumer_pending(self): # make sure that we start with no pending messages consumer = self.consumer() self.assertEquals(consumer.pending(), 0) self.assertEquals(consumer.pending(partitions=[0]), 0) self.assertEquals(consumer.pending(partitions=[1]), 0) # Produce 10 messages to partitions 0 and 1 self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) consumer = self.consumer() self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) # move to last message, so one partition should have 1 pending # message and other 0 consumer.seek(-1, 2) self.assertEqual(consumer.pending(), 1) pending_part1 = consumer.pending(partitions=[0]) pending_part2 = consumer.pending(partitions=[1]) self.assertEquals(set([0, 1]), set([pending_part1, pending_part2])) consumer.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') def test_multi_process_consumer(self): # Produce 100 messages to partitions 0 and 1 self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) consumer = self.consumer(consumer = MultiProcessConsumer) self.assert_message_count([ message for message in consumer ], 200) consumer.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') def test_multi_process_consumer_blocking(self): consumer = self.consumer(consumer = MultiProcessConsumer) # Ask for 5 messages, No messages in queue, block 1 second with Timer() as t: messages = consumer.get_messages(block=True, timeout=1) self.assert_message_count(messages, 0) self.assertGreaterEqual(t.interval, 1) # Send 10 messages self.send_messages(0, range(0, 10)) # Ask for 5 messages, 10 messages in queue, block 0 seconds with Timer() as t: messages = consumer.get_messages(count=5, block=True, timeout=5) self.assert_message_count(messages, 5) self.assertLessEqual(t.interval, 1) # Ask for 10 messages, 5 in queue, block 1 second with Timer() as t: messages = consumer.get_messages(count=10, block=True, timeout=1) self.assert_message_count(messages, 5) self.assertGreaterEqual(t.interval, 1) # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1 # second, get at least one back, no blocking self.send_messages(0, range(0, 5)) with Timer() as t: messages = consumer.get_messages(count=10, block=1, timeout=1) received_message_count = len(messages) self.assertGreaterEqual(received_message_count, 1) self.assert_message_count(messages, received_message_count) self.assertLessEqual(t.interval, 1) consumer.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') def test_multi_proc_pending(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) # set group to None and auto_commit to False to avoid interactions w/ # offset commit/fetch apis consumer = MultiProcessConsumer(self.client, None, self.topic, auto_commit=False, iter_timeout=0) self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) consumer.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_multi_process_consumer_load_initial_offsets(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) # Create 1st consumer and change offsets consumer = self.consumer(group='test_multi_process_consumer_load_initial_offsets') self.assertEqual(consumer.offsets, {0: 0, 1: 0}) consumer.offsets.update({0:5, 1:15}) # Update counter after manual offsets update consumer.count_since_commit += 1 consumer.commit() # Create 2nd consumer and check initial offsets consumer = self.consumer(consumer = MultiProcessConsumer, group='test_multi_process_consumer_load_initial_offsets', auto_commit=False) self.assertEqual(consumer.offsets, {0: 5, 1: 15}) def test_large_messages(self): # Produce 10 "normal" size messages small_messages = self.send_messages(0, [ str(x) for x in range(10) ]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ]) # Brokers prior to 0.11 will return the next message # if it is smaller than max_bytes (called buffer_size in SimpleConsumer) # Brokers 0.11 and later that store messages in v2 format # internally will return the next message only if the # full MessageSet is smaller than max_bytes. # For that reason, we set the max buffer size to a little more # than the size of all large messages combined consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) actual_messages = set([x.message.value for x in consumer if not isinstance(x.message, PartialMessage)]) self.assertEqual(expected_messages, actual_messages) consumer.stop() def test_huge_messages(self): huge_message, = self.send_messages(0, [ create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)), ]) # Create a consumer with the default buffer size consumer = self.consumer() # This consumer fails to get the message with self.assertRaises(ConsumerFetchSizeTooSmall): consumer.get_message(False, 0.1) consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer( max_buffer_size = None, partitions = [0], ) # Seek to the last message big_consumer.seek(-1, 2) # Consume giant message successfully message = big_consumer.get_message(block=False, timeout=10) self.assertIsNotNone(message) self.assertEqual(message.message.value, huge_message) big_consumer.stop() @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.consumer( group='test_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # Grab the first 195 messages output_msgs1 = [ consumer1.get_message().message.value for _ in range(195) ] self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 consumer2 = self.consumer( group='test_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # 181-200 self.assert_message_count([ message for message in consumer2 ], 20) consumer1.stop() consumer2.stop() @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky') @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") def test_multi_process_offset_behavior__resuming_behavior(self): self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.consumer( consumer=MultiProcessConsumer, group='test_multi_process_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # Grab the first 195 messages output_msgs1 = [] idx = 0 for message in consumer1: output_msgs1.append(message.message.value) idx += 1 if idx >= 195: break self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 consumer2 = self.consumer( consumer=MultiProcessConsumer, group='test_multi_process_offset_behavior__resuming_behavior', auto_commit=True, auto_commit_every_t = None, auto_commit_every_n = 20, ) # 181-200 self.assert_message_count([ message for message in consumer2 ], 20) consumer1.stop() consumer2.stop() # TODO: Make this a unit test -- should not require integration def test_fetch_buffer_size(self): # Test parameters (see issue 135 / PR 136) TEST_MESSAGE_SIZE=1048 INIT_BUFFER_SIZE=1024 MAX_BUFFER_SIZE=2048 assert TEST_MESSAGE_SIZE > INIT_BUFFER_SIZE assert TEST_MESSAGE_SIZE < MAX_BUFFER_SIZE assert MAX_BUFFER_SIZE == 2 * INIT_BUFFER_SIZE self.send_messages(0, [ "x" * 1048 ]) self.send_messages(1, [ "x" * 1048 ]) consumer = self.consumer(buffer_size=1024, max_buffer_size=2048) messages = [ message for message in consumer ] self.assertEqual(len(messages), 2)
import logging import uuid import pytest from kafka.admin import NewTopic from kafka.protocol.metadata import MetadataRequest_v1 from test.testutil import assert_message_count, env_kafka_version, random_string, special_to_underscore @pytest.fixture( params=[ pytest.param( "PLAIN", marks=pytest.mark.skipif(env_kafka_version() < (0, 10), reason="Requires KAFKA_VERSION >= 0.10") ), pytest.param( "SCRAM-SHA-256", marks=pytest.mark.skipif(env_kafka_version() < (0, 10, 2), reason="Requires KAFKA_VERSION >= 0.10.2"), ), pytest.param( "SCRAM-SHA-512", marks=pytest.mark.skipif(env_kafka_version() < (0, 10, 2), reason="Requires KAFKA_VERSION >= 0.10.2"), ), ] ) def sasl_kafka(request, kafka_broker_factory): sasl_kafka = kafka_broker_factory(transport="SASL_PLAINTEXT", sasl_mechanism=request.param)[0] yield sasl_kafka sasl_kafka.child.dump_logs()