def test_producer_and_consumer(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, new_consumer=True, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, using new consumer if new_consumer == True Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService(self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = ["Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def test_producer_throughput(self, acks, topic, num_producers=1, message_size=DEFAULT_RECORD_SIZE, compression_type="none", security_protocol='PLAINTEXT', client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Setup: 1 node zk + 3 node kafka cluster Produce ~128MB worth of messages to a topic with 6 partitions. Required acks, topic replication factor, security protocol and message size are varied depending on arguments injected into this test. Collect and return aggregate throughput statistics after all messages have been acknowledged. (This runs ProducerPerformance.java under the hood) """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) self.start_kafka(security_protocol, security_protocol, broker_version) # Always generate the same total amount of data nrecords = int(self.target_data_size / message_size) self.producer = ProducerPerformanceService( self.test_context, num_producers, self.kafka, topic=topic, num_records=nrecords, record_size=message_size, throughput=-1, version=client_version, settings={ 'acks': acks, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory}) self.producer.run() return compute_aggregate_throughput(self.producer)
def test_producer_throughput(self, acks, topic, num_producers=1, message_size=DEFAULT_RECORD_SIZE, compression_type="none", security_protocol='PLAINTEXT', tls_version=None, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Setup: 1 node zk + 3 node kafka cluster Produce ~128MB worth of messages to a topic with 6 partitions. Required acks, topic replication factor, security protocol and message size are varied depending on arguments injected into this test. Collect and return aggregate throughput statistics after all messages have been acknowledged. (This runs ProducerPerformance.java under the hood) """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) self.start_kafka(security_protocol, security_protocol, broker_version, tls_version) # Always generate the same total amount of data nrecords = int(self.target_data_size / message_size) self.producer = ProducerPerformanceService( self.test_context, num_producers, self.kafka, topic=topic, num_records=nrecords, record_size=message_size, throughput=-1, version=client_version, settings={ 'acks': acks, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory}) self.producer.run() return compute_aggregate_throughput(self.producer)
def test_version(self, version=str(LATEST_0_9), new_consumer=True, metadata_quorum=quorum.zk): """ Sanity check out producer performance service - verify that we can run the service with a small number of messages. The actual stats here are pretty meaningless since the number of messages is quite small. """ version = KafkaVersion(version) self.kafka = KafkaService( self.test_context, 1, self.zk, topics={self.topic: {'partitions': 1, 'replication-factor': 1}}, version=version) self.kafka.start() # check basic run of producer performance self.producer_perf = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=1000000000, # Set impossibly for no throttling for equivalent behavior between 0.8.X and 0.9.X version=version, settings={ 'acks': 1, 'batch.size': 8*1024, 'buffer.memory': 64*1024*1024}) self.producer_perf.run() producer_perf_data = compute_aggregate_throughput(self.producer_perf) assert producer_perf_data['records_per_sec'] > 0 # check basic run of end to end latency self.end_to_end = EndToEndLatencyService( self.test_context, 1, self.kafka, topic=self.topic, num_records=self.num_records, version=version) self.end_to_end.run() end_to_end_data = latency(self.end_to_end.results[0]['latency_50th_ms'], self.end_to_end.results[0]['latency_99th_ms'], self.end_to_end.results[0]['latency_999th_ms']) # check basic run of consumer performance service self.consumer_perf = ConsumerPerformanceService( self.test_context, 1, self.kafka, new_consumer=new_consumer, topic=self.topic, version=version, messages=self.num_records) self.consumer_perf.group = "test-consumer-group" self.consumer_perf.run() consumer_perf_data = compute_aggregate_throughput(self.consumer_perf) assert consumer_perf_data['records_per_sec'] > 0 return { "producer_performance": producer_perf_data, "end_to_end_latency": end_to_end_data, "consumer_performance": consumer_perf_data }
def test_version(self, version=str(LATEST_0_9), new_consumer=False): """ Sanity check out producer performance service - verify that we can run the service with a small number of messages. The actual stats here are pretty meaningless since the number of messages is quite small. """ version = KafkaVersion(version) self.kafka = KafkaService( self.test_context, 1, self.zk, topics={self.topic: {'partitions': 1, 'replication-factor': 1}}, version=version) self.kafka.start() # check basic run of producer performance self.producer_perf = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=1000000000, # Set impossibly for no throttling for equivalent behavior between 0.8.X and 0.9.X version=version, settings={ 'acks': 1, 'batch.size': 8*1024, 'buffer.memory': 64*1024*1024}) self.producer_perf.run() producer_perf_data = compute_aggregate_throughput(self.producer_perf) # check basic run of end to end latency self.end_to_end = EndToEndLatencyService( self.test_context, 1, self.kafka, topic=self.topic, num_records=self.num_records, version=version) self.end_to_end.run() end_to_end_data = latency(self.end_to_end.results[0]['latency_50th_ms'], self.end_to_end.results[0]['latency_99th_ms'], self.end_to_end.results[0]['latency_999th_ms']) # check basic run of consumer performance service self.consumer_perf = ConsumerPerformanceService( self.test_context, 1, self.kafka, new_consumer=new_consumer, topic=self.topic, version=version, messages=self.num_records) self.consumer_perf.group = "test-consumer-group" self.consumer_perf.run() consumer_perf_data = compute_aggregate_throughput(self.consumer_perf) return { "producer_performance": producer_perf_data, "end_to_end_latency": end_to_end_data, "consumer_performance": consumer_perf_data }
def test_producer_and_consumer(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, new_consumer=True, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, using new consumer if new_consumer == True Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory } ) self.consumer = ConsumerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = [ "Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def test_consumer_throughput(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, new_consumer=True, num_consumers=1, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Consume 10e6 100-byte messages with 1 or more consumers from a topic with 6 partitions (using new consumer iff new_consumer == True), and report throughput. """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 # seed kafka w/messages self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.producer.run() # consume self.consumer = ConsumerPerformanceService(self.test_context, num_consumers, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) self.consumer.group = "test-consumer-group" self.consumer.run() return compute_aggregate_throughput(self.consumer)
def test_consumer_throughput(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, new_consumer=True, num_consumers=1, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Consume 10e6 100-byte messages with 1 or more consumers from a topic with 6 partitions (using new consumer iff new_consumer == True), and report throughput. """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 # seed kafka w/messages self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory } ) self.producer.run() # consume self.consumer = ConsumerPerformanceService( self.test_context, num_consumers, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) self.consumer.group = "test-consumer-group" self.consumer.run() return compute_aggregate_throughput(self.consumer)