def test_three_consumers(self): topic = "test-rep-three" self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=topic, num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.producer.run() self.logger.info("BENCHMARK: Three consumers") self.perf = ConsumerPerformanceService(self.test_context, 3, self.kafka, topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1) self.perf.run() data = compute_throughput(self.perf) self.logger.info("Three consumers: %s", data) return data
def test_single_consumer(self): topic = "test-rep-three" self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=topic, num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.producer.run() # All consumer tests use the messages from the first benchmark, so # they'll get messages of the default message size self.logger.info("BENCHMARK: Single consumer") self.perf = ConsumerPerformanceService(self.test_context, 1, self.kafka, topic=topic, num_records=self.msgs_default, throughput=-1, threads=1) self.perf.run() data = compute_throughput(self.perf) self.logger.info("Single consumer: %s" % data) return data
def test_producer_and_consumer(self): self.logger.info("BENCHMARK: Producer + Consumer") self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_throughput(self.producer), "consumer": compute_throughput(self.consumer) } summary = ["Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def test_consumer_throughput(self, new_consumer, num_consumers): """ Consume 10e6 100-byte messages with 1 or more consumers from a topic with 6 partitions (using new consumer iff new_consumer == True), and report throughput. """ num_records = 10 * 1000 * 1000 # 10e6 # seed kafka w/messages self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.producer.run() # consume self.consumer = ConsumerPerformanceService(self.test_context, num_consumers, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) self.consumer.group = "test-consumer-group" self.consumer.run() return compute_aggregate_throughput(self.consumer)
def test_producer_and_consumer(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, new_consumer=True, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, using new consumer if new_consumer == True Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService(self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = ["Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def test_consumer_throughput(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, new_consumer=True, num_consumers=1, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Consume 10e6 100-byte messages with 1 or more consumers from a topic with 6 partitions (using new consumer iff new_consumer == True), and report throughput. """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 # seed kafka w/messages self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.producer.run() # consume self.consumer = ConsumerPerformanceService(self.test_context, num_consumers, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) self.consumer.group = "test-consumer-group" self.consumer.run() return compute_aggregate_throughput(self.consumer)
def test_version(self, version=str(LATEST_0_9), new_consumer=True, metadata_quorum=quorum.zk): """ Sanity check out producer performance service - verify that we can run the service with a small number of messages. The actual stats here are pretty meaningless since the number of messages is quite small. """ version = KafkaVersion(version) self.kafka = KafkaService( self.test_context, 1, self.zk, topics={self.topic: {'partitions': 1, 'replication-factor': 1}}, version=version) self.kafka.start() # check basic run of producer performance self.producer_perf = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=1000000000, # Set impossibly for no throttling for equivalent behavior between 0.8.X and 0.9.X version=version, settings={ 'acks': 1, 'batch.size': 8*1024, 'buffer.memory': 64*1024*1024}) self.producer_perf.run() producer_perf_data = compute_aggregate_throughput(self.producer_perf) assert producer_perf_data['records_per_sec'] > 0 # check basic run of end to end latency self.end_to_end = EndToEndLatencyService( self.test_context, 1, self.kafka, topic=self.topic, num_records=self.num_records, version=version) self.end_to_end.run() end_to_end_data = latency(self.end_to_end.results[0]['latency_50th_ms'], self.end_to_end.results[0]['latency_99th_ms'], self.end_to_end.results[0]['latency_999th_ms']) # check basic run of consumer performance service self.consumer_perf = ConsumerPerformanceService( self.test_context, 1, self.kafka, new_consumer=new_consumer, topic=self.topic, version=version, messages=self.num_records) self.consumer_perf.group = "test-consumer-group" self.consumer_perf.run() consumer_perf_data = compute_aggregate_throughput(self.consumer_perf) assert consumer_perf_data['records_per_sec'] > 0 return { "producer_performance": producer_perf_data, "end_to_end_latency": end_to_end_data, "consumer_performance": consumer_perf_data }
def test_producer_and_consumer(self, new_consumer=False): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, using new consumer if new_consumer == True Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService(self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = ["Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data