class NativeVsRestProducerPerformance(RestProxyTest): def __init__(self, test_context): super(NativeVsRestProducerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one' : { 'partitions': 6, 'replication-factor': 1 }, }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 self.producer_perf = ProducerPerformanceService( test_context, 1, self.kafka, topic="test-rep-one", num_records=msgs, record_size=msg_size, throughput=-1, settings={'batch.size':batch_size, 'acks':acks} ) self.rest_producer_perf = RestProducerPerformanceService( test_context, 1, self.rest, topic="test-rep-one", num_records=msgs, record_size=msg_size, batch_size=batch_size, throughput=-1 ) def test(self): self.producer_perf.run() self.rest_producer_perf.run() self.logger.info("Producer performance: %f per sec, %f ms", self.producer_perf.results[0]['records_per_sec'], self.producer_perf.results[0]['latency_99th_ms']) self.logger.info("REST Producer performance: %f per sec, %f ms", self.rest_producer_perf.results[0]['records_per_sec'], self.rest_producer_perf.results[0]['latency_99th_ms'])
class NativeVsRestProducerPerformance(RestProxyTest): def __init__(self, test_context): super(NativeVsRestProducerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one': { 'partitions': 6, 'replication-factor': 1 }, }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 self.producer_perf = ProducerPerformanceService(test_context, 1, self.kafka, topic="test-rep-one", num_records=msgs, record_size=msg_size, throughput=-1, settings={ 'batch.size': batch_size, 'acks': acks }) self.rest_producer_perf = RestProducerPerformanceService( test_context, 1, self.rest, topic="test-rep-one", num_records=msgs, record_size=msg_size, batch_size=batch_size, throughput=-1) def test(self): self.producer_perf.run() self.rest_producer_perf.run() self.logger.info("Producer performance: %f per sec, %f ms", self.producer_perf.results[0]['records_per_sec'], self.producer_perf.results[0]['latency_99th_ms']) self.logger.info("REST Producer performance: %f per sec, %f ms", self.rest_producer_perf.results[0]['records_per_sec'], self.rest_producer_perf.results[0]['latency_99th_ms'])
def __init__(self, test_context): super(NativeVsRestConsumerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one': { 'partitions': 6, 'replication-factor': 1 } }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 # default for REST proxy, which isn't yet configurable nthreads = 1 # not configurable for REST proxy self.producer = ProducerPerformanceService(test_context, 1, self.kafka, topic="test", num_records=msgs + 1000, record_size=msg_size, throughput=-1, settings={ 'batch.size': batch_size, 'acks': acks }) self.consumer_perf = ConsumerPerformanceService(test_context, 1, self.kafka, topic="test", num_records=msgs, throughput=-1, threads=nthreads) self.rest_consumer_perf = RestConsumerPerformanceService( test_context, 1, self.rest, topic="test", num_records=msgs, throughput=-1)
def __init__(self, test_context): super(NativeVsRestConsumerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one' : { 'partitions': 6, 'replication-factor': 1 } }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 # default for REST proxy, which isn't yet configurable nthreads = 1 # not configurable for REST proxy self.producer = ProducerPerformanceService( test_context, 1, self.kafka, topic="test", num_records=msgs+1000, record_size=msg_size, throughput=-1, settings={'batch.size':batch_size, 'acks': acks} ) self.consumer_perf = ConsumerPerformanceService( test_context, 1, self.kafka, topic="test", num_records=msgs, throughput=-1, threads=nthreads ) self.rest_consumer_perf = RestConsumerPerformanceService( test_context, 1, self.rest, topic="test", num_records=msgs, throughput=-1 )
def __init__(self, test_context): super(NativeVsRestProducerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one' : { 'partitions': 6, 'replication-factor': 1 }, }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 self.producer_perf = ProducerPerformanceService( test_context, 1, self.kafka, topic="test-rep-one", num_records=msgs, record_size=msg_size, throughput=-1, settings={'batch.size':batch_size, 'acks':acks} ) self.rest_producer_perf = RestProducerPerformanceService( test_context, 1, self.rest, topic="test-rep-one", num_records=msgs, record_size=msg_size, batch_size=batch_size, throughput=-1 )
def test_producer_and_consumer(self): self.logger.info("BENCHMARK: Producer + Consumer") self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1) Service.run_parallel(self.producer, self.consumer) summary = [ "Producer + consumer:", " Producer: %s" % throughput(self.producer), " Consumer: %s" % throughput(self.consumer) ] self.logger.info("\n".join(summary))
def test_multiple_message_size(self): # TODO this would be a great place to use parametrization self.perfs = {} for msg_size in self.msg_sizes: self.logger.info( "BENCHMARK: Message size %d (%f GB total, single producer, async 3x replication)", msg_size, self.target_data_size_gb) # Always generate the same total amount of data nrecords = int(self.target_data_size / msg_size) self.perfs["perf-" + str(msg_size)] = ProducerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=nrecords, record_size=msg_size, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.msg_size_perf = {} for msg_size in self.msg_sizes: perf = self.perfs["perf-" + str(msg_size)] perf.run() self.msg_size_perf[msg_size] = perf summary = ["Message size:"] for msg_size in self.msg_sizes: summary.append( " %d: %s" % (msg_size, throughput(self.msg_size_perf[msg_size]))) self.logger.info("\n".join(summary))
def __init__(self, test_context): super(NativeVsRestProducerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one': { 'partitions': 6, 'replication-factor': 1 }, }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 self.producer_perf = ProducerPerformanceService(test_context, 1, self.kafka, topic="test-rep-one", num_records=msgs, record_size=msg_size, throughput=-1, settings={ 'batch.size': batch_size, 'acks': acks }) self.rest_producer_perf = RestProducerPerformanceService( test_context, 1, self.rest, topic="test-rep-one", num_records=msgs, record_size=msg_size, batch_size=batch_size, throughput=-1)
class NativeVsRestConsumerPerformance(RestProxyTest): def __init__(self, test_context): super(NativeVsRestConsumerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one' : { 'partitions': 6, 'replication-factor': 1 } }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 # default for REST proxy, which isn't yet configurable nthreads = 1 # not configurable for REST proxy self.producer = ProducerPerformanceService( test_context, 1, self.kafka, topic="test", num_records=msgs+1000, record_size=msg_size, throughput=-1, settings={'batch.size':batch_size, 'acks': acks} ) self.consumer_perf = ConsumerPerformanceService( test_context, 1, self.kafka, topic="test", num_records=msgs, throughput=-1, threads=nthreads ) self.rest_consumer_perf = RestConsumerPerformanceService( test_context, 1, self.rest, topic="test", num_records=msgs, throughput=-1 ) def test(self): # Seed data. FIXME currently the REST consumer isn't properly finishing # unless we have some extra messages -- the last set isn't getting # properly returned for some reason. self.producer.run() self.consumer_perf.run() self.rest_consumer_perf.run() self.logger.info("Consumer performance: %f MB/s, %f msg/sec", self.consumer_perf.results[0]['mbps'], self.consumer_perf.results[0]['records_per_sec']) self.logger.info("REST Consumer performance: %f MB/s, %f msg/sec", self.rest_consumer_perf.results[0]['mbps'], self.rest_consumer_perf.results[0]['records_per_sec'])
def test_single_producer_sync(self): self.logger.info("BENCHMARK: Single producer, sync 3x replication") self.perf = ProducerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1, settings={ 'acks': -1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.perf.run() self.logger.info("Single producer, sync 3x replication: %s" % throughput(self.perf))
def test_long_term_throughput(self): self.logger.info("BENCHMARK: Long production") self.perf = ProducerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_large, record_size=self.msg_size_default, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }, intermediate_stats=True) self.perf.run() summary = ["Throughput over long run, data > memory:"] # FIXME we should be generating a graph too # Try to break it into 5 blocks, but fall back to a smaller number if # there aren't even 5 elements block_size = max(len(self.perf.stats[0]) / 5, 1) nblocks = len(self.perf.stats[0]) / block_size for i in range(nblocks): subset = self.perf.stats[0][i * block_size:min( (i + 1) * block_size, len(self.perf.stats[0]))] if len(subset) == 0: summary.append(" Time block %d: (empty)" % i) else: summary.append( " Time block %d: %f rec/sec (%f MB/s)" % (i, sum([stat['records_per_sec'] for stat in subset]) / float(len(subset)), sum([stat['mbps'] for stat in subset]) / float(len(subset)))) self.logger.info("\n".join(summary))
class NativeVsRestConsumerPerformance(RestProxyTest): def __init__(self, test_context): super(NativeVsRestConsumerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={ 'test-rep-one': { 'partitions': 6, 'replication-factor': 1 } }) if True: # Works on both aws and local msgs = 1000000 else: # Can use locally on Vagrant VMs, but may use too much memory for aws msgs = 50000000 msg_size = 100 batch_size = 8196 acks = 1 # default for REST proxy, which isn't yet configurable nthreads = 1 # not configurable for REST proxy self.producer = ProducerPerformanceService(test_context, 1, self.kafka, topic="test", num_records=msgs + 1000, record_size=msg_size, throughput=-1, settings={ 'batch.size': batch_size, 'acks': acks }) self.consumer_perf = ConsumerPerformanceService(test_context, 1, self.kafka, topic="test", num_records=msgs, throughput=-1, threads=nthreads) self.rest_consumer_perf = RestConsumerPerformanceService( test_context, 1, self.rest, topic="test", num_records=msgs, throughput=-1) def test(self): # Seed data. FIXME currently the REST consumer isn't properly finishing # unless we have some extra messages -- the last set isn't getting # properly returned for some reason. self.producer.run() self.consumer_perf.run() self.rest_consumer_perf.run() self.logger.info("Consumer performance: %f MB/s, %f msg/sec", self.consumer_perf.results[0]['mbps'], self.consumer_perf.results[0]['records_per_sec']) self.logger.info("REST Consumer performance: %f MB/s, %f msg/sec", self.rest_consumer_perf.results[0]['mbps'], self.rest_consumer_perf.results[0]['records_per_sec'])