class StreamsSimpleBenchmarkTest(KafkaTest): """ Simple benchmark of Kafka Streams. """ def __init__(self, test_context): super(StreamsSimpleBenchmarkTest, self).__init__(test_context, num_zk=1, num_brokers=1) self.driver = StreamsSimpleBenchmarkService(test_context, self.kafka) def test_simple_benchmark(self): """ Run simple Kafka Streams benchmark """ self.driver.start() self.driver.wait() self.driver.stop() node = self.driver.node node.account.ssh("grep Performance %s" % self.driver.STDOUT_FILE, allow_fail=False) return self.driver.collect_data(node)
class StreamsSimpleBenchmarkTest(Test): """ Simple benchmark of Kafka Streams. """ def __init__(self, test_context): super(StreamsSimpleBenchmarkTest, self).__init__(test_context) # these values could be updated in ad-hoc benchmarks self.key_skew = 0 self.value_size = 1024 self.num_records = 10000000L self.num_threads = 1 self.replication = 1 @cluster(num_nodes=12) @matrix(test=["consume", "consumeproduce", "streams-simple", "streams-count", "streams-join"], scale=[1]) def test_simple_benchmark(self, test, scale): """ Run simple Kafka Streams benchmark """ self.driver = [None] * (scale + 1) self.final = {} ############# # SETUP PHASE ############# self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={ 'simpleBenchmarkSourceTopic1' : { 'partitions': scale, 'replication-factor': self.replication }, 'simpleBenchmarkSourceTopic2' : { 'partitions': scale, 'replication-factor': self.replication }, 'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication }, 'yahooCampaigns' : { 'partitions': 20, 'replication-factor': self.replication }, 'yahooEvents' : { 'partitions': 20, 'replication-factor': self.replication } }) self.kafka.log_level = "INFO" self.kafka.start() load_test = "" if test == ALL_TEST: load_test = "load-two" if test in STREAMS_JOIN_TESTS or test == STREAMS_JOIN_TEST: load_test = "load-two" if test in STREAMS_COUNT_TESTS or test == STREAMS_COUNT_TEST: load_test = "load-one" if test in STREAMS_SIMPLE_TESTS or test == STREAMS_SIMPLE_TEST: load_test = "load-one" if test in NON_STREAMS_TESTS: load_test = "load-one" ################ # LOAD PHASE ################ self.load_driver = StreamsSimpleBenchmarkService(self.test_context, self.kafka, load_test, self.num_threads, self.num_records, self.key_skew, self.value_size) self.load_driver.start() self.load_driver.wait(3600) # wait at most 30 minutes self.load_driver.stop() if test == ALL_TEST: for single_test in STREAMS_SIMPLE_TESTS + STREAMS_COUNT_TESTS + STREAMS_JOIN_TESTS: self.execute(single_test, scale) elif test == STREAMS_SIMPLE_TEST: for single_test in STREAMS_SIMPLE_TESTS: self.execute(single_test, scale) elif test == STREAMS_COUNT_TEST: for single_test in STREAMS_COUNT_TESTS: self.execute(single_test, scale) elif test == STREAMS_JOIN_TEST: for single_test in STREAMS_JOIN_TESTS: self.execute(single_test, scale) else: self.execute(test, scale) return self.final def execute(self, test, scale): ################ # RUN PHASE ################ for num in range(0, scale): self.driver[num] = StreamsSimpleBenchmarkService(self.test_context, self.kafka, test, self.num_threads, self.num_records, self.key_skew, self.value_size) self.driver[num].start() ####################### # STOP + COLLECT PHASE ####################### data = [None] * (scale) for num in range(0, scale): self.driver[num].wait() self.driver[num].stop() self.driver[num].node.account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False) data[num] = self.driver[num].collect_data(self.driver[num].node, "") self.driver[num].read_jmx_output_all_nodes() for num in range(0, scale): for key in data[num]: self.final[key + "-" + str(num)] = data[num][key] for key in sorted(self.driver[num].jmx_stats[0]): self.logger.info("%s: %s" % (key, self.driver[num].jmx_stats[0][key])) self.final[test + "-jmx-avg-" + str(num)] = self.driver[num].average_jmx_value self.final[test + "-jmx-max-" + str(num)] = self.driver[num].maximum_jmx_value
class StreamsSimpleBenchmarkTest(Test): """ Simple benchmark of Kafka Streams. """ def __init__(self, test_context): super(StreamsSimpleBenchmarkTest, self).__init__(test_context) self.num_records = 10000000L self.replication = 1 self.num_threads = 1 @cluster(num_nodes=9) @matrix(test=[ "produce", "consume", "count", "processstream", "processstreamwithsink", "processstreamwithstatestore", "processstreamwithcachedstatestore", "kstreamktablejoin", "kstreamkstreamjoin", "ktablektablejoin", "yahoo" ], scale=[1, 3]) def test_simple_benchmark(self, test, scale): """ Run simple Kafka Streams benchmark """ self.driver = [None] * (scale + 1) node = [None] * (scale) data = [None] * (scale) ############# # SETUP PHASE ############# self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() self.kafka = KafkaService( self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={ 'simpleBenchmarkSourceTopic': { 'partitions': scale, 'replication-factor': self.replication }, 'countTopic': { 'partitions': scale, 'replication-factor': self.replication }, 'simpleBenchmarkSinkTopic': { 'partitions': scale, 'replication-factor': self.replication }, 'joinSourceTopic1KStreamKStream': { 'partitions': scale, 'replication-factor': self.replication }, 'joinSourceTopic2KStreamKStream': { 'partitions': scale, 'replication-factor': self.replication }, 'joinSourceTopic1KStreamKTable': { 'partitions': scale, 'replication-factor': self.replication }, 'joinSourceTopic2KStreamKTable': { 'partitions': scale, 'replication-factor': self.replication }, 'joinSourceTopic1KTableKTable': { 'partitions': scale, 'replication-factor': self.replication }, 'joinSourceTopic2KTableKTable': { 'partitions': scale, 'replication-factor': self.replication }, 'yahooCampaigns': { 'partitions': 20, 'replication-factor': self.replication }, 'yahooEvents': { 'partitions': 20, 'replication-factor': self.replication } }) self.kafka.log_level = "INFO" self.kafka.start() ################ # LOAD PHASE ################ self.load_driver = StreamsSimpleBenchmarkService( self.test_context, self.kafka, self.num_records * scale, "true", test, self.num_threads) self.load_driver.start() self.load_driver.wait() self.load_driver.stop() ################ # RUN PHASE ################ for num in range(0, scale): self.driver[num] = StreamsSimpleBenchmarkService( self.test_context, self.kafka, self.num_records / (scale), "false", test, self.num_threads) self.driver[num].start() ####################### # STOP + COLLECT PHASE ####################### for num in range(0, scale): self.driver[num].wait() self.driver[num].stop() node[num] = self.driver[num].node node[num].account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False) data[num] = self.driver[num].collect_data(node[num], "") final = {} for num in range(0, scale): for key in data[num]: final[key + str(num)] = data[num][key] return final