class StreamsSimpleBenchmarkTest(KafkaTest):
    """
    Simple benchmark of Kafka Streams.
    """
    def __init__(self, test_context):
        super(StreamsSimpleBenchmarkTest, self).__init__(test_context,
                                                         num_zk=1,
                                                         num_brokers=1)

        self.driver = StreamsSimpleBenchmarkService(test_context, self.kafka)

    def test_simple_benchmark(self):
        """
        Run simple Kafka Streams benchmark
        """

        self.driver.start()
        self.driver.wait()
        self.driver.stop()
        node = self.driver.node
        node.account.ssh("grep Performance %s" % self.driver.STDOUT_FILE,
                         allow_fail=False)

        return self.driver.collect_data(node)
class StreamsSimpleBenchmarkTest(Test):
    """
    Simple benchmark of Kafka Streams.
    """

    def __init__(self, test_context):
        super(StreamsSimpleBenchmarkTest, self).__init__(test_context)

        # these values could be updated in ad-hoc benchmarks
        self.key_skew = 0
        self.value_size = 1024
        self.num_records = 10000000L
        self.num_threads = 1

        self.replication = 1

    @cluster(num_nodes=12)
    @matrix(test=["consume", "consumeproduce", "streams-simple", "streams-count", "streams-join"], scale=[1])
    def test_simple_benchmark(self, test, scale):
        """
        Run simple Kafka Streams benchmark
        """
        self.driver = [None] * (scale + 1)

        self.final = {}

        #############
        # SETUP PHASE
        #############
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()
        self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={
            'simpleBenchmarkSourceTopic1' : { 'partitions': scale, 'replication-factor': self.replication },
            'simpleBenchmarkSourceTopic2' : { 'partitions': scale, 'replication-factor': self.replication },
            'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication },
            'yahooCampaigns' : { 'partitions': 20, 'replication-factor': self.replication },
            'yahooEvents' : { 'partitions': 20, 'replication-factor': self.replication }
        })
        self.kafka.log_level = "INFO"
        self.kafka.start()


        load_test = ""
        if test == ALL_TEST:
            load_test = "load-two"
        if test in STREAMS_JOIN_TESTS or test == STREAMS_JOIN_TEST:
            load_test = "load-two"
        if test in STREAMS_COUNT_TESTS or test == STREAMS_COUNT_TEST:
            load_test = "load-one"
        if test in STREAMS_SIMPLE_TESTS or test == STREAMS_SIMPLE_TEST:
            load_test = "load-one"
        if test in NON_STREAMS_TESTS:
            load_test = "load-one"



        ################
        # LOAD PHASE
        ################
        self.load_driver = StreamsSimpleBenchmarkService(self.test_context,
                                                         self.kafka,
                                                         load_test,
                                                         self.num_threads,
                                                         self.num_records,
                                                         self.key_skew,
                                                         self.value_size)

        self.load_driver.start()
        self.load_driver.wait(3600) # wait at most 30 minutes
        self.load_driver.stop()

        if test == ALL_TEST:
            for single_test in STREAMS_SIMPLE_TESTS + STREAMS_COUNT_TESTS + STREAMS_JOIN_TESTS:
                self.execute(single_test, scale)
        elif test == STREAMS_SIMPLE_TEST:
            for single_test in STREAMS_SIMPLE_TESTS:
                self.execute(single_test, scale)
        elif test == STREAMS_COUNT_TEST:
            for single_test in STREAMS_COUNT_TESTS:
                self.execute(single_test, scale)
        elif test == STREAMS_JOIN_TEST:
            for single_test in STREAMS_JOIN_TESTS:
                self.execute(single_test, scale)
        else:
            self.execute(test, scale)

        return self.final

    def execute(self, test, scale):

        ################
        # RUN PHASE
        ################
        for num in range(0, scale):
            self.driver[num] = StreamsSimpleBenchmarkService(self.test_context,
                                                             self.kafka,
                                                             test,
                                                             self.num_threads,
                                                             self.num_records,
                                                             self.key_skew,
                                                             self.value_size)
            self.driver[num].start()

        #######################
        # STOP + COLLECT PHASE
        #######################
        data = [None] * (scale)

        for num in range(0, scale):
            self.driver[num].wait()
            self.driver[num].stop()
            self.driver[num].node.account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False)
            data[num] = self.driver[num].collect_data(self.driver[num].node, "")
            self.driver[num].read_jmx_output_all_nodes()

        for num in range(0, scale):
            for key in data[num]:
                self.final[key + "-" + str(num)] = data[num][key]

            for key in sorted(self.driver[num].jmx_stats[0]):
                self.logger.info("%s: %s" % (key, self.driver[num].jmx_stats[0][key]))

            self.final[test + "-jmx-avg-" + str(num)] = self.driver[num].average_jmx_value
            self.final[test + "-jmx-max-" + str(num)] = self.driver[num].maximum_jmx_value
Example #3
0
class StreamsSimpleBenchmarkTest(Test):
    """
    Simple benchmark of Kafka Streams.
    """
    def __init__(self, test_context):
        super(StreamsSimpleBenchmarkTest, self).__init__(test_context)
        self.num_records = 10000000L
        self.replication = 1
        self.num_threads = 1

    @cluster(num_nodes=9)
    @matrix(test=[
        "produce", "consume", "count", "processstream",
        "processstreamwithsink", "processstreamwithstatestore",
        "processstreamwithcachedstatestore", "kstreamktablejoin",
        "kstreamkstreamjoin", "ktablektablejoin", "yahoo"
    ],
            scale=[1, 3])
    def test_simple_benchmark(self, test, scale):
        """
        Run simple Kafka Streams benchmark
        """
        self.driver = [None] * (scale + 1)
        node = [None] * (scale)
        data = [None] * (scale)

        #############
        # SETUP PHASE
        #############
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()
        self.kafka = KafkaService(
            self.test_context,
            num_nodes=scale,
            zk=self.zk,
            version=DEV_BRANCH,
            topics={
                'simpleBenchmarkSourceTopic': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'countTopic': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'simpleBenchmarkSinkTopic': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'joinSourceTopic1KStreamKStream': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'joinSourceTopic2KStreamKStream': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'joinSourceTopic1KStreamKTable': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'joinSourceTopic2KStreamKTable': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'joinSourceTopic1KTableKTable': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'joinSourceTopic2KTableKTable': {
                    'partitions': scale,
                    'replication-factor': self.replication
                },
                'yahooCampaigns': {
                    'partitions': 20,
                    'replication-factor': self.replication
                },
                'yahooEvents': {
                    'partitions': 20,
                    'replication-factor': self.replication
                }
            })
        self.kafka.log_level = "INFO"
        self.kafka.start()

        ################
        # LOAD PHASE
        ################
        self.load_driver = StreamsSimpleBenchmarkService(
            self.test_context, self.kafka, self.num_records * scale, "true",
            test, self.num_threads)
        self.load_driver.start()
        self.load_driver.wait()
        self.load_driver.stop()

        ################
        # RUN PHASE
        ################
        for num in range(0, scale):
            self.driver[num] = StreamsSimpleBenchmarkService(
                self.test_context, self.kafka, self.num_records / (scale),
                "false", test, self.num_threads)
            self.driver[num].start()

        #######################
        # STOP + COLLECT PHASE
        #######################
        for num in range(0, scale):
            self.driver[num].wait()
            self.driver[num].stop()
            node[num] = self.driver[num].node
            node[num].account.ssh("grep Performance %s" %
                                  self.driver[num].STDOUT_FILE,
                                  allow_fail=False)
            data[num] = self.driver[num].collect_data(node[num], "")

        final = {}
        for num in range(0, scale):
            for key in data[num]:
                final[key + str(num)] = data[num][key]

        return final