Exemple #1
0
    def test_long_term_producer_throughput(self, compression_type="none",
                                           security_protocol='PLAINTEXT', tls_version=None,
                                           interbroker_security_protocol=None, client_version=str(DEV_BRANCH),
                                           broker_version=str(DEV_BRANCH)):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce 10e6 100 byte messages to a topic with 6 partitions, replication-factor 3, and acks=1.

        Collect and return aggregate throughput statistics after all messages have been acknowledged.

        (This runs ProducerPerformance.java under the hood)
        """
        client_version = KafkaVersion(client_version)
        broker_version = KafkaVersion(broker_version)
        self.validate_versions(client_version, broker_version)
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol, broker_version, tls_version)
        self.producer = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic=TOPIC_REP_THREE, num_records=self.msgs_large, record_size=DEFAULT_RECORD_SIZE,
            throughput=-1, version=client_version, settings={
                'acks': 1,
                'compression.type': compression_type,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            },
            intermediate_stats=True
        )
        self.producer.run()

        summary = ["Throughput over long run, data > memory:"]
        data = {}
        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.producer.stats[0]) / 5, 1)
        nblocks = len(self.producer.stats[0]) / block_size

        for i in range(nblocks):
            subset = self.producer.stats[0][i*block_size:min((i+1)*block_size, len(self.producer.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
                data[i] = None
            else:
                records_per_sec = sum([stat['records_per_sec'] for stat in subset])/float(len(subset))
                mb_per_sec = sum([stat['mbps'] for stat in subset])/float(len(subset))

                summary.append(" Time block %d: %f rec/sec (%f MB/s)" % (i, records_per_sec, mb_per_sec))
                data[i] = throughput(records_per_sec, mb_per_sec)

        self.logger.info("\n".join(summary))
        return data
Exemple #2
0
    def test_long_term_producer_throughput(self, compression_type="none", security_protocol='PLAINTEXT',
                                           interbroker_security_protocol=None, client_version=str(DEV_BRANCH),
                                           broker_version=str(DEV_BRANCH)):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce 10e6 100 byte messages to a topic with 6 partitions, replication-factor 3, and acks=1.

        Collect and return aggregate throughput statistics after all messages have been acknowledged.

        (This runs ProducerPerformance.java under the hood)
        """
        client_version = KafkaVersion(client_version)
        broker_version = KafkaVersion(broker_version)
        self.validate_versions(client_version, broker_version)
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol, broker_version)
        self.producer = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic=TOPIC_REP_THREE, num_records=self.msgs_large, record_size=DEFAULT_RECORD_SIZE,
            throughput=-1, version=client_version, settings={
                'acks': 1,
                'compression.type': compression_type,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            },
            intermediate_stats=True
        )
        self.producer.run()

        summary = ["Throughput over long run, data > memory:"]
        data = {}
        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.producer.stats[0]) / 5, 1)
        nblocks = len(self.producer.stats[0]) / block_size

        for i in range(nblocks):
            subset = self.producer.stats[0][i*block_size:min((i+1)*block_size, len(self.producer.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
                data[i] = None
            else:
                records_per_sec = sum([stat['records_per_sec'] for stat in subset])/float(len(subset))
                mb_per_sec = sum([stat['mbps'] for stat in subset])/float(len(subset))

                summary.append(" Time block %d: %f rec/sec (%f MB/s)" % (i, records_per_sec, mb_per_sec))
                data[i] = throughput(records_per_sec, mb_per_sec)

        self.logger.info("\n".join(summary))
        return data