Exemple #1
0
 def move_start_offset(self):
     """We move the start offset of the topic by writing really old messages
     and waiting for them to be cleaned up.
     """
     producer = VerifiableProducer(self.test_context,
                                   1,
                                   self.kafka,
                                   self.topic,
                                   throughput=-1,
                                   enable_idempotence=True,
                                   create_time=1000)
     producer.start()
     wait_until(lambda: producer.num_acked > 0,
                timeout_sec=30,
                err_msg="Failed to get an acknowledgement for %ds" % 30)
     # Wait 8 seconds to let the topic be seeded with messages that will
     # be deleted. The 8 seconds is important, since we should get 2 deleted
     # segments in this period based on the configured log roll time and the
     # retention check interval.
     time.sleep(8)
     producer.stop()
     self.logger.info("Seeded topic with %d messages which will be deleted" %\
                      producer.num_acked)
     # Since the configured check interval is 5 seconds, we wait another
     # 6 seconds to ensure that at least one more cleaning so that the last
     # segment is deleted. An altenate to using timeouts is to poll each
     # partition until the log start offset matches the end offset. The
     # latter is more robust.
     time.sleep(6)
class GetOffsetShellTest(Test):
    """
    Tests GetOffsetShell tool
    """
    def __init__(self, test_context):
        super(GetOffsetShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {
            TOPIC: {'partitions': NUM_PARTITIONS, 'replication-factor': REPLICATION_FACTOR}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)


    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_producer(self):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=TOPIC, throughput=1000, max_messages=MAX_MESSAGES)
        self.producer.start()
        current_acked = self.producer.num_acked
        wait_until(lambda: self.producer.num_acked >= current_acked + MAX_MESSAGES, timeout_sec=10,
                   err_msg="Timeout awaiting messages to be produced and acked")

    def start_consumer(self):
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                        consumer_timeout_ms=1000)
        self.consumer.start()

    @cluster(num_nodes=4)
    def test_get_offset_shell(self, security_protocol='PLAINTEXT'):
        """
        Tests if GetOffsetShell is getting offsets correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer()

        # Assert that offset fetched without any consumers consuming is 0
        assert self.kafka.get_offset_shell(TOPIC, None, 1000, 1, -1), "%s:%s:%s" % (TOPIC, NUM_PARTITIONS - 1, 0)

        self.start_consumer()

        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node), timeout_sec=20, backoff_sec=.2, err_msg="Consumer was too slow to start")

        # Assert that offset is correctly indicated by GetOffsetShell tool
        wait_until(lambda: "%s:%s:%s" % (TOPIC, NUM_PARTITIONS - 1, MAX_MESSAGES) in self.kafka.get_offset_shell(TOPIC, None, 1000, 1, -1), timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")
 def move_start_offset(self):
     """We move the start offset of the topic by writing really old messages
     and waiting for them to be cleaned up.
     """
     producer = VerifiableProducer(self.test_context, 1, self.kafka, self.topic,
                                   throughput=-1, enable_idempotence=True,
                                   create_time=1000)
     producer.start()
     wait_until(lambda: producer.num_acked > 0,
                timeout_sec=30,
                err_msg="Failed to get an acknowledgement for %ds" % 30)
     # Wait 8 seconds to let the topic be seeded with messages that will
     # be deleted. The 8 seconds is important, since we should get 2 deleted
     # segments in this period based on the configured log roll time and the
     # retention check interval.
     time.sleep(8)
     producer.stop()
     self.logger.info("Seeded topic with %d messages which will be deleted" %\
                      producer.num_acked)
     # Since the configured check interval is 5 seconds, we wait another
     # 6 seconds to ensure that at least one more cleaning so that the last
     # segment is deleted. An altenate to using timeouts is to poll each
     # partition until the log start offset matches the end offset. The
     # latter is more robust.
     time.sleep(6)
class TestVerifiableProducer(Test):
    """Sanity checks on verifiable producer service class."""
    def __init__(self, test_context):
        super(TestVerifiableProducer, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           max_messages=self.num_messages,
                                           throughput=1000)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    @cluster(num_nodes=3)
    @parametrize(producer_version=str(LATEST_0_8_2))
    @parametrize(producer_version=str(LATEST_0_9))
    @parametrize(producer_version=str(DEV_BRANCH))
    def test_simple_run(self, producer_version=DEV_BRANCH):
        """
        Test that we can start VerifiableProducer on the current branch snapshot version or against the 0.8.2 jar, and
        verify that we can produce a small number of messages.
        """
        node = self.producer.nodes[0]
        node.version = KafkaVersion(producer_version)
        self.producer.start()
        wait_until(
            lambda: self.producer.num_acked > 5,
            timeout_sec=5,
            err_msg="Producer failed to start in a reasonable amount of time.")

        # using version.vstring (distutils.version.LooseVersion) is a tricky way of ensuring
        # that this check works with DEV_BRANCH
        # When running VerifiableProducer 0.8.X, both the current branch version and 0.8.X should show up because of the
        # way verifiable producer pulls in some development directories into its classpath
        if node.version <= LATEST_0_8_2:
            assert is_version(node, [node.version.vstring, DEV_BRANCH.vstring])
        else:
            assert is_version(node, [node.version.vstring])

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (
            num_produced, self.num_messages)
class TestVerifiableProducer(Test):
    """Sanity checks on verifiable producer service class."""
    def __init__(self, test_context):
        super(TestVerifiableProducer, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=self.num_messages, throughput=self.num_messages/5)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    @cluster(num_nodes=3)
    @parametrize(producer_version=str(LATEST_0_8_2))
    @parametrize(producer_version=str(LATEST_0_9))
    @parametrize(producer_version=str(LATEST_0_10_0))
    @parametrize(producer_version=str(LATEST_0_10_1))
    @parametrize(producer_version=str(DEV_BRANCH))
    def test_simple_run(self, producer_version=DEV_BRANCH):
        """
        Test that we can start VerifiableProducer on the current branch snapshot version or against the 0.8.2 jar, and
        verify that we can produce a small number of messages.
        """
        node = self.producer.nodes[0]
        node.version = KafkaVersion(producer_version)
        self.producer.start()
        wait_until(lambda: self.producer.num_acked > 5, timeout_sec=5,
             err_msg="Producer failed to start in a reasonable amount of time.")

        # using version.vstring (distutils.version.LooseVersion) is a tricky way of ensuring
        # that this check works with DEV_BRANCH
        # When running VerifiableProducer 0.8.X, both the current branch version and 0.8.X should show up because of the
        # way verifiable producer pulls in some development directories into its classpath
        #
        # If the test fails here because 'ps .. | grep' couldn't find the process it means
        # the login and grep that is_version() performs is slower than
        # the time it takes the producer to produce its messages.
        # Easy fix is to decrease throughput= above, the good fix is to make the producer
        # not terminate until explicitly killed in this case.
        if node.version <= LATEST_0_8_2:
            assert is_version(node, [node.version.vstring, DEV_BRANCH.vstring], logger=self.logger)
        else:
            assert is_version(node, [node.version.vstring], logger=self.logger)

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (num_produced, self.num_messages)
class SimpleConsumerShellTest(Test):
    """
    Tests SimpleConsumerShell tool
    """

    def __init__(self, test_context):
        super(SimpleConsumerShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {TOPIC: {"partitions": NUM_PARTITIONS, "replication-factor": REPLICATION_FACTOR}}

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self):
        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, topics=self.topics)
        self.kafka.start()

    def run_producer(self):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(
            self.test_context, num_nodes=1, kafka=self.kafka, topic=TOPIC, throughput=1000, max_messages=MAX_MESSAGES
        )
        self.producer.start()
        wait_until(
            lambda: self.producer.num_acked == MAX_MESSAGES,
            timeout_sec=10,
            err_msg="Timeout awaiting messages to be produced and acked",
        )

    def start_simple_consumer_shell(self):
        self.simple_consumer_shell = SimpleConsumerShell(self.test_context, 1, self.kafka, TOPIC)
        self.simple_consumer_shell.start()

    def test_simple_consumer_shell(self):
        """
        Tests if SimpleConsumerShell is fetching expected records
        :return: None
        """
        self.start_kafka()
        self.run_producer()
        self.start_simple_consumer_shell()

        # Assert that SimpleConsumerShell is fetching expected number of messages
        wait_until(
            lambda: self.simple_consumer_shell.get_output().count("\n") == (MAX_MESSAGES + 1),
            timeout_sec=10,
            err_msg="Timed out waiting to receive expected number of messages.",
        )
Exemple #7
0
 def seed_messages(self, topic, num_seed_messages):
     seed_timeout_sec = 10000
     seed_producer = VerifiableProducer(context=self.test_context,
                                        num_nodes=1,
                                        kafka=self.kafka,
                                        topic=topic,
                                        message_validator=is_int,
                                        max_messages=num_seed_messages,
                                        enable_idempotence=True)
     seed_producer.start()
     wait_until(lambda: seed_producer.num_acked >= num_seed_messages,
                timeout_sec=seed_timeout_sec,
                err_msg="Producer failed to produce messages %d in  %ds." %\
                (self.num_seed_messages, seed_timeout_sec))
     return seed_producer.acked
Exemple #8
0
 def seed_messages(self, topic, num_seed_messages):
     seed_timeout_sec = 10000
     seed_producer = VerifiableProducer(context=self.test_context,
                                        num_nodes=1,
                                        kafka=self.kafka,
                                        topic=topic,
                                        message_validator=is_int,
                                        max_messages=num_seed_messages,
                                        enable_idempotence=True)
     seed_producer.start()
     wait_until(lambda: seed_producer.num_acked >= num_seed_messages,
                timeout_sec=seed_timeout_sec,
                err_msg="Producer failed to produce messages %d in %ds." %\
                (self.num_seed_messages, seed_timeout_sec))
     return seed_producer.acked
class TestVerifiableProducer(Test):
    """Sanity checks on verifiable producer service class."""
    def __init__(self, test_context):
        super(TestVerifiableProducer, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=self.num_messages, throughput=1000)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    @parametrize(producer_version=str(LATEST_0_8_2))
    @parametrize(producer_version=str(LATEST_0_9))
    @parametrize(producer_version=str(TRUNK))
    def test_simple_run(self, producer_version=TRUNK):
        """
        Test that we can start VerifiableProducer on trunk or against the 0.8.2 jar, and
        verify that we can produce a small number of messages.
        """
        node = self.producer.nodes[0]
        node.version = KafkaVersion(producer_version)
        self.producer.start()
        wait_until(lambda: self.producer.num_acked > 5, timeout_sec=5,
             err_msg="Producer failed to start in a reasonable amount of time.")

        # using version.vstring (distutils.version.LooseVersion) is a tricky way of ensuring
        # that this check works with TRUNK
        # When running VerifiableProducer 0.8.X, both trunk version and 0.8.X should show up because of the way
        # verifiable producer pulls in some trunk directories into its classpath
        if node.version <= LATEST_0_8_2:
            assert is_version(node, [node.version.vstring, TRUNK.vstring])
        else:
            assert is_version(node, [node.version.vstring])

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (num_produced, self.num_messages)
class TestVerifiableProducer(Test):
    """Sanity checks on verifiable producer service class."""
    def __init__(self, test_context):
        super(TestVerifiableProducer, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1) if quorum.for_test(test_context) == quorum.zk else None
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=self.num_messages, throughput=self.num_messages // 10)
    def setUp(self):
        if self.zk:
            self.zk.start()

    @cluster(num_nodes=3)
    @parametrize(producer_version=str(LATEST_0_8_2))
    @parametrize(producer_version=str(LATEST_0_9))
    @parametrize(producer_version=str(LATEST_0_10_0))
    @parametrize(producer_version=str(LATEST_0_10_1))
    @matrix(producer_version=[str(DEV_BRANCH)], security_protocol=['PLAINTEXT', 'SSL'], metadata_quorum=quorum.all)
    @cluster(num_nodes=4)
    @matrix(producer_version=[str(DEV_BRANCH)], security_protocol=['SASL_SSL'], sasl_mechanism=['PLAIN', 'GSSAPI'],
            metadata_quorum=quorum.all)
    def test_simple_run(self, producer_version, security_protocol = 'PLAINTEXT', sasl_mechanism='PLAIN',
                        metadata_quorum=quorum.zk):
        """
        Test that we can start VerifiableProducer on the current branch snapshot version or against the 0.8.2 jar, and
        verify that we can produce a small number of messages.
        """
        self.kafka.security_protocol = security_protocol
        self.kafka.client_sasl_mechanism = sasl_mechanism
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.interbroker_sasl_mechanism = sasl_mechanism
        if self.kafka.quorum_info.using_kraft:
            controller_quorum = self.kafka.controller_quorum
            controller_quorum.controller_security_protocol = security_protocol
            controller_quorum.controller_sasl_mechanism = sasl_mechanism
            controller_quorum.intercontroller_security_protocol = security_protocol
            controller_quorum.intercontroller_sasl_mechanism = sasl_mechanism
        self.kafka.start()

        node = self.producer.nodes[0]
        node.version = KafkaVersion(producer_version)
        self.producer.start()
        wait_until(lambda: self.producer.num_acked > 5, timeout_sec=15,
             err_msg="Producer failed to start in a reasonable amount of time.")

        # using version.vstring (distutils.version.LooseVersion) is a tricky way of ensuring
        # that this check works with DEV_BRANCH
        # When running VerifiableProducer 0.8.X, both the current branch version and 0.8.X should show up because of the
        # way verifiable producer pulls in some development directories into its classpath
        #
        # If the test fails here because 'ps .. | grep' couldn't find the process it means
        # the login and grep that is_version() performs is slower than
        # the time it takes the producer to produce its messages.
        # Easy fix is to decrease throughput= above, the good fix is to make the producer
        # not terminate until explicitly killed in this case.
        if node.version <= LATEST_0_8_2:
            assert is_version(node, [node.version.vstring, DEV_BRANCH.vstring], logger=self.logger)
        else:
            assert is_version(node, [node.version.vstring], logger=self.logger)

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (num_produced, self.num_messages)

    @cluster(num_nodes=4)
    @matrix(inter_broker_security_protocol=['PLAINTEXT', 'SSL'], metadata_quorum=[quorum.remote_kraft])
    @matrix(inter_broker_security_protocol=['SASL_SSL'], inter_broker_sasl_mechanism=['PLAIN', 'GSSAPI'],
            metadata_quorum=[quorum.remote_kraft])
    def test_multiple_kraft_security_protocols(
            self, inter_broker_security_protocol, inter_broker_sasl_mechanism='GSSAPI', metadata_quorum=quorum.remote_kraft):
        """
        Test for remote KRaft cases that we can start VerifiableProducer on the current branch snapshot version, and
        verify that we can produce a small number of messages.  The inter-controller and broker-to-controller
        security protocols are defined to be different (which differs from the above test, where they were the same).
        """
        self.kafka.security_protocol = self.kafka.interbroker_security_protocol = inter_broker_security_protocol
        self.kafka.client_sasl_mechanism = self.kafka.interbroker_sasl_mechanism = inter_broker_sasl_mechanism
        controller_quorum = self.kafka.controller_quorum
        sasl_mechanism = 'PLAIN' if inter_broker_sasl_mechanism == 'GSSAPI' else 'GSSAPI'
        if inter_broker_security_protocol == 'PLAINTEXT':
            controller_security_protocol = 'SSL'
            intercontroller_security_protocol = 'SASL_SSL'
        elif inter_broker_security_protocol == 'SSL':
            controller_security_protocol = 'SASL_SSL'
            intercontroller_security_protocol = 'PLAINTEXT'
        else: # inter_broker_security_protocol == 'SASL_SSL'
            controller_security_protocol = 'PLAINTEXT'
            intercontroller_security_protocol = 'SSL'
        controller_quorum.controller_security_protocol = controller_security_protocol
        controller_quorum.controller_sasl_mechanism = sasl_mechanism
        controller_quorum.intercontroller_security_protocol = intercontroller_security_protocol
        controller_quorum.intercontroller_sasl_mechanism = sasl_mechanism
        self.kafka.start()

        node = self.producer.nodes[0]
        node.version = KafkaVersion(str(DEV_BRANCH))
        self.producer.start()
        wait_until(lambda: self.producer.num_acked > 5, timeout_sec=15,
             err_msg="Producer failed to start in a reasonable amount of time.")

        # See above comment above regarding use of version.vstring (distutils.version.LooseVersion)
        assert is_version(node, [node.version.vstring], logger=self.logger)

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (num_produced, self.num_messages)

    @cluster(num_nodes=4)
    @parametrize(metadata_quorum=quorum.remote_kraft)
    def test_multiple_kraft_sasl_mechanisms(self, metadata_quorum):
        """
        Test for remote KRaft cases that we can start VerifiableProducer on the current branch snapshot version, and
        verify that we can produce a small number of messages.  The inter-controller and broker-to-controller
        security protocols are both SASL_PLAINTEXT but the SASL mechanisms are different (we set
        GSSAPI for the inter-controller mechanism and PLAIN for the broker-to-controller mechanism).
        This test differs from the above tests -- he ones above used the same SASL mechanism for both paths.
        """
        self.kafka.security_protocol = self.kafka.interbroker_security_protocol = 'PLAINTEXT'
        controller_quorum = self.kafka.controller_quorum
        controller_quorum.controller_security_protocol = 'SASL_PLAINTEXT'
        controller_quorum.controller_sasl_mechanism = 'PLAIN'
        controller_quorum.intercontroller_security_protocol = 'SASL_PLAINTEXT'
        controller_quorum.intercontroller_sasl_mechanism = 'GSSAPI'
        self.kafka.start()

        node = self.producer.nodes[0]
        node.version = KafkaVersion(str(DEV_BRANCH))
        self.producer.start()
        wait_until(lambda: self.producer.num_acked > 5, timeout_sec=15,
             err_msg="Producer failed to start in a reasonable amount of time.")

        # See above comment above regarding use of version.vstring (distutils.version.LooseVersion)
        assert is_version(node, [node.version.vstring], logger=self.logger)

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (num_produced, self.num_messages)
Exemple #11
0
class TestMirrorMakerService(Test):
    """Sanity checks on mirror maker service class."""
    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = "topic"
        self.source_zk = ZookeeperService(test_context, num_nodes=1)
        self.target_zk = ZookeeperService(test_context, num_nodes=1)

        self.source_kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.source_zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})
        self.target_kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.target_zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context,
                                           num_nodes=1,
                                           kafka=self.source_kafka,
                                           topic=self.topic,
                                           max_messages=self.num_messages,
                                           throughput=1000)

        # Use a regex whitelist to check that the start command is well-formed in this case
        self.mirror_maker = MirrorMaker(test_context,
                                        num_nodes=1,
                                        source=self.source_kafka,
                                        target=self.target_kafka,
                                        whitelist=".*",
                                        consumer_timeout_ms=2000)

        # This will consume from target kafka cluster
        self.consumer = ConsoleConsumer(test_context,
                                        num_nodes=1,
                                        kafka=self.target_kafka,
                                        topic=self.topic,
                                        consumer_timeout_ms=1000)

    def setUp(self):
        # Source cluster
        self.source_zk.start()
        self.source_kafka.start()

        # Target cluster
        self.target_zk.start()
        self.target_kafka.start()

    def test_end_to_end(self):
        """
        Test end-to-end behavior under non-failure conditions.

        Setup: two single node Kafka clusters, each connected to its own single node zookeeper cluster.
        One is source, and the other is target. Single-node mirror maker mirrors from source to target.

        - Start mirror maker.
        - Produce a small number of messages to the source cluster.
        - Consume messages from target.
        - Verify that number of consumed messages matches the number produced.
        """
        self.mirror_maker.start()
        # Check that consumer_timeout_ms setting made it to config file
        self.mirror_maker.nodes[0].account.ssh(
            "grep \"consumer\.timeout\.ms\" %s" % MirrorMaker.CONSUMER_CONFIG,
            allow_fail=False)

        self.producer.start()
        self.producer.wait(10)
        self.consumer.start()
        self.consumer.wait(10)

        num_consumed = len(self.consumer.messages_consumed[1])
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (
            num_produced, self.num_messages)
        assert num_produced == num_consumed, "num_produced: %d, num_consumed: %d" % (
            num_produced, num_consumed)

        self.mirror_maker.stop()
class GetOffsetShellTest(Test):
    """
    Tests GetOffsetShell tool
    """
    def __init__(self, test_context):
        super(GetOffsetShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {
            TOPIC_TEST_NAME: {
                'partitions': NUM_PARTITIONS,
                'replication-factor': REPLICATION_FACTOR
            },
            TOPIC_TEST_PATTERN1: {
                'partitions': 1,
                'replication-factor': REPLICATION_FACTOR
            },
            TOPIC_TEST_PATTERN2: {
                'partitions': 1,
                'replication-factor': REPLICATION_FACTOR
            },
            TOPIC_TEST_PARTITIONS: {
                'partitions': 2,
                'replication-factor': REPLICATION_FACTOR
            },
            TOPIC_TEST_INTERNAL_FILTER: {
                'partitions': 1,
                'replication-factor': REPLICATION_FACTOR
            },
            TOPIC_TEST_TOPIC_PARTITIONS1: {
                'partitions': 2,
                'replication-factor': REPLICATION_FACTOR
            },
            TOPIC_TEST_TOPIC_PARTITIONS2: {
                'partitions': 2,
                'replication-factor': REPLICATION_FACTOR
            }
        }

        self.zk = ZookeeperService(test_context,
                                   self.num_zk) if quorum.for_test(
                                       test_context) == quorum.zk else None

    def setUp(self):
        if self.zk:
            self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_producer(self, topic):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=topic,
                                           throughput=1000,
                                           max_messages=MAX_MESSAGES,
                                           repeating_keys=MAX_MESSAGES)
        self.producer.start()
        current_acked = self.producer.num_acked
        wait_until(
            lambda: self.producer.num_acked >= current_acked + MAX_MESSAGES,
            timeout_sec=10,
            err_msg="Timeout awaiting messages to be produced and acked")

    def start_consumer(self, topic):
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=self.num_brokers,
                                        kafka=self.kafka,
                                        topic=topic,
                                        consumer_timeout_ms=1000)
        self.consumer.start()

    def check_message_count_sum_equals(self, message_count, **kwargs):
        sum = self.extract_message_count_sum(**kwargs)
        return sum == message_count

    def extract_message_count_sum(self, **kwargs):
        offsets = self.kafka.get_offset_shell(**kwargs).split("\n")
        sum = 0
        for offset in offsets:
            if len(offset) == 0:
                continue
            sum += int(offset.split(":")[-1])
        return sum

    @cluster(num_nodes=3)
    @matrix(metadata_quorum=quorum.all_non_upgrade)
    def test_get_offset_shell_topic_name(self,
                                         security_protocol='PLAINTEXT',
                                         metadata_quorum=quorum.zk):
        """
        Tests if GetOffsetShell handles --topic argument with a simple name correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer(TOPIC_TEST_NAME)

        # Assert that offset is correctly indicated by GetOffsetShell tool
        wait_until(lambda: self.check_message_count_sum_equals(
            MAX_MESSAGES, topic=TOPIC_TEST_NAME),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

    @cluster(num_nodes=4)
    @matrix(metadata_quorum=quorum.all_non_upgrade)
    def test_get_offset_shell_topic_pattern(self,
                                            security_protocol='PLAINTEXT',
                                            metadata_quorum=quorum.zk):
        """
        Tests if GetOffsetShell handles --topic argument with a pattern correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer(TOPIC_TEST_PATTERN1)
        self.start_producer(TOPIC_TEST_PATTERN2)

        # Assert that offset is correctly indicated by GetOffsetShell tool
        wait_until(lambda: self.check_message_count_sum_equals(
            2 * MAX_MESSAGES, topic=TOPIC_TEST_PATTERN_PATTERN),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

    @cluster(num_nodes=3)
    @matrix(metadata_quorum=quorum.all_non_upgrade)
    def test_get_offset_shell_partitions(self,
                                         security_protocol='PLAINTEXT',
                                         metadata_quorum=quorum.zk):
        """
        Tests if GetOffsetShell handles --partitions argument correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer(TOPIC_TEST_PARTITIONS)

        def fetch_and_sum_partitions_separately():
            partition_count0 = self.extract_message_count_sum(
                topic=TOPIC_TEST_PARTITIONS, partitions="0")
            partition_count1 = self.extract_message_count_sum(
                topic=TOPIC_TEST_PARTITIONS, partitions="1")
            return partition_count0 + partition_count1 == MAX_MESSAGES

        # Assert that offset is correctly indicated when fetching partitions one by one
        wait_until(fetch_and_sum_partitions_separately,
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

        # Assert that offset is correctly indicated when fetching partitions together
        wait_until(lambda: self.check_message_count_sum_equals(
            MAX_MESSAGES, topic=TOPIC_TEST_PARTITIONS),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

    @cluster(num_nodes=4)
    @matrix(metadata_quorum=quorum.all_non_upgrade)
    def test_get_offset_shell_topic_partitions(self,
                                               security_protocol='PLAINTEXT',
                                               metadata_quorum=quorum.zk):
        """
        Tests if GetOffsetShell handles --topic-partitions argument correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer(TOPIC_TEST_TOPIC_PARTITIONS1)
        self.start_producer(TOPIC_TEST_TOPIC_PARTITIONS2)

        # Assert that a single topic pattern matches all 4 partitions
        wait_until(lambda: self.check_message_count_sum_equals(
            2 * MAX_MESSAGES,
            topic_partitions=TOPIC_TEST_TOPIC_PARTITIONS_PATTERN),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

        # Assert that a topic pattern with partition range matches all 4 partitions
        wait_until(lambda: self.check_message_count_sum_equals(
            2 * MAX_MESSAGES,
            topic_partitions=TOPIC_TEST_TOPIC_PARTITIONS_PATTERN + ":0-2"),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

        # Assert that 2 separate topic patterns match all 4 partitions
        wait_until(lambda: self.check_message_count_sum_equals(
            2 * MAX_MESSAGES,
            topic_partitions=TOPIC_TEST_TOPIC_PARTITIONS1 + "," +
            TOPIC_TEST_TOPIC_PARTITIONS2),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

        # Assert that 4 separate topic-partition patterns match all 4 partitions
        wait_until(lambda: self.check_message_count_sum_equals(
            2 * MAX_MESSAGES,
            topic_partitions=TOPIC_TEST_TOPIC_PARTITIONS1 + ":0," +
            TOPIC_TEST_TOPIC_PARTITIONS1 + ":1," + TOPIC_TEST_TOPIC_PARTITIONS2
            + ":0," + TOPIC_TEST_TOPIC_PARTITIONS2 + ":1"),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

        # Assert that only partitions #0 are matched with topic pattern and fix partition number
        filtered_partitions = self.kafka.get_offset_shell(
            topic_partitions=TOPIC_TEST_TOPIC_PARTITIONS_PATTERN + ":0")
        assert 1 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS1, 0))
        assert 0 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS1, 1))
        assert 1 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS2, 0))
        assert 0 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS2, 1))

        # Assert that only partitions #1 are matched with topic pattern and partition lower bound
        filtered_partitions = self.kafka.get_offset_shell(
            topic_partitions=TOPIC_TEST_TOPIC_PARTITIONS_PATTERN + ":1-")
        assert 1 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS1, 1))
        assert 0 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS1, 0))
        assert 1 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS2, 1))
        assert 0 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS2, 0))

        # Assert that only partitions #0 are matched with topic pattern and partition upper bound
        filtered_partitions = self.kafka.get_offset_shell(
            topic_partitions=TOPIC_TEST_TOPIC_PARTITIONS_PATTERN + ":-1")
        assert 1 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS1, 0))
        assert 0 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS1, 1))
        assert 1 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS2, 0))
        assert 0 == filtered_partitions.count(
            "%s:%s" % (TOPIC_TEST_TOPIC_PARTITIONS2, 1))

    @cluster(num_nodes=4)
    @matrix(metadata_quorum=quorum.all_non_upgrade)
    def test_get_offset_shell_internal_filter(self,
                                              security_protocol='PLAINTEXT',
                                              metadata_quorum=quorum.zk):
        """
        Tests if GetOffsetShell handles --exclude-internal-topics flag correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer(TOPIC_TEST_INTERNAL_FILTER)

        # Create consumer and poll messages to create consumer offset record
        self.start_consumer(TOPIC_TEST_INTERNAL_FILTER)
        node = self.consumer.nodes[0]
        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=20,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")

        # Assert that a single topic pattern matches all 4 partitions
        wait_until(lambda: self.check_message_count_sum_equals(
            MAX_MESSAGES, topic_partitions=TOPIC_TEST_INTERNAL_FILTER),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

        # No filters
        # Assert that without exclusion, we can find both the test topic and the __consumer_offsets internal topic
        offset_output = self.kafka.get_offset_shell()
        assert "__consumer_offsets" in offset_output
        assert TOPIC_TEST_INTERNAL_FILTER in offset_output

        # Assert that with exclusion, we can find the test topic but not the __consumer_offsets internal topic
        offset_output = self.kafka.get_offset_shell(
            exclude_internal_topics=True)
        assert "__consumer_offsets" not in offset_output
        assert TOPIC_TEST_INTERNAL_FILTER in offset_output

        # Topic filter
        # Assert that without exclusion, we can find both the test topic and the __consumer_offsets internal topic
        offset_output = self.kafka.get_offset_shell(topic=".*consumer_offsets")
        assert "__consumer_offsets" in offset_output
        assert TOPIC_TEST_INTERNAL_FILTER in offset_output

        # Assert that with exclusion, we can find the test topic but not the __consumer_offsets internal topic
        offset_output = self.kafka.get_offset_shell(
            topic=".*consumer_offsets", exclude_internal_topics=True)
        assert "__consumer_offsets" not in offset_output
        assert TOPIC_TEST_INTERNAL_FILTER in offset_output

        # Topic-partition filter
        # Assert that without exclusion, we can find both the test topic and the __consumer_offsets internal topic
        offset_output = self.kafka.get_offset_shell(
            topic_partitions=".*consumer_offsets:0")
        assert "__consumer_offsets" in offset_output
        assert TOPIC_TEST_INTERNAL_FILTER in offset_output

        # Assert that with exclusion, we can find the test topic but not the __consumer_offsets internal topic
        offset_output = self.kafka.get_offset_shell(
            topic_partitions=".*consumer_offsets:0",
            exclude_internal_topics=True)
        assert "__consumer_offsets" not in offset_output
        assert TOPIC_TEST_INTERNAL_FILTER in offset_output
Exemple #13
0
class DelegationTokenTest(Test):
    def __init__(self, test_context):
        super(DelegationTokenTest, self).__init__(test_context)

        self.test_context = test_context
        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            self.test_context,
            num_nodes=1,
            zk=self.zk,
            zk_chroot="/kafka",
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }},
            server_prop_overides=[[
                config_property.DELEGATION_TOKEN_MAX_LIFETIME_MS, "604800000"
            ], [config_property.DELEGATION_TOKEN_EXPIRY_TIME_MS, "86400000"
                ], [config_property.DELEGATION_TOKEN_SECRET_KEY, "test12345"],
                                  [
                                      config_property.SASL_ENABLED_MECHANISMS,
                                      "GSSAPI,SCRAM-SHA-256"
                                  ]])
        self.jaas_deleg_conf_path = "/tmp/jaas_deleg.conf"
        self.jaas_deleg_conf = ""
        self.client_properties_content = """
security.protocol=SASL_PLAINTEXT
sasl.mechanism=SCRAM-SHA-256
sasl.kerberos.service.name=kafka
client.id=console-consumer
"""
        self.client_kafka_opts = ' -Djava.security.auth.login.config=' + self.jaas_deleg_conf_path

        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=1,
            kafka=self.kafka,
            topic=self.topic,
            max_messages=1,
            throughput=1,
            kafka_opts_override=self.client_kafka_opts,
            client_prop_file_override=self.client_properties_content)

        self.consumer = ConsoleConsumer(
            self.test_context,
            num_nodes=1,
            kafka=self.kafka,
            topic=self.topic,
            kafka_opts_override=self.client_kafka_opts,
            client_prop_file_override=self.client_properties_content)

        self.kafka.security_protocol = 'SASL_PLAINTEXT'
        self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256'
        self.kafka.interbroker_sasl_mechanism = 'GSSAPI'

    def setUp(self):
        self.zk.start()

    def tearDown(self):
        self.producer.nodes[0].account.remove(self.jaas_deleg_conf_path)
        self.consumer.nodes[0].account.remove(self.jaas_deleg_conf_path)

    def generate_delegation_token(self):
        self.logger.debug("Request delegation token")
        self.delegation_tokens.generate_delegation_token()
        self.jaas_deleg_conf = self.delegation_tokens.create_jaas_conf_with_delegation_token(
        )

    def expire_delegation_token(self):
        self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256'
        token_hmac = self.delegation_tokens.token_hmac()
        self.delegation_tokens.expire_delegation_token(token_hmac)

    def produce_with_delegation_token(self):
        self.producer.acked_values = []
        self.producer.nodes[0].account.create_file(self.jaas_deleg_conf_path,
                                                   self.jaas_deleg_conf)
        self.logger.debug(self.jaas_deleg_conf)
        self.producer.start()

    def consume_with_delegation_token(self):
        self.logger.debug("Consume messages with delegation token")

        self.consumer.nodes[0].account.create_file(self.jaas_deleg_conf_path,
                                                   self.jaas_deleg_conf)
        self.logger.debug(self.jaas_deleg_conf)
        self.consumer.consumer_timeout_ms = 5000

        self.consumer.start()
        self.consumer.wait()

    def get_datetime_ms(self, input_date):
        return int(
            time.mktime(
                datetime.strptime(input_date, "%Y-%m-%dT%H:%M").timetuple()) *
            1000)

    def renew_delegation_token(self):
        dt = self.delegation_tokens.parse_delegation_token_out()
        orig_expiry_date_ms = self.get_datetime_ms(dt["expirydate"])
        new_expirydate_ms = orig_expiry_date_ms + 1000

        self.delegation_tokens.renew_delegation_token(dt["hmac"],
                                                      new_expirydate_ms)

    @cluster(num_nodes=5)
    def test_delegation_token_lifecycle(self):
        self.kafka.start()
        self.delegation_tokens = DelegationTokens(self.kafka,
                                                  self.test_context)

        self.generate_delegation_token()
        self.renew_delegation_token()
        self.produce_with_delegation_token()
        wait_until(lambda: self.producer.num_acked > 0,
                   timeout_sec=30,
                   err_msg="Expected producer to still be producing.")
        assert 1 == self.producer.num_acked, "number of acked messages: %d" % self.producer.num_acked

        self.consume_with_delegation_token()
        num_consumed = len(self.consumer.messages_consumed[1])
        assert 1 == num_consumed, "number of consumed messages: %d" % num_consumed

        self.expire_delegation_token()

        self.produce_with_delegation_token()
        assert 0 == self.producer.num_acked, "number of acked messages: %d" % self.producer.num_acked
class StreamsOptimizedTest(Test):
    """
    Test doing upgrades of a Kafka Streams application
    that is un-optimized initially then optimized
    """

    input_topic = 'inputTopic'
    aggregation_topic = 'aggregationTopic'
    reduce_topic = 'reduceTopic'
    join_topic = 'joinTopic'
    operation_pattern = 'AGGREGATED\|REDUCED\|JOINED'
    stopped_message = 'OPTIMIZE_TEST Streams Stopped'

    def __init__(self, test_context):
        super(StreamsOptimizedTest, self).__init__(test_context)
        self.topics = {
            self.input_topic: {
                'partitions': 6
            },
            self.aggregation_topic: {
                'partitions': 6
            },
            self.reduce_topic: {
                'partitions': 6
            },
            self.join_topic: {
                'partitions': 6
            }
        }

        self.zookeeper = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zookeeper,
                                  topics=self.topics)

        self.producer = VerifiableProducer(self.test_context,
                                           1,
                                           self.kafka,
                                           self.input_topic,
                                           throughput=1000,
                                           acks=1)

    def test_upgrade_optimized_topology(self):
        self.zookeeper.start()
        self.kafka.start()

        processor1 = StreamsOptimizedUpgradeTestService(
            self.test_context, self.kafka)
        processor2 = StreamsOptimizedUpgradeTestService(
            self.test_context, self.kafka)
        processor3 = StreamsOptimizedUpgradeTestService(
            self.test_context, self.kafka)

        processors = [processor1, processor2, processor3]

        # produce records continually during the test
        self.producer.start()

        # start all processors unoptimized
        for processor in processors:
            self.set_topics(processor)
            processor.CLEAN_NODE_ENABLED = False
            self.verify_running_repartition_topic_count(processor, 4)

        self.verify_processing(processors, verify_individual_operations=False)

        stop_processors(processors, self.stopped_message)

        # start again with topology optimized
        for processor in processors:
            processor.OPTIMIZED_CONFIG = 'all'
            self.verify_running_repartition_topic_count(processor, 1)

        self.verify_processing(processors, verify_individual_operations=True)

        stop_processors(processors, self.stopped_message)

        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()

    @staticmethod
    def verify_running_repartition_topic_count(processor,
                                               repartition_topic_count):
        node = processor.node
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            processor.start()
            monitor.wait_until(
                'REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%s' %
                repartition_topic_count,
                timeout_sec=120,
                err_msg=
                "Never saw 'REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%s' message "
                % repartition_topic_count + str(processor.node.account))

    def verify_processing(self, processors, verify_individual_operations):
        for processor in processors:
            if not self.all_source_subtopology_tasks(processor):
                if verify_individual_operations:
                    for operation in self.operation_pattern.split('\|'):
                        self.do_verify(processor, operation)
                else:
                    self.do_verify(processor, self.operation_pattern)
            else:
                self.logger.info(
                    "Skipping processor %s with all source tasks" %
                    processor.node.account)

    def do_verify(self, processor, pattern):
        self.logger.info("Verifying %s processing pattern in STDOUT_FILE" %
                         pattern)
        with processor.node.account.monitor_log(
                processor.STDOUT_FILE) as monitor:
            monitor.wait_until(
                pattern,
                timeout_sec=60,
                err_msg="Never saw processing of %s " % pattern +
                str(processor.node.account))

    def all_source_subtopology_tasks(self, processor):
        retries = 0
        while retries < 5:
            found = list(
                processor.node.account.ssh_capture(
                    "sed -n 's/.*current active tasks: \[\(\(0_[0-9], \)\{3\}0_[0-9]\)\].*/\1/p' %s"
                    % processor.LOG_FILE,
                    allow_fail=True))
            self.logger.info("Returned %s from assigned task check" % found)
            if len(found) > 0:
                return True
            retries += 1
            time.sleep(1)

        return False

    def set_topics(self, processor):
        processor.INPUT_TOPIC = self.input_topic
        processor.AGGREGATION_TOPIC = self.aggregation_topic
        processor.REDUCE_TOPIC = self.reduce_topic
        processor.JOIN_TOPIC = self.join_topic
Exemple #15
0
class ReplicaVerificationToolTest(Test):
    """
    Tests ReplicaVerificationTool
    """
    def __init__(self, test_context):
        super(ReplicaVerificationToolTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 2
        self.messages_received_count = 0
        self.topics = {TOPIC: {'partitions': 1, 'replication-factor': 2}}

        self.zk = ZookeeperService(test_context, self.num_zk)
        self.kafka = None
        self.producer = None
        self.replica_verifier = None

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_replica_verification_tool(self, security_protocol):
        self.replica_verifier = ReplicaVerificationTool(
            self.test_context,
            1,
            self.kafka,
            TOPIC,
            report_interval_ms=REPORT_INTERVAL_MS,
            security_protocol=security_protocol)
        self.replica_verifier.start()

    def start_producer(self, max_messages, acks, timeout):
        # This will produce to kafka cluster
        current_acked = 0
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=TOPIC,
                                           throughput=1000,
                                           acks=acks,
                                           max_messages=max_messages)
        self.producer.start()
        wait_until(
            lambda: acks == 0 or self.producer.num_acked >= current_acked +
            max_messages,
            timeout_sec=timeout,
            err_msg="Timeout awaiting messages to be produced and acked")

    def stop_producer(self):
        self.producer.stop()

    @cluster(num_nodes=6)
    def test_replica_lags(self, security_protocol='PLAINTEXT'):
        """
        Tests ReplicaVerificationTool
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_replica_verification_tool(security_protocol)
        self.start_producer(max_messages=10, acks=-1, timeout=15)

        # Verify that there is no lag in replicas and is correctly reported by ReplicaVerificationTool
        wait_until(
            lambda: self.replica_verifier.get_lag_for_partition(TOPIC, 0) == 0,
            timeout_sec=10,
            err_msg="Timed out waiting to reach zero replica lags.")
        self.stop_producer()

        self.start_producer(max_messages=10000, acks=0, timeout=5)
        # Verify that there is lag in replicas and is correctly reported by ReplicaVerificationTool
        wait_until(
            lambda: self.replica_verifier.get_lag_for_partition(TOPIC, 0) > 0,
            timeout_sec=10,
            err_msg=
            "Timed out waiting to reach non-zero number of replica lags.")
class StreamsStaticMembershipTest(Test):
    """
    Tests using static membership when broker points to minimum supported
    version (2.3) or higher.
    """

    input_topic = 'inputTopic'
    pattern = 'PROCESSED'
    running_message = 'REBALANCING -> RUNNING'
    stopped_message = 'Static membership test closed'

    def __init__(self, test_context):
        super(StreamsStaticMembershipTest, self).__init__(test_context)
        self.topics = {
            self.input_topic: {
                'partitions': 18
            },
        }

        self.zookeeper = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zookeeper,
                                  topics=self.topics)

        self.producer = VerifiableProducer(self.test_context,
                                           1,
                                           self.kafka,
                                           self.input_topic,
                                           throughput=1000,
                                           acks=1)

    def test_rolling_bounces_will_not_trigger_rebalance_under_static_membership(
            self):
        self.zookeeper.start()
        self.kafka.start()

        numThreads = 3
        processor1 = StaticMemberTestService(self.test_context, self.kafka,
                                             "consumer-A", numThreads)
        processor2 = StaticMemberTestService(self.test_context, self.kafka,
                                             "consumer-B", numThreads)
        processor3 = StaticMemberTestService(self.test_context, self.kafka,
                                             "consumer-C", numThreads)

        processors = [processor1, processor2, processor3]

        self.producer.start()

        for processor in processors:
            processor.CLEAN_NODE_ENABLED = False
            self.set_topics(processor)
            verify_running(processor, self.running_message)

        self.verify_processing(processors)

        # do several rolling bounces
        num_bounces = 3
        for i in range(0, num_bounces):
            for processor in processors:
                verify_stopped(processor, self.stopped_message)
                verify_running(processor, self.running_message)

        stable_generation = -1
        for processor in processors:
            generations = extract_generation_from_logs(processor)
            num_bounce_generations = num_bounces * numThreads
            assert num_bounce_generations <= len(generations), \
                "Smaller than minimum expected %d generation messages, actual %d" % (num_bounce_generations, len(generations))

            for generation in generations[-num_bounce_generations:]:
                generation = int(generation)
                if stable_generation == -1:
                    stable_generation = generation
                assert stable_generation == generation, \
                    "Stream rolling bounce have caused unexpected generation bump %d" % generation

        self.verify_processing(processors)

        stop_processors(processors, self.stopped_message)

        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()

    def verify_processing(self, processors):
        for processor in processors:
            with processor.node.account.monitor_log(
                    processor.STDOUT_FILE) as monitor:
                monitor.wait_until(
                    self.pattern,
                    timeout_sec=60,
                    err_msg="Never saw processing of %s " % self.pattern +
                    str(processor.node.account))

    def set_topics(self, processor):
        processor.INPUT_TOPIC = self.input_topic
class StreamsOptimizedTest(Test):
    """
    Test doing upgrades of a Kafka Streams application
    that is un-optimized initially then optimized
    """

    input_topic = 'inputTopic'
    aggregation_topic = 'aggregationTopic'
    reduce_topic = 'reduceTopic'
    join_topic = 'joinTopic'
    operation_pattern = 'AGGREGATED\|REDUCED\|JOINED'
    stopped_message = 'OPTIMIZE_TEST Streams Stopped'

    def __init__(self, test_context):
        super(StreamsOptimizedTest, self).__init__(test_context)
        self.topics = {
            self.input_topic: {
                'partitions': 6
            },
            self.aggregation_topic: {
                'partitions': 6
            },
            self.reduce_topic: {
                'partitions': 6
            },
            self.join_topic: {
                'partitions': 6
            }
        }

        self.zookeeper = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zookeeper,
                                  topics=self.topics)

        self.producer = VerifiableProducer(self.test_context,
                                           1,
                                           self.kafka,
                                           self.input_topic,
                                           throughput=1000,
                                           acks=1)

    def test_upgrade_optimized_topology(self):
        self.zookeeper.start()
        self.kafka.start()

        processor1 = StreamsOptimizedUpgradeTestService(
            self.test_context, self.kafka)
        processor2 = StreamsOptimizedUpgradeTestService(
            self.test_context, self.kafka)
        processor3 = StreamsOptimizedUpgradeTestService(
            self.test_context, self.kafka)

        processors = [processor1, processor2, processor3]

        self.logger.info("produce records continually during the test")
        self.producer.start()

        self.logger.info("start all processors unoptimized")
        for processor in processors:
            self.set_topics(processor)
            processor.CLEAN_NODE_ENABLED = False
            self.verify_running_repartition_topic_count(processor, 4)

        self.logger.info("verify unoptimized")
        self.verify_processing(processors, verify_individual_operations=False)

        self.logger.info("stop unoptimized")
        stop_processors(processors, self.stopped_message)

        self.logger.info("reset")
        self.reset_application()
        for processor in processors:
            processor.node.account.ssh("mv " + processor.LOG_FILE + " " +
                                       processor.LOG_FILE + ".1",
                                       allow_fail=False)
            processor.node.account.ssh("mv " + processor.STDOUT_FILE + " " +
                                       processor.STDOUT_FILE + ".1",
                                       allow_fail=False)
            processor.node.account.ssh("mv " + processor.STDERR_FILE + " " +
                                       processor.STDERR_FILE + ".1",
                                       allow_fail=False)
            processor.node.account.ssh("mv " + processor.CONFIG_FILE + " " +
                                       processor.CONFIG_FILE + ".1",
                                       allow_fail=False)

        self.logger.info("start again with topology optimized")
        for processor in processors:
            processor.OPTIMIZED_CONFIG = 'all'
            self.verify_running_repartition_topic_count(processor, 1)

        self.logger.info("verify optimized")
        self.verify_processing(processors, verify_individual_operations=True)

        self.logger.info("stop optimized")
        stop_processors(processors, self.stopped_message)

        self.logger.info("teardown")
        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()

    def reset_application(self):
        resetter = StreamsResetter(self.test_context,
                                   self.kafka,
                                   topic=self.input_topic,
                                   applicationId='StreamsOptimizedTest')
        resetter.start()
        # resetter is not long-term running but it would be better to check the pid by stopping it
        resetter.stop()

    @staticmethod
    def verify_running_repartition_topic_count(processor,
                                               repartition_topic_count):
        node = processor.node
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            processor.start()
            monitor.wait_until(
                'REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%s' %
                repartition_topic_count,
                timeout_sec=120,
                err_msg=
                "Never saw 'REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%s' message "
                % repartition_topic_count + str(processor.node.account))

    def verify_processing(self, processors, verify_individual_operations):
        # This test previously had logic to account for skewed assignments, in which not all processors may
        # receive active assignments. I don't think this will happen anymore, but keep an eye out if we see
        # test failures here. If that does resurface, note that the prior implementation was not correct.
        # A better approach would be to make sure we see processing of each partition across the whole cluster
        # instead of just expecting to see each node perform some processing.
        for processor in processors:
            if verify_individual_operations:
                for operation in self.operation_pattern.split('\|'):
                    self.do_verify(processor, operation)
            else:
                self.do_verify(processor, self.operation_pattern)

    def do_verify(self, processor, pattern):
        self.logger.info("Verifying %s processing pattern in STDOUT_FILE" %
                         pattern)
        self.logger.info(
            list(
                processor.node.account.ssh_capture("ls -lh %s" %
                                                   (processor.STDOUT_FILE),
                                                   allow_fail=True)))
        wait_until(lambda: processor.node.account.ssh(
            "grep --max-count 1 '%s' %s" % (pattern, processor.STDOUT_FILE),
            allow_fail=True) == 0,
                   timeout_sec=60)

    def set_topics(self, processor):
        processor.INPUT_TOPIC = self.input_topic
        processor.AGGREGATION_TOPIC = self.aggregation_topic
        processor.REDUCE_TOPIC = self.reduce_topic
        processor.JOIN_TOPIC = self.join_topic
Exemple #18
0
class StreamsNamedRepartitionTopicTest(Test):
    """
    Tests using a named repartition topic by starting
    application then doing a rolling upgrade with added
    operations and the application still runs
    """

    input_topic = 'inputTopic'
    aggregation_topic = 'aggregationTopic'
    pattern = 'AGGREGATED'

    def __init__(self, test_context):
        super(StreamsNamedRepartitionTopicTest, self).__init__(test_context)
        self.topics = {
            self.input_topic: {
                'partitions': 6
            },
            self.aggregation_topic: {
                'partitions': 6
            }
        }

        self.zookeeper = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zookeeper,
                                  topics=self.topics)

        self.producer = VerifiableProducer(self.test_context,
                                           1,
                                           self.kafka,
                                           self.input_topic,
                                           throughput=1000,
                                           acks=1)

    def test_upgrade_topology_with_named_repartition_topic(self):
        self.zookeeper.start()
        self.kafka.start()

        processor1 = StreamsNamedRepartitionTopicService(
            self.test_context, self.kafka)
        processor2 = StreamsNamedRepartitionTopicService(
            self.test_context, self.kafka)
        processor3 = StreamsNamedRepartitionTopicService(
            self.test_context, self.kafka)

        processors = [processor1, processor2, processor3]

        self.producer.start()

        for processor in processors:
            processor.CLEAN_NODE_ENABLED = False
            self.set_topics(processor)
            self.verify_running(processor, 'REBALANCING -> RUNNING')

        self.verify_processing(processors)

        # do rolling upgrade
        for processor in processors:
            self.verify_stopped(processor)
            #  will tell app to add operations before repartition topic
            processor.ADD_ADDITIONAL_OPS = 'true'
            self.verify_running(processor, 'UPDATED Topology')

        self.verify_processing(processors)

        self.stop_processors(processors)

        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()

    @staticmethod
    def verify_running(processor, message):
        node = processor.node
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            processor.start()
            monitor.wait_until(message,
                               timeout_sec=60,
                               err_msg="Never saw '%s' message " % message +
                               str(processor.node.account))

    @staticmethod
    def verify_stopped(processor):
        node = processor.node
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            processor.stop()
            monitor.wait_until(
                'NAMED_REPARTITION_TEST Streams Stopped',
                timeout_sec=60,
                err_msg="'NAMED_REPARTITION_TEST Streams Stopped' message" +
                str(processor.node.account))

    def verify_processing(self, processors):
        for processor in processors:
            with processor.node.account.monitor_log(
                    processor.STDOUT_FILE) as monitor:
                monitor.wait_until(
                    self.pattern,
                    timeout_sec=60,
                    err_msg="Never saw processing of %s " % self.pattern +
                    str(processor.node.account))

    def stop_processors(self, processors):
        for processor in processors:
            self.verify_stopped(processor)

    def set_topics(self, processor):
        processor.INPUT_TOPIC = self.input_topic
        processor.AGGREGATION_TOPIC = self.aggregation_topic
class ReplicaVerificationToolTest(Test):
    """
    Tests ReplicaVerificationTool
    """
    def __init__(self, test_context):
        super(ReplicaVerificationToolTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 2
        self.messages_received_count = 0
        self.topics = {
            TOPIC: {'partitions': 1, 'replication-factor': 2}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)
        self.kafka = None
        self.producer = None
        self.replica_verifier = None

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_replica_verification_tool(self, security_protocol):
        self.replica_verifier = ReplicaVerificationTool(self.test_context, 1, self.kafka, TOPIC, report_interval_ms=REPORT_INTERVAL_MS, security_protocol=security_protocol)
        self.replica_verifier.start()

    def start_producer(self, max_messages, acks, timeout):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=TOPIC, throughput=1000, acks=acks, max_messages=max_messages)
        current_acked = self.producer.num_acked
        self.logger.info("current_acked = %s" % current_acked)
        self.producer.start()
        wait_until(lambda: acks == 0 or self.producer.num_acked >= current_acked + max_messages, timeout_sec=timeout,
                   err_msg="Timeout awaiting messages to be produced and acked")

    def stop_producer(self):
        self.producer.stop()

    def test_replica_lags(self, security_protocol='PLAINTEXT'):
        """
        Tests ReplicaVerificationTool
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_replica_verification_tool(security_protocol)
        self.start_producer(max_messages=10, acks=-1, timeout=15)
        # Verify that there is no lag in replicas and is correctly reported by ReplicaVerificationTool
        wait_until(lambda: self.replica_verifier.get_lag_for_partition(TOPIC, 0) == 0, timeout_sec=10,
                   err_msg="Timed out waiting to reach zero replica lags.")
        self.stop_producer()

        self.start_producer(max_messages=1000, acks=0, timeout=5)
        # Verify that there is lag in replicas and is correctly reported by ReplicaVerificationTool
        wait_until(lambda: self.replica_verifier.get_lag_for_partition(TOPIC, 0) > 0, timeout_sec=10,
                   err_msg="Timed out waiting to reach non-zero number of replica lags.")
Exemple #20
0
class ConsoleConsumerTest(Test):
    """Sanity checks on console consumer service class."""
    def __init__(self, test_context):
        super(ConsoleConsumerTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka",
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic)

    def setUp(self):
        self.zk.start()

    @cluster(num_nodes=3)
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=4)
    @matrix(security_protocol=['SASL_SSL'], sasl_mechanism=['PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512'])
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'])
    def test_lifecycle(self, security_protocol, sasl_mechanism='GSSAPI'):
        """Check that console consumer starts/stops properly, and that we are capturing log output."""

        self.kafka.security_protocol = security_protocol
        self.kafka.client_sasl_mechanism = sasl_mechanism
        self.kafka.interbroker_sasl_mechanism = sasl_mechanism
        self.kafka.start()

        self.consumer.security_protocol = security_protocol

        t0 = time.time()
        self.consumer.start()
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
            timeout_sec=20, backoff_sec=.2, err_msg="Consumer was too slow to start")
        self.logger.info("consumer started in %s seconds " % str(time.time() - t0))

        # Verify that log output is happening
        wait_until(lambda: file_exists(node, ConsoleConsumer.LOG_FILE), timeout_sec=10,
                   err_msg="Timed out waiting for consumer log file to exist.")
        wait_until(lambda: line_count(node, ConsoleConsumer.LOG_FILE) > 0, timeout_sec=1,
                   backoff_sec=.25, err_msg="Timed out waiting for log entries to start.")

        # Verify no consumed messages
        assert line_count(node, ConsoleConsumer.STDOUT_CAPTURE) == 0

        self.consumer.stop_node(node)

    @cluster(num_nodes=4)
    def test_version(self):
        """Check that console consumer v0.8.2.X successfully starts and consumes messages."""
        self.kafka.start()

        num_messages = 1000
        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=num_messages, throughput=1000)
        self.producer.start()
        self.producer.wait()

        self.consumer.nodes[0].version = LATEST_0_8_2
        self.consumer.new_consumer = False
        self.consumer.consumer_timeout_ms = 1000
        self.consumer.start()
        self.consumer.wait()

        num_consumed = len(self.consumer.messages_consumed[1])
        num_produced = self.producer.num_acked
        assert num_produced == num_consumed, "num_produced: %d, num_consumed: %d" % (num_produced, num_consumed)
Exemple #21
0
class StreamsOptimizedTest(Test):
    """
    Test doing upgrades of a Kafka Streams application
    that is un-optimized initially then optimized
    """

    input_topic = 'inputTopic'
    aggregation_topic = 'aggregationTopic'
    reduce_topic = 'reduceTopic'
    join_topic = 'joinTopic'
    operation_pattern = 'AGGREGATED\|REDUCED\|JOINED'

    def __init__(self, test_context):
        super(StreamsOptimizedTest, self).__init__(test_context)
        self.topics = {
            self.input_topic: {'partitions': 6},
            self.aggregation_topic: {'partitions': 6},
            self.reduce_topic: {'partitions': 6},
            self.join_topic: {'partitions': 6}
        }

        self.zookeeper = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3,
                                  zk=self.zookeeper, topics=self.topics)

        self.producer = VerifiableProducer(self.test_context,
                                           1,
                                           self.kafka,
                                           self.input_topic,
                                           throughput=1000,
                                           acks=1)

    def test_upgrade_optimized_topology(self):
        self.zookeeper.start()
        self.kafka.start()

        processor1 = StreamsOptimizedUpgradeTestService(self.test_context, self.kafka)
        processor2 = StreamsOptimizedUpgradeTestService(self.test_context, self.kafka)
        processor3 = StreamsOptimizedUpgradeTestService(self.test_context, self.kafka)

        processors = [processor1, processor2, processor3]

        # produce records continually during the test
        self.producer.start()

        # start all processors unoptimized
        for processor in processors:
            self.set_topics(processor)
            processor.CLEAN_NODE_ENABLED = False
            self.verify_running_repartition_topic_count(processor, 4)

        self.verify_processing(processors, verify_individual_operations=False)

        self.stop_processors(processors)

        # start again with topology optimized
        for processor in processors:
            processor.OPTIMIZED_CONFIG = 'all'
            self.verify_running_repartition_topic_count(processor, 1)

        self.verify_processing(processors, verify_individual_operations=True)

        self.stop_processors(processors)

        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()

    @staticmethod
    def verify_running_repartition_topic_count(processor, repartition_topic_count):
        node = processor.node
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            processor.start()
            monitor.wait_until('REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%s' % repartition_topic_count,
                               timeout_sec=120,
                               err_msg="Never saw 'REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%s' message "
                                       % repartition_topic_count + str(processor.node.account))

    @staticmethod
    def verify_stopped(processor):
        node = processor.node
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            processor.stop()
            monitor.wait_until('OPTIMIZE_TEST Streams Stopped',
                               timeout_sec=60,
                               err_msg="'OPTIMIZE_TEST Streams Stopped' message" + str(processor.node.account))

    def verify_processing(self, processors, verify_individual_operations):
        for processor in processors:
            if not self.all_source_subtopology_tasks(processor):
                if verify_individual_operations:
                    for operation in self.operation_pattern.split('\|'):
                        self.do_verify(processor, operation)
                else:
                    self.do_verify(processor, self.operation_pattern)
            else:
                self.logger.info("Skipping processor %s with all source tasks" % processor.node.account)

    def do_verify(self, processor, pattern):
        self.logger.info("Verifying %s processing pattern in STDOUT_FILE" % pattern)
        with processor.node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            monitor.wait_until(pattern,
                               timeout_sec=60,
                               err_msg="Never saw processing of %s " % pattern + str(processor.node.account))

    def all_source_subtopology_tasks(self, processor):
        retries = 0
        while retries < 5:
            found = list(processor.node.account.ssh_capture("sed -n 's/.*current active tasks: \[\(\(0_[0-9], \)\{3\}0_[0-9]\)\].*/\1/p' %s" % processor.LOG_FILE, allow_fail=True))
            self.logger.info("Returned %s from assigned task check" % found)
            if len(found) > 0:
                return True
            retries += 1
            time.sleep(1)

        return False

    def stop_processors(self, processors):
        for processor in processors:
            self.verify_stopped(processor)

    def set_topics(self, processor):
        processor.INPUT_TOPIC = self.input_topic
        processor.AGGREGATION_TOPIC = self.aggregation_topic
        processor.REDUCE_TOPIC = self.reduce_topic
        processor.JOIN_TOPIC = self.join_topic
Exemple #22
0
class TestBounce(Test):
    """Sanity checks on verifiable producer service class with cluster roll."""
    def __init__(self, test_context):
        super(TestBounce, self).__init__(test_context)

        quorum_size_arg_name = 'quorum_size'
        default_quorum_size = 1
        quorum_size = default_quorum_size if not test_context.injected_args else test_context.injected_args.get(
            quorum_size_arg_name, default_quorum_size)
        if quorum_size < 1:
            raise Exception("Illegal %s value provided for the test: %s" %
                            (quorum_size_arg_name, quorum_size))
        self.topic = "topic"
        self.zk = ZookeeperService(test_context,
                                   num_nodes=quorum_size) if quorum.for_test(
                                       test_context) == quorum.zk else None
        num_kafka_nodes = quorum_size if quorum.for_test(
            test_context) == quorum.colocated_kraft else 1
        self.kafka = KafkaService(
            test_context,
            num_nodes=num_kafka_nodes,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }},
            controller_num_nodes_override=quorum_size)
        self.num_messages = 1000

    def create_producer(self):
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           max_messages=self.num_messages,
                                           throughput=self.num_messages // 10)

    def setUp(self):
        if self.zk:
            self.zk.start()

    # ZooKeeper and KRaft, quorum size = 1
    @cluster(num_nodes=4)
    @matrix(metadata_quorum=quorum.all, quorum_size=[1])
    # Remote and Co-located KRaft, quorum size = 3
    @cluster(num_nodes=6)
    @matrix(metadata_quorum=quorum.all_kraft, quorum_size=[3])
    def test_simple_run(self, metadata_quorum, quorum_size):
        """
        Test that we can start VerifiableProducer on the current branch snapshot version, and
        verify that we can produce a small number of messages both before and after a subsequent roll.
        """
        self.kafka.start()
        for first_time in [True, False]:
            self.create_producer()
            self.producer.start()
            wait_until(
                lambda: self.producer.num_acked > 5,
                timeout_sec=15,
                err_msg=
                "Producer failed to start in a reasonable amount of time.")

            self.producer.wait()
            num_produced = self.producer.num_acked
            assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (
                num_produced, self.num_messages)
            if first_time:
                self.producer.stop()
                if self.kafka.quorum_info.using_kraft and self.kafka.remote_controller_quorum:
                    self.kafka.remote_controller_quorum.restart_cluster()
                self.kafka.restart_cluster()
class ConsoleConsumerTest(Test):
    """Sanity checks on console consumer service class."""
    def __init__(self, test_context):
        super(ConsoleConsumerTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, new_consumer=False)

    def setUp(self):
        self.zk.start()

    @cluster(num_nodes=3)
    @parametrize(security_protocol='PLAINTEXT', new_consumer=False)
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=4)
    @matrix(security_protocol=['SASL_SSL'], sasl_mechanism=['PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512'])
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'])
    def test_lifecycle(self, security_protocol, new_consumer=True, sasl_mechanism='GSSAPI'):
        """Check that console consumer starts/stops properly, and that we are capturing log output."""

        self.kafka.security_protocol = security_protocol
        self.kafka.client_sasl_mechanism = sasl_mechanism
        self.kafka.interbroker_sasl_mechanism = sasl_mechanism
        self.kafka.start()

        self.consumer.security_protocol = security_protocol
        self.consumer.new_consumer = new_consumer

        t0 = time.time()
        self.consumer.start()
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
            timeout_sec=10, backoff_sec=.2, err_msg="Consumer was too slow to start")
        self.logger.info("consumer started in %s seconds " % str(time.time() - t0))

        # Verify that log output is happening
        wait_until(lambda: file_exists(node, ConsoleConsumer.LOG_FILE), timeout_sec=10,
                   err_msg="Timed out waiting for consumer log file to exist.")
        wait_until(lambda: line_count(node, ConsoleConsumer.LOG_FILE) > 0, timeout_sec=1,
                   backoff_sec=.25, err_msg="Timed out waiting for log entries to start.")

        # Verify no consumed messages
        assert line_count(node, ConsoleConsumer.STDOUT_CAPTURE) == 0

        self.consumer.stop_node(node)

    @cluster(num_nodes=4)
    def test_version(self):
        """Check that console consumer v0.8.2.X successfully starts and consumes messages."""
        self.kafka.start()

        num_messages = 1000
        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=num_messages, throughput=1000)
        self.producer.start()
        self.producer.wait()

        self.consumer.nodes[0].version = LATEST_0_8_2
        self.consumer.consumer_timeout_ms = 1000
        self.consumer.start()
        self.consumer.wait()

        num_consumed = len(self.consumer.messages_consumed[1])
        num_produced = self.producer.num_acked
        assert num_produced == num_consumed, "num_produced: %d, num_consumed: %d" % (num_produced, num_consumed)
Exemple #24
0
class SimpleConsumerShellTest(Test):
    """
    Tests SimpleConsumerShell tool
    """
    def __init__(self, test_context):
        super(SimpleConsumerShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {
            TOPIC: {
                'partitions': NUM_PARTITIONS,
                'replication-factor': REPLICATION_FACTOR
            }
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self):
        self.kafka = KafkaService(self.test_context,
                                  self.num_brokers,
                                  self.zk,
                                  topics=self.topics)
        self.kafka.start()

    def run_producer(self):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=TOPIC,
                                           throughput=1000,
                                           max_messages=MAX_MESSAGES)
        self.producer.start()
        wait_until(
            lambda: self.producer.num_acked == MAX_MESSAGES,
            timeout_sec=10,
            err_msg="Timeout awaiting messages to be produced and acked")

    def start_simple_consumer_shell(self):
        self.simple_consumer_shell = SimpleConsumerShell(
            self.test_context, 1, self.kafka, TOPIC)
        self.simple_consumer_shell.start()

    @cluster(num_nodes=4)
    def test_simple_consumer_shell(self):
        """
        Tests if SimpleConsumerShell is fetching expected records
        :return: None
        """
        self.start_kafka()
        self.run_producer()
        self.start_simple_consumer_shell()

        # Assert that SimpleConsumerShell is fetching expected number of messages
        wait_until(
            lambda: self.simple_consumer_shell.get_output().count("\n") ==
            (MAX_MESSAGES + 1),
            timeout_sec=10,
            err_msg="Timed out waiting to receive expected number of messages."
        )
class TestBounce(Test):
    """Sanity checks on verifiable producer service class with cluster roll."""
    def __init__(self, test_context):
        super(TestBounce, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context,
                                   num_nodes=1) if quorum.for_test(
                                       test_context) == quorum.zk else None
        self.kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }},
            controller_num_nodes_override=3
            if quorum.for_test(test_context) == quorum.remote_raft else 1)
        self.num_messages = 1000

    def create_producer(self):
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           max_messages=self.num_messages,
                                           throughput=self.num_messages // 10)

    def setUp(self):
        if self.zk:
            self.zk.start()

    @cluster(num_nodes=6)
    @parametrize(metadata_quorum=quorum.remote_raft)
    @cluster(num_nodes=4)
    @parametrize(metadata_quorum=quorum.colocated_raft)
    @cluster(num_nodes=4)
    @parametrize(metadata_quorum=quorum.zk)
    def test_simple_run(self, metadata_quorum):
        """
        Test that we can start VerifiableProducer on the current branch snapshot version, and
        verify that we can produce a small number of messages both before and after a subsequent roll.
        """
        self.kafka.start()
        for first_time in [True, False]:
            self.create_producer()
            self.producer.start()
            wait_until(
                lambda: self.producer.num_acked > 5,
                timeout_sec=15,
                err_msg=
                "Producer failed to start in a reasonable amount of time.")

            self.producer.wait()
            num_produced = self.producer.num_acked
            assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (
                num_produced, self.num_messages)
            if first_time:
                self.producer.stop()
                if self.kafka.quorum_info.using_raft and self.kafka.remote_controller_quorum:
                    self.kafka.remote_controller_quorum.restart_cluster()
                self.kafka.restart_cluster()
Exemple #26
0
class ReplicationTest(Test):
    """Replication tests.
    These tests verify that replication provides simple durability guarantees by checking that data acked by
    brokers is still available for consumption in the face of various failure scenarios."""
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(ReplicationTest, self).__init__(test_context=test_context)

        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context,
                                  num_nodes=3,
                                  zk=self.zk,
                                  topics={
                                      self.topic: {
                                          "partitions": 3,
                                          "replication-factor": 3,
                                          "min.insync.replicas": 2
                                      }
                                  })
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    def min_cluster_size(self):
        """Override this since we're adding services outside of the constructor"""
        return super(
            ReplicationTest,
            self).min_cluster_size() + self.num_producers + self.num_consumers

    def run_with_failure(self, failure):
        """This is the top-level test template.

        The steps are:
            Produce messages in the background while driving some failure condition
            When done driving failures, immediately stop producing
            Consume all messages
            Validate that messages acked by brokers were consumed

        Note that consuming is a bit tricky, at least with console consumer. The goal is to consume all messages
        (foreach partition) in the topic. In this case, waiting for the last message may cause the consumer to stop
        too soon since console consumer is consuming multiple partitions from a single thread and therefore we lose
        ordering guarantees.

        Waiting on a count of consumed messages can be unreliable: if we stop consuming when num_consumed == num_acked,
        we might exit early if some messages are duplicated (though not an issue here since producer retries==0)

        Therefore rely here on the consumer.timeout.ms setting which times out on the interval between successively
        consumed messages. Since we run the producer to completion before running the consumer, this is a reliable
        indicator that nothing is left to consume.

        """
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           throughput=self.producer_throughput)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=3000)

        # Produce in a background thread while driving broker failures
        self.producer.start()
        if not wait_until(lambda: self.producer.num_acked > 5, timeout_sec=5):
            raise RuntimeError(
                "Producer failed to start in a reasonable amount of time.")
        failure()
        self.producer.stop()

        self.acked = self.producer.acked
        self.not_acked = self.producer.not_acked
        self.logger.info("num not acked: %d" % self.producer.num_not_acked)
        self.logger.info("num acked:     %d" % self.producer.num_acked)

        # Consume all messages
        self.consumer.start()
        self.consumer.wait()
        self.consumed = self.consumer.messages_consumed[1]
        self.logger.info("num consumed:  %d" % len(self.consumed))

        # Check produced vs consumed
        success, msg = self.validate()

        if not success:
            self.mark_for_collect(self.producer)

        assert success, msg

    def clean_shutdown(self):
        """Discover leader node for our topic and shut it down cleanly."""
        self.kafka.signal_leader(self.topic, partition=0, sig=signal.SIGTERM)

    def hard_shutdown(self):
        """Discover leader node for our topic and shut it down with a hard kill."""
        self.kafka.signal_leader(self.topic, partition=0, sig=signal.SIGKILL)

    def clean_bounce(self):
        """Chase the leader of one partition and restart it cleanly."""
        for i in range(5):
            prev_leader_node = self.kafka.leader(topic=self.topic, partition=0)
            self.kafka.restart_node(prev_leader_node,
                                    wait_sec=5,
                                    clean_shutdown=True)

    def hard_bounce(self):
        """Chase the leader and restart it cleanly."""
        for i in range(5):
            prev_leader_node = self.kafka.leader(topic=self.topic, partition=0)
            self.kafka.restart_node(prev_leader_node,
                                    wait_sec=5,
                                    clean_shutdown=False)

            # Wait long enough for previous leader to probably be awake again
            time.sleep(6)

    def validate(self):
        """Check that produced messages were consumed."""

        success = True
        msg = ""

        if len(set(self.consumed)) != len(self.consumed):
            # There are duplicates. This is ok, so report it but don't fail the test
            msg += "There are duplicate messages in the log\n"

        if not set(self.consumed).issuperset(set(self.acked)):
            # Every acked message must appear in the logs. I.e. consumed messages must be superset of acked messages.
            acked_minus_consumed = set(self.producer.acked) - set(
                self.consumed)
            success = False
            msg += "At least one acked message did not appear in the consumed messages. acked_minus_consumed: " + str(
                acked_minus_consumed)

        if not success:
            # Collect all the data logs if there was a failure
            self.mark_for_collect(self.kafka)

        return success, msg

    def test_clean_shutdown(self):
        self.run_with_failure(self.clean_shutdown)

    def test_hard_shutdown(self):
        self.run_with_failure(self.hard_shutdown)

    def test_clean_bounce(self):
        self.run_with_failure(self.clean_bounce)

    def test_hard_bounce(self):
        self.run_with_failure(self.hard_bounce)
Exemple #27
0
class StreamsCooperativeRebalanceUpgradeTest(Test):
    """
    Test of a rolling upgrade from eager rebalance to
    cooperative rebalance
    """

    source_topic = "source"
    sink_topic = "sink"
    task_delimiter = "#"
    report_interval = "1000"
    processing_message = "Processed [0-9]* records so far"
    stopped_message = "COOPERATIVE-REBALANCE-TEST-CLIENT-CLOSED"
    running_state_msg = "STREAMS in a RUNNING State"
    cooperative_turned_off_msg = "Eager rebalancing enabled now for upgrade from %s"
    cooperative_enabled_msg = "Cooperative rebalancing enabled now"
    first_bounce_phase = "first_bounce_phase-"
    second_bounce_phase = "second_bounce_phase-"

    # !!CAUTION!!: THIS LIST OF VERSIONS IS FIXED, NO VERSIONS MUST BE ADDED
    streams_eager_rebalance_upgrade_versions = [
        str(LATEST_0_10_0),
        str(LATEST_0_10_1),
        str(LATEST_0_10_2),
        str(LATEST_0_11_0),
        str(LATEST_1_0),
        str(LATEST_1_1),
        str(LATEST_2_0),
        str(LATEST_2_1),
        str(LATEST_2_2),
        str(LATEST_2_3)
    ]

    def __init__(self, test_context):
        super(StreamsCooperativeRebalanceUpgradeTest,
              self).__init__(test_context)
        self.topics = {
            self.source_topic: {
                'partitions': 9
            },
            self.sink_topic: {
                'partitions': 9
            }
        }

        self.zookeeper = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zookeeper,
                                  topics=self.topics)

        self.producer = VerifiableProducer(self.test_context,
                                           1,
                                           self.kafka,
                                           self.source_topic,
                                           throughput=1000,
                                           acks=1)

    @matrix(upgrade_from_version=streams_eager_rebalance_upgrade_versions)
    def test_upgrade_to_cooperative_rebalance(self, upgrade_from_version):
        self.zookeeper.start()
        self.kafka.start()

        processor1 = CooperativeRebalanceUpgradeService(
            self.test_context, self.kafka)
        processor2 = CooperativeRebalanceUpgradeService(
            self.test_context, self.kafka)
        processor3 = CooperativeRebalanceUpgradeService(
            self.test_context, self.kafka)

        processors = [processor1, processor2, processor3]

        # produce records continually during the test
        self.producer.start()

        # start all processors without upgrade_from config; normal operations mode
        self.logger.info("Starting all streams clients in normal running mode")
        for processor in processors:
            processor.set_version(upgrade_from_version)
            self.set_props(processor)
            processor.CLEAN_NODE_ENABLED = False
            # can't use state as older version don't have state listener
            # so just verify up and running
            verify_running(processor, self.processing_message)

        # all running rebalancing has ceased
        for processor in processors:
            self.verify_processing(processor, self.processing_message)

        # first rolling bounce with "upgrade.from" config set
        previous_phase = ""
        self.maybe_upgrade_rolling_bounce_and_verify(processors,
                                                     previous_phase,
                                                     self.first_bounce_phase,
                                                     upgrade_from_version)

        # All nodes processing, rebalancing has ceased
        for processor in processors:
            self.verify_processing(
                processor, self.first_bounce_phase + self.processing_message)

        # second rolling bounce without "upgrade.from" config
        self.maybe_upgrade_rolling_bounce_and_verify(processors,
                                                     self.first_bounce_phase,
                                                     self.second_bounce_phase)

        # All nodes processing, rebalancing has ceased
        for processor in processors:
            self.verify_processing(
                processor, self.second_bounce_phase + self.processing_message)

        # now verify tasks are unique
        for processor in processors:
            self.get_tasks_for_processor(processor)
            self.logger.info("Active tasks %s" % processor.active_tasks)

        overlapping_tasks = processor1.active_tasks.intersection(
            processor2.active_tasks)
        assert len(overlapping_tasks) == int(0), \
            "Final task assignments are not unique %s %s" % (processor1.active_tasks, processor2.active_tasks)

        overlapping_tasks = processor1.active_tasks.intersection(
            processor3.active_tasks)
        assert len(overlapping_tasks) == int(0), \
            "Final task assignments are not unique %s %s" % (processor1.active_tasks, processor3.active_tasks)

        overlapping_tasks = processor2.active_tasks.intersection(
            processor3.active_tasks)
        assert len(overlapping_tasks) == int(0), \
            "Final task assignments are not unique %s %s" % (processor2.active_tasks, processor3.active_tasks)

        # test done close all down
        stop_processors(processors,
                        self.second_bounce_phase + self.stopped_message)

        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()

    def maybe_upgrade_rolling_bounce_and_verify(self,
                                                processors,
                                                previous_phase,
                                                current_phase,
                                                upgrade_from_version=None):
        for processor in processors:
            # stop the processor in prep for setting "update.from" or removing "update.from"
            verify_stopped(processor, previous_phase + self.stopped_message)
            # upgrade to version with cooperative rebalance
            processor.set_version("")
            processor.set_upgrade_phase(current_phase)

            if upgrade_from_version is not None:
                # need to remove minor version numbers for check of valid upgrade from numbers
                upgrade_version = upgrade_from_version[:upgrade_from_version.
                                                       rfind('.')]
                rebalance_mode_msg = self.cooperative_turned_off_msg % upgrade_version
            else:
                upgrade_version = None
                rebalance_mode_msg = self.cooperative_enabled_msg

            self.set_props(processor, upgrade_version)
            node = processor.node
            with node.account.monitor_log(
                    processor.STDOUT_FILE) as stdout_monitor:
                with node.account.monitor_log(
                        processor.LOG_FILE) as log_monitor:
                    processor.start()
                    # verify correct rebalance mode either turned off for upgrade or enabled after upgrade
                    log_monitor.wait_until(
                        rebalance_mode_msg,
                        timeout_sec=60,
                        err_msg="Never saw '%s' message " % rebalance_mode_msg
                        + str(processor.node.account))

                # verify rebalanced into a running state
                rebalance_msg = current_phase + self.running_state_msg
                stdout_monitor.wait_until(
                    rebalance_msg,
                    timeout_sec=60,
                    err_msg="Never saw '%s' message " % rebalance_msg +
                    str(processor.node.account))

                # verify processing
                verify_processing_msg = current_phase + self.processing_message
                stdout_monitor.wait_until(
                    verify_processing_msg,
                    timeout_sec=60,
                    err_msg="Never saw '%s' message " % verify_processing_msg +
                    str(processor.node.account))

    def verify_processing(self, processor, pattern):
        self.logger.info("Verifying %s processing pattern in STDOUT_FILE" %
                         pattern)
        with processor.node.account.monitor_log(
                processor.STDOUT_FILE) as monitor:
            monitor.wait_until(
                pattern,
                timeout_sec=60,
                err_msg="Never saw processing of %s " % pattern +
                str(processor.node.account))

    def get_tasks_for_processor(self, processor):
        retries = 0
        while retries < 5:
            found_tasks = list(
                processor.node.account.ssh_capture(
                    "grep TASK-ASSIGNMENTS %s | tail -n 1" %
                    processor.STDOUT_FILE,
                    allow_fail=True))
            self.logger.info("Returned %s from assigned task check" %
                             found_tasks)
            if len(found_tasks) > 0:
                task_string = str(found_tasks[0]).strip()
                self.logger.info("Converted %s from assigned task check" %
                                 task_string)
                processor.set_tasks(task_string)
                return
            retries += 1
            time.sleep(1)
        return

    def set_props(self, processor, upgrade_from=None):
        processor.SOURCE_TOPIC = self.source_topic
        processor.SINK_TOPIC = self.sink_topic
        processor.REPORT_INTERVAL = self.report_interval
        processor.UPGRADE_FROM = upgrade_from
class ClientCompatibilityTest(Test):
    def __init__(self, test_context):
        super(ClientCompatibilityTest,
              self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zk,
                                  version=LATEST_0_8_2,
                                  topics={
                                      self.topic: {
                                          "partitions": 3,
                                          "replication-factor": 3,
                                          'configs': {
                                              "min.insync.replicas": 2
                                          }
                                      }
                                  })
        self.zk.start()
        self.kafka.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1

    def test_producer_back_compatibility(self):
        """Run 0.9.X java producer against 0.8.X brokers.
        This test documents the fact that java producer v0.9.0.0 and later won't run against 0.8.X brokers
        the broker responds to a V1 produce request with a V0 fetch response; the client then tries to parse this V0
        produce response as a V1 produce response, resulting in a BufferUnderflowException
        """
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           max_messages=100,
                                           throughput=self.producer_throughput,
                                           version=TRUNK)

        node = self.producer.nodes[0]
        try:
            self.producer.start()
            self.producer.wait()
            raise Exception(
                "0.9.X java producer should not run successfully against 0.8.X broker"
            )
        except:
            # Expected
            pass
        finally:
            self.producer.kill_node(node, clean_shutdown=False)

        self.logger.info("Grepping producer log for expected error type")
        node.account.ssh("egrep -m 1 %s %s" % (
            "\"org\.apache\.kafka\.common\.protocol\.types\.SchemaException.*throttle_time_ms.*: java\.nio\.BufferUnderflowException\"",
            self.producer.LOG_FILE),
                         allow_fail=False)

    def test_consumer_back_compatibility(self):
        """Run the scala 0.8.X consumer against an 0.9.X cluster.
        Expect 0.8.X scala consumer to fail with buffer underflow. This error is the same as when an 0.9.X producer
        is run against an 0.8.X broker: the broker responds to a V1 fetch request with a V0 fetch response; the
        client then tries to parse this V0 fetch response as a V1 fetch response, resulting in a BufferUnderflowException
        """
        num_messages = 10
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           max_messages=num_messages,
                                           throughput=self.producer_throughput,
                                           version=LATEST_0_8_2)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        group_id="consumer-09X",
                                        consumer_timeout_ms=10000,
                                        message_validator=is_int,
                                        version=TRUNK)

        self.old_consumer = ConsoleConsumer(self.test_context,
                                            self.num_consumers,
                                            self.kafka,
                                            self.topic,
                                            group_id="consumer-08X",
                                            consumer_timeout_ms=10000,
                                            message_validator=is_int,
                                            version=LATEST_0_8_2)

        self.producer.run()
        self.consumer.run()
        self.old_consumer.run()

        consumed = len(self.consumer.messages_consumed[1])
        old_consumed = len(self.old_consumer.messages_consumed[1])
        assert old_consumed == num_messages, "Expected 0.8.X scala consumer to consume %d, but only got %d" % (
            num_messages, old_consumed)
        assert consumed == 0, "Expected 0.9.X scala consumer to fail to consume any messages, but got %d" % consumed

        self.logger.info("Grepping consumer log for expected error type")
        node = self.consumer.nodes[0]
        node.account.ssh("egrep -m 1 %s %s" %
                         ("\"java\.nio\.BufferUnderflowException\"",
                          self.consumer.LOG_FILE),
                         allow_fail=False)
Exemple #29
0
class DelegationTokenTest(Test):
    def __init__(self, test_context):
        super(DelegationTokenTest, self).__init__(test_context)

        self.test_context = test_context
        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka",
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}},
                                  server_prop_overides=[
                                      [config_property.DELEGATION_TOKEN_MAX_LIFETIME_MS, "604800000"],
                                      [config_property.DELEGATION_TOKEN_EXPIRY_TIME_MS, "86400000"],
                                      [config_property.DELEGATION_TOKEN_MASTER_KEY, "test12345"],
                                      [config_property.SASL_ENABLED_MECHANISMS, "GSSAPI,SCRAM-SHA-256"]
                                  ])
        self.jaas_deleg_conf_path = "/tmp/jaas_deleg.conf"
        self.jaas_deleg_conf = ""
        self.client_properties_content = """
security.protocol=SASL_PLAINTEXT
sasl.mechanism=SCRAM-SHA-256
sasl.kerberos.service.name=kafka
client.id=console-consumer
"""
        self.client_kafka_opts=' -Djava.security.auth.login.config=' + self.jaas_deleg_conf_path

        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, max_messages=1,
                                       throughput=1, kafka_opts_override=self.client_kafka_opts,
                                       client_prop_file_override=self.client_properties_content)

        self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                        kafka_opts_override=self.client_kafka_opts,
                                        client_prop_file_override=self.client_properties_content)

        self.kafka.security_protocol = 'SASL_PLAINTEXT'
        self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256'
        self.kafka.interbroker_sasl_mechanism = 'GSSAPI'


    def setUp(self):
        self.zk.start()

    def tearDown(self):
        self.producer.nodes[0].account.remove(self.jaas_deleg_conf_path)
        self.consumer.nodes[0].account.remove(self.jaas_deleg_conf_path)

    def generate_delegation_token(self):
        self.logger.debug("Request delegation token")
        self.delegation_tokens.generate_delegation_token()
        self.jaas_deleg_conf = self.delegation_tokens.create_jaas_conf_with_delegation_token()

    def expire_delegation_token(self):
        self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256'
        token_hmac = self.delegation_tokens.token_hmac()
        self.delegation_tokens.expire_delegation_token(token_hmac)


    def produce_with_delegation_token(self):
        self.producer.acked_values = []
        self.producer.nodes[0].account.create_file(self.jaas_deleg_conf_path, self.jaas_deleg_conf)
        self.logger.debug(self.jaas_deleg_conf)
        self.producer.start()

    def consume_with_delegation_token(self):
        self.logger.debug("Consume messages with delegation token")

        self.consumer.nodes[0].account.create_file(self.jaas_deleg_conf_path, self.jaas_deleg_conf)
        self.logger.debug(self.jaas_deleg_conf)
        self.consumer.consumer_timeout_ms = 5000

        self.consumer.start()
        self.consumer.wait()

    def get_datetime_ms(self, input_date):
        return int(time.mktime(datetime.strptime(input_date,"%Y-%m-%dT%H:%M").timetuple()) * 1000)

    def renew_delegation_token(self):
        dt = self.delegation_tokens.parse_delegation_token_out()
        orig_expiry_date_ms = self.get_datetime_ms(dt["expirydate"])
        new_expirydate_ms = orig_expiry_date_ms + 1000

        self.delegation_tokens.renew_delegation_token(dt["hmac"], new_expirydate_ms)

    def test_delegation_token_lifecycle(self):
        self.kafka.start()
        self.delegation_tokens = DelegationTokens(self.kafka, self.test_context)

        self.generate_delegation_token()
        self.renew_delegation_token()
        self.produce_with_delegation_token()
        wait_until(lambda: self.producer.num_acked > 0, timeout_sec=30,
                   err_msg="Expected producer to still be producing.")
        assert 1 == self.producer.num_acked, "number of acked messages: %d" % self.producer.num_acked

        self.consume_with_delegation_token()
        num_consumed = len(self.consumer.messages_consumed[1])
        assert 1 == num_consumed, "number of consumed messages: %d" % num_consumed

        self.expire_delegation_token()

        self.produce_with_delegation_token()
        assert 0 == self.producer.num_acked, "number of acked messages: %d" % self.producer.num_acked
class ClientCompatibilityTest(Test):

    def __init__(self, test_context):
        super(ClientCompatibilityTest, self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=LATEST_0_8_2, topics={self.topic: {
                                                                    "partitions": 3,
                                                                    "replication-factor": 3,
                                                                    'configs': {"min.insync.replicas": 2}}})
        self.zk.start()
        self.kafka.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1

    def test_producer_back_compatibility(self):
        """Run 0.9.X java producer against 0.8.X brokers.
        This test documents the fact that java producer v0.9.0.0 and later won't run against 0.8.X brokers
        the broker responds to a V1 produce request with a V0 fetch response; the client then tries to parse this V0
        produce response as a V1 produce response, resulting in a BufferUnderflowException
        """
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic, max_messages=100,
            throughput=self.producer_throughput, version=TRUNK)

        node = self.producer.nodes[0]
        try:
            self.producer.start()
            self.producer.wait()
            raise Exception("0.9.X java producer should not run successfully against 0.8.X broker")
        except:
            # Expected
            pass
        finally:
            self.producer.kill_node(node, clean_shutdown=False)

        self.logger.info("Grepping producer log for expected error type")
        node.account.ssh("egrep -m 1 %s %s" % ("\"org\.apache\.kafka\.common\.protocol\.types\.SchemaException.*throttle_time_ms.*: java\.nio\.BufferUnderflowException\"", self.producer.LOG_FILE), allow_fail=False)

    def test_consumer_back_compatibility(self):
        """Run the scala 0.8.X consumer against an 0.9.X cluster.
        Expect 0.8.X scala consumer to fail with buffer underflow. This error is the same as when an 0.9.X producer
        is run against an 0.8.X broker: the broker responds to a V1 fetch request with a V0 fetch response; the
        client then tries to parse this V0 fetch response as a V1 fetch response, resulting in a BufferUnderflowException
        """
        num_messages = 10
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic, max_messages=num_messages,
            throughput=self.producer_throughput, version=LATEST_0_8_2)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic, group_id="consumer-09X",
            consumer_timeout_ms=10000, message_validator=is_int, version=TRUNK)

        self.old_consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic, group_id="consumer-08X",
            consumer_timeout_ms=10000, message_validator=is_int, version=LATEST_0_8_2)

        self.producer.run()
        self.consumer.run()
        self.old_consumer.run()

        consumed = len(self.consumer.messages_consumed[1])
        old_consumed = len(self.old_consumer.messages_consumed[1])
        assert old_consumed == num_messages, "Expected 0.8.X scala consumer to consume %d, but only got %d" % (num_messages, old_consumed)
        assert consumed == 0, "Expected 0.9.X scala consumer to fail to consume any messages, but got %d" % consumed

        self.logger.info("Grepping consumer log for expected error type")
        node = self.consumer.nodes[0]
        node.account.ssh("egrep -m 1 %s %s" % ("\"java\.nio\.BufferUnderflowException\"", self.consumer.LOG_FILE), allow_fail=False)