def test_rolling_bounces_will_not_trigger_rebalance_under_static_membership(
            self):
        self.zookeeper.start()
        self.kafka.start()

        numThreads = 3
        processor1 = StaticMemberTestService(self.test_context, self.kafka,
                                             "consumer-A", numThreads)
        processor2 = StaticMemberTestService(self.test_context, self.kafka,
                                             "consumer-B", numThreads)
        processor3 = StaticMemberTestService(self.test_context, self.kafka,
                                             "consumer-C", numThreads)

        processors = [processor1, processor2, processor3]

        self.producer.start()

        for processor in processors:
            processor.CLEAN_NODE_ENABLED = False
            self.set_topics(processor)
            verify_running(processor, self.running_message)

        self.verify_processing(processors)

        # do several rolling bounces
        num_bounces = 3
        for i in range(0, num_bounces):
            for processor in processors:
                verify_stopped(processor, self.stopped_message)
                verify_running(processor, self.running_message)

        stable_generation = -1
        for processor in processors:
            generations = extract_generation_from_logs(processor)
            num_bounce_generations = num_bounces * numThreads
            assert num_bounce_generations <= len(generations), \
                "Smaller than minimum expected %d generation messages, actual %d" % (num_bounce_generations, len(generations))

            for generation in generations[-num_bounce_generations:]:
                generation = int(generation)
                if stable_generation == -1:
                    stable_generation = generation
                assert stable_generation == generation, \
                    "Stream rolling bounce have caused unexpected generation bump %d" % generation

        self.verify_processing(processors)

        stop_processors(processors, self.stopped_message)

        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()
Exemple #2
0
    def do_rolling_bounce(self, processor, counter, current_generation):
        first_other_processor = None
        second_other_processor = None
        for p in self.processors:
            if p != processor:
                if first_other_processor is None:
                    first_other_processor = p
                else:
                    second_other_processor = p

        node = processor.node
        first_other_node = first_other_processor.node
        second_other_node = second_other_processor.node

        with first_other_node.account.monitor_log(
                first_other_processor.LOG_FILE) as first_other_monitor:
            with second_other_node.account.monitor_log(
                    second_other_processor.LOG_FILE) as second_other_monitor:
                # stop processor
                processor.stop()
                node.account.ssh_capture("grep UPGRADE-TEST-CLIENT-CLOSED %s" %
                                         processor.STDOUT_FILE,
                                         allow_fail=False)

                node.account.ssh("mv " + processor.STDOUT_FILE + " " +
                                 processor.STDOUT_FILE + "." + str(counter),
                                 allow_fail=False)
                node.account.ssh("mv " + processor.STDERR_FILE + " " +
                                 processor.STDERR_FILE + "." + str(counter),
                                 allow_fail=False)
                node.account.ssh("mv " + processor.LOG_FILE + " " +
                                 processor.LOG_FILE + "." + str(counter),
                                 allow_fail=False)
                self.leader_counter[processor] = 0

                with node.account.monitor_log(
                        processor.LOG_FILE) as log_monitor:
                    processor.set_upgrade_to("future_version")
                    processor.start()
                    self.old_processors.remove(processor)
                    self.upgraded_processors.append(processor)

                    # checking for the dev version which should be the only SNAPSHOT
                    log_monitor.wait_until(
                        "Kafka version.*" + self.base_version_number +
                        ".*SNAPSHOT",
                        timeout_sec=60,
                        err_msg="Could not detect Kafka Streams version " +
                        str(DEV_VERSION) + " in " + str(node.account))
                    log_monitor.offset = 5
                    log_monitor.wait_until(
                        "partition\.assignment\.strategy = \[org\.apache\.kafka\.streams\.tests\.StreamsUpgradeTest$FutureStreamsPartitionAssignor\]",
                        timeout_sec=60,
                        err_msg=
                        "Could not detect FutureStreamsPartitionAssignor in " +
                        str(node.account))

                    if processor == self.leader:
                        self.update_leader()
                    else:
                        self.leader_counter[
                            self.leader] = self.leader_counter[self.leader] + 1

                    if processor == self.leader:
                        leader_monitor = log_monitor
                    elif first_other_processor == self.leader:
                        leader_monitor = first_other_monitor
                    elif second_other_processor == self.leader:
                        leader_monitor = second_other_monitor
                    else:
                        raise Exception("Could not identify leader.")

                    monitors = {}
                    monitors[processor] = log_monitor
                    monitors[first_other_processor] = first_other_monitor
                    monitors[second_other_processor] = second_other_monitor

                    leader_monitor.wait_until(
                        "Received a future (version probing) subscription (version: 5). Sending empty assignment back (with supported version 4).",
                        timeout_sec=60,
                        err_msg=
                        "Could not detect 'version probing' attempt at leader "
                        + str(self.leader.node.account))

                    if len(self.old_processors) > 0:
                        log_monitor.wait_until(
                            "Sent a version 5 subscription and got version 4 assignment back (successful version probing). Downgrading subscription metadata to received version and trigger new rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Could not detect 'successful version probing' at upgrading node "
                            + str(node.account))
                    else:
                        log_monitor.wait_until(
                            "Sent a version 5 subscription and got version 4 assignment back (successful version probing). Setting subscription metadata to leaders supported version 5 and trigger new rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Could not detect 'successful version probing with upgraded leader' at upgrading node "
                            + str(node.account))
                        first_other_monitor.wait_until(
                            "Sent a version 4 subscription and group leader.s latest supported version is 5. Upgrading subscription metadata version to 5 for next rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Never saw output 'Upgrade metadata to version 4' on"
                            + str(first_other_node.account))
                        second_other_monitor.wait_until(
                            "Sent a version 4 subscription and group leader.s latest supported version is 5. Upgrading subscription metadata version to 5 for next rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Never saw output 'Upgrade metadata to version 4' on"
                            + str(second_other_node.account))

                    log_monitor.wait_until(
                        "Version probing detected. Triggering new rebalance.",
                        timeout_sec=60,
                        err_msg=
                        "Could not detect 'Triggering new rebalance' at upgrading node "
                        + str(node.account))

                    # version probing should trigger second rebalance
                    # now we check that after consecutive rebalances we have synchronized generation
                    generation_synchronized = False
                    retries = 0

                    while retries < 10:
                        processor_found = extract_generation_from_logs(
                            processor)
                        first_other_processor_found = extract_generation_from_logs(
                            first_other_processor)
                        second_other_processor_found = extract_generation_from_logs(
                            second_other_processor)

                        if len(processor_found) > 0 and len(
                                first_other_processor_found) > 0 and len(
                                    second_other_processor_found) > 0:
                            self.logger.info("processor: " +
                                             str(processor_found))
                            self.logger.info("first other processor: " +
                                             str(first_other_processor_found))
                            self.logger.info("second other processor: " +
                                             str(second_other_processor_found))

                            processor_generation = self.extract_highest_generation(
                                processor_found)
                            first_other_processor_generation = self.extract_highest_generation(
                                first_other_processor_found)
                            second_other_processor_generation = self.extract_highest_generation(
                                second_other_processor_found)

                            if processor_generation == first_other_processor_generation and processor_generation == second_other_processor_generation:
                                current_generation = processor_generation
                                generation_synchronized = True
                                break

                        time.sleep(5)
                        retries = retries + 1

                    if generation_synchronized == False:
                        raise Exception(
                            "Never saw all three processors have the synchronized generation number"
                        )

                    if processor == self.leader:
                        self.update_leader()
                    else:
                        self.leader_counter[
                            self.leader] = self.leader_counter[self.leader] + 1

                    if self.leader in self.old_processors or len(
                            self.old_processors) > 0:
                        self.verify_metadata_no_upgraded_yet()

        return current_generation
    def do_rolling_bounce(self, processor, counter, current_generation):
        first_other_processor = None
        second_other_processor = None
        for p in self.processors:
            if p != processor:
                if first_other_processor is None:
                    first_other_processor = p
                else:
                    second_other_processor = p

        node = processor.node
        first_other_node = first_other_processor.node
        second_other_node = second_other_processor.node

        with first_other_node.account.monitor_log(first_other_processor.LOG_FILE) as first_other_monitor:
            with second_other_node.account.monitor_log(second_other_processor.LOG_FILE) as second_other_monitor:
                # stop processor
                processor.stop()
                node.account.ssh_capture("grep UPGRADE-TEST-CLIENT-CLOSED %s" % processor.STDOUT_FILE, allow_fail=False)

                node.account.ssh("mv " + processor.STDOUT_FILE + " " + processor.STDOUT_FILE + "." + str(counter), allow_fail=False)
                node.account.ssh("mv " + processor.STDERR_FILE + " " + processor.STDERR_FILE + "." + str(counter), allow_fail=False)
                node.account.ssh("mv " + processor.LOG_FILE + " " + processor.LOG_FILE + "." + str(counter), allow_fail=False)

                with node.account.monitor_log(processor.LOG_FILE) as log_monitor:
                    processor.set_upgrade_to("future_version")
                    processor.start()
                    self.old_processors.remove(processor)
                    self.upgraded_processors.append(processor)

                    # checking for the dev version
                    log_monitor.wait_until("Kafka version.*" + self.base_version_number + ".*",
                                           timeout_sec=60,
                                           err_msg="Could not detect Kafka Streams version " + str(DEV_VERSION) + " in " + str(node.account))
                    log_monitor.offset = 5
                    log_monitor.wait_until("partition\.assignment\.strategy = \[org\.apache\.kafka\.streams\.tests\.StreamsUpgradeTest$FutureStreamsPartitionAssignor\]",
                                           timeout_sec=60,
                                           err_msg="Could not detect FutureStreamsPartitionAssignor in " + str(node.account))

                    monitors = {}
                    monitors[processor] = log_monitor
                    monitors[first_other_processor] = first_other_monitor
                    monitors[second_other_processor] = second_other_monitor

                    end_of_upgrade_message = "Sent a version 9 subscription and group.s latest commonly supported version is 10 (successful version probing and end of rolling upgrade). Upgrading subscription metadata version to 10 for next rebalance."
                    end_of_upgrade_error_message = "Could not detect 'successful version probing and end of rolling upgrade' at upgraded node "
                    followup_rebalance_message = "Triggering the followup rebalance scheduled for 0 ms."
                    followup_rebalance_error_message = "Could not detect 'Triggering followup rebalance' at node "
                    if len(self.old_processors) > 0:
                        log_monitor.wait_until("Sent a version 10 subscription and got version 9 assignment back (successful version probing). Downgrade subscription metadata to commonly supported version 9 and trigger new rebalance.",
                                               timeout_sec=60,
                                               err_msg="Could not detect 'successful version probing' at upgrading node " + str(node.account))
                        log_monitor.wait_until(followup_rebalance_message,
                                               timeout_sec=60,
                                               err_msg=followup_rebalance_error_message + str(node.account))
                    else:
                        first_other_monitor.wait_until(end_of_upgrade_message,
                                                       timeout_sec=60,
                                                       err_msg=end_of_upgrade_error_message + str(first_other_node.account))
                        first_other_monitor.wait_until(followup_rebalance_message,
                                                       timeout_sec=60,
                                                       err_msg=followup_rebalance_error_message + str(node.account))
                        second_other_monitor.wait_until(end_of_upgrade_message,
                                                        timeout_sec=60,
                                                        err_msg=end_of_upgrade_error_message + str(second_other_node.account))
                        second_other_monitor.wait_until(followup_rebalance_message,
                                                        timeout_sec=60,
                                                        err_msg=followup_rebalance_error_message + str(node.account))

                    # version probing should trigger second rebalance
                    # now we check that after consecutive rebalances we have synchronized generation
                    generation_synchronized = False
                    retries = 0

                    while retries < 10:
                        processor_found = extract_generation_from_logs(processor)
                        first_other_processor_found = extract_generation_from_logs(first_other_processor)
                        second_other_processor_found = extract_generation_from_logs(second_other_processor)

                        if len(processor_found) > 0 and len(first_other_processor_found) > 0 and len(second_other_processor_found) > 0:
                            self.logger.info("processor: " + str(processor_found))
                            self.logger.info("first other processor: " + str(first_other_processor_found))
                            self.logger.info("second other processor: " + str(second_other_processor_found))

                            processor_generation = self.extract_highest_generation(processor_found)
                            first_other_processor_generation = self.extract_highest_generation(first_other_processor_found)
                            second_other_processor_generation = self.extract_highest_generation(second_other_processor_found)

                            if processor_generation == first_other_processor_generation and processor_generation == second_other_processor_generation:
                                current_generation = processor_generation
                                generation_synchronized = True
                                break

                        time.sleep(5)
                        retries = retries + 1

                    if generation_synchronized == False:
                        raise Exception("Never saw all three processors have the synchronized generation number")


                    if len(self.old_processors) > 0:
                        self.verify_metadata_no_upgraded_yet(end_of_upgrade_message)

        return current_generation