def test_streams_resilient_to_broker_down(self):
        self.kafka.start()

        # Broker should be down over 2x of retries * timeout ms
        # So with (2 * 15000) = 30 seconds, we'll set downtime to 70 seconds
        broker_down_time_in_seconds = 70

        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka,
                                                       self.get_configs())
        processor.start()

        # until KIP-91 is merged we'll only send 5 messages to assert Kafka Streams is running before taking the broker down
        # After KIP-91 is merged we'll continue to send messages the duration of the test
        self.assert_produce_consume("before_broker_stop")

        node = self.kafka.leader(self.inputTopic)

        self.kafka.stop_node(node)

        time.sleep(broker_down_time_in_seconds)

        self.kafka.start_node(node)

        self.assert_produce_consume("after_broker_stop")

        self.kafka.stop()
Exemple #2
0
    def test_streams_resilient_to_broker_down(self):
        self.kafka.start()

        # Broker should be down over 2x of retries * timeout ms
        # So with (2 * 15000) = 30 seconds, we'll set downtime to 70 seconds
        broker_down_time_in_seconds = 70

        processor = StreamsBrokerDownResilienceService(self.test_context, self.kafka, self.get_configs())
        processor.start()

        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "before_broker_stop")

        node = self.kafka.leader(self.inputTopic)

        self.kafka.stop_node(node)

        time.sleep(broker_down_time_in_seconds)

        with processor.node.account.monitor_log(processor.LOG_FILE) as monitor:
            self.kafka.start_node(node)
            monitor.wait_until(self.connected_message,
                               timeout_sec=120,
                               err_msg=("Never saw output '%s' on " % self.connected_message) + str(processor.node.account))

        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "after_broker_stop",
                                    timeout_sec=120)

        self.kafka.stop()
    def test_streams_resilient_to_broker_down(self):
        self.kafka.start()

        # Broker should be down over 2x of retries * timeout ms
        # So with (2 * 15000) = 30 seconds, we'll set downtime to 70 seconds
        broker_down_time_in_seconds = 70

        processor = StreamsBrokerDownResilienceService(self.test_context, self.kafka, self.get_configs())
        processor.start()

        # until KIP-91 is merged we'll only send 5 messages to assert Kafka Streams is running before taking the broker down
        # After KIP-91 is merged we'll continue to send messages the duration of the test
        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "before_broker_stop")

        node = self.kafka.leader(self.inputTopic)

        self.kafka.stop_node(node)

        time.sleep(broker_down_time_in_seconds)

        self.kafka.start_node(node)

        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "after_broker_stop",
                                    timeout_sec=120)

        self.kafka.stop()
    def test_streams_runs_with_broker_down_initially(self):
        self.kafka.start()
        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        configs = self.get_configs(extra_configs=",application.id=starting_wo_broker_id")

        # start streams with broker down initially
        processor = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_3.start()

        broker_unavailable_message = "Broker may not be available"

        # verify streams instances unable to connect to broker, kept trying
        self.wait_for_verification(processor, broker_unavailable_message, processor.LOG_FILE, 10)
        self.wait_for_verification(processor_2, broker_unavailable_message, processor_2.LOG_FILE, 10)
        self.wait_for_verification(processor_3, broker_unavailable_message, processor_3.LOG_FILE, 10)

        # now start broker
        self.kafka.start_node(node)

        # assert streams can process when starting with broker down
        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "running_with_broker_down_initially",
                                    num_messages=9,
                                    timeout_sec=120)

        message = "processed3messages"
        # need to show all 3 instances processed messages
        self.wait_for_verification(processor, message, processor.STDOUT_FILE)
        self.wait_for_verification(processor_2, message, processor_2.STDOUT_FILE)
        self.wait_for_verification(processor_3, message, processor_3.STDOUT_FILE)

        self.kafka.stop()
    def test_streams_resilient_to_broker_down(self):
        self.kafka.start()

        # Consumer max.poll.interval > min(max.block.ms, ((retries + 1) * request.timeout)
        consumer_poll_ms = "consumer.max.poll.interval.ms=50000"
        retries_config = "producer.retries=2"
        request_timeout = "producer.request.timeout.ms=15000"
        max_block_ms = "producer.max.block.ms=30000"

        # Broker should be down over 2x of retries * timeout ms
        # So with (2 * 15000) = 30 seconds, we'll set downtime to 70 seconds
        broker_down_time_in_seconds = 70

        # java code expects configs in key=value,key=value format
        updated_configs = consumer_poll_ms + "," + retries_config + "," + request_timeout + "," + max_block_ms

        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka,
                                                       updated_configs)
        processor.start()

        # until KIP-91 is merged we'll only send 5 messages to assert Kafka Streams is running before taking the broker down
        # After KIP-91 is merged we'll continue to send messages the duration of the test
        self.assert_produce_consume("before_broker_stop")

        node = self.kafka.leader(self.inputTopic)

        self.kafka.stop_node(node)

        time.sleep(broker_down_time_in_seconds)

        self.kafka.start_node(node)

        self.assert_produce_consume("after_broker_stop")

        self.kafka.stop()
    def test_streams_should_scale_in_while_brokers_down(self):
        self.kafka.start()

        configs = self.get_configs(
            extra_configs=",application.id=shutdown_with_broker_down")

        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_3.start()

        # need to wait for rebalance  once
        self.wait_for_verification(
            processor_3, "State transition from REBALANCING to RUNNING",
            processor_3.LOG_FILE)

        # assert streams can process when starting with broker down
        self.assert_produce_consume("waiting for rebalance to complete",
                                    num_messages=9)

        message = "processed3messages"

        self.wait_for_verification(processor, message, processor.STDOUT_FILE)
        self.wait_for_verification(processor_2, message,
                                   processor_2.STDOUT_FILE)
        self.wait_for_verification(processor_3, message,
                                   processor_3.STDOUT_FILE)

        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        processor.stop()
        processor_2.stop()

        shutdown_message = "Complete shutdown of streams resilience test app now"
        self.wait_for_verification(processor, shutdown_message,
                                   processor.STDOUT_FILE)
        self.wait_for_verification(processor_2, shutdown_message,
                                   processor_2.STDOUT_FILE)

        self.kafka.start_node(node)

        self.assert_produce_consume(
            "sending_message_after_stopping_streams_instance_bouncing_broker",
            num_messages=9)

        self.wait_for_verification(processor_3, "processed9messages",
                                   processor_3.STDOUT_FILE)

        self.kafka.stop()
    def test_streams_runs_with_broker_down_initially(self):
        self.kafka.start()
        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        configs = self.get_configs(
            extra_configs=",application.id=starting_wo_broker_id")

        # start streams with broker down initially
        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_3.start()

        broker_unavailable_message = "Broker may not be available"

        # verify streams instances unable to connect to broker, kept trying
        self.wait_for_verification(processor, broker_unavailable_message,
                                   processor.LOG_FILE, 100)
        self.wait_for_verification(processor_2, broker_unavailable_message,
                                   processor_2.LOG_FILE, 100)
        self.wait_for_verification(processor_3, broker_unavailable_message,
                                   processor_3.LOG_FILE, 100)

        # now start broker
        self.kafka.start_node(node)

        # assert streams can process when starting with broker down
        self.assert_produce_consume("running_with_broker_down_initially",
                                    num_messages=9)

        message = "processed3messages"
        # need to show all 3 instances processed messages
        self.wait_for_verification(processor, message, processor.STDOUT_FILE)
        self.wait_for_verification(processor_2, message,
                                   processor_2.STDOUT_FILE)
        self.wait_for_verification(processor_3, message,
                                   processor_3.STDOUT_FILE)

        self.kafka.stop()
    def test_streams_runs_with_broker_down_initially(self):
        self.kafka.start()
        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        configs = self.get_configs(
            extra_configs=",application.id=starting_wo_broker_id")

        # start streams with broker down initially
        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_3.start()

        broker_unavailable_message = "Broker may not be available"

        # verify streams instances unable to connect to broker, kept trying
        self.wait_for_verification(processor, broker_unavailable_message,
                                   processor.LOG_FILE, 10)
        self.wait_for_verification(processor_2, broker_unavailable_message,
                                   processor_2.LOG_FILE, 10)
        self.wait_for_verification(processor_3, broker_unavailable_message,
                                   processor_3.LOG_FILE, 10)

        with processor.node.account.monitor_log(
                processor.LOG_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(
                    processor_2.LOG_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(
                        processor_3.LOG_FILE) as monitor_3:
                    self.kafka.start_node(node)

                    monitor_1.wait_until(
                        self.connected_message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.connected_message)
                        + str(processor.node.account))
                    monitor_2.wait_until(
                        self.connected_message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.connected_message)
                        + str(processor_2.node.account))
                    monitor_3.wait_until(
                        self.connected_message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.connected_message)
                        + str(processor_3.node.account))

        with processor.node.account.monitor_log(
                processor.STDOUT_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(
                    processor_2.STDOUT_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(
                        processor_3.STDOUT_FILE) as monitor_3:

                    self.assert_produce(
                        self.inputTopic,
                        "sending_message_after_broker_down_initially",
                        num_messages=self.num_messages,
                        timeout_sec=120)

                    monitor_1.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor.node.account))
                    monitor_2.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor_2.node.account))
                    monitor_3.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor_3.node.account))

                    self.assert_consume(
                        self.client_id,
                        "consuming_message_after_broker_down_initially",
                        self.outputTopic,
                        num_messages=self.num_messages,
                        timeout_sec=120)

        self.kafka.stop()
    def test_streams_should_failover_while_brokers_down(self):
        self.kafka.start()

        # TODO KIP-441: consider rewriting the test for HighAvailabilityTaskAssignor
        configs = self.get_configs(
            extra_configs=",application.id=failover_with_broker_down" +
            ",internal.task.assignor.class=org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor"
        )

        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)

        # need to wait for rebalance once
        rebalance = "State transition from REBALANCING to RUNNING"
        with processor_3.node.account.monitor_log(
                processor_3.LOG_FILE) as monitor:
            processor_3.start()

            monitor.wait_until(
                rebalance,
                timeout_sec=120,
                err_msg=("Never saw output '%s' on " % rebalance) +
                str(processor_3.node.account))

        with processor.node.account.monitor_log(
                processor.STDOUT_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(
                    processor_2.STDOUT_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(
                        processor_3.STDOUT_FILE) as monitor_3:

                    self.assert_produce(
                        self.inputTopic,
                        "sending_message_after_normal_broker_start",
                        num_messages=self.num_messages,
                        timeout_sec=120)

                    monitor_1.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor.node.account))
                    monitor_2.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor_2.node.account))
                    monitor_3.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor_3.node.account))

                    self.assert_consume(
                        self.client_id,
                        "consuming_message_after_normal_broker_start",
                        self.outputTopic,
                        num_messages=self.num_messages,
                        timeout_sec=120)

        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        processor.abortThenRestart()
        processor_2.abortThenRestart()
        processor_3.abortThenRestart()

        with processor.node.account.monitor_log(
                processor.LOG_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(
                    processor_2.LOG_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(
                        processor_3.LOG_FILE) as monitor_3:
                    self.kafka.start_node(node)

                    monitor_1.wait_until(
                        self.connected_message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.connected_message)
                        + str(processor.node.account))
                    monitor_2.wait_until(
                        self.connected_message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.connected_message)
                        + str(processor_2.node.account))
                    monitor_3.wait_until(
                        self.connected_message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.connected_message)
                        + str(processor_3.node.account))

        with processor.node.account.monitor_log(
                processor.STDOUT_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(
                    processor_2.STDOUT_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(
                        processor_3.STDOUT_FILE) as monitor_3:

                    self.assert_produce(
                        self.inputTopic,
                        "sending_message_after_hard_bouncing_streams_instance_bouncing_broker",
                        num_messages=self.num_messages,
                        timeout_sec=120)

                    monitor_1.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor.node.account))
                    monitor_2.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor_2.node.account))
                    monitor_3.wait_until(
                        self.message,
                        timeout_sec=120,
                        err_msg=("Never saw '%s' on " % self.message) +
                        str(processor_3.node.account))

                    self.assert_consume(
                        self.client_id,
                        "consuming_message_after_stopping_streams_instance_bouncing_broker",
                        self.outputTopic,
                        num_messages=self.num_messages,
                        timeout_sec=120)
        self.kafka.stop()
    def test_streams_should_failover_while_brokers_down(self):
        self.kafka.start()

        configs = self.get_configs(
            extra_configs=",application.id=failover_with_broker_down")

        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(
            self.test_context, self.kafka, configs)
        processor_3.start()

        # need to wait for rebalance once
        self.wait_for_verification(
            processor_3, "State transition from REBALANCING to RUNNING",
            processor_3.LOG_FILE)

        # assert streams can process when starting with broker up
        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "waiting for rebalance to complete",
                                    num_messages=9,
                                    timeout_sec=120)

        message = "processed3messages"

        self.wait_for_verification(processor, message, processor.STDOUT_FILE)
        self.wait_for_verification(processor_2, message,
                                   processor_2.STDOUT_FILE)
        self.wait_for_verification(processor_3, message,
                                   processor_3.STDOUT_FILE)

        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        processor.abortThenRestart()
        processor_2.abortThenRestart()
        processor_3.abortThenRestart()

        self.kafka.start_node(node)

        self.assert_produce_consume(
            self.inputTopic,
            self.outputTopic,
            self.client_id,
            "sending_message_after_hard_bouncing_streams_instance_bouncing_broker",
            num_messages=9,
            timeout_sec=120)

        self.kafka.stop()
Exemple #11
0
    def test_streams_runs_with_broker_down_initially(self):
        self.kafka.start()
        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        configs = self.get_configs(extra_configs=",application.id=starting_wo_broker_id")

        # start streams with broker down initially
        processor = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_3.start()

        broker_unavailable_message = "Broker may not be available"

        # verify streams instances unable to connect to broker, kept trying
        self.wait_for_verification(processor, broker_unavailable_message, processor.LOG_FILE, 10)
        self.wait_for_verification(processor_2, broker_unavailable_message, processor_2.LOG_FILE, 10)
        self.wait_for_verification(processor_3, broker_unavailable_message, processor_3.LOG_FILE, 10)

        with processor.node.account.monitor_log(processor.LOG_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(processor_2.LOG_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(processor_3.LOG_FILE) as monitor_3:
                    self.kafka.start_node(node)

                    monitor_1.wait_until(self.connected_message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.connected_message) + str(processor.node.account))
                    monitor_2.wait_until(self.connected_message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.connected_message) + str(processor_2.node.account))
                    monitor_3.wait_until(self.connected_message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.connected_message) + str(processor_3.node.account))

        with processor.node.account.monitor_log(processor.STDOUT_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(processor_2.STDOUT_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(processor_3.STDOUT_FILE) as monitor_3:

                    self.assert_produce(self.inputTopic,
                                        "sending_message_after_broker_down_initially",
                                        num_messages=self.num_messages,
                                        timeout_sec=120)

                    monitor_1.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor.node.account))
                    monitor_2.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_2.node.account))
                    monitor_3.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_3.node.account))

                    self.assert_consume(self.client_id,
                                        "consuming_message_after_broker_down_initially",
                                        self.outputTopic,
                                        num_messages=self.num_messages,
                                        timeout_sec=120)

        self.kafka.stop()
Exemple #12
0
    def test_streams_should_failover_while_brokers_down(self):
        self.kafka.start()

        configs = self.get_configs(extra_configs=",application.id=failover_with_broker_down")

        processor = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)

        # need to wait for rebalance once
        rebalance = "State transition from REBALANCING to RUNNING"
        with processor_3.node.account.monitor_log(processor_3.LOG_FILE) as monitor:
            processor_3.start()

            monitor.wait_until(rebalance,
                               timeout_sec=120,
                               err_msg=("Never saw output '%s' on " % rebalance) + str(processor_3.node.account))

        with processor.node.account.monitor_log(processor.STDOUT_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(processor_2.STDOUT_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(processor_3.STDOUT_FILE) as monitor_3:

                    self.assert_produce(self.inputTopic,
                                        "sending_message_after_normal_broker_start",
                                        num_messages=self.num_messages,
                                        timeout_sec=120)

                    monitor_1.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor.node.account))
                    monitor_2.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_2.node.account))
                    monitor_3.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_3.node.account))

                    self.assert_consume(self.client_id,
                                        "consuming_message_after_normal_broker_start",
                                        self.outputTopic,
                                        num_messages=self.num_messages,
                                        timeout_sec=120)

        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        processor.abortThenRestart()
        processor_2.abortThenRestart()
        processor_3.abortThenRestart()

        with processor.node.account.monitor_log(processor.LOG_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(processor_2.LOG_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(processor_3.LOG_FILE) as monitor_3:
                    self.kafka.start_node(node)

                    monitor_1.wait_until(self.connected_message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.connected_message) + str(processor.node.account))
                    monitor_2.wait_until(self.connected_message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.connected_message) + str(processor_2.node.account))
                    monitor_3.wait_until(self.connected_message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.connected_message) + str(processor_3.node.account))

        with processor.node.account.monitor_log(processor.STDOUT_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(processor_2.STDOUT_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(processor_3.STDOUT_FILE) as monitor_3:

                    self.assert_produce(self.inputTopic,
                                        "sending_message_after_hard_bouncing_streams_instance_bouncing_broker",
                                        num_messages=self.num_messages,
                                        timeout_sec=120)

                    monitor_1.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor.node.account))
                    monitor_2.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_2.node.account))
                    monitor_3.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_3.node.account))

                    self.assert_consume(self.client_id,
                                        "consuming_message_after_stopping_streams_instance_bouncing_broker",
                                        self.outputTopic,
                                        num_messages=self.num_messages,
                                        timeout_sec=120)
        self.kafka.stop()
Exemple #13
0
    def test_streams_should_scale_in_while_brokers_down(self):
        self.kafka.start()

        configs = self.get_configs(extra_configs=",application.id=shutdown_with_broker_down")

        processor = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)

        # need to wait for rebalance once
        rebalance = "State transition from REBALANCING to RUNNING"
        with processor_3.node.account.monitor_log(processor_3.LOG_FILE) as monitor:
            processor_3.start()

            monitor.wait_until(rebalance,
                               timeout_sec=120,
                               err_msg=("Never saw output '%s' on " % rebalance) + str(processor_3.node.account))

        with processor.node.account.monitor_log(processor.STDOUT_FILE) as monitor_1:
            with processor_2.node.account.monitor_log(processor_2.STDOUT_FILE) as monitor_2:
                with processor_3.node.account.monitor_log(processor_3.STDOUT_FILE) as monitor_3:

                    self.assert_produce(self.inputTopic,
                                        "sending_message_normal_broker_start",
                                        num_messages=self.num_messages,
                                        timeout_sec=120)

                    monitor_1.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor.node.account))
                    monitor_2.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_2.node.account))
                    monitor_3.wait_until(self.message,
                                         timeout_sec=120,
                                         err_msg=("Never saw '%s' on " % self.message) + str(processor_3.node.account))

                    self.assert_consume(self.client_id,
                                        "consuming_message_normal_broker_start",
                                        self.outputTopic,
                                        num_messages=self.num_messages,
                                        timeout_sec=120)

        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        processor.stop()
        processor_2.stop()

        shutdown_message = "Complete shutdown of streams resilience test app now"
        self.wait_for_verification(processor, shutdown_message, processor.STDOUT_FILE)
        self.wait_for_verification(processor_2, shutdown_message, processor_2.STDOUT_FILE)

        with processor_3.node.account.monitor_log(processor_3.LOG_FILE) as monitor_3:
            self.kafka.start_node(node)

            monitor_3.wait_until(self.connected_message,
                                 timeout_sec=120,
                                 err_msg=("Never saw '%s' on " % self.connected_message) + str(processor_3.node.account))

        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "sending_message_after_stopping_streams_instance_bouncing_broker",
                                    num_messages=self.num_messages,
                                    timeout_sec=120)

        self.kafka.stop()
    def test_streams_should_failover_while_brokers_down(self):
        self.kafka.start()

        configs = self.get_configs(extra_configs=",application.id=failover_with_broker_down")

        processor = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor.start()

        processor_2 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_2.start()

        processor_3 = StreamsBrokerDownResilienceService(self.test_context, self.kafka, configs)
        processor_3.start()

        # need to wait for rebalance once
        self.wait_for_verification(processor_3, "State transition from REBALANCING to RUNNING", processor_3.LOG_FILE)

        # assert streams can process when starting with broker up
        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "waiting for rebalance to complete",
                                    num_messages=9,
                                    timeout_sec=120)

        message = "processed3messages"

        self.wait_for_verification(processor, message, processor.STDOUT_FILE)
        self.wait_for_verification(processor_2, message, processor_2.STDOUT_FILE)
        self.wait_for_verification(processor_3, message, processor_3.STDOUT_FILE)

        node = self.kafka.leader(self.inputTopic)
        self.kafka.stop_node(node)

        processor.abortThenRestart()
        processor_2.abortThenRestart()
        processor_3.abortThenRestart()

        self.kafka.start_node(node)

        self.assert_produce_consume(self.inputTopic,
                                    self.outputTopic,
                                    self.client_id,
                                    "sending_message_after_hard_bouncing_streams_instance_bouncing_broker",
                                    num_messages=9,
                                    timeout_sec=120)

        self.kafka.stop()