コード例 #1
0
class LogDirFailureTest(ProduceConsumeValidateTest):
    """
    Note that consuming is a bit tricky, at least with console consumer. The goal is to consume all messages
    (foreach partition) in the topic. In this case, waiting for the last message may cause the consumer to stop
    too soon since console consumer is consuming multiple partitions from a single thread and therefore we lose
    ordering guarantees.

    Waiting on a count of consumed messages can be unreliable: if we stop consuming when num_consumed == num_acked,
    we might exit early if some messages are duplicated (though not an issue here since producer retries==0)

    Therefore rely here on the consumer.timeout.ms setting which times out on the interval between successively
    consumed messages. Since we run the producer to completion before running the consumer, this is a reliable
    indicator that nothing is left to consume.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(LogDirFailureTest, self).__init__(test_context=test_context)

        self.topic1 = "test_topic_1"
        self.topic2 = "test_topic_2"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            test_context,
            num_nodes=3,
            zk=self.zk,
            topics={
                self.topic1: {
                    "partitions": 1,
                    "replication-factor": 3,
                    "configs": {
                        "min.insync.replicas": 2
                    }
                },
                self.topic2: {
                    "partitions": 1,
                    "replication-factor": 3,
                    "configs": {
                        "min.insync.replicas": 1
                    }
                }
            },
            # Set log.roll.ms to 3 seconds so that broker will detect disk error sooner when it creates log segment
            # Otherwise broker will still be able to read/write the log file even if the log directory is inaccessible.
            server_prop_overides=[
                [config_property.LOG_FLUSH_INTERVAL_MESSAGE, "5"],
                [
                    config_property.
                    REPLICA_HIGHWATERMARK_CHECKPOINT_INTERVAL_MS, "60000"
                ], [config_property.LOG_ROLL_TIME_MS, "3000"]
            ])

        self.producer_throughput = 1000
        self.num_producers = 1
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        """Override this since we're adding services outside of the constructor"""
        return super(LogDirFailureTest, self).min_cluster_size(
        ) + self.num_producers * 2 + self.num_consumers * 2

    @cluster(num_nodes=9)
    @matrix(bounce_broker=[False, True],
            broker_type=["leader", "follower"],
            security_protocol=["PLAINTEXT"])
    def test_replication_with_disk_failure(self, bounce_broker,
                                           security_protocol, broker_type):
        """Replication tests.
        These tests verify that replication provides simple durability guarantees by checking that data acked by
        brokers is still available for consumption in the face of various failure scenarios.

        Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2
               and another topic with partitions=3, replication-factor=3, and min.insync.replicas=1
        
            - Produce messages in the background
            - Consume messages in the background
            - Drive broker failures (shutdown, or bounce repeatedly with kill -15 or kill -9)
            - When done driving failures, stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.start()

        try:
            # Initialize producer/consumer for topic1
            self.producer = VerifiableProducer(
                self.test_context,
                self.num_producers,
                self.kafka,
                self.topic1,
                throughput=self.producer_throughput)
            self.consumer = ConsoleConsumer(self.test_context,
                                            self.num_consumers,
                                            self.kafka,
                                            self.topic1,
                                            group_id="test-consumer-group-1",
                                            new_consumer=False,
                                            consumer_timeout_ms=60000,
                                            message_validator=is_int)
            self.start_producer_and_consumer()

            # Get a replica of the partition of topic1 and make its first log directory offline by changing the log dir's permission.
            # We assume that partition of topic1 is created in the first log directory of respective brokers.
            broker_node = select_node(self, broker_type, self.topic1)
            broker_idx = self.kafka.idx(broker_node)
            assert broker_idx in self.kafka.isr_idx_list(self.topic1), \
                   "Broker %d should be in isr set %s" % (broker_idx, str(self.kafka.isr_idx_list(self.topic1)))

            self.logger.debug("Making log dir %s inaccessible" %
                              (KafkaService.DATA_LOG_DIR_1))
            cmd = "chmod a-w %s -R" % (KafkaService.DATA_LOG_DIR_1)
            broker_node.account.ssh(cmd, allow_fail=False)

            if bounce_broker:
                self.kafka.restart_node(broker_node, clean_shutdown=True)

            # Verify the following:
            # 1) The broker with offline log directory is not the leader of the partition of topic1
            # 2) The broker with offline log directory is not in the ISR
            # 3) The broker with offline log directory is still online
            # 4) Messages can still be produced and consumed from topic1
            wait_until(
                lambda: self.kafka.leader(self.topic1, partition=0
                                          ) != broker_node,
                timeout_sec=60,
                err_msg=
                "Broker %d should not be leader of topic %s and partition 0" %
                (broker_idx, self.topic1))
            assert self.kafka.alive(
                broker_node), "Broker %d should be still online" % (broker_idx)
            wait_until(
                lambda: broker_idx not in self.kafka.isr_idx_list(self.topic1),
                timeout_sec=60,
                err_msg="Broker %d should not be in isr set %s" %
                (broker_idx, str(self.kafka.isr_idx_list(self.topic1))))

            self.stop_producer_and_consumer()
            self.validate()

            # Shutdown all other brokers so that the broker with offline log dir is the only online broker
            offline_nodes = []
            for node in self.kafka.nodes:
                if broker_node != node:
                    offline_nodes.append(node)
                    self.logger.debug("Hard shutdown broker %d" %
                                      (self.kafka.idx(node)))
                    self.kafka.stop_node(node)

            # Verify the following:
            # 1) The broker with offline directory is the only in-sync broker of the partition of topic2
            # 2) Messages can still be produced and consumed from topic2
            self.producer = VerifiableProducer(
                self.test_context,
                self.num_producers,
                self.kafka,
                self.topic2,
                throughput=self.producer_throughput,
                offline_nodes=offline_nodes)
            self.consumer = ConsoleConsumer(self.test_context,
                                            self.num_consumers,
                                            self.kafka,
                                            self.topic2,
                                            group_id="test-consumer-group-2",
                                            new_consumer=False,
                                            consumer_timeout_ms=60000,
                                            message_validator=is_int)
            self.start_producer_and_consumer()

            assert self.kafka.isr_idx_list(self.topic2) == [broker_idx], \
                   "In-sync replicas of topic %s and partition 0 should be %s" % (self.topic2, str([broker_idx]))

            self.stop_producer_and_consumer()
            self.validate()

        except BaseException as e:
            for s in self.test_context.services:
                self.mark_for_collect(s)
            raise
コード例 #2
0
class LogDirFailureTest(ProduceConsumeValidateTest):
    """
    Note that consuming is a bit tricky, at least with console consumer. The goal is to consume all messages
    (foreach partition) in the topic. In this case, waiting for the last message may cause the consumer to stop
    too soon since console consumer is consuming multiple partitions from a single thread and therefore we lose
    ordering guarantees.

    Waiting on a count of consumed messages can be unreliable: if we stop consuming when num_consumed == num_acked,
    we might exit early if some messages are duplicated (though not an issue here since producer retries==0)

    Therefore rely here on the consumer.timeout.ms setting which times out on the interval between successively
    consumed messages. Since we run the producer to completion before running the consumer, this is a reliable
    indicator that nothing is left to consume.
    """

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(LogDirFailureTest, self).__init__(test_context=test_context)

        self.topic1 = "test_topic_1"
        self.topic2 = "test_topic_2"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context,
                                  num_nodes=3,
                                  zk=self.zk,
                                  topics={
                                      self.topic1: {"partitions": 1, "replication-factor": 3, "configs": {"min.insync.replicas": 1}},
                                      self.topic2: {"partitions": 1, "replication-factor": 3, "configs": {"min.insync.replicas": 2}}
                                  },
                                  # Set log.roll.ms to 3 seconds so that broker will detect disk error sooner when it creates log segment
                                  # Otherwise broker will still be able to read/write the log file even if the log directory is inaccessible.
                                  server_prop_overides=[
                                      [config_property.OFFSETS_TOPIC_NUM_PARTITIONS, "1"],
                                      [config_property.LOG_FLUSH_INTERVAL_MESSAGE, "5"],
                                      [config_property.REPLICA_HIGHWATERMARK_CHECKPOINT_INTERVAL_MS, "60000"],
                                      [config_property.LOG_ROLL_TIME_MS, "3000"]
                                  ])

        self.producer_throughput = 1000
        self.num_producers = 1
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        """Override this since we're adding services outside of the constructor"""
        return super(LogDirFailureTest, self).min_cluster_size() + self.num_producers * 2 + self.num_consumers * 2

    @cluster(num_nodes=9)
    @matrix(bounce_broker=[False, True], broker_type=["leader", "follower"], security_protocol=["PLAINTEXT"])
    def test_replication_with_disk_failure(self, bounce_broker, security_protocol, broker_type):
        """Replication tests.
        These tests verify that replication provides simple durability guarantees by checking that data acked by
        brokers is still available for consumption in the face of various failure scenarios.

        Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2
               and another topic with partitions=3, replication-factor=3, and min.insync.replicas=1
            - Produce messages in the background
            - Consume messages in the background
            - Drive broker failures (shutdown, or bounce repeatedly with kill -15 or kill -9)
            - When done driving failures, stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.start()

        try:
            # Initialize producer/consumer for topic2
            self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic2,
                                               throughput=self.producer_throughput)
            self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic2, group_id="test-consumer-group-1",
                                            consumer_timeout_ms=60000, message_validator=is_int)
            self.start_producer_and_consumer()

            # Get a replica of the partition of topic2 and make its log directory offline by changing the log dir's permission.
            # We assume that partition of topic2 is created in the second log directory of respective brokers.
            broker_node = select_node(self, broker_type, self.topic2)
            broker_idx = self.kafka.idx(broker_node)
            assert broker_idx in self.kafka.isr_idx_list(self.topic2), \
                   "Broker %d should be in isr set %s" % (broker_idx, str(self.kafka.isr_idx_list(self.topic2)))

            # Verify that topic1 and the consumer offset topic is in the first log directory and topic2 is in the second log directory
            topic_1_partition_0 = KafkaService.DATA_LOG_DIR_1 + "/test_topic_1-0"
            topic_2_partition_0 = KafkaService.DATA_LOG_DIR_2 + "/test_topic_2-0"
            offset_topic_partition_0 = KafkaService.DATA_LOG_DIR_1 + "/__consumer_offsets-0"
            for path in [topic_1_partition_0, topic_2_partition_0, offset_topic_partition_0]:
                assert path_exists(broker_node, path), "%s should exist" % path

            self.logger.debug("Making log dir %s inaccessible" % (KafkaService.DATA_LOG_DIR_2))
            cmd = "chmod a-w %s -R" % (KafkaService.DATA_LOG_DIR_2)
            broker_node.account.ssh(cmd, allow_fail=False)

            if bounce_broker:
                self.kafka.restart_node(broker_node, clean_shutdown=True)

            # Verify the following:
            # 1) The broker with offline log directory is not the leader of the partition of topic2
            # 2) The broker with offline log directory is not in the ISR
            # 3) The broker with offline log directory is still online
            # 4) Messages can still be produced and consumed from topic2
            wait_until(lambda: self.kafka.leader(self.topic2, partition=0) != broker_node,
                       timeout_sec=60,
                       err_msg="Broker %d should not be leader of topic %s and partition 0" % (broker_idx, self.topic2))
            assert self.kafka.alive(broker_node), "Broker %d should be still online" % (broker_idx)
            wait_until(lambda: broker_idx not in self.kafka.isr_idx_list(self.topic2),
                       timeout_sec=60,
                       err_msg="Broker %d should not be in isr set %s" % (broker_idx, str(self.kafka.isr_idx_list(self.topic2))))

            self.stop_producer_and_consumer()
            self.validate()

            # Shutdown all other brokers so that the broker with offline log dir is the only online broker
            offline_nodes = []
            for node in self.kafka.nodes:
                if broker_node != node:
                    offline_nodes.append(node)
                    self.logger.debug("Hard shutdown broker %d" % (self.kafka.idx(node)))
                    self.kafka.stop_node(node)

            # Verify the following:
            # 1) The broker with offline directory is the only in-sync broker of the partition of topic1
            # 2) Messages can still be produced and consumed from topic1
            self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic1,
                                               throughput=self.producer_throughput, offline_nodes=offline_nodes)
            self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic1, group_id="test-consumer-group-2",
                                            consumer_timeout_ms=90000, message_validator=is_int)
            self.consumer_start_timeout_sec = 90
            self.start_producer_and_consumer()

            assert self.kafka.isr_idx_list(self.topic1) == [broker_idx], \
                   "In-sync replicas of topic %s and partition 0 should be %s" % (self.topic1, str([broker_idx]))

            self.stop_producer_and_consumer()
            self.validate()

        except BaseException as e:
            for s in self.test_context.services:
                self.mark_for_collect(s)
            raise
コード例 #3
0
class TestSnapshots(ProduceConsumeValidateTest):

    TOPIC_NAME_PREFIX = "test_topic_"

    def __init__(self, test_context):
        super(TestSnapshots, self).__init__(test_context=test_context)
        self.topics_created = 0
        self.topic = "test_topic"
        self.partitions = 3
        self.replication_factor = 3
        self.num_nodes = 3

        # Producer and consumer
        self.producer_throughput = 1000
        self.num_producers = 1
        self.num_consumers = 1

        security_protocol = 'PLAINTEXT'
        # Setup Custom Config to ensure snapshot will be generated deterministically
        self.kafka = KafkaService(
            self.test_context,
            self.num_nodes,
            zk=None,
            topics={
                self.topic: {
                    "partitions": self.partitions,
                    "replication-factor": self.replication_factor,
                    'configs': {
                        "min.insync.replicas": 2
                    }
                }
            },
            server_prop_overrides=[
                [
                    config_property.METADATA_LOG_DIR,
                    KafkaService.METADATA_LOG_DIR
                ], [config_property.METADATA_LOG_SEGMENT_MS, "10000"],
                [config_property.METADATA_LOG_RETENTION_BYTES, "2048"],
                [config_property.METADATA_LOG_BYTES_BETWEEN_SNAPSHOTS, "2048"]
            ])

        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.security_protocol = security_protocol

    def setUp(self):
        # Start the cluster and ensure that a snapshot is generated
        self.logger.info(
            "Starting the cluster and running until snapshot creation")

        assert quorum.for_test(self.test_context) in quorum.all_kraft, \
                "Snapshot test should be run Kraft Modes only"

        self.kafka.start()

        topic_count = 10
        self.topics_created += self.create_n_topics(topic_count)

        if self.kafka.remote_controller_quorum:
            self.controller_nodes = self.kafka.remote_controller_quorum.nodes
        else:
            self.controller_nodes = self.kafka.nodes[:self.kafka.
                                                     num_nodes_controller_role]

        # Waiting for snapshot creation and first log segment
        # cleanup on all controller nodes
        for node in self.controller_nodes:
            self.logger.debug("Waiting for snapshot on: %s" %
                              self.kafka.who_am_i(node))
            self.wait_for_log_segment_delete(node)
            self.wait_for_snapshot(node)
        self.logger.debug("Verified Snapshots exist on controller nodes")

    def create_n_topics(self, topic_count):
        for i in range(self.topics_created, topic_count):
            topic = "%s%d" % (TestSnapshots.TOPIC_NAME_PREFIX, i)
            self.logger.debug("Creating topic %s" % topic)
            topic_cfg = {
                "topic": topic,
                "partitions": self.partitions,
                "replication-factor": self.replication_factor,
                "configs": {
                    "min.insync.replicas": 2
                }
            }
            self.kafka.create_topic(topic_cfg)
        self.logger.debug("Created %d more topics" % topic_count)
        return topic_count

    def wait_for_log_segment_delete(self, node):
        file_path = self.kafka.METADATA_FIRST_LOG
        # Wait until the first log segment in metadata log is marked for deletion
        wait_until(
            lambda: not self.file_exists(node, file_path),
            timeout_sec=100,
            backoff_sec=1,
            err_msg=
            "Not able to verify cleanup of log file %s in a reasonable amount of time"
            % file_path)

    def wait_for_snapshot(self, node):
        # Wait for a snapshot file to show up
        file_path = self.kafka.METADATA_SNAPSHOT_SEARCH_STR
        wait_until(
            lambda: self.file_exists(node, file_path),
            timeout_sec=100,
            backoff_sec=1,
            err_msg=
            "Not able to verify snapshot existence in a reasonable amount of time"
        )

    def file_exists(self, node, file_path):
        # Check if the first log segment is cleaned up
        self.logger.debug("Checking if file %s exists" % file_path)
        cmd = "ls %s" % file_path
        files = node.account.ssh_output(cmd,
                                        allow_fail=True,
                                        combine_stderr=False)

        if len(files) is 0:
            self.logger.debug("File %s does not exist" % file_path)
            return False
        else:
            self.logger.debug("File %s was found" % file_path)
            return True

    def validate_success(self, topic=None):
        if topic is None:
            # Create a new topic
            topic = "%s%d" % (TestSnapshots.TOPIC_NAME_PREFIX,
                              self.topics_created)
            self.topics_created += self.create_n_topics(topic_count=1)

        # Produce to the newly created topic to ensure broker has caught up
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           topic,
                                           throughput=self.producer_throughput,
                                           message_validator=is_int)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        topic,
                                        consumer_timeout_ms=30000,
                                        message_validator=is_int)
        self.start_producer_and_consumer()
        self.stop_producer_and_consumer()
        self.validate()

    @cluster(num_nodes=9)
    @matrix(metadata_quorum=quorum.all_kraft)
    def test_broker(self, metadata_quorum=quorum.colocated_kraft):
        """ Test the ability of a broker to consume metadata snapshots
        and to recover the cluster metadata state using them

        The test ensures that that there is atleast one snapshot created on
        the controller quorum during the setup phase and that at least the first
        log segment in the metadata log has been marked for deletion, thereby ensuring
        that any observer of the log needs to always load a snapshot to catch
        up to the current metadata state.

        Each scenario is a progression over the previous one.
        The scenarios build on top of each other by:
        * Loading a snapshot
        * Loading and snapshot and some delta records
        * Loading a snapshot and delta and ensuring that the most recent metadata state
          has been caught up.

        Even though a subsequent scenario covers the previous one, they are all
        left in the test to make debugging a failure of the test easier
        e.g. if the first scenario passes and the second fails, it hints towards
        a problem with the application of delta records while catching up
        """

        # Scenario -- Re-init broker after cleaning up all persistent state
        node = random.choice(self.kafka.nodes)
        self.logger.debug("Scenario: kill-clean-start on broker node %s",
                          self.kafka.who_am_i(node))
        self.kafka.clean_node(node)
        self.kafka.start_node(node)

        # Scenario -- Re-init broker after cleaning up all persistent state
        # Create some metadata changes for the broker to consume as well.
        node = random.choice(self.kafka.nodes)
        self.logger.debug(
            "Scenario: kill-clean-create_topics-start on broker node %s",
            self.kafka.who_am_i(node))
        self.kafka.clean_node(node)
        # Now modify the cluster to create more metadata changes
        self.topics_created += self.create_n_topics(topic_count=10)
        self.kafka.start_node(node)

        # Scenario -- Re-init broker after cleaning up all persistent state
        # And ensure that the broker has replicated the metadata log
        node = random.choice(self.kafka.nodes)
        self.logger.debug(
            "Scenario: kill-clean-start-verify-produce on broker node %s",
            self.kafka.who_am_i(node))
        self.kafka.clean_node(node)
        self.kafka.start_node(node)
        # Create a topic where the affected broker must be the leader
        broker_topic = "%s%d" % (TestSnapshots.TOPIC_NAME_PREFIX,
                                 self.topics_created)
        self.topics_created += 1
        self.logger.debug("Creating topic %s" % broker_topic)
        topic_cfg = {
            "topic": broker_topic,
            "replica-assignment": self.kafka.idx(node),
            "configs": {
                "min.insync.replicas": 1
            }
        }
        self.kafka.create_topic(topic_cfg)

        # Produce to the newly created topic and make sure it works.
        self.validate_success(broker_topic)

    @cluster(num_nodes=9)
    @matrix(metadata_quorum=quorum.all_kraft)
    def test_controller(self, metadata_quorum=quorum.colocated_kraft):
        """ Test the ability of controllers to consume metadata snapshots
        and to recover the cluster metadata state using them

        The test ensures that that there is atleast one snapshot created on
        the controller quorum during the setup phase and that at least the first
        log segment in the metadata log has been marked for deletion, thereby ensuring
        that any observer of the log needs to always load a snapshot to catch
        up to the current metadata state.

        Each scenario is a progression over the previous one.
        The scenarios build on top of each other by:
        * Loading a snapshot
        * Loading and snapshot and some delta records
        * Loading a snapshot and delta and ensuring that the most recent metadata state
          has been caught up.

        Even though a subsequent scenario covers the previous one, they are all
        left in the test to make debugging a failure of the test easier
        e.g. if the first scenario passes and the second fails, it hints towards
        a problem with the application of delta records while catching up
        """

        # Scenario -- Re-init controllers with a clean kafka dir
        self.logger.debug("Scenario: kill-clean-start controller node")
        for node in self.controller_nodes:
            self.logger.debug("Restarting node: %s",
                              self.kafka.controller_quorum.who_am_i(node))
            self.kafka.controller_quorum.clean_node(node)
            self.kafka.controller_quorum.start_node(node)

        # Scenario -- Re-init controllers with a clean kafka dir and
        # make metadata changes while they are down.
        # This will force the entire quorum to load from snapshots
        # and verify the quorum's ability to catch up to the latest metadata
        self.logger.debug(
            "Scenario: kill-clean-create_topics-start on controller node %s")
        for node in self.controller_nodes:
            self.logger.debug("Restarting node: %s",
                              self.kafka.controller_quorum.who_am_i(node))
            self.kafka.controller_quorum.clean_node(node)
            # Now modify the cluster to create more metadata changes
            self.topics_created += self.create_n_topics(topic_count=5)
            self.kafka.controller_quorum.start_node(node)

        # Produce to a newly created topic and make sure it works.
        self.validate_success()
コード例 #4
0
class FetchFromFollowerTest(ProduceConsumeValidateTest):

    RACK_AWARE_REPLICA_SELECTOR = "org.apache.kafka.common.replica.RackAwareReplicaSelector"
    METADATA_MAX_AGE_MS = 3000

    def __init__(self, test_context):
        super(FetchFromFollowerTest, self).__init__(test_context=test_context)
        self.jmx_tool = JmxTool(test_context, jmx_poll_ms=100)
        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context,
                                  num_nodes=3,
                                  zk=self.zk,
                                  topics={
                                      self.topic: {
                                          "partitions": 1,
                                          "replication-factor": 3,
                                          "configs": {
                                              "min.insync.replicas": 1
                                          }
                                      },
                                  },
                                  server_prop_overides=[[
                                      "replica.selector.class",
                                      self.RACK_AWARE_REPLICA_SELECTOR
                                  ]],
                                  per_node_server_prop_overrides={
                                      1: [("broker.rack", "rack-a")],
                                      2: [("broker.rack", "rack-b")],
                                      3: [("broker.rack", "rack-c")]
                                  })

        self.producer_throughput = 1000
        self.num_producers = 1
        self.num_consumers = 1

    def min_cluster_size(self):
        return super(FetchFromFollowerTest, self).min_cluster_size(
        ) + self.num_producers * 2 + self.num_consumers * 2

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    @cluster(num_nodes=9)
    def test_consumer_preferred_read_replica(self):
        """
        This test starts up brokers with "broker.rack" and "replica.selector.class" configurations set. The replica
        selector is set to the rack-aware implementation. One of the brokers has a different rack than the other two.
        We then use a console consumer with the "client.rack" set to the same value as the differing broker. After
        producing some records, we verify that the client has been informed of the preferred replica and that all the
        records are properly consumed.
        """

        # Find the leader, configure consumer to be on a different rack
        leader_node = self.kafka.leader(self.topic, 0)
        leader_idx = self.kafka.idx(leader_node)
        non_leader_idx = 2 if leader_idx != 2 else 1
        non_leader_rack = "rack-b" if leader_idx != 2 else "rack-a"

        self.logger.debug("Leader %d %s" % (leader_idx, leader_node))
        self.logger.debug("Non-Leader %d %s" %
                          (non_leader_idx, non_leader_rack))

        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           throughput=self.producer_throughput)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        client_id="console-consumer",
                                        group_id="test-consumer-group-1",
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int,
                                        consumer_properties={
                                            "client.rack":
                                            non_leader_rack,
                                            "metadata.max.age.ms":
                                            self.METADATA_MAX_AGE_MS
                                        })

        # Start up and let some data get produced
        self.start_producer_and_consumer()
        time.sleep(self.METADATA_MAX_AGE_MS * 2. / 1000)

        consumer_node = self.consumer.nodes[0]
        consumer_idx = self.consumer.idx(consumer_node)
        read_replica_attribute = "preferred-read-replica"
        read_replica_mbean = "kafka.consumer:type=consumer-fetch-manager-metrics,client-id=%s,topic=%s,partition=%d" % \
                  ("console-consumer", self.topic, 0)
        self.jmx_tool.jmx_object_names = [read_replica_mbean]
        self.jmx_tool.jmx_attributes = [read_replica_attribute]
        self.jmx_tool.start_jmx_tool(consumer_idx, consumer_node)

        # Wait for at least one interval of "metadata.max.age.ms"
        time.sleep(self.METADATA_MAX_AGE_MS * 2. / 1000)

        # Read the JMX output
        self.jmx_tool.read_jmx_output(consumer_idx, consumer_node)

        all_captured_preferred_read_replicas = defaultdict(int)
        self.logger.debug(self.jmx_tool.jmx_stats)

        for ts, data in self.jmx_tool.jmx_stats[0].items():
            for k, v in data.items():
                if k.endswith(read_replica_attribute):
                    all_captured_preferred_read_replicas[int(v)] += 1

        self.logger.debug("Saw the following preferred read replicas %s",
                          dict(all_captured_preferred_read_replicas.items()))

        assert all_captured_preferred_read_replicas[non_leader_idx] > 0, \
            "Expected to see broker %d (%s) as a preferred replica" % (non_leader_idx, non_leader_rack)

        # Validate consumed messages
        self.stop_producer_and_consumer()
        self.validate()