Ejemplo n.º 1
0
    def prepare_cluster(self, use_tls, use_sasl):
        self.security = SecurityConfig()
        self.security.enable_sasl = use_sasl
        self.security.enable_mtls_identity = use_tls and not use_sasl

        if use_tls:
            self.tls = tls.TLSCertManager(self.logger)

            # cert for principal with no explicitly granted permissions
            self.base_user_cert = self.tls.create_cert(socket.gethostname(),
                                                       common_name="morty",
                                                       name="base_client")

            # cert for principal with cluster describe permissions
            self.cluster_describe_user_cert = self.tls.create_cert(
                socket.gethostname(),
                common_name="cluster_describe",
                name="cluster_describe_client")

            # cert for admin user used to bootstrap
            self.admin_user_cert = self.tls.create_cert(
                socket.gethostname(),
                common_name="admin",
                name="test_admin_client")

            self.security.tls_provider = MTLSProvider(self.tls)

        self.redpanda.set_security_settings(self.security)
        self.redpanda.start()

        admin = Admin(self.redpanda)

        if self.security.enable_mtls_identity:
            feature_name = "mtls_authentication"
            admin.put_feature(feature_name, {"state": "active"})

            # wait for feature to be active so that tests don't have to retry
            def check_feature_active():
                for f in admin.get_features()["features"]:
                    if f["name"] == feature_name and f["state"] == "active":
                        return True
                return False

            wait_until(check_feature_active, timeout_sec=10, backoff_sec=1)

        # base case user is not a superuser and has no configured ACLs
        if use_sasl:
            admin.create_user("base", self.password, self.algorithm)

        # only grant cluster describe permission to user cluster_describe
        if use_sasl:
            admin.create_user("cluster_describe", self.password,
                              self.algorithm)
        client = self.get_super_client()
        client.acl_create_allow_cluster("cluster_describe", "describe")

        # there is not a convenient interface for waiting for acls to propogate
        # to all nodes so when we are using mtls only for identity we inject a
        # sleep here to try to avoid any acl propogation races.
        if self.security.enable_mtls_identity:
            time.sleep(5)
            return

        # wait for users to proogate to nodes
        def users_propogated():
            for node in self.redpanda.nodes:
                users = admin.list_users(node=node)
                if "base" not in users or "cluster_describe" not in users:
                    return False
            return True

        wait_until(users_propogated, timeout_sec=10, backoff_sec=1)
Ejemplo n.º 2
0
class FeaturesMultiNodeTest(FeaturesTestBase):
    """
    Multi-node variant of tests is the 'normal' execution path for feature manager.
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, num_brokers=3, **kwargs)

        self.admin = Admin(self.redpanda)

    @cluster(num_nodes=3)
    def test_get_features(self):
        self._assert_default_features()

    @cluster(num_nodes=3, log_allow_list=RESTART_LOG_ALLOW_LIST)
    def test_upgrade(self):
        """
        Verify that on updating to a new logical version, the cluster
        version does not increment until all nodes are up to date.
        """
        initial_version = self.admin.get_features()['cluster_version']

        new_version = initial_version + 1
        self.logger.info(
            f"Simulating upgrade from version {initial_version} to version {new_version}"
        )

        # Modified environment variables apply to processes restarted from this point onwards
        self.redpanda.set_environment(
            {'__REDPANDA_LOGICAL_VERSION': f'{new_version}'})

        # Restart nodes one by one.  Version shouldn't increment until all three are done.
        self.redpanda.restart_nodes([self.redpanda.nodes[0]])
        time.sleep(5)  # Give it a chance to update
        assert initial_version == self.admin.get_features()['cluster_version']

        self.redpanda.restart_nodes([self.redpanda.nodes[1]])
        time.sleep(5)  # Give it a chance to update
        assert initial_version == self.admin.get_features()['cluster_version']

        self.redpanda.restart_nodes([self.redpanda.nodes[2]])
        wait_until(lambda: new_version == self.admin.get_features()[
            'cluster_version'],
                   timeout_sec=5,
                   backoff_sec=1)

    @cluster(num_nodes=3, log_allow_list=RESTART_LOG_ALLOW_LIST)
    def test_rollback(self):
        """
        Verify that on a rollback before updating all nodes, the cluster
        version does not increment.
        """
        initial_version = self.admin.get_features()['cluster_version']

        new_version = initial_version + 1
        self.logger.info(
            f"Simulating upgrade from version {initial_version} to version {new_version}"
        )

        # Modified environment variables apply to processes restarted from this point onwards
        self.redpanda.set_environment(
            {'__REDPANDA_LOGICAL_VERSION': f'{new_version}'})

        # Restart nodes one by one.  Version shouldn't increment until all three are done.
        self.redpanda.restart_nodes([self.redpanda.nodes[0]])
        time.sleep(5)  # Give it a chance to update
        assert initial_version == self.admin.get_features()['cluster_version']

        self.redpanda.restart_nodes([self.redpanda.nodes[1]])
        time.sleep(5)  # Give it a chance to update
        assert initial_version == self.admin.get_features()['cluster_version']

        self.logger.info(f"Simulating rollback to version {initial_version}")
        self.redpanda.set_environment(
            {'__REDPANDA_LOGICAL_VERSION': f'{initial_version}'})

        self.redpanda.restart_nodes([self.redpanda.nodes[0]])
        self.redpanda.restart_nodes([self.redpanda.nodes[1]])
        time.sleep(5)  # Give it a chance to update
        assert initial_version == self.admin.get_features()['cluster_version']

    @cluster(num_nodes=3, log_allow_list=RESTART_LOG_ALLOW_LIST)
    def test_explicit_activation(self):
        """
        Using a dummy feature, verify its progression through unavailable->available->active
        """

        # Parameters of the compiled-in test feature
        feature_alpha_version = 2001
        feature_alpha_name = "__test_alpha"

        initial_version = self.admin.get_features()['cluster_version']
        assert (initial_version < feature_alpha_version)
        # Initially, before setting the magic environment variable, dummy test features
        # should be hidden
        assert feature_alpha_name not in self._get_features_map().keys()

        self.redpanda.set_environment({'__REDPANDA_TEST_FEATURES': "ON"})
        self.redpanda.restart_nodes(self.redpanda.nodes)
        assert self._get_features_map(
        )[feature_alpha_name]['state'] == 'unavailable'

        # Version is too low, feature should be unavailable
        assert initial_version == self.admin.get_features()['cluster_version']

        self.redpanda.set_environment({
            '__REDPANDA_TEST_FEATURES':
            "ON",
            '__REDPANDA_LOGICAL_VERSION':
            f'{feature_alpha_version}'
        })
        self.redpanda.restart_nodes(self.redpanda.nodes)

        # Wait for version to increment: this is a little slow because we wait
        # for health monitor structures to time out in order to propagate the
        # updated version
        wait_until(lambda: feature_alpha_version == self.admin.get_features()[
            'cluster_version'],
                   timeout_sec=15,
                   backoff_sec=1)

        # Feature should become available now that version increased.  It should NOT
        # become active, because it has an explicit_only policy for activation.
        assert self._get_features_map(
        )[feature_alpha_name]['state'] == 'available'

        # Disable the feature, see that it enters the expected state
        self.admin.put_feature(feature_alpha_name, {"state": "disabled"})
        wait_until(lambda: self._get_features_map()[feature_alpha_name][
            'state'] == 'disabled',
                   timeout_sec=5,
                   backoff_sec=1)
        state = self._get_features_map()[feature_alpha_name]
        assert state['state'] == 'disabled'
        assert state['was_active'] == False

        # Write to admin API to enable the feature
        self.admin.put_feature(feature_alpha_name, {"state": "active"})

        # This is an async check because propagation of feature_table is async
        wait_until(lambda: self._get_features_map()[feature_alpha_name][
            'state'] == 'active',
                   timeout_sec=5,
                   backoff_sec=1)

        # Disable the feature, see that it enters the expected state
        self.admin.put_feature(feature_alpha_name, {"state": "disabled"})
        wait_until(lambda: self._get_features_map()[feature_alpha_name][
            'state'] == 'disabled',
                   timeout_sec=5,
                   backoff_sec=1)
        state = self._get_features_map()[feature_alpha_name]
        assert state['state'] == 'disabled'
        assert state['was_active'] == True