def prepare_cluster(self, use_tls, use_sasl): self.security = SecurityConfig() self.security.enable_sasl = use_sasl self.security.enable_mtls_identity = use_tls and not use_sasl if use_tls: self.tls = tls.TLSCertManager(self.logger) # cert for principal with no explicitly granted permissions self.base_user_cert = self.tls.create_cert(socket.gethostname(), common_name="morty", name="base_client") # cert for principal with cluster describe permissions self.cluster_describe_user_cert = self.tls.create_cert( socket.gethostname(), common_name="cluster_describe", name="cluster_describe_client") # cert for admin user used to bootstrap self.admin_user_cert = self.tls.create_cert( socket.gethostname(), common_name="admin", name="test_admin_client") self.security.tls_provider = MTLSProvider(self.tls) self.redpanda.set_security_settings(self.security) self.redpanda.start() admin = Admin(self.redpanda) if self.security.enable_mtls_identity: feature_name = "mtls_authentication" admin.put_feature(feature_name, {"state": "active"}) # wait for feature to be active so that tests don't have to retry def check_feature_active(): for f in admin.get_features()["features"]: if f["name"] == feature_name and f["state"] == "active": return True return False wait_until(check_feature_active, timeout_sec=10, backoff_sec=1) # base case user is not a superuser and has no configured ACLs if use_sasl: admin.create_user("base", self.password, self.algorithm) # only grant cluster describe permission to user cluster_describe if use_sasl: admin.create_user("cluster_describe", self.password, self.algorithm) client = self.get_super_client() client.acl_create_allow_cluster("cluster_describe", "describe") # there is not a convenient interface for waiting for acls to propogate # to all nodes so when we are using mtls only for identity we inject a # sleep here to try to avoid any acl propogation races. if self.security.enable_mtls_identity: time.sleep(5) return # wait for users to proogate to nodes def users_propogated(): for node in self.redpanda.nodes: users = admin.list_users(node=node) if "base" not in users or "cluster_describe" not in users: return False return True wait_until(users_propogated, timeout_sec=10, backoff_sec=1)
class FeaturesMultiNodeTest(FeaturesTestBase): """ Multi-node variant of tests is the 'normal' execution path for feature manager. """ def __init__(self, *args, **kwargs): super().__init__(*args, num_brokers=3, **kwargs) self.admin = Admin(self.redpanda) @cluster(num_nodes=3) def test_get_features(self): self._assert_default_features() @cluster(num_nodes=3, log_allow_list=RESTART_LOG_ALLOW_LIST) def test_upgrade(self): """ Verify that on updating to a new logical version, the cluster version does not increment until all nodes are up to date. """ initial_version = self.admin.get_features()['cluster_version'] new_version = initial_version + 1 self.logger.info( f"Simulating upgrade from version {initial_version} to version {new_version}" ) # Modified environment variables apply to processes restarted from this point onwards self.redpanda.set_environment( {'__REDPANDA_LOGICAL_VERSION': f'{new_version}'}) # Restart nodes one by one. Version shouldn't increment until all three are done. self.redpanda.restart_nodes([self.redpanda.nodes[0]]) time.sleep(5) # Give it a chance to update assert initial_version == self.admin.get_features()['cluster_version'] self.redpanda.restart_nodes([self.redpanda.nodes[1]]) time.sleep(5) # Give it a chance to update assert initial_version == self.admin.get_features()['cluster_version'] self.redpanda.restart_nodes([self.redpanda.nodes[2]]) wait_until(lambda: new_version == self.admin.get_features()[ 'cluster_version'], timeout_sec=5, backoff_sec=1) @cluster(num_nodes=3, log_allow_list=RESTART_LOG_ALLOW_LIST) def test_rollback(self): """ Verify that on a rollback before updating all nodes, the cluster version does not increment. """ initial_version = self.admin.get_features()['cluster_version'] new_version = initial_version + 1 self.logger.info( f"Simulating upgrade from version {initial_version} to version {new_version}" ) # Modified environment variables apply to processes restarted from this point onwards self.redpanda.set_environment( {'__REDPANDA_LOGICAL_VERSION': f'{new_version}'}) # Restart nodes one by one. Version shouldn't increment until all three are done. self.redpanda.restart_nodes([self.redpanda.nodes[0]]) time.sleep(5) # Give it a chance to update assert initial_version == self.admin.get_features()['cluster_version'] self.redpanda.restart_nodes([self.redpanda.nodes[1]]) time.sleep(5) # Give it a chance to update assert initial_version == self.admin.get_features()['cluster_version'] self.logger.info(f"Simulating rollback to version {initial_version}") self.redpanda.set_environment( {'__REDPANDA_LOGICAL_VERSION': f'{initial_version}'}) self.redpanda.restart_nodes([self.redpanda.nodes[0]]) self.redpanda.restart_nodes([self.redpanda.nodes[1]]) time.sleep(5) # Give it a chance to update assert initial_version == self.admin.get_features()['cluster_version'] @cluster(num_nodes=3, log_allow_list=RESTART_LOG_ALLOW_LIST) def test_explicit_activation(self): """ Using a dummy feature, verify its progression through unavailable->available->active """ # Parameters of the compiled-in test feature feature_alpha_version = 2001 feature_alpha_name = "__test_alpha" initial_version = self.admin.get_features()['cluster_version'] assert (initial_version < feature_alpha_version) # Initially, before setting the magic environment variable, dummy test features # should be hidden assert feature_alpha_name not in self._get_features_map().keys() self.redpanda.set_environment({'__REDPANDA_TEST_FEATURES': "ON"}) self.redpanda.restart_nodes(self.redpanda.nodes) assert self._get_features_map( )[feature_alpha_name]['state'] == 'unavailable' # Version is too low, feature should be unavailable assert initial_version == self.admin.get_features()['cluster_version'] self.redpanda.set_environment({ '__REDPANDA_TEST_FEATURES': "ON", '__REDPANDA_LOGICAL_VERSION': f'{feature_alpha_version}' }) self.redpanda.restart_nodes(self.redpanda.nodes) # Wait for version to increment: this is a little slow because we wait # for health monitor structures to time out in order to propagate the # updated version wait_until(lambda: feature_alpha_version == self.admin.get_features()[ 'cluster_version'], timeout_sec=15, backoff_sec=1) # Feature should become available now that version increased. It should NOT # become active, because it has an explicit_only policy for activation. assert self._get_features_map( )[feature_alpha_name]['state'] == 'available' # Disable the feature, see that it enters the expected state self.admin.put_feature(feature_alpha_name, {"state": "disabled"}) wait_until(lambda: self._get_features_map()[feature_alpha_name][ 'state'] == 'disabled', timeout_sec=5, backoff_sec=1) state = self._get_features_map()[feature_alpha_name] assert state['state'] == 'disabled' assert state['was_active'] == False # Write to admin API to enable the feature self.admin.put_feature(feature_alpha_name, {"state": "active"}) # This is an async check because propagation of feature_table is async wait_until(lambda: self._get_features_map()[feature_alpha_name][ 'state'] == 'active', timeout_sec=5, backoff_sec=1) # Disable the feature, see that it enters the expected state self.admin.put_feature(feature_alpha_name, {"state": "disabled"}) wait_until(lambda: self._get_features_map()[feature_alpha_name][ 'state'] == 'disabled', timeout_sec=5, backoff_sec=1) state = self._get_features_map()[feature_alpha_name] assert state['state'] == 'disabled' assert state['was_active'] == True