def retrieve_noobaa_cli_binary(self): """ Copy the NooBaa CLI binary from the operator pod if it wasn't found locally, or if the hashes between the two don't match. Raises: NoobaaCliChecksumFailedException: If checksum doesn't match. AssertionError: In the case CLI binary doesn't exist. """ def _compare_cli_hashes(): """ Verify that the remote and local CLI binaries are the same in order to make sure the local bin is up to date Returns: bool: Whether the local and remote hashes are identical """ remote_cli_bin_md5 = cal_md5sum( self.operator_pod, constants.NOOBAA_OPERATOR_POD_CLI_PATH) logger.info(f"Remote noobaa cli md5 hash: {remote_cli_bin_md5}") local_cli_bin_md5 = calc_local_file_md5_sum( constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH) logger.info(f"Local noobaa cli md5 hash: {local_cli_bin_md5}") return remote_cli_bin_md5 == local_cli_bin_md5 if (not os.path.isfile(constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH) or not _compare_cli_hashes()): logger.info( f"The MCG CLI binary could not not found in {constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH}," " attempting to copy it from the MCG operator pod") local_mcg_cli_dir = os.path.dirname( constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH) remote_mcg_cli_basename = os.path.basename( constants.NOOBAA_OPERATOR_POD_CLI_PATH) # The MCG CLI retrieval process is known to be flaky # and there's an active BZ regardaing it - # https://bugzilla.redhat.com/show_bug.cgi?id=2011845 # rsync should be more reliable than cp, thus the use of oc rsync. if version.get_semantic_ocs_version_from_config( ) > version.VERSION_4_5: cmd = ( f"oc rsync -n {self.namespace} {self.operator_pod.name}:" f"{constants.NOOBAA_OPERATOR_POD_CLI_PATH}" f" {local_mcg_cli_dir}") exec_cmd(cmd) os.rename( os.path.join(local_mcg_cli_dir, remote_mcg_cli_basename), constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH, ) else: cmd = (f"oc exec -n {self.namespace} {self.operator_pod.name}" f" -- cat {constants.NOOBAA_OPERATOR_POD_CLI_PATH}" f"> {constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH}") proc = subprocess.run(cmd, shell=True, capture_output=True) logger.info( f"MCG CLI copying process stdout:{proc.stdout.decode()}, stderr: {proc.stderr.decode()}" ) # Add an executable bit in order to allow usage of the binary current_file_permissions = os.stat( constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH) os.chmod( constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH, current_file_permissions.st_mode | stat.S_IEXEC, ) # Make sure the binary was copied properly and has the correct permissions assert os.path.isfile( constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH ), f"MCG CLI file not found at {constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH}" assert os.access( constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH, os.X_OK ), "The MCG CLI binary does not have execution permissions" if not _compare_cli_hashes(): raise NoobaaCliChecksumFailedException( "Binary hash doesn't match the one on the operator pod")
def setup_ceph_toolbox(force_setup=False): """ Setup ceph-toolbox - also checks if toolbox exists, if it exists it behaves as noop. Args: force_setup (bool): force setup toolbox pod """ if ocsci_config.ENV_DATA["mcg_only_deployment"]: log.info("Skipping Ceph toolbox setup due to running in MCG only mode") return namespace = ocsci_config.ENV_DATA["cluster_namespace"] ceph_toolbox = get_pod_name_by_pattern("rook-ceph-tools", namespace) # setup toolbox for external mode # Refer bz: 1856982 - invalid admin secret if len(ceph_toolbox) == 1: log.info("Ceph toolbox already exists, skipping") if force_setup: log.info("Running force setup for Ceph toolbox!") else: return external_mode = ocsci_config.DEPLOYMENT.get("external_mode") if version.get_semantic_ocs_version_from_config() == version.VERSION_4_2: tool_box_data = templating.load_yaml(constants.TOOL_POD_YAML) tool_box_data["spec"]["template"]["spec"]["containers"][0][ "image" ] = get_rook_version() rook_toolbox = OCS(**tool_box_data) rook_toolbox.create() else: if external_mode: toolbox = templating.load_yaml(constants.TOOL_POD_YAML) toolbox["spec"]["template"]["spec"]["containers"][0][ "image" ] = get_rook_version() toolbox["metadata"]["name"] += "-external" keyring_dict = ocsci_config.EXTERNAL_MODE.get("admin_keyring") env = toolbox["spec"]["template"]["spec"]["containers"][0]["env"] # replace secret env = [item for item in env if not (item["name"] == "ROOK_CEPH_SECRET")] env.append({"name": "ROOK_CEPH_SECRET", "value": keyring_dict["key"]}) toolbox["spec"]["template"]["spec"]["containers"][0]["env"] = env # add ceph volumeMounts ceph_volume_mount_path = {"mountPath": "/etc/ceph", "name": "ceph-config"} ceph_volume = {"name": "ceph-config", "emptyDir": {}} toolbox["spec"]["template"]["spec"]["containers"][0]["volumeMounts"].append( ceph_volume_mount_path ) toolbox["spec"]["template"]["spec"]["volumes"].append(ceph_volume) rook_toolbox = OCS(**toolbox) rook_toolbox.create() return # Workaround for https://bugzilla.redhat.com/show_bug.cgi?id=1982721 # TODO: Remove workaround when bug 1982721 is fixed # https://github.com/red-hat-storage/ocs-ci/issues/4585 if ocsci_config.ENV_DATA.get("is_multus_enabled"): toolbox = templating.load_yaml(constants.TOOL_POD_YAML) toolbox["spec"]["template"]["spec"]["containers"][0][ "image" ] = get_rook_version() toolbox["metadata"]["name"] += "-multus" toolbox["spec"]["template"]["metadata"]["annotations"] = { "k8s.v1.cni.cncf.io/networks": "openshift-storage/ocs-public" } toolbox["spec"]["template"]["spec"]["hostNetwork"] = False rook_toolbox = OCS(**toolbox) rook_toolbox.create() return # for OCS >= 4.3 there is new toolbox pod deployment done here: # https://github.com/openshift/ocs-operator/pull/207/ log.info("starting ceph toolbox pod") run_cmd( "oc patch ocsinitialization ocsinit -n openshift-storage --type " 'json --patch \'[{ "op": "replace", "path": ' '"/spec/enableCephTools", "value": true }]\'' ) toolbox_pod = OCP(kind=constants.POD, namespace=namespace) toolbox_pod.wait_for_resource( condition="Running", selector="app=rook-ceph-tools", resource_count=1, timeout=120, )
not (config.ENV_DATA["platform"].lower() in MANAGED_SERVICE_PLATFORMS and config.ENV_DATA["cluster_type"].lower() == "provider"), reason="Test runs ONLY on managed service provider cluster", ) ms_consumer_required = pytest.mark.skipif( not (config.ENV_DATA["platform"].lower() in MANAGED_SERVICE_PLATFORMS and config.ENV_DATA["cluster_type"].lower() == "consumer"), reason="Test runs ONLY on managed service consumer cluster", ) kms_config_required = pytest.mark.skipif( (config.ENV_DATA["KMS_PROVIDER"].lower() != HPCS_KMS_PROVIDER and load_auth_config().get("vault", {}).get("VAULT_ADDR") is None) or (not (config.ENV_DATA["KMS_PROVIDER"].lower() == HPCS_KMS_PROVIDER and version.get_semantic_ocs_version_from_config() >= version.VERSION_4_10 and load_auth_config().get( "hpcs", {}).get("IBM_KP_SERVICE_INSTANCE_ID") is not None, )), reason="KMS config not found in auth.yaml", ) skipif_aws_i3 = pytest.mark.skipif( config.ENV_DATA["platform"].lower() == "aws" and config.DEPLOYMENT.get("local_storage") is True, reason="Test will not run on AWS i3", ) skipif_bm = pytest.mark.skipif( config.ENV_DATA["platform"].lower() == "baremetal" and config.DEPLOYMENT.get("local_storage") is True, reason="Test will not run on Bare Metal",
def __init__(self, *args, **kwargs): """ Constructor for the MCG class """ self.namespace = config.ENV_DATA["cluster_namespace"] self.operator_pod = Pod(**get_pods_having_label( constants.NOOBAA_OPERATOR_POD_LABEL, self.namespace)[0]) self.core_pod = Pod(**get_pods_having_label( constants.NOOBAA_CORE_POD_LABEL, self.namespace)[0]) self.retrieve_noobaa_cli_binary() """ The certificate will be copied on each mcg_obj instantiation since the process is so light and quick, that the time required for the redundant copy is neglible in comparison to the time a hash comparison will take. """ retrieve_default_ingress_crt() get_noobaa = OCP(kind="noobaa", namespace=self.namespace).get() self.s3_endpoint = (get_noobaa.get("items")[0].get("status").get( "services").get("serviceS3").get("externalDNS")[0]) self.s3_internal_endpoint = (get_noobaa.get("items")[0].get( "status").get("services").get("serviceS3").get("internalDNS")[0]) self.mgmt_endpoint = (get_noobaa.get("items")[0].get("status").get( "services").get("serviceMgmt").get("externalDNS")[0]) + "/rpc" self.region = config.ENV_DATA["region"] creds_secret_name = (get_noobaa.get("items")[0].get("status").get( "accounts").get("admin").get("secretRef").get("name")) secret_ocp_obj = OCP(kind="secret", namespace=self.namespace) creds_secret_obj = secret_ocp_obj.get(creds_secret_name) self.access_key_id = base64.b64decode( creds_secret_obj.get("data").get("AWS_ACCESS_KEY_ID")).decode( "utf-8") self.access_key = base64.b64decode( creds_secret_obj.get("data").get("AWS_SECRET_ACCESS_KEY")).decode( "utf-8") self.noobaa_user = base64.b64decode( creds_secret_obj.get("data").get("email")).decode("utf-8") self.noobaa_password = base64.b64decode( creds_secret_obj.get("data").get("password")).decode("utf-8") self.noobaa_token = self.retrieve_nb_token() self.s3_resource = boto3.resource( "s3", verify=retrieve_verification_mode(), endpoint_url=self.s3_endpoint, aws_access_key_id=self.access_key_id, aws_secret_access_key=self.access_key, ) self.s3_client = self.s3_resource.meta.client if config.ENV_DATA["platform"].lower() == "aws" and kwargs.get( "create_aws_creds"): ( self.cred_req_obj, self.aws_access_key_id, self.aws_access_key, ) = self.request_aws_credentials() self.aws_s3_resource = boto3.resource( "s3", endpoint_url="https://s3.amazonaws.com", aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_access_key, ) if (config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS or storagecluster_independent_check()): if not config.ENV_DATA["platform"] == constants.AZURE_PLATFORM and ( version.get_semantic_ocs_version_from_config() > version.VERSION_4_5): logger.info("Checking whether RGW pod is not present") pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL, namespace=self.namespace) assert ( not pods ), "RGW pods should not exist in the current platform/cluster" elif (config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS and not config.ENV_DATA["mcg_only_deployment"]): rgw_count = get_rgw_count(config.ENV_DATA["ocs_version"], check_if_cluster_was_upgraded(), None) logger.info( f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform' ) rgw_pod = OCP(kind=constants.POD, namespace=self.namespace) assert rgw_pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.RGW_APP_LABEL, resource_count=rgw_count, timeout=60, )
def test_object_actions(self, mcg_obj, bucket_factory): """ Test to verify different object actions and cross account access to buckets """ data = "Sample string content to write to a new S3 object" object_key = "ObjKey-" + str(uuid.uuid4().hex) # Creating multiple obc users (accounts) obc = bucket_factory(amount=1, interface="OC") obc_obj = OBC(obc[0].name) # Creating noobaa account to access bucket belonging to obc account user_name = "noobaa-user" + str(uuid.uuid4().hex) email = user_name + "@mail.com" user = NoobaaAccount( mcg_obj, name=user_name, email=email, buckets=[obc_obj.bucket_name] ) # Admin sets policy on obc bucket with obc account principal bucket_policy_generated = gen_bucket_policy( user_list=[obc_obj.obc_account, user.email_id], actions_list=["PutObject"] if version.get_semantic_ocs_version_from_config() <= version.VERSION_4_6 else ["GetObject", "DeleteObject"], effect="Allow" if version.get_semantic_ocs_version_from_config() <= version.VERSION_4_6 else "Deny", resources_list=[f'{obc_obj.bucket_name}/{"*"}'], ) bucket_policy = json.dumps(bucket_policy_generated) logger.info( f"Creating bucket policy on bucket: {obc_obj.bucket_name} with principal: {obc_obj.obc_account}" ) put_policy = put_bucket_policy(mcg_obj, obc_obj.bucket_name, bucket_policy) logger.info(f"Put bucket policy response from Admin: {put_policy}") # Get Policy logger.info(f"Getting Bucket policy on bucket: {obc_obj.bucket_name}") get_policy = get_bucket_policy(mcg_obj, obc_obj.bucket_name) logger.info(f"Got bucket policy: {get_policy['Policy']}") # Verifying whether users can put object logger.info( f"Adding object on bucket: {obc_obj.bucket_name} using user: {obc_obj.obc_account}" ) assert s3_put_object( obc_obj, obc_obj.bucket_name, object_key, data ), "Failed: Put Object" logger.info( f"Adding object on bucket: {obc_obj.bucket_name} using user: {user.email_id}" ) assert s3_put_object( user, obc_obj.bucket_name, object_key, data ), "Failed: Put Object" # Verifying whether Get action is not allowed logger.info( f"Verifying whether user: "******"ocs_version"]) >= 4.6 else obc_obj.obc_account}' f" is denied to Get object" ) try: if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_6: s3_get_object(user, obc_obj.bucket_name, object_key) else: s3_get_object(obc_obj, obc_obj.bucket_name, object_key) except boto3exception.ClientError as e: logger.info(e.response) response = HttpResponseParser(e.response) if response.error["Code"] == "AccessDenied": logger.info("Get Object action has been denied access") else: raise UnexpectedBehaviour( f"{e.response} received invalid error code {response.error['Code']}" ) else: assert False, "Get object succeeded when it should have failed" if version.get_semantic_ocs_version_from_config() == version.VERSION_4_6: logger.info( f"Verifying whether the user: "******"{obc_obj.obc_account} is able to access Get action" f"irrespective of the policy set" ) assert s3_get_object( obc_obj, obc_obj.bucket_name, object_key ), "Failed: Get Object" # Verifying whether obc account allowed to create multipart logger.info( f"Creating multipart on bucket: {obc_obj.bucket_name}" f" with key: {object_key} using user: {obc_obj.obc_account}" ) create_multipart_upload(obc_obj, obc_obj.bucket_name, object_key) # Verifying whether S3 user is allowed to create multipart logger.info( f"Creating multipart on bucket: {obc_obj.bucket_name} " f"with key: {object_key} using user: {user.email_id}" ) create_multipart_upload(user, obc_obj.bucket_name, object_key) # Verifying whether obc account is denied access to delete object logger.info( f"Verifying whether user: "******"ocs_version"]) >= 4.6 else obc_obj.obc_account}' f"is denied to Delete object" ) try: if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_6: s3_delete_object(user, obc_obj.bucket_name, object_key) else: s3_delete_object(obc_obj, obc_obj.bucket_name, object_key) except boto3exception.ClientError as e: logger.info(e.response) response = HttpResponseParser(e.response) if response.error["Code"] == "AccessDenied": logger.info("Delete action has been denied access") else: raise UnexpectedBehaviour( f"{e.response} received invalid error code {response.error['Code']}" ) else: assert False, "Delete object succeeded when it should have failed" # Admin sets a policy on obc-account bucket with noobaa-account principal (cross account access) new_policy_generated = gen_bucket_policy( user_list=[user.email_id], actions_list=["GetObject", "DeleteObject"] if float(config.ENV_DATA["ocs_version"]) <= 4.6 else ["PutObject"], effect="Allow" if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_6 else "Deny", resources_list=[f'{obc_obj.bucket_name}/{"*"}'], ) new_policy = json.dumps(new_policy_generated) logger.info( f"Creating bucket policy on bucket: {obc_obj.bucket_name} with principal: {obc_obj.obc_account}" ) put_policy = put_bucket_policy(mcg_obj, obc_obj.bucket_name, new_policy) logger.info(f"Put bucket policy response from admin: {put_policy}") # Get Policy logger.info(f"Getting bucket policy on bucket: {obc_obj.bucket_name}") get_policy = get_bucket_policy(mcg_obj, obc_obj.bucket_name) logger.info(f"Got bucket policy: {get_policy['Policy']}") # Verifying whether Get, Delete object is allowed logger.info( f"Getting object on bucket: {obc_obj.bucket_name} with user: {user.email_id}" ) for get_resp in TimeoutSampler( 30, 4, s3_get_object, user, obc_obj.bucket_name, object_key ): if "403" not in str(get_resp["ResponseMetadata"]["HTTPStatusCode"]): logger.info("GetObj operation successful") break else: logger.info("GetObj operation is denied access") logger.info( f"Deleting object on bucket: {obc_obj.bucket_name} with user: {user.email_id}" ) for del_resp in TimeoutSampler( 30, 4, s3_delete_object, user, obc_obj.bucket_name, object_key ): if "403" not in str(del_resp["ResponseMetadata"]["HTTPStatusCode"]): logger.info("DeleteObj operation successful") break else: logger.info("DeleteObj operation is denied access") # Verifying whether Put object action is denied logger.info( f"Verifying whether user: {user.email_id} is denied to Put object after updating policy" ) try: s3_put_object(user, obc_obj.bucket_name, object_key, data) except boto3exception.ClientError as e: logger.info(e.response) response = HttpResponseParser(e.response) if response.error["Code"] == "AccessDenied": logger.info("Put object action has been denied access") else: raise UnexpectedBehaviour( f"{e.response} received invalid error code {response.error['Code']}" )
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ set_registry_to_managed_state() image = None ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace) try: ceph_cluster.get().get("items")[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") # disconnected installation? load_cluster_info() if config.DEPLOYMENT.get("disconnected"): image = prepare_disconnected_ocs_deployment() if config.DEPLOYMENT["external_mode"]: self.deploy_with_external_mode() else: self.deploy_ocs_via_operator(image) if config.ENV_DATA["mcg_only_deployment"]: mcg_only_post_deployment_checks() return pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods mon_pod_timeout = ( 900 if self.platform == constants.IBMCLOUD_PLATFORM else 600 ) assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-mon", resource_count=3, timeout=mon_pod_timeout, ) assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-mgr", timeout=600 ) assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-osd", resource_count=3, timeout=600, ) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # check for odf-console ocs_version = version.get_semantic_ocs_version_from_config() if ocs_version >= version.VERSION_4_9: assert pod.wait_for_resource( condition="Running", selector="app=odf-console", timeout=600 ) # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector="app=rook-ceph-tools", resource_count=1, timeout=600, ) if not config.COMPONENTS["disable_cephfs"]: # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data["items"][0]["metadata"]["name"] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "persistent-monitoring" ): setup_persistent_monitoring() elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "telemeter_server_url" ): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"] ) if not config.COMPONENTS["disable_cephfs"]: # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Enable console plugin enable_console_plugin() # Verify health of ceph cluster logger.info("Done creating rook resources, waiting for HEALTH_OK") try: ceph_health_check(namespace=self.namespace, tries=30, delay=10) except CephHealthException as ex: err = str(ex) logger.warning(f"Ceph health check failed with {err}") if "clock skew detected" in err: logger.info( f"Changing NTP on compute nodes to" f" {constants.RH_NTP_CLOCK}" ) if self.platform == constants.VSPHERE_PLATFORM: update_ntp_compute_nodes() assert ceph_health_check(namespace=self.namespace, tries=60, delay=10) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs_via_operator(self, image=None): """ Method for deploy OCS via OCS operator Args: image (str): Image of ocs registry. """ ui_deployment = config.DEPLOYMENT.get("ui_deployment") live_deployment = config.DEPLOYMENT.get("live_deployment") arbiter_deployment = config.DEPLOYMENT.get("arbiter_deployment") if ui_deployment and ui_deployment_conditions(): self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() if not live_deployment: create_catalog_source(image) if config.DEPLOYMENT.get("local_storage"): setup_local_storage(storageclass=self.DEFAULT_STORAGECLASS_LSO) logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") # create multus network if config.ENV_DATA.get("is_multus_enabled"): logger.info("Creating multus network") multus_data = templating.load_yaml(constants.MULTUS_YAML) multus_config_str = multus_data["spec"]["config"] multus_config_dct = json.loads(multus_config_str) if config.ENV_DATA.get("multus_public_network_interface"): multus_config_dct["master"] = config.ENV_DATA.get( "multus_public_network_interface" ) multus_data["spec"]["config"] = json.dumps(multus_config_dct) multus_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="multus", delete=False ) templating.dump_data_to_temp_yaml(multus_data, multus_data_yaml.name) run_cmd(f"oc create -f {multus_data_yaml.name}") if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: ibmcloud.add_deployment_dependencies() if not live_deployment: create_ocs_secret(self.namespace) self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval") ocs_version = version.get_semantic_ocs_version_from_config() if ocs_version >= version.VERSION_4_9: ocs_operator_names = [ defaults.ODF_OPERATOR_NAME, defaults.OCS_OPERATOR_NAME, ] build_number = version.get_semantic_version(get_ocs_build_number()) if build_number >= version.get_semantic_version("4.9.0-231"): ocs_operator_names.append(defaults.MCG_OPERATOR) else: ocs_operator_names.append(defaults.NOOBAA_OPERATOR) else: ocs_operator_names = [defaults.OCS_OPERATOR_NAME] channel = config.DEPLOYMENT.get("ocs_csv_channel") is_ibm_sa_linked = False for ocs_operator_name in ocs_operator_names: package_manifest = PackageManifest( resource_name=ocs_operator_name, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) package_manifest.wait_for_resource(timeout=300) csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) if ( config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment ): if not is_ibm_sa_linked: logger.info("Sleeping for 60 seconds before applying SA") time.sleep(60) link_all_sa_and_secret_and_delete_pods( constants.OCS_SECRET, self.namespace ) is_ibm_sa_linked = True csv.wait_for_phase("Succeeded", timeout=720) # create storage system if ocs_version >= version.VERSION_4_9: exec_cmd(f"oc apply -f {constants.STORAGE_SYSTEM_ODF_YAML}") ocp_version = version.get_semantic_ocp_version_from_config() if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: config_map = ocp.OCP( kind="configmap", namespace=self.namespace, resource_name=constants.ROOK_OPERATOR_CONFIGMAP, ) config_map.get(retry=10, wait=5) config_map_patch = ( '\'{"data": {"ROOK_CSI_KUBELET_DIR_PATH": "/var/data/kubelet"}}\'' ) logger.info("Patching config map to change KUBLET DIR PATH") exec_cmd( f"oc patch configmap -n {self.namespace} " f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}" ) if config.DEPLOYMENT.get("create_ibm_cos_secret", True): logger.info("Creating secret for IBM Cloud Object Storage") with open(constants.IBM_COS_SECRET_YAML, "r") as cos_secret_fd: cos_secret_data = yaml.load(cos_secret_fd, Loader=yaml.SafeLoader) key_id = config.AUTH["ibmcloud"]["ibm_cos_access_key_id"] key_secret = config.AUTH["ibmcloud"]["ibm_cos_secret_access_key"] cos_secret_data["data"]["IBM_COS_ACCESS_KEY_ID"] = key_id cos_secret_data["data"]["IBM_COS_SECRET_ACCESS_KEY"] = key_secret cos_secret_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cos_secret", delete=False ) templating.dump_data_to_temp_yaml( cos_secret_data, cos_secret_data_yaml.name ) exec_cmd(f"oc create -f {cos_secret_data_yaml.name}") # Modify the CSV with custom values if required if all( key in config.DEPLOYMENT for key in ("csv_change_from", "csv_change_to") ): modify_csv( csv=csv_name, replace_from=config.DEPLOYMENT["csv_change_from"], replace_to=config.DEPLOYMENT["csv_change_to"], ) # create custom storage class for StorageCluster CR if necessary if self.CUSTOM_STORAGE_CLASS_PATH is not None: with open(self.CUSTOM_STORAGE_CLASS_PATH, "r") as custom_sc_fo: custom_sc = yaml.load(custom_sc_fo, Loader=yaml.SafeLoader) # set value of DEFAULT_STORAGECLASS to mach the custom storage cls self.DEFAULT_STORAGECLASS = custom_sc["metadata"]["name"] run_cmd(f"oc create -f {self.CUSTOM_STORAGE_CLASS_PATH}") # Set rook log level self.set_rook_log_level() # creating StorageCluster if config.DEPLOYMENT.get("kms_deployment"): kms = KMS.get_kms_deployment() kms.deploy() if config.ENV_DATA["mcg_only_deployment"]: mcg_only_deployment() return cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) # Figure out all the OCS modules enabled/disabled # CLI parameter --disable-components takes the precedence over # anything which comes from config file if config.ENV_DATA.get("disable_components"): for component in config.ENV_DATA["disable_components"]: config.COMPONENTS[f"disable_{component}"] = True logger.warning(f"disabling: {component}") # Update cluster_data with respective component enable/disable for key in config.COMPONENTS.keys(): comp_name = constants.OCS_COMPONENTS_MAP[key.split("_")[1]] if config.COMPONENTS[key]: if "noobaa" in key: merge_dict( cluster_data, { "spec": { "multiCloudGateway": {"reconcileStrategy": "ignore"} } }, ) else: merge_dict( cluster_data, { "spec": { "managedResources": { f"{comp_name}": {"reconcileStrategy": "ignore"} } } }, ) if arbiter_deployment: cluster_data["spec"]["arbiter"] = {} cluster_data["spec"]["nodeTopologies"] = {} cluster_data["spec"]["arbiter"]["enable"] = True cluster_data["spec"]["nodeTopologies"][ "arbiterLocation" ] = self.get_arbiter_location() cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 4 cluster_data["metadata"]["name"] = config.ENV_DATA["storage_cluster_name"] deviceset_data = cluster_data["spec"]["storageDeviceSets"][0] device_size = int(config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE)) logger.info( "Flexible scaling is available from version 4.7 on LSO cluster with less than 3 zones" ) zone_num = get_az_count() if ( config.DEPLOYMENT.get("local_storage") and ocs_version >= version.VERSION_4_7 and zone_num < 3 and not config.DEPLOYMENT.get("arbiter_deployment") ): cluster_data["spec"]["flexibleScaling"] = True # https://bugzilla.redhat.com/show_bug.cgi?id=1921023 cluster_data["spec"]["storageDeviceSets"][0]["count"] = 3 cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 1 # set size of request for storage if self.platform.lower() == constants.BAREMETAL_PLATFORM: pv_size_list = helpers.get_pv_size( storageclass=self.DEFAULT_STORAGECLASS_LSO ) pv_size_list.sort() deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage" ] = f"{pv_size_list[0]}" else: deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage" ] = f"{device_size}Gi" # set storage class to OCS default on current platform if self.DEFAULT_STORAGECLASS: deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName" ] = self.DEFAULT_STORAGECLASS # StorageCluster tweaks for LSO if config.DEPLOYMENT.get("local_storage"): cluster_data["spec"]["manageNodes"] = False cluster_data["spec"]["monDataDirHostPath"] = "/var/lib/rook" deviceset_data["name"] = constants.DEFAULT_DEVICESET_LSO_PVC_NAME deviceset_data["portable"] = False deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName" ] = self.DEFAULT_STORAGECLASS_LSO lso_type = config.DEPLOYMENT.get("type") if ( self.platform.lower() == constants.AWS_PLATFORM and not lso_type == constants.AWS_EBS ): deviceset_data["count"] = 2 # setting resource limits for AWS i3 # https://access.redhat.com/documentation/en-us/red_hat_openshift_container_storage/4.6/html-single/deploying_openshift_container_storage_using_amazon_web_services/index#creating-openshift-container-storage-cluster-on-amazon-ec2_local-storage if ( ocs_version >= version.VERSION_4_5 and config.ENV_DATA.get("worker_instance_type") == constants.AWS_LSO_WORKER_INSTANCE ): deviceset_data["resources"] = { "limits": {"cpu": 2, "memory": "5Gi"}, "requests": {"cpu": 1, "memory": "5Gi"}, } if (ocp_version >= version.VERSION_4_6) and ( ocs_version >= version.VERSION_4_6 ): cluster_data["metadata"]["annotations"] = { "cluster.ocs.openshift.io/local-devices": "true" } count = config.DEPLOYMENT.get("local_storage_storagedeviceset_count") if count is not None: deviceset_data["count"] = count # Allow lower instance requests and limits for OCS deployment # The resources we need to change can be found here: # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116 if config.DEPLOYMENT.get("allow_lower_instance_requirements"): none_resources = {"Requests": None, "Limits": None} deviceset_data["resources"] = deepcopy(none_resources) resources = [ "mon", "mds", "rgw", "mgr", "noobaa-core", "noobaa-db", ] if ocs_version >= version.VERSION_4_5: resources.append("noobaa-endpoint") cluster_data["spec"]["resources"] = { resource: deepcopy(none_resources) for resource in resources } if ocs_version >= version.VERSION_4_5: cluster_data["spec"]["resources"]["noobaa-endpoint"] = { "limits": {"cpu": "100m", "memory": "100Mi"}, "requests": {"cpu": "100m", "memory": "100Mi"}, } else: local_storage = config.DEPLOYMENT.get("local_storage") platform = config.ENV_DATA.get("platform", "").lower() if local_storage and platform == "aws": resources = { "mds": { "limits": {"cpu": 3, "memory": "8Gi"}, "requests": {"cpu": 1, "memory": "8Gi"}, } } if ocs_version < version.VERSION_4_5: resources["noobaa-core"] = { "limits": {"cpu": 2, "memory": "8Gi"}, "requests": {"cpu": 1, "memory": "8Gi"}, } resources["noobaa-db"] = { "limits": {"cpu": 2, "memory": "8Gi"}, "requests": {"cpu": 1, "memory": "8Gi"}, } cluster_data["spec"]["resources"] = resources # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get("host_network"): cluster_data["spec"]["hostNetwork"] = True cluster_data["spec"]["storageDeviceSets"] = [deviceset_data] if self.platform == constants.IBMCLOUD_PLATFORM: mon_pvc_template = { "spec": { "accessModes": ["ReadWriteOnce"], "resources": {"requests": {"storage": "20Gi"}}, "storageClassName": self.DEFAULT_STORAGECLASS, "volumeMode": "Filesystem", } } cluster_data["spec"]["monPVCTemplate"] = mon_pvc_template # Need to check if it's needed for ibm cloud to set manageNodes cluster_data["spec"]["manageNodes"] = False if config.ENV_DATA.get("encryption_at_rest"): if ocs_version < version.VERSION_4_6: error_message = "Encryption at REST can be enabled only on OCS >= 4.6!" logger.error(error_message) raise UnsupportedFeatureError(error_message) logger.info("Enabling encryption at REST!") cluster_data["spec"]["encryption"] = { "enable": True, } if config.DEPLOYMENT.get("kms_deployment"): cluster_data["spec"]["encryption"]["kms"] = { "enable": True, } if config.DEPLOYMENT.get("ceph_debug"): setup_ceph_debug() cluster_data["spec"]["managedResources"] = { "cephConfig": {"reconcileStrategy": "ignore"} } if config.ENV_DATA.get("is_multus_enabled"): cluster_data["spec"]["network"] = { "provider": "multus", "selectors": { "public": f"{defaults.ROOK_CLUSTER_NAMESPACE}/ocs-public" }, } cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cluster_storage", delete=False ) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=1200) if config.DEPLOYMENT["infra_nodes"]: _ocp = ocp.OCP(kind="node") _ocp.exec_oc_cmd( command=f"annotate namespace {defaults.ROOK_CLUSTER_NAMESPACE} " f"{constants.NODE_SELECTOR_ANNOTATION}" )
def subscribe_ocs(self): """ This method subscription manifest and subscribe to OCS operator. """ live_deployment = config.DEPLOYMENT.get("live_deployment") if ( config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment ): link_all_sa_and_secret_and_delete_pods(constants.OCS_SECRET, self.namespace) operator_selector = get_selector_for_ocs_operator() # wait for package manifest # For OCS version >= 4.9, we have odf-operator ocs_version = version.get_semantic_ocs_version_from_config() if ocs_version >= version.VERSION_4_9: ocs_operator_name = defaults.ODF_OPERATOR_NAME subscription_file = constants.SUBSCRIPTION_ODF_YAML else: ocs_operator_name = defaults.OCS_OPERATOR_NAME subscription_file = constants.SUBSCRIPTION_YAML package_manifest = PackageManifest( resource_name=ocs_operator_name, selector=operator_selector, ) # Wait for package manifest is ready package_manifest.wait_for_resource(timeout=300) default_channel = package_manifest.get_default_channel() subscription_yaml_data = templating.load_yaml(subscription_file) subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval") if subscription_plan_approval: subscription_yaml_data["spec"][ "installPlanApproval" ] = subscription_plan_approval custom_channel = config.DEPLOYMENT.get("ocs_csv_channel") if custom_channel: logger.info(f"Custom channel will be used: {custom_channel}") subscription_yaml_data["spec"]["channel"] = custom_channel else: logger.info(f"Default channel will be used: {default_channel}") subscription_yaml_data["spec"]["channel"] = default_channel if config.DEPLOYMENT.get("stage"): subscription_yaml_data["spec"]["source"] = constants.OPERATOR_SOURCE_NAME if config.DEPLOYMENT.get("live_deployment"): subscription_yaml_data["spec"]["source"] = config.DEPLOYMENT.get( "live_content_source", defaults.LIVE_CONTENT_SOURCE ) subscription_manifest = tempfile.NamedTemporaryFile( mode="w+", prefix="subscription_manifest", delete=False ) templating.dump_data_to_temp_yaml( subscription_yaml_data, subscription_manifest.name ) run_cmd(f"oc create -f {subscription_manifest.name}") logger.info("Sleeping for 90 seconds after subscribing OCS") time.sleep(90) if subscription_plan_approval == "Manual": wait_for_install_plan_and_approve(self.namespace) csv_name = package_manifest.get_current_csv(channel=custom_channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Installing", timeout=60)
def test_noobaa_bucket_quota(measure_noobaa_exceed_bucket_quota): """ Test that there are appropriate alerts when NooBaa Bucket Quota is reached. """ api = prometheus.PrometheusAPI() alerts = measure_noobaa_exceed_bucket_quota.get("prometheus_alerts") # since version 4.5 all NooBaa alerts have defined Pending state if version.get_semantic_ocs_version_from_config() < version.VERSION_4_5: expected_alerts = [ ( constants.ALERT_BUCKETREACHINGQUOTASTATE, "A NooBaa Bucket Is In Reaching Quota State", ["firing"], "warning", ), ( constants.ALERT_BUCKETERRORSTATE, "A NooBaa Bucket Is In Error State", ["pending", "firing"], "warning", ), ( constants.ALERT_BUCKETEXCEEDINGQUOTASTATE, "A NooBaa Bucket Is In Exceeding Quota State", ["firing"], "warning", ), ] else: expected_alerts = [ ( constants.ALERT_BUCKETREACHINGQUOTASTATE, "A NooBaa Bucket Is In Reaching Quota State", ["pending", "firing"], "warning", ), ( constants.ALERT_BUCKETERRORSTATE, "A NooBaa Bucket Is In Error State", ["pending", "firing"], "warning", ), ( constants.ALERT_BUCKETEXCEEDINGQUOTASTATE, "A NooBaa Bucket Is In Exceeding Quota State", ["pending", "firing"], "warning", ), ] for target_label, target_msg, target_states, target_severity in expected_alerts: prometheus.check_alert_list( label=target_label, msg=target_msg, alerts=alerts, states=target_states, severity=target_severity, ) # the time to wait is increased because it takes more time for OCS # cluster to resolve its issues pg_wait = 480 api.check_alert_cleared( label=target_label, measure_end_time=measure_noobaa_exceed_bucket_quota.get("stop"), time_min=pg_wait, )