def test_rgw_pod_existence(self): if (config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS or storagecluster_independent_check()): if (not config.ENV_DATA["platform"] == constants.AZURE_PLATFORM and not config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and (version.get_semantic_ocs_version_from_config() > version.VERSION_4_5)): logger.info("Checking whether RGW pod is not present") assert ( not pod.get_rgw_pods() ), "RGW pods should not exist in the current platform/cluster" elif (config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS and not config.ENV_DATA["mcg_only_deployment"]): rgw_count = get_rgw_count(config.ENV_DATA["ocs_version"], check_if_cluster_was_upgraded(), None) logger.info( f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform' ) rgw_pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]) assert rgw_pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.RGW_APP_LABEL, resource_count=rgw_count, timeout=60, )
def verify_image_versions(old_images, upgrade_version, version_before_upgrade): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS version_before_upgrade (float): version of OCS before upgrade """ number_of_worker_nodes = len(get_nodes()) osd_count = get_osd_count() verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) # in 4.3 app selector nooba have those pods: noobaa-core-ID, noobaa-db-ID, # noobaa-operator-ID but in 4.2 only 2: noobaa-core-ID, noobaa-operator-ID nooba_pods = 2 if upgrade_version < parse_version("4.3") else 3 verify_pods_upgraded(old_images, selector=constants.NOOBAA_APP_LABEL, count=nooba_pods) verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.MON_APP_LABEL, count=3, ) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750 verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=osd_timeout * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: rgw_count = get_rgw_count(upgrade_version.base_version, True, version_before_upgrade) verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, )
def __init__(self, *args, **kwargs): """ Constructor for the MCG class """ self.namespace = config.ENV_DATA["cluster_namespace"] self.operator_pod = Pod(**get_pods_having_label( constants.NOOBAA_OPERATOR_POD_LABEL, self.namespace)[0]) self.core_pod = Pod(**get_pods_having_label( constants.NOOBAA_CORE_POD_LABEL, self.namespace)[0]) self.retrieve_noobaa_cli_binary() """ The certificate will be copied on each mcg_obj instantiation since the process is so light and quick, that the time required for the redundant copy is neglible in comparison to the time a hash comparison will take. """ retrieve_default_ingress_crt() get_noobaa = OCP(kind="noobaa", namespace=self.namespace).get() self.s3_endpoint = (get_noobaa.get("items")[0].get("status").get( "services").get("serviceS3").get("externalDNS")[0]) self.s3_internal_endpoint = (get_noobaa.get("items")[0].get( "status").get("services").get("serviceS3").get("internalDNS")[0]) self.mgmt_endpoint = (get_noobaa.get("items")[0].get("status").get( "services").get("serviceMgmt").get("externalDNS")[0]) + "/rpc" self.region = config.ENV_DATA["region"] creds_secret_name = (get_noobaa.get("items")[0].get("status").get( "accounts").get("admin").get("secretRef").get("name")) secret_ocp_obj = OCP(kind="secret", namespace=self.namespace) creds_secret_obj = secret_ocp_obj.get(creds_secret_name) self.access_key_id = base64.b64decode( creds_secret_obj.get("data").get("AWS_ACCESS_KEY_ID")).decode( "utf-8") self.access_key = base64.b64decode( creds_secret_obj.get("data").get("AWS_SECRET_ACCESS_KEY")).decode( "utf-8") self.noobaa_user = base64.b64decode( creds_secret_obj.get("data").get("email")).decode("utf-8") self.noobaa_password = base64.b64decode( creds_secret_obj.get("data").get("password")).decode("utf-8") self.noobaa_token = self.retrieve_nb_token() self.s3_resource = boto3.resource( "s3", verify=retrieve_verification_mode(), endpoint_url=self.s3_endpoint, aws_access_key_id=self.access_key_id, aws_secret_access_key=self.access_key, ) self.s3_client = self.s3_resource.meta.client if config.ENV_DATA["platform"].lower() == "aws" and kwargs.get( "create_aws_creds"): ( self.cred_req_obj, self.aws_access_key_id, self.aws_access_key, ) = self.request_aws_credentials() self.aws_s3_resource = boto3.resource( "s3", endpoint_url="https://s3.amazonaws.com", aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_access_key, ) if (config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS or storagecluster_independent_check()): if not config.ENV_DATA["platform"] == constants.AZURE_PLATFORM and ( float(config.ENV_DATA["ocs_version"]) > 4.5): logger.info("Checking whether RGW pod is not present") pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL, namespace=self.namespace) assert ( not pods ), "RGW pods should not exist in the current platform/cluster" elif config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: rgw_count = get_rgw_count(config.ENV_DATA["ocs_version"], check_if_cluster_was_upgraded(), None) logger.info( f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform' ) rgw_pod = OCP(kind=constants.POD, namespace=self.namespace) assert rgw_pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.RGW_APP_LABEL, resource_count=rgw_count, timeout=60, )
def verify_image_versions(old_images, upgrade_version, version_before_upgrade): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS version_before_upgrade (float): version of OCS before upgrade """ number_of_worker_nodes = len(get_nodes()) verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) default_noobaa_pods = 3 noobaa_pods = default_noobaa_pods if upgrade_version >= parse_version("4.7"): noobaa = OCP(kind="noobaa", namespace=config.ENV_DATA["cluster_namespace"]) resource = noobaa.get()["items"][0] endpoints = resource.get("spec", {}).get("endpoints", {}) max_endpoints = endpoints.get("maxCount", constants.MAX_NB_ENDPOINT_COUNT) min_endpoints = endpoints.get( "minCount", constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT) noobaa_pods = default_noobaa_pods + min_endpoints try: verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, ) except TimeoutException as ex: if upgrade_version >= parse_version("4.7"): log.info( "Nooba pods didn't match. Trying once more with max noobaa endpoints!" f"Exception: {ex}") noobaa_pods = default_noobaa_pods + max_endpoints verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, timeout=60, ) else: raise verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) if not config.DEPLOYMENT.get("external_mode"): verify_pods_upgraded( old_images, selector=constants.MON_APP_LABEL, count=3, ) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750 osd_count = get_osd_count() verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=osd_timeout * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: rgw_count = get_rgw_count(upgrade_version.base_version, True, version_before_upgrade) verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, )
def ocs_install_verification( timeout=600, skip_osd_distribution_check=False, ocs_registry_image=None, post_upgrade_verification=False, version_before_upgrade=None, ): """ Perform steps necessary to verify a successful OCS installation Args: timeout (int): Number of seconds for timeout which will be used in the checks used in this function. skip_osd_distribution_check (bool): If true skip the check for osd distribution. ocs_registry_image (str): Specific image to check if it was installed properly. post_upgrade_verification (bool): Set to True if this function is called after upgrade. version_before_upgrade (float): Set to OCS version before upgrade """ from ocs_ci.ocs.node import get_nodes from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs from ocs_ci.ocs.resources.pod import get_ceph_tools_pod, get_all_pods from ocs_ci.ocs.cluster import validate_cluster_on_pvc from ocs_ci.ocs.resources.fips import check_fips_enabled number_of_worker_nodes = len(get_nodes()) namespace = config.ENV_DATA["cluster_namespace"] log.info("Verifying OCS installation") if config.ENV_DATA.get("disable_components"): for component in config.ENV_DATA["disable_components"]: config.COMPONENTS[f"disable_{component}"] = True disable_noobaa = config.COMPONENTS["disable_noobaa"] disable_rgw = config.COMPONENTS["disable_rgw"] disable_blockpools = config.COMPONENTS["disable_blockpools"] disable_cephfs = config.COMPONENTS["disable_cephfs"] # Verify OCS CSV is in Succeeded phase log.info("verifying ocs csv") ocs_csv = get_ocs_csv() # Verify if OCS CSV has proper version. csv_version = ocs_csv.data["spec"]["version"] ocs_version = version.get_semantic_ocs_version_from_config() log.info( f"Check if OCS version: {ocs_version} matches with CSV: {csv_version}") assert ( f"{ocs_version}" in csv_version ), f"OCS version: {ocs_version} mismatch with CSV version {csv_version}" # Verify if OCS CSV has the same version in provided CI build. ocs_registry_image = ocs_registry_image or config.DEPLOYMENT.get( "ocs_registry_image") if ocs_registry_image and ocs_registry_image.endswith(".ci"): ocs_registry_image = ocs_registry_image.rsplit(":", 1)[1] log.info( f"Check if OCS registry image: {ocs_registry_image} matches with " f"CSV: {csv_version}") ignore_csv_mismatch = config.DEPLOYMENT.get("ignore_csv_mismatch") if ignore_csv_mismatch: log.info( "The possible mismatch will be ignored as you deployed " "the different version than the default version from the CSV") else: assert ocs_registry_image in csv_version, ( f"OCS registry image version: {ocs_registry_image} mismatch " f"with CSV version {csv_version}") # Verify Storage System status if ocs_version >= version.VERSION_4_9: log.info("Verifying storage system status") storage_system = OCP(kind=constants.STORAGESYSTEM, namespace=namespace) storage_system_data = storage_system.get() storage_system_status = {} for condition in storage_system_data["items"][0]["status"][ "conditions"]: storage_system_status[condition["type"]] = condition["status"] log.debug(f"storage system status: {storage_system_status}") assert storage_system_status == constants.STORAGE_SYSTEM_STATUS, ( f"Storage System status is not in expected state. Expected {constants.STORAGE_SYSTEM_STATUS}" f" but found {storage_system_status}") # Verify OCS Cluster Service (ocs-storagecluster) is Ready storage_cluster_name = config.ENV_DATA["storage_cluster_name"] log.info("Verifying status of storage cluster: %s", storage_cluster_name) storage_cluster = StorageCluster( resource_name=storage_cluster_name, namespace=namespace, ) log.info(f"Check if StorageCluster: {storage_cluster_name} is in" f"Succeeded phase") storage_cluster.wait_for_phase(phase="Ready", timeout=timeout) # Verify pods in running state and proper counts log.info("Verifying pod states and counts") pod = OCP(kind=constants.POD, namespace=namespace) if not config.DEPLOYMENT["external_mode"]: osd_count = int( storage_cluster.data["spec"]["storageDeviceSets"][0]["count"] ) * int( storage_cluster.data["spec"]["storageDeviceSets"][0]["replica"]) rgw_count = None if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: if not disable_rgw: rgw_count = get_rgw_count(f"{ocs_version}", post_upgrade_verification, version_before_upgrade) min_eps = constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT max_eps = (constants.MAX_NB_ENDPOINT_COUNT if ocs_version >= version.VERSION_4_6 else 1) if config.ENV_DATA.get("platform") == constants.IBM_POWER_PLATFORM: min_eps = 1 max_eps = 1 nb_db_label = (constants.NOOBAA_DB_LABEL_46_AND_UNDER if ocs_version < version.VERSION_4_7 else constants.NOOBAA_DB_LABEL_47_AND_ABOVE) resources_dict = { nb_db_label: 1, constants.OCS_OPERATOR_LABEL: 1, constants.OPERATOR_LABEL: 1, constants.NOOBAA_OPERATOR_POD_LABEL: 1, constants.NOOBAA_CORE_POD_LABEL: 1, constants.NOOBAA_ENDPOINT_POD_LABEL: min_eps, } if not config.DEPLOYMENT["external_mode"]: resources_dict.update({ constants.MON_APP_LABEL: 3, constants.CSI_CEPHFSPLUGIN_LABEL: number_of_worker_nodes, constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL: 2, constants.CSI_RBDPLUGIN_LABEL: number_of_worker_nodes, constants.CSI_RBDPLUGIN_PROVISIONER_LABEL: 2, constants.OSD_APP_LABEL: osd_count, constants.MGR_APP_LABEL: 1, constants.MDS_APP_LABEL: 2, constants.RGW_APP_LABEL: rgw_count, }) if ocs_version >= version.VERSION_4_9: resources_dict.update({ constants.ODF_OPERATOR_CONTROL_MANAGER_LABEL: 1, }) for label, count in resources_dict.items(): if label == constants.RGW_APP_LABEL: if (not config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS or disable_rgw): continue if "noobaa" in label and disable_noobaa: continue if "mds" in label and disable_cephfs: continue assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=label, resource_count=count, timeout=timeout, ) if not disable_noobaa: nb_ep_pods = get_pods_having_label( label=constants.NOOBAA_ENDPOINT_POD_LABEL, namespace=defaults.ROOK_CLUSTER_NAMESPACE, ) assert len(nb_ep_pods) <= max_eps, ( f"The number of running NooBaa endpoint pods ({len(nb_ep_pods)}) " f"is greater than the maximum defined in the NooBaa CR ({max_eps})" ) # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd) log.info("Verifying storage classes") storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace) storage_cluster_name = config.ENV_DATA["storage_cluster_name"] required_storage_classes = { f"{storage_cluster_name}-cephfs", f"{storage_cluster_name}-ceph-rbd", } if ocs_version >= version.VERSION_4_10: # TODO: Add rbd-thick storage class verification in external mode cluster upgraded # to OCS 4.8 when the bug 1978542 is fixed # Skip rbd-thick storage class verification in external mode upgraded cluster. This is blocked by bug 1978542 if not (config.DEPLOYMENT["external_mode"] and post_upgrade_verification): required_storage_classes.update( {f"{storage_cluster_name}-ceph-rbd-thick"}) skip_storage_classes = set() if disable_cephfs: skip_storage_classes.update({ f"{storage_cluster_name}-cephfs", }) if disable_blockpools: skip_storage_classes.update({ f"{storage_cluster_name}-ceph-rbd", }) required_storage_classes = required_storage_classes.difference( skip_storage_classes) if config.DEPLOYMENT["external_mode"]: required_storage_classes.update({ f"{storage_cluster_name}-ceph-rgw", f'{config.ENV_DATA["cluster_namespace"]}.noobaa.io', }) storage_classes = storage_class.get() storage_class_names = { item["metadata"]["name"] for item in storage_classes["items"] } # required storage class names should be observed in the cluster under test missing_scs = required_storage_classes.difference(storage_class_names) if len(missing_scs) > 0: log.error("few storage classess are not present: %s", missing_scs) assert list(missing_scs) == [] # Verify OSDs are distributed if not config.DEPLOYMENT["external_mode"]: if not skip_osd_distribution_check: log.info( "Verifying OSDs are distributed evenly across worker nodes") ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace) osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)["items"] deviceset_count = get_deviceset_count() node_names = [osd["spec"]["nodeName"] for osd in osds] for node in node_names: assert ( not node_names.count(node) > deviceset_count ), "OSD's are not distributed evenly across worker nodes" # Verify that CSI driver object contains provisioner names log.info("Verifying CSI driver object contains provisioner names.") csi_driver = OCP(kind="CSIDriver") csi_drivers = { item["metadata"]["name"] for item in csi_driver.get()["items"] } assert defaults.CSI_PROVISIONERS.issubset(csi_drivers) # Verify node and provisioner secret names in storage class log.info("Verifying node and provisioner secret names in storage class.") if config.DEPLOYMENT["external_mode"]: sc_rbd = storage_class.get( resource_name=constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD) sc_cephfs = storage_class.get(resource_name=( constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_CEPHFS)) else: if not disable_blockpools: sc_rbd = storage_class.get( resource_name=constants.DEFAULT_STORAGECLASS_RBD) if not disable_cephfs: sc_cephfs = storage_class.get( resource_name=constants.DEFAULT_STORAGECLASS_CEPHFS) if not disable_blockpools: assert ( sc_rbd["parameters"]["csi.storage.k8s.io/node-stage-secret-name"] == constants.RBD_NODE_SECRET) assert ( sc_rbd["parameters"]["csi.storage.k8s.io/provisioner-secret-name"] == constants.RBD_PROVISIONER_SECRET) if not disable_cephfs: assert (sc_cephfs["parameters"] ["csi.storage.k8s.io/node-stage-secret-name"] == constants.CEPHFS_NODE_SECRET) assert (sc_cephfs["parameters"] ["csi.storage.k8s.io/provisioner-secret-name"] == constants.CEPHFS_PROVISIONER_SECRET) log.info("Verified node and provisioner secret names in storage class.") ct_pod = get_ceph_tools_pod() # https://github.com/red-hat-storage/ocs-ci/issues/3820 # Verify ceph osd tree output if not (config.DEPLOYMENT.get("ui_deployment") or config.DEPLOYMENT["external_mode"]): log.info( "Verifying ceph osd tree output and checking for device set PVC names " "in the output.") if config.DEPLOYMENT.get("local_storage"): deviceset_pvcs = [osd.get_node() for osd in get_osd_pods()] # removes duplicate hostname deviceset_pvcs = list(set(deviceset_pvcs)) if config.ENV_DATA.get("platform") == constants.BAREMETAL_PLATFORM: deviceset_pvcs = [ deviceset.replace(".", "-") for deviceset in deviceset_pvcs ] else: deviceset_pvcs = [pvc.name for pvc in get_deviceset_pvcs()] osd_tree = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree", format="json") schemas = { "root": constants.OSD_TREE_ROOT, "rack": constants.OSD_TREE_RACK, "host": constants.OSD_TREE_HOST, "osd": constants.OSD_TREE_OSD, "region": constants.OSD_TREE_REGION, "zone": constants.OSD_TREE_ZONE, } schemas["host"]["properties"]["name"] = {"enum": deviceset_pvcs} for item in osd_tree["nodes"]: validate(instance=item, schema=schemas[item["type"]]) if item["type"] == "host": deviceset_pvcs.remove(item["name"]) assert not deviceset_pvcs, ( f"These device set PVCs are not given in ceph osd tree output " f"- {deviceset_pvcs}") log.info( "Verified ceph osd tree output. Device set PVC names are given in the " "output.") # TODO: Verify ceph osd tree output have osd listed as ssd # TODO: Verify ceph osd tree output have zone or rack based on AZ # Verify CSI snapshotter sidecar container is not present # if the OCS version is < 4.6 if ocs_version < version.VERSION_4_6: log.info("Verifying CSI snapshotter is not present.") provisioner_pods = get_all_pods( namespace=defaults.ROOK_CLUSTER_NAMESPACE, selector=[ constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, ], ) for pod_obj in provisioner_pods: pod_info = pod_obj.get() for container, image in get_images(data=pod_info).items(): assert ("snapshot" not in container) and ( "snapshot" not in image ), (f"Snapshot container is present in {pod_obj.name} pod. " f"Container {container}. Image {image}") deployments = ocs_csv.get()["spec"]["install"]["spec"]["deployments"] rook_ceph_operator_deployment = [ deployment_val for deployment_val in deployments if deployment_val["name"] == "rook-ceph-operator" ] assert { "name": "CSI_ENABLE_SNAPSHOTTER", "value": "false" } in (rook_ceph_operator_deployment[0]["spec"]["template"]["spec"] ["containers"][0]["env"] ), "CSI_ENABLE_SNAPSHOTTER value is not set to 'false'." log.info("Verified: CSI snapshotter is not present.") # Verify pool crush rule is with "type": "zone" if utils.get_az_count() == 3: log.info("Verifying pool crush rule is with type: zone") crush_dump = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd crush dump", format="") pool_names = [ constants.METADATA_POOL, constants.DEFAULT_BLOCKPOOL, constants.DATA_POOL, ] crush_rules = [ rule for rule in crush_dump["rules"] if rule["rule_name"] in pool_names ] for crush_rule in crush_rules: assert [ item for item in crush_rule["steps"] if item.get("type") == "zone" ], f"{crush_rule['rule_name']} is not with type as zone" log.info("Verified - pool crush rule is with type: zone") log.info("Validate cluster on PVC") validate_cluster_on_pvc() # Verify ceph health log.info("Verifying ceph health") health_check_tries = 20 health_check_delay = 30 if post_upgrade_verification: # In case of upgrade with FIO we have to wait longer time to see # health OK. See discussion in BZ: # https://bugzilla.redhat.com/show_bug.cgi?id=1817727 health_check_tries = 180 assert utils.ceph_health_check(namespace, health_check_tries, health_check_delay) if config.ENV_DATA.get("fips"): # In case that fips is enabled when deploying, # a verification of the installation of it will run # on all running state pods check_fips_enabled() if config.ENV_DATA.get("encryption_at_rest"): osd_encryption_verification() if config.DEPLOYMENT.get("kms_deployment"): kms = KMS.get_kms_deployment() kms.post_deploy_verification() storage_cluster_obj = get_storage_cluster() is_flexible_scaling = ( storage_cluster_obj.get()["items"][0].get("spec").get( "flexibleScaling", False)) if is_flexible_scaling is True: failure_domain = storage_cluster_obj.data["items"][0]["status"][ "failureDomain"] assert failure_domain == "host", ( f"The expected failure domain on cluster with flexible scaling is 'host'," f" the actaul failure domain is {failure_domain}") if ocs_version >= version.VERSION_4_7: log.info("Verifying images in storage cluster") verify_sc_images(storage_cluster) if config.ENV_DATA.get("is_multus_enabled"): verify_multus_network()
def verify_image_versions(old_images, upgrade_version, version_before_upgrade): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS version_before_upgrade (float): version of OCS before upgrade """ number_of_worker_nodes = len(get_nodes()) verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) default_noobaa_pods = 3 noobaa_pods = default_noobaa_pods if upgrade_version >= parse_version("4.7"): noobaa = OCP(kind="noobaa", namespace=config.ENV_DATA["cluster_namespace"]) resource = noobaa.get()["items"][0] endpoints = resource.get("spec", {}).get("endpoints", {}) max_endpoints = endpoints.get("maxCount", constants.MAX_NB_ENDPOINT_COUNT) min_endpoints = endpoints.get( "minCount", constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT) noobaa_pods = default_noobaa_pods + min_endpoints try: verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, ) except TimeoutException as ex: if upgrade_version >= parse_version("4.7"): log.info( "Nooba pods didn't match. Trying once more with max noobaa endpoints!" f"Exception: {ex}") noobaa_pods = default_noobaa_pods + max_endpoints verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, timeout=60, ) else: raise verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) if not config.DEPLOYMENT.get("external_mode"): verify_pods_upgraded( old_images, selector=constants.MON_APP_LABEL, count=3, ) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750 osd_count = get_osd_count() # In the debugging issue: # https://github.com/red-hat-storage/ocs-ci/issues/5031 # Noticed that it's taking about 1 more minute from previous check till actual # OSD pods getting restarted. # Hence adding sleep here for 120 seconds to be sure, OSD pods upgrade started. log.info("Waiting for 2 minutes before start checking OSD pods") time.sleep(120) verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=osd_timeout * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: rgw_count = get_rgw_count(upgrade_version.base_version, True, version_before_upgrade) verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, ) if upgrade_version >= parse_version("4.6"): verify_pods_upgraded(old_images, selector=constants.OCS_METRICS_EXPORTER)
def test_get_rgw_count(ocs_version, is_upgrade, version_before_upgrade, expected): rgw_count = rgwutils.get_rgw_count(ocs_version, is_upgrade, version_before_upgrade) assert rgw_count == expected