Exemple #1
0
    def test_rgw_pod_existence(self):
        if (config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS
                or storagecluster_independent_check()):
            if (not config.ENV_DATA["platform"] == constants.AZURE_PLATFORM
                    and not config.ENV_DATA["platform"]
                    == constants.IBMCLOUD_PLATFORM
                    and (version.get_semantic_ocs_version_from_config() >
                         version.VERSION_4_5)):
                logger.info("Checking whether RGW pod is not present")
                assert (
                    not pod.get_rgw_pods()
                ), "RGW pods should not exist in the current platform/cluster"

        elif (config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS
              and not config.ENV_DATA["mcg_only_deployment"]):
            rgw_count = get_rgw_count(config.ENV_DATA["ocs_version"],
                                      check_if_cluster_was_upgraded(), None)
            logger.info(
                f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform'
            )
            rgw_pod = OCP(kind=constants.POD,
                          namespace=config.ENV_DATA["cluster_namespace"])
            assert rgw_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.RGW_APP_LABEL,
                resource_count=rgw_count,
                timeout=60,
            )
Exemple #2
0
def verify_image_versions(old_images, upgrade_version, version_before_upgrade):
    """
    Verify if all the images of OCS objects got upgraded

    Args:
        old_images (set): set with old images
        upgrade_version (packaging.version.Version): version of OCS
        version_before_upgrade (float): version of OCS before upgrade

    """
    number_of_worker_nodes = len(get_nodes())
    osd_count = get_osd_count()
    verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL)
    verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL)
    # in 4.3 app selector nooba have those pods: noobaa-core-ID, noobaa-db-ID,
    # noobaa-operator-ID but in 4.2 only 2: noobaa-core-ID, noobaa-operator-ID
    nooba_pods = 2 if upgrade_version < parse_version("4.3") else 3
    verify_pods_upgraded(old_images,
                         selector=constants.NOOBAA_APP_LABEL,
                         count=nooba_pods)
    verify_pods_upgraded(
        old_images,
        selector=constants.CSI_CEPHFSPLUGIN_LABEL,
        count=number_of_worker_nodes,
    )
    verify_pods_upgraded(old_images,
                         selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL,
                         count=2)
    verify_pods_upgraded(
        old_images,
        selector=constants.CSI_RBDPLUGIN_LABEL,
        count=number_of_worker_nodes,
    )
    verify_pods_upgraded(old_images,
                         selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL,
                         count=2)
    verify_pods_upgraded(
        old_images,
        selector=constants.MON_APP_LABEL,
        count=3,
    )
    verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL)
    osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750
    verify_pods_upgraded(
        old_images,
        selector=constants.OSD_APP_LABEL,
        count=osd_count,
        timeout=osd_timeout * osd_count,
    )
    verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2)
    if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS:
        rgw_count = get_rgw_count(upgrade_version.base_version, True,
                                  version_before_upgrade)
        verify_pods_upgraded(
            old_images,
            selector=constants.RGW_APP_LABEL,
            count=rgw_count,
        )
Exemple #3
0
    def __init__(self, *args, **kwargs):
        """
        Constructor for the MCG class
        """
        self.namespace = config.ENV_DATA["cluster_namespace"]
        self.operator_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_OPERATOR_POD_LABEL, self.namespace)[0])
        self.core_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_CORE_POD_LABEL, self.namespace)[0])

        self.retrieve_noobaa_cli_binary()
        """
        The certificate will be copied on each mcg_obj instantiation since
        the process is so light and quick, that the time required for the redundant
        copy is neglible in comparison to the time a hash comparison will take.
        """
        retrieve_default_ingress_crt()

        get_noobaa = OCP(kind="noobaa", namespace=self.namespace).get()

        self.s3_endpoint = (get_noobaa.get("items")[0].get("status").get(
            "services").get("serviceS3").get("externalDNS")[0])
        self.s3_internal_endpoint = (get_noobaa.get("items")[0].get(
            "status").get("services").get("serviceS3").get("internalDNS")[0])
        self.mgmt_endpoint = (get_noobaa.get("items")[0].get("status").get(
            "services").get("serviceMgmt").get("externalDNS")[0]) + "/rpc"
        self.region = config.ENV_DATA["region"]

        creds_secret_name = (get_noobaa.get("items")[0].get("status").get(
            "accounts").get("admin").get("secretRef").get("name"))
        secret_ocp_obj = OCP(kind="secret", namespace=self.namespace)
        creds_secret_obj = secret_ocp_obj.get(creds_secret_name)

        self.access_key_id = base64.b64decode(
            creds_secret_obj.get("data").get("AWS_ACCESS_KEY_ID")).decode(
                "utf-8")
        self.access_key = base64.b64decode(
            creds_secret_obj.get("data").get("AWS_SECRET_ACCESS_KEY")).decode(
                "utf-8")

        self.noobaa_user = base64.b64decode(
            creds_secret_obj.get("data").get("email")).decode("utf-8")
        self.noobaa_password = base64.b64decode(
            creds_secret_obj.get("data").get("password")).decode("utf-8")

        self.noobaa_token = self.retrieve_nb_token()

        self.s3_resource = boto3.resource(
            "s3",
            verify=retrieve_verification_mode(),
            endpoint_url=self.s3_endpoint,
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.access_key,
        )

        self.s3_client = self.s3_resource.meta.client

        if config.ENV_DATA["platform"].lower() == "aws" and kwargs.get(
                "create_aws_creds"):
            (
                self.cred_req_obj,
                self.aws_access_key_id,
                self.aws_access_key,
            ) = self.request_aws_credentials()

            self.aws_s3_resource = boto3.resource(
                "s3",
                endpoint_url="https://s3.amazonaws.com",
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_access_key,
            )

        if (config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS
                or storagecluster_independent_check()):
            if not config.ENV_DATA["platform"] == constants.AZURE_PLATFORM and (
                    float(config.ENV_DATA["ocs_version"]) > 4.5):
                logger.info("Checking whether RGW pod is not present")
                pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL,
                                                 namespace=self.namespace)
                assert (
                    not pods
                ), "RGW pods should not exist in the current platform/cluster"

        elif config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS:
            rgw_count = get_rgw_count(config.ENV_DATA["ocs_version"],
                                      check_if_cluster_was_upgraded(), None)
            logger.info(
                f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform'
            )
            rgw_pod = OCP(kind=constants.POD, namespace=self.namespace)
            assert rgw_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.RGW_APP_LABEL,
                resource_count=rgw_count,
                timeout=60,
            )
Exemple #4
0
def verify_image_versions(old_images, upgrade_version, version_before_upgrade):
    """
    Verify if all the images of OCS objects got upgraded

    Args:
        old_images (set): set with old images
        upgrade_version (packaging.version.Version): version of OCS
        version_before_upgrade (float): version of OCS before upgrade

    """
    number_of_worker_nodes = len(get_nodes())
    verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL)
    verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL)
    default_noobaa_pods = 3
    noobaa_pods = default_noobaa_pods
    if upgrade_version >= parse_version("4.7"):
        noobaa = OCP(kind="noobaa",
                     namespace=config.ENV_DATA["cluster_namespace"])
        resource = noobaa.get()["items"][0]
        endpoints = resource.get("spec", {}).get("endpoints", {})
        max_endpoints = endpoints.get("maxCount",
                                      constants.MAX_NB_ENDPOINT_COUNT)
        min_endpoints = endpoints.get(
            "minCount", constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT)
        noobaa_pods = default_noobaa_pods + min_endpoints
    try:
        verify_pods_upgraded(
            old_images,
            selector=constants.NOOBAA_APP_LABEL,
            count=noobaa_pods,
        )
    except TimeoutException as ex:
        if upgrade_version >= parse_version("4.7"):
            log.info(
                "Nooba pods didn't match. Trying once more with max noobaa endpoints!"
                f"Exception: {ex}")
            noobaa_pods = default_noobaa_pods + max_endpoints
            verify_pods_upgraded(
                old_images,
                selector=constants.NOOBAA_APP_LABEL,
                count=noobaa_pods,
                timeout=60,
            )
        else:
            raise
    verify_pods_upgraded(
        old_images,
        selector=constants.CSI_CEPHFSPLUGIN_LABEL,
        count=number_of_worker_nodes,
    )
    verify_pods_upgraded(old_images,
                         selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL,
                         count=2)
    verify_pods_upgraded(
        old_images,
        selector=constants.CSI_RBDPLUGIN_LABEL,
        count=number_of_worker_nodes,
    )
    verify_pods_upgraded(old_images,
                         selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL,
                         count=2)
    if not config.DEPLOYMENT.get("external_mode"):
        verify_pods_upgraded(
            old_images,
            selector=constants.MON_APP_LABEL,
            count=3,
        )
        verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL)
        osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750
        osd_count = get_osd_count()
        verify_pods_upgraded(
            old_images,
            selector=constants.OSD_APP_LABEL,
            count=osd_count,
            timeout=osd_timeout * osd_count,
        )
        verify_pods_upgraded(old_images,
                             selector=constants.MDS_APP_LABEL,
                             count=2)
        if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS:
            rgw_count = get_rgw_count(upgrade_version.base_version, True,
                                      version_before_upgrade)
            verify_pods_upgraded(
                old_images,
                selector=constants.RGW_APP_LABEL,
                count=rgw_count,
            )
Exemple #5
0
def ocs_install_verification(
    timeout=600,
    skip_osd_distribution_check=False,
    ocs_registry_image=None,
    post_upgrade_verification=False,
    version_before_upgrade=None,
):
    """
    Perform steps necessary to verify a successful OCS installation

    Args:
        timeout (int): Number of seconds for timeout which will be used in the
            checks used in this function.
        skip_osd_distribution_check (bool): If true skip the check for osd
            distribution.
        ocs_registry_image (str): Specific image to check if it was installed
            properly.
        post_upgrade_verification (bool): Set to True if this function is
            called after upgrade.
        version_before_upgrade (float): Set to OCS version before upgrade

    """
    from ocs_ci.ocs.node import get_nodes
    from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs
    from ocs_ci.ocs.resources.pod import get_ceph_tools_pod, get_all_pods
    from ocs_ci.ocs.cluster import validate_cluster_on_pvc
    from ocs_ci.ocs.resources.fips import check_fips_enabled

    number_of_worker_nodes = len(get_nodes())
    namespace = config.ENV_DATA["cluster_namespace"]
    log.info("Verifying OCS installation")
    if config.ENV_DATA.get("disable_components"):
        for component in config.ENV_DATA["disable_components"]:
            config.COMPONENTS[f"disable_{component}"] = True
    disable_noobaa = config.COMPONENTS["disable_noobaa"]
    disable_rgw = config.COMPONENTS["disable_rgw"]
    disable_blockpools = config.COMPONENTS["disable_blockpools"]
    disable_cephfs = config.COMPONENTS["disable_cephfs"]

    # Verify OCS CSV is in Succeeded phase
    log.info("verifying ocs csv")
    ocs_csv = get_ocs_csv()
    # Verify if OCS CSV has proper version.
    csv_version = ocs_csv.data["spec"]["version"]
    ocs_version = version.get_semantic_ocs_version_from_config()
    log.info(
        f"Check if OCS version: {ocs_version} matches with CSV: {csv_version}")
    assert (
        f"{ocs_version}" in csv_version
    ), f"OCS version: {ocs_version} mismatch with CSV version {csv_version}"
    # Verify if OCS CSV has the same version in provided CI build.
    ocs_registry_image = ocs_registry_image or config.DEPLOYMENT.get(
        "ocs_registry_image")
    if ocs_registry_image and ocs_registry_image.endswith(".ci"):
        ocs_registry_image = ocs_registry_image.rsplit(":", 1)[1]
        log.info(
            f"Check if OCS registry image: {ocs_registry_image} matches with "
            f"CSV: {csv_version}")
        ignore_csv_mismatch = config.DEPLOYMENT.get("ignore_csv_mismatch")
        if ignore_csv_mismatch:
            log.info(
                "The possible mismatch will be ignored as you deployed "
                "the different version than the default version from the CSV")
        else:
            assert ocs_registry_image in csv_version, (
                f"OCS registry image version: {ocs_registry_image} mismatch "
                f"with CSV version {csv_version}")

    # Verify Storage System status
    if ocs_version >= version.VERSION_4_9:
        log.info("Verifying storage system status")
        storage_system = OCP(kind=constants.STORAGESYSTEM, namespace=namespace)
        storage_system_data = storage_system.get()
        storage_system_status = {}
        for condition in storage_system_data["items"][0]["status"][
                "conditions"]:
            storage_system_status[condition["type"]] = condition["status"]
        log.debug(f"storage system status: {storage_system_status}")
        assert storage_system_status == constants.STORAGE_SYSTEM_STATUS, (
            f"Storage System status is not in expected state. Expected {constants.STORAGE_SYSTEM_STATUS}"
            f" but found {storage_system_status}")

    # Verify OCS Cluster Service (ocs-storagecluster) is Ready
    storage_cluster_name = config.ENV_DATA["storage_cluster_name"]
    log.info("Verifying status of storage cluster: %s", storage_cluster_name)
    storage_cluster = StorageCluster(
        resource_name=storage_cluster_name,
        namespace=namespace,
    )
    log.info(f"Check if StorageCluster: {storage_cluster_name} is in"
             f"Succeeded phase")
    storage_cluster.wait_for_phase(phase="Ready", timeout=timeout)

    # Verify pods in running state and proper counts
    log.info("Verifying pod states and counts")
    pod = OCP(kind=constants.POD, namespace=namespace)
    if not config.DEPLOYMENT["external_mode"]:
        osd_count = int(
            storage_cluster.data["spec"]["storageDeviceSets"][0]["count"]
        ) * int(
            storage_cluster.data["spec"]["storageDeviceSets"][0]["replica"])
    rgw_count = None
    if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS:
        if not disable_rgw:
            rgw_count = get_rgw_count(f"{ocs_version}",
                                      post_upgrade_verification,
                                      version_before_upgrade)

    min_eps = constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT
    max_eps = (constants.MAX_NB_ENDPOINT_COUNT
               if ocs_version >= version.VERSION_4_6 else 1)

    if config.ENV_DATA.get("platform") == constants.IBM_POWER_PLATFORM:
        min_eps = 1
        max_eps = 1

    nb_db_label = (constants.NOOBAA_DB_LABEL_46_AND_UNDER
                   if ocs_version < version.VERSION_4_7 else
                   constants.NOOBAA_DB_LABEL_47_AND_ABOVE)
    resources_dict = {
        nb_db_label: 1,
        constants.OCS_OPERATOR_LABEL: 1,
        constants.OPERATOR_LABEL: 1,
        constants.NOOBAA_OPERATOR_POD_LABEL: 1,
        constants.NOOBAA_CORE_POD_LABEL: 1,
        constants.NOOBAA_ENDPOINT_POD_LABEL: min_eps,
    }
    if not config.DEPLOYMENT["external_mode"]:
        resources_dict.update({
            constants.MON_APP_LABEL: 3,
            constants.CSI_CEPHFSPLUGIN_LABEL: number_of_worker_nodes,
            constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL: 2,
            constants.CSI_RBDPLUGIN_LABEL: number_of_worker_nodes,
            constants.CSI_RBDPLUGIN_PROVISIONER_LABEL: 2,
            constants.OSD_APP_LABEL: osd_count,
            constants.MGR_APP_LABEL: 1,
            constants.MDS_APP_LABEL: 2,
            constants.RGW_APP_LABEL: rgw_count,
        })

    if ocs_version >= version.VERSION_4_9:
        resources_dict.update({
            constants.ODF_OPERATOR_CONTROL_MANAGER_LABEL: 1,
        })

    for label, count in resources_dict.items():
        if label == constants.RGW_APP_LABEL:
            if (not config.ENV_DATA.get("platform")
                    in constants.ON_PREM_PLATFORMS or disable_rgw):
                continue
        if "noobaa" in label and disable_noobaa:
            continue
        if "mds" in label and disable_cephfs:
            continue

        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=label,
            resource_count=count,
            timeout=timeout,
        )

    if not disable_noobaa:
        nb_ep_pods = get_pods_having_label(
            label=constants.NOOBAA_ENDPOINT_POD_LABEL,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        )
        assert len(nb_ep_pods) <= max_eps, (
            f"The number of running NooBaa endpoint pods ({len(nb_ep_pods)}) "
            f"is greater than the maximum defined in the NooBaa CR ({max_eps})"
        )

    # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd)
    log.info("Verifying storage classes")
    storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace)
    storage_cluster_name = config.ENV_DATA["storage_cluster_name"]
    required_storage_classes = {
        f"{storage_cluster_name}-cephfs",
        f"{storage_cluster_name}-ceph-rbd",
    }
    if ocs_version >= version.VERSION_4_10:
        # TODO: Add rbd-thick storage class verification in external mode cluster upgraded
        # to OCS 4.8 when the bug 1978542 is fixed
        # Skip rbd-thick storage class verification in external mode upgraded cluster. This is blocked by bug 1978542
        if not (config.DEPLOYMENT["external_mode"]
                and post_upgrade_verification):
            required_storage_classes.update(
                {f"{storage_cluster_name}-ceph-rbd-thick"})
    skip_storage_classes = set()
    if disable_cephfs:
        skip_storage_classes.update({
            f"{storage_cluster_name}-cephfs",
        })
    if disable_blockpools:
        skip_storage_classes.update({
            f"{storage_cluster_name}-ceph-rbd",
        })
    required_storage_classes = required_storage_classes.difference(
        skip_storage_classes)

    if config.DEPLOYMENT["external_mode"]:
        required_storage_classes.update({
            f"{storage_cluster_name}-ceph-rgw",
            f'{config.ENV_DATA["cluster_namespace"]}.noobaa.io',
        })
    storage_classes = storage_class.get()
    storage_class_names = {
        item["metadata"]["name"]
        for item in storage_classes["items"]
    }
    # required storage class names should be observed in the cluster under test
    missing_scs = required_storage_classes.difference(storage_class_names)
    if len(missing_scs) > 0:
        log.error("few storage classess are not present: %s", missing_scs)
    assert list(missing_scs) == []

    # Verify OSDs are distributed
    if not config.DEPLOYMENT["external_mode"]:
        if not skip_osd_distribution_check:
            log.info(
                "Verifying OSDs are distributed evenly across worker nodes")
            ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace)
            osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)["items"]
            deviceset_count = get_deviceset_count()
            node_names = [osd["spec"]["nodeName"] for osd in osds]
            for node in node_names:
                assert (
                    not node_names.count(node) > deviceset_count
                ), "OSD's are not distributed evenly across worker nodes"

    # Verify that CSI driver object contains provisioner names
    log.info("Verifying CSI driver object contains provisioner names.")
    csi_driver = OCP(kind="CSIDriver")
    csi_drivers = {
        item["metadata"]["name"]
        for item in csi_driver.get()["items"]
    }
    assert defaults.CSI_PROVISIONERS.issubset(csi_drivers)

    # Verify node and provisioner secret names in storage class
    log.info("Verifying node and provisioner secret names in storage class.")
    if config.DEPLOYMENT["external_mode"]:
        sc_rbd = storage_class.get(
            resource_name=constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD)
        sc_cephfs = storage_class.get(resource_name=(
            constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_CEPHFS))
    else:
        if not disable_blockpools:
            sc_rbd = storage_class.get(
                resource_name=constants.DEFAULT_STORAGECLASS_RBD)
        if not disable_cephfs:
            sc_cephfs = storage_class.get(
                resource_name=constants.DEFAULT_STORAGECLASS_CEPHFS)
    if not disable_blockpools:
        assert (
            sc_rbd["parameters"]["csi.storage.k8s.io/node-stage-secret-name"]
            == constants.RBD_NODE_SECRET)
        assert (
            sc_rbd["parameters"]["csi.storage.k8s.io/provisioner-secret-name"]
            == constants.RBD_PROVISIONER_SECRET)
    if not disable_cephfs:
        assert (sc_cephfs["parameters"]
                ["csi.storage.k8s.io/node-stage-secret-name"] ==
                constants.CEPHFS_NODE_SECRET)
        assert (sc_cephfs["parameters"]
                ["csi.storage.k8s.io/provisioner-secret-name"] ==
                constants.CEPHFS_PROVISIONER_SECRET)
    log.info("Verified node and provisioner secret names in storage class.")

    ct_pod = get_ceph_tools_pod()

    # https://github.com/red-hat-storage/ocs-ci/issues/3820
    # Verify ceph osd tree output
    if not (config.DEPLOYMENT.get("ui_deployment")
            or config.DEPLOYMENT["external_mode"]):
        log.info(
            "Verifying ceph osd tree output and checking for device set PVC names "
            "in the output.")
        if config.DEPLOYMENT.get("local_storage"):
            deviceset_pvcs = [osd.get_node() for osd in get_osd_pods()]
            # removes duplicate hostname
            deviceset_pvcs = list(set(deviceset_pvcs))
            if config.ENV_DATA.get("platform") == constants.BAREMETAL_PLATFORM:
                deviceset_pvcs = [
                    deviceset.replace(".", "-") for deviceset in deviceset_pvcs
                ]
        else:
            deviceset_pvcs = [pvc.name for pvc in get_deviceset_pvcs()]

        osd_tree = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree",
                                        format="json")
        schemas = {
            "root": constants.OSD_TREE_ROOT,
            "rack": constants.OSD_TREE_RACK,
            "host": constants.OSD_TREE_HOST,
            "osd": constants.OSD_TREE_OSD,
            "region": constants.OSD_TREE_REGION,
            "zone": constants.OSD_TREE_ZONE,
        }
        schemas["host"]["properties"]["name"] = {"enum": deviceset_pvcs}
        for item in osd_tree["nodes"]:
            validate(instance=item, schema=schemas[item["type"]])
            if item["type"] == "host":
                deviceset_pvcs.remove(item["name"])
        assert not deviceset_pvcs, (
            f"These device set PVCs are not given in ceph osd tree output "
            f"- {deviceset_pvcs}")
        log.info(
            "Verified ceph osd tree output. Device set PVC names are given in the "
            "output.")

    # TODO: Verify ceph osd tree output have osd listed as ssd
    # TODO: Verify ceph osd tree output have zone or rack based on AZ

    # Verify CSI snapshotter sidecar container is not present
    # if the OCS version is < 4.6
    if ocs_version < version.VERSION_4_6:
        log.info("Verifying CSI snapshotter is not present.")
        provisioner_pods = get_all_pods(
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            selector=[
                constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL,
                constants.CSI_RBDPLUGIN_PROVISIONER_LABEL,
            ],
        )
        for pod_obj in provisioner_pods:
            pod_info = pod_obj.get()
            for container, image in get_images(data=pod_info).items():
                assert ("snapshot" not in container) and (
                    "snapshot" not in image
                ), (f"Snapshot container is present in {pod_obj.name} pod. "
                    f"Container {container}. Image {image}")
        deployments = ocs_csv.get()["spec"]["install"]["spec"]["deployments"]
        rook_ceph_operator_deployment = [
            deployment_val for deployment_val in deployments
            if deployment_val["name"] == "rook-ceph-operator"
        ]
        assert {
            "name": "CSI_ENABLE_SNAPSHOTTER",
            "value": "false"
        } in (rook_ceph_operator_deployment[0]["spec"]["template"]["spec"]
              ["containers"][0]["env"]
              ), "CSI_ENABLE_SNAPSHOTTER value is not set to 'false'."
        log.info("Verified: CSI snapshotter is not present.")

    # Verify pool crush rule is with "type": "zone"
    if utils.get_az_count() == 3:
        log.info("Verifying pool crush rule is with type: zone")
        crush_dump = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd crush dump",
                                          format="")
        pool_names = [
            constants.METADATA_POOL,
            constants.DEFAULT_BLOCKPOOL,
            constants.DATA_POOL,
        ]
        crush_rules = [
            rule for rule in crush_dump["rules"]
            if rule["rule_name"] in pool_names
        ]
        for crush_rule in crush_rules:
            assert [
                item for item in crush_rule["steps"]
                if item.get("type") == "zone"
            ], f"{crush_rule['rule_name']} is not with type as zone"
        log.info("Verified - pool crush rule is with type: zone")
    log.info("Validate cluster on PVC")
    validate_cluster_on_pvc()

    # Verify ceph health
    log.info("Verifying ceph health")
    health_check_tries = 20
    health_check_delay = 30
    if post_upgrade_verification:
        # In case of upgrade with FIO we have to wait longer time to see
        # health OK. See discussion in BZ:
        # https://bugzilla.redhat.com/show_bug.cgi?id=1817727
        health_check_tries = 180
    assert utils.ceph_health_check(namespace, health_check_tries,
                                   health_check_delay)
    if config.ENV_DATA.get("fips"):
        # In case that fips is enabled when deploying,
        # a verification of the installation of it will run
        # on all running state pods
        check_fips_enabled()
    if config.ENV_DATA.get("encryption_at_rest"):
        osd_encryption_verification()
        if config.DEPLOYMENT.get("kms_deployment"):
            kms = KMS.get_kms_deployment()
            kms.post_deploy_verification()

    storage_cluster_obj = get_storage_cluster()
    is_flexible_scaling = (
        storage_cluster_obj.get()["items"][0].get("spec").get(
            "flexibleScaling", False))
    if is_flexible_scaling is True:
        failure_domain = storage_cluster_obj.data["items"][0]["status"][
            "failureDomain"]
        assert failure_domain == "host", (
            f"The expected failure domain on cluster with flexible scaling is 'host',"
            f" the actaul failure domain is {failure_domain}")

    if ocs_version >= version.VERSION_4_7:
        log.info("Verifying images in storage cluster")
        verify_sc_images(storage_cluster)

    if config.ENV_DATA.get("is_multus_enabled"):
        verify_multus_network()
Exemple #6
0
def verify_image_versions(old_images, upgrade_version, version_before_upgrade):
    """
    Verify if all the images of OCS objects got upgraded

    Args:
        old_images (set): set with old images
        upgrade_version (packaging.version.Version): version of OCS
        version_before_upgrade (float): version of OCS before upgrade

    """
    number_of_worker_nodes = len(get_nodes())
    verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL)
    verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL)
    default_noobaa_pods = 3
    noobaa_pods = default_noobaa_pods
    if upgrade_version >= parse_version("4.7"):
        noobaa = OCP(kind="noobaa",
                     namespace=config.ENV_DATA["cluster_namespace"])
        resource = noobaa.get()["items"][0]
        endpoints = resource.get("spec", {}).get("endpoints", {})
        max_endpoints = endpoints.get("maxCount",
                                      constants.MAX_NB_ENDPOINT_COUNT)
        min_endpoints = endpoints.get(
            "minCount", constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT)
        noobaa_pods = default_noobaa_pods + min_endpoints
    try:
        verify_pods_upgraded(
            old_images,
            selector=constants.NOOBAA_APP_LABEL,
            count=noobaa_pods,
        )
    except TimeoutException as ex:
        if upgrade_version >= parse_version("4.7"):
            log.info(
                "Nooba pods didn't match. Trying once more with max noobaa endpoints!"
                f"Exception: {ex}")
            noobaa_pods = default_noobaa_pods + max_endpoints
            verify_pods_upgraded(
                old_images,
                selector=constants.NOOBAA_APP_LABEL,
                count=noobaa_pods,
                timeout=60,
            )
        else:
            raise
    verify_pods_upgraded(
        old_images,
        selector=constants.CSI_CEPHFSPLUGIN_LABEL,
        count=number_of_worker_nodes,
    )
    verify_pods_upgraded(old_images,
                         selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL,
                         count=2)
    verify_pods_upgraded(
        old_images,
        selector=constants.CSI_RBDPLUGIN_LABEL,
        count=number_of_worker_nodes,
    )
    verify_pods_upgraded(old_images,
                         selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL,
                         count=2)
    if not config.DEPLOYMENT.get("external_mode"):
        verify_pods_upgraded(
            old_images,
            selector=constants.MON_APP_LABEL,
            count=3,
        )
        verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL)
        osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750
        osd_count = get_osd_count()
        # In the debugging issue:
        # https://github.com/red-hat-storage/ocs-ci/issues/5031
        # Noticed that it's taking about 1 more minute from previous check till actual
        # OSD pods getting restarted.
        # Hence adding sleep here for 120 seconds to be sure, OSD pods upgrade started.
        log.info("Waiting for 2 minutes before start checking OSD pods")
        time.sleep(120)
        verify_pods_upgraded(
            old_images,
            selector=constants.OSD_APP_LABEL,
            count=osd_count,
            timeout=osd_timeout * osd_count,
        )
        verify_pods_upgraded(old_images,
                             selector=constants.MDS_APP_LABEL,
                             count=2)
        if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS:
            rgw_count = get_rgw_count(upgrade_version.base_version, True,
                                      version_before_upgrade)
            verify_pods_upgraded(
                old_images,
                selector=constants.RGW_APP_LABEL,
                count=rgw_count,
            )
    if upgrade_version >= parse_version("4.6"):
        verify_pods_upgraded(old_images,
                             selector=constants.OCS_METRICS_EXPORTER)
Exemple #7
0
def test_get_rgw_count(ocs_version, is_upgrade, version_before_upgrade,
                       expected):
    rgw_count = rgwutils.get_rgw_count(ocs_version, is_upgrade,
                                       version_before_upgrade)
    assert rgw_count == expected