def test_toleration(self):
        """
        1. Check if nodes are tainted
        2. Taint ocs nodes if not tainted
        3. Check for tolerations on all pod
        4. Respin all ocs pods and check if it runs on ocs nodes
        5. Untaint nodes

        """
        # taint nodes if not already tainted
        nodes = get_ocs_nodes()
        taint_nodes(nodes)

        # Check tolerations on pods under openshift-storage
        check_toleration_on_pods()

        # Respin all pods and check it if is still running
        pod_list = get_all_pods(namespace=defaults.ROOK_CLUSTER_NAMESPACE)
        for pod in pod_list:
            pod.delete(wait=False)
        assert wait_for_pods_to_be_running(timeout=300)
    def test_non_ocs_taint_and_tolerations(self):
        """
        Test runs the following steps
        1. Taint ocs nodes with non-ocs taint
        2. Set tolerations on storagecluster, subscription, configmap and ocsinit
        3. Respin all ocs pods and check if it runs on ocs nodes with tolerations
        4. Add Capacity

        """

        # Taint all nodes with non-ocs taint
        ocs_nodes = get_worker_nodes()
        taint_nodes(nodes=ocs_nodes, taint_label="xyz=true:NoSchedule")

        # Add tolerations to the storagecluster
        storagecluster_obj = ocp.OCP(
            resource_name=constants.DEFAULT_CLUSTERNAME,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            kind=constants.STORAGECLUSTER,
        )
        tolerations = (
            '{"tolerations": [{"effect": "NoSchedule", "key": "xyz",'
            '"operator": "Equal", "value": "true"}, '
            '{"effect": "NoSchedule", "key": "node.ocs.openshift.io/storage", '
            '"operator": "Equal", "value": "true"}]}')
        param = (
            f'{{"spec": {{"placement": {{"all": {tolerations}, "mds": {tolerations}, '
            f'"noobaa-core": {tolerations}, "rgw": {tolerations}}}}}}}')
        storagecluster_obj.patch(params=param, format_type="merge")

        # Add tolerations to the subscription
        sub_list = ocp.get_all_resource_names_of_a_kind(
            kind=constants.SUBSCRIPTION)
        param = (
            '{"spec": {"config":  {"tolerations": '
            '[{"effect": "NoSchedule", "key": "xyz", "operator": "Equal", '
            '"value": "true"}]}}}')
        for sub in sub_list:
            sub_obj = ocp.OCP(
                resource_name=sub,
                namespace=defaults.ROOK_CLUSTER_NAMESPACE,
                kind=constants.SUBSCRIPTION,
            )
            sub_obj.patch(params=param, format_type="merge")

        # Add tolerations to the ocsinitializations.ocs.openshift.io
        param = (
            '{"spec":  {"tolerations": '
            '[{"effect": "NoSchedule", "key": "xyz", "operator": "Equal", '
            '"value": "true"}]}}')

        ocsini_obj = ocp.OCP(
            resource_name=constants.OCSINIT,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            kind=constants.OCSINITIALIZATION,
        )
        ocsini_obj.patch(params=param, format_type="merge")

        # Add tolerations to the configmap rook-ceph-operator-config
        configmap_obj = ocp.OCP(
            kind=constants.CONFIGMAP,
            namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
            resource_name=constants.ROOK_OPERATOR_CONFIGMAP,
        )
        toleration = configmap_obj.get().get("data").get(
            "CSI_PLUGIN_TOLERATIONS")
        toleration += (
            '\n- key: xyz\n  operator: Equal\n  value: "true"\n  effect: NoSchedule'
        )
        toleration = toleration.replace('"', '\\"').replace("\n", "\\n")
        param_cmd = (
            f'[{{"op": "replace", "path": "/data/CSI_PLUGIN_TOLERATIONS", "value": "{toleration}" }}, '
            f'{{"op": "replace", "path": "/data/CSI_PROVISIONER_TOLERATIONS", "value": "{toleration}" }}]'
        )
        configmap_obj.patch(params=param_cmd, format_type="json")

        # After edit noticed few pod respins as expected
        assert wait_for_pods_to_be_running(timeout=600, sleep=15)

        # Respin all pods and check it if is still running
        pod_list = get_all_pods(namespace=defaults.ROOK_CLUSTER_NAMESPACE, )
        for pod in pod_list:
            pod.delete(wait=False)

        assert wait_for_pods_to_be_running(timeout=600, sleep=15)
        self.sanity_helpers.health_check()

        # Add capacity to check if new osds has toleration
        osd_size = storage_cluster.get_osd_size()
        count = storage_cluster.add_capacity(osd_size)
        pod = ocp.OCP(kind=constants.POD,
                      namespace=config.ENV_DATA["cluster_namespace"])
        if is_flexible_scaling_enabled():
            replica_count = 1
        else:
            replica_count = 3
        assert pod.wait_for_resource(
            timeout=300,
            condition=constants.STATUS_RUNNING,
            selector=constants.OSD_APP_LABEL,
            resource_count=count * replica_count,
        ), "New OSDs failed to reach running state"
        check_ceph_health_after_add_capacity(ceph_rebalance_timeout=2500)
Beispiel #3
0
def uninstall_ocs():
    """
    The function uninstalls the OCS operator from a openshift
    cluster and removes all its settings and dependencies

    """
    ocp_obj = ocp.OCP()

    log.info("deleting volume snapshots")
    vs_ocp_obj = ocp.OCP(kind=constants.VOLUMESNAPSHOT)
    vs_list = vs_ocp_obj.get(all_namespaces=True)["items"]
    for vs in vs_list:
        vs_obj = ocp.OCP(kind=constants.VOLUMESNAPSHOT,
                         namespace=vs.get("metadata").get("namespace"))
        vs_obj.delete(resource_name=vs.get("metadata").get("name"))

    log.info("queering for OCS PVCs")
    provisioners = constants.OCS_PROVISIONERS
    sc_list = [
        sc for sc in get_all_storageclass()
        if sc.get("provisioner") in provisioners
    ]

    pvc_to_delete = []
    for sc in sc_list:
        pvc_to_delete.extend(pvc for pvc in get_all_pvcs_in_storageclass(
            sc.get("metadata").get("name")) if "noobaa" not in pvc.name)

    if config.ENV_DATA["platform"].lower() == constants.ROSA_PLATFORM:
        log.info("Deleting OCS PVCs")
        for pvc in pvc_to_delete:
            log.info(f"Deleting PVC: {pvc.name}")
            pvc.delete()
        rosa.delete_odf_addon(config.ENV_DATA["cluster_name"])
        return None
    log.info("Removing monitoring stack from OpenShift Container Storage")
    remove_monitoring_stack_from_ocs()

    log.info(
        "Removing OpenShift Container Platform registry from OpenShift Container Storage"
    )
    remove_ocp_registry_from_ocs(config.ENV_DATA["platform"])

    log.info(
        "Removing the cluster logging operator from OpenShift Container Storage"
    )
    try:
        remove_cluster_logging_operator_from_ocs()
    except CommandFailed:
        log.info("No cluster logging found")

    log.info("Deleting OCS PVCs")
    for pvc in pvc_to_delete:
        log.info(f"Deleting PVC: {pvc.name}")
        pvc.delete()

    storage_cluster = ocp.OCP(
        kind=constants.STORAGECLUSTER,
        resource_name=constants.DEFAULT_CLUSTERNAME,
        namespace="openshift-storage",
    )

    log.info("Checking for local storage")
    lso_sc = None
    if check_local_volume_local_volume_set():
        "Local volume was found. Will be removed later"
        lso_sc = (storage_cluster.get().get("spec").get("storageDeviceSets")[0]
                  .get("dataPVCTemplate").get("spec").get("storageClassName"))

    cleanup_policy = (storage_cluster.get().get("metadata").get(
        "annotations").get("uninstall.ocs.openshift.io/cleanup-policy"))

    log.info("Deleting storageCluster object")
    storage_cluster.delete(resource_name=constants.DEFAULT_CLUSTERNAME)

    if cleanup_policy == "delete":
        log.info("Cleanup policy set to delete. checking cleanup pods")
        cleanup_pods = [
            pod for pod in get_all_pods() if "cluster-cleanup-job" in pod.name
        ]
        for pod in cleanup_pods:
            while pod.get().get("status").get("phase") != "Succeeded":
                log.info(f"waiting for cleanup pod {pod.name} to complete")
                TimeoutSampler(timeout=10, sleep=30)
            log.info(f"Cleanup pod {pod.name} completed successfully ")
        # no need to confirm var/vib/rook was deleted from nodes if all cleanup pods are completed.
    else:
        log.info("Cleanup policy set to retain. skipping nodes cleanup")

    log.info("Deleting openshift-storage namespace")
    ocp_obj.delete_project(constants.OPENSHIFT_STORAGE_NAMESPACE)
    ocp_obj.wait_for_delete(constants.OPENSHIFT_STORAGE_NAMESPACE)
    switch_to_project(constants.DEFAULT_NAMESPACE)

    # step 10: TODO remove crypto from nodes.
    """for node in storage_node_list:
        log.info(f"removing encryption from {node}")
        ocp_obj.exec_oc_debug_cmd(node=node, cmd_list=[])"""

    if lso_sc is not None:
        log.info("Removing LSO")
        try:
            uninstall_lso(lso_sc)
        except Exception as e:
            log.info(f"LSO removal failed.{e}")

    log.info("deleting noobaa storage class")
    noobaa_sc = ocp.OCP(kind=constants.STORAGECLASS)
    noobaa_sc.delete(resource_name=constants.NOOBAA_SC)

    nodes = get_all_nodes()
    node_objs = get_node_objs(nodes)

    log.info("Unlabeling storage nodes")
    label_nodes(nodes=node_objs,
                label=constants.OPERATOR_NODE_LABEL[:-3] + "-")
    label_nodes(nodes=node_objs, label=constants.TOPOLOGY_ROOK_LABEL + "-")

    log.info("Removing taints from storage nodes")
    taint_nodes(nodes=nodes, taint_label=constants.OPERATOR_NODE_TAINT + "-")

    log.info("Deleting remaining OCS PVs (if there are any)")
    try:
        rbd_pv = ocp.OCP(kind=constants.PV,
                         resource_name="ocs-storagecluster-ceph-rbd")
        fs_pv = ocp.OCP(kind=constants.PV,
                        resource_name="ocs-storagecluster-cephfs")
        rbd_pv.delete()
        fs_pv.delete()
        log.info("OCS PVs deleted")
    except Exception as e:
        log.info(f"OCS PV(s) not found. {e}")

    log.info("Removing CRDs")
    crd_list = [
        "backingstores.noobaa.io",
        "bucketclasses.noobaa.io",
        "cephblockpools.ceph.rook.io",
        "cephclusters.ceph.rook.io",
        "cephfilesystems.ceph.rook.io",
        "cephnfses.ceph.rook.io",
        "cephobjectstores.ceph.rook.io",
        "cephobjectstoreusers.ceph.rook.io",
        "noobaas.noobaa.io",
        "ocsinitializations.ocs.openshift.io",
        "storageclusters.ocs.openshift.io",
        "cephclients.ceph.rook.io",
        "cephobjectrealms.ceph.rook.io",
        "cephobjectzonegroups.ceph.rook.io",
        "cephobjectzones.ceph.rook.io",
        "cephrbdmirrors.ceph.rook.io",
    ]

    for crd in crd_list:
        try:
            ocp_obj.exec_oc_cmd(f"delete crd {crd} --timeout=300m")
        except Exception:
            log.info(f"crd {crd} was not found")
Beispiel #4
0
    def factory(ocs_nodes=False, node_count=3, taint_label=None):
        """
        Args:
            ocs_nodes (bool): True if new nodes are OCS, False otherwise
            node_count (int): Number of nodes to be added
            taint_label (str): Taint label to be added

        """

        new_nodes = []
        if config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS:
            dt = config.ENV_DATA["deployment_type"]
            if dt == "ipi":
                machines = machine_utils.get_machinesets()
                log.info(
                    f"The worker nodes number before expansion {len(get_worker_nodes())}"
                )
                for machine in machines:
                    new_nodes.append(
                        add_new_node_and_label_it(
                            machine, mark_for_ocs_label=ocs_nodes))

                log.info(
                    f"The worker nodes number after expansion {len(get_worker_nodes())}"
                )

            else:
                log.info(
                    f"The worker nodes number before expansion {len(get_worker_nodes())}"
                )
                if config.ENV_DATA.get("rhel_workers"):
                    node_type = constants.RHEL_OS
                else:
                    node_type = constants.RHCOS

                new_nodes.append(
                    add_new_node_and_label_upi(node_type,
                                               node_count,
                                               mark_for_ocs_label=ocs_nodes))
                log.info(
                    f"The worker nodes number after expansion {len(get_worker_nodes())}"
                )

        elif config.ENV_DATA["platform"].lower() == constants.VSPHERE_PLATFORM:
            log.info(
                f"The worker nodes number before expansion {len(get_worker_nodes())}"
            )
            if config.ENV_DATA.get("rhel_user"):
                node_type = constants.RHEL_OS
            else:
                node_type = constants.RHCOS

            new_nodes.append(
                add_new_node_and_label_upi(node_type,
                                           node_count,
                                           mark_for_ocs_label=ocs_nodes))
            log.info(
                f"The worker nodes number after expansion {len(get_worker_nodes())}"
            )

        nodes = [node for sublist in new_nodes for node in sublist]

        if taint_label:
            taint_nodes(nodes=nodes,
                        taint_label=taint_label), "Failed to taint nodes"
        log.info(f"Successfully Tainted nodes {new_nodes} with {taint_label}")
Beispiel #5
0
    def factory(ocs_nodes=False, node_count=3, taint_label=None):
        """
        Args:
            ocs_nodes (bool): True if new nodes are OCS, False otherwise
            node_count (int): Number of nodes to be added
            taint_label (str): Taint label to be added

        """

        new_nodes = []
        if config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS:
            dt = config.ENV_DATA["deployment_type"]
            if dt == "ipi":
                machines = machine_utils.get_machinesets()
                log.info(
                    f"The worker nodes number before expansion {len(get_worker_nodes())}"
                )
                for machine in machines:
                    new_nodes.append(
                        add_new_node_and_label_it(
                            machine, mark_for_ocs_label=ocs_nodes))

                log.info(
                    f"The worker nodes number after expansion {len(get_worker_nodes())}"
                )

            else:
                log.info(
                    f"The worker nodes number before expansion {len(get_worker_nodes())}"
                )
                if config.ENV_DATA.get("rhel_workers"):
                    node_type = constants.RHEL_OS
                else:
                    node_type = constants.RHCOS

                new_nodes.append(
                    add_new_node_and_label_upi(node_type,
                                               node_count,
                                               mark_for_ocs_label=ocs_nodes))
                log.info(
                    f"The worker nodes number after expansion {len(get_worker_nodes())}"
                )

        elif config.ENV_DATA["platform"].lower() == constants.VSPHERE_PLATFORM:
            pytest.skip("Skipping add node in Vmware platform due to "
                        "https://bugzilla.redhat.com/show_bug.cgi?id=1844521")
            # Issue to remove skip code https://github.com/red-hat-storage/ocs-ci/issues/2403
            # log.info(
            #     f"The worker nodes number before expansion {len(get_worker_nodes())}"
            # )
            # if config.ENV_DATA.get("rhel_user"):
            #     pytest.skip("Skipping add RHEL node, code unavailable")
            # node_type = constants.RHCOS
            # new_nodes.append(
            #     add_new_node_and_label_upi(node_type, num_nodes=node_count)
            # )
            # log.info(
            #     f"The worker nodes number after expansion {len(get_worker_nodes())}"
            # )

        nodes = [node for sublist in new_nodes for node in sublist]

        if taint_label:
            taint_nodes(nodes=nodes,
                        taint_label=taint_label), "Failed to taint nodes"
        log.info(f"Successfully Tainted nodes {new_nodes} with {taint_label}")