Example #1
0
def check_local_volume_local_volume_set():
    """
    Function to check if Local-volume and Local volume set is present or not

    Returns:
        dict: dict for localvolume and localvolumeset

    """

    lv_or_lvs_dict = {}
    logger.info("Checking if Local Volume is Present")

    if csv.get_csvs_start_with_prefix(
            csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME,
            namespace=config.ENV_DATA["local_storage_namespace"],
    ):
        ocp_obj = OCP()
        command = f"get localvolume local-block -n {config.ENV_DATA['local_storage_namespace']} "
        try:
            ocp_obj.exec_oc_cmd(command, out_yaml_format=False)
            lv_or_lvs_dict["localvolume"] = True
        except CommandFailed as ex:
            logger.debug(f"Local volume does not exists! Exception: {ex}")
            logger.info("No Local volume found")
            lv_or_lvs_dict["localvolume"] = False

        logger.info("Checking if Local Volume Set is Present")
        if csv.get_csvs_start_with_prefix(
                csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME,
                namespace=config.ENV_DATA["local_storage_namespace"],
        ):
            ocp_obj = OCP()
            command = (
                f"get {constants.LOCAL_VOLUME_SET} {constants.LOCAL_BLOCK_RESOURCE} "
                f"-n {config.ENV_DATA['local_storage_namespace']} ")
            try:
                ocp_obj.exec_oc_cmd(command, out_yaml_format=False)
                lv_or_lvs_dict["localvolumeset"] = True
            except CommandFailed as ex:
                logger.debug(
                    f"Local volume Set does not exists! Exception: {ex}")
                lv_or_lvs_dict["localvolumeset"] = False

        return lv_or_lvs_dict
Example #2
0
def check_local_volume():
    """
    Function to check if Local-volume is present or not

    Returns:
        bool: True if LV present, False if LV not present

    """

    if csv.get_csvs_start_with_prefix(
        csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME,
        namespace=constants.LOCAL_STORAGE_NAMESPACE
    ):
        ocp_obj = OCP()
        command = "get localvolume local-block -n local-storage "
        status = ocp_obj.exec_oc_cmd(command, out_yaml_format=False)
        return "No resources found" not in status
Example #3
0
def check_local_volume():
    """
    Function to check if Local-volume is present or not

    Returns:
        bool: True if LV present, False if LV not present

    """

    if csv.get_csvs_start_with_prefix(
            csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME,
            namespace=config.ENV_DATA['local_storage_namespace']):
        ocp_obj = OCP()
        command = f"get localvolume local-block -n {config.ENV_DATA['local_storage_namespace']} "
        try:
            status = ocp_obj.exec_oc_cmd(command, out_yaml_format=False)
        except CommandFailed as ex:
            logger.debug(f"Local volume does not exists! Exception: {ex}")
            return False
        return "No resources found" not in status
Example #4
0
def get_ocs_build_number():
    """
    Gets the build number for ocs operator

    Return:
        str: build number for ocs operator version

    """
    # Importing here to avoid circular dependency
    from ocs_ci.ocs.resources.csv import get_csvs_start_with_prefix

    build_num = ""
    if config.REPORTING['us_ds'] == 'DS':
        build_str = get_csvs_start_with_prefix(
            defaults.OCS_OPERATOR_NAME, defaults.ROOK_CLUSTER_NAMESPACE,
        )
        try:
            return build_str[0]['metadata']['name'].partition('.')[2]
        except (IndexError, AttributeError):
            logging.warning("No version info found for OCS operator")
    return build_num
Example #5
0
    def setup_quay_operator(self):
        """
        Deploys Quay operator

        """
        quay_operator_data = templating.load_yaml(file=constants.QUAY_SUB)
        self.quay_operator = OCS(**quay_operator_data)
        logger.info(f"Installing Quay operator: {self.quay_operator.name}")
        self.quay_operator.create()
        for quay_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern,
                                       constants.QUAY_OPERATOR,
                                       self.namespace):
            if quay_pod:
                self.quay_pod_obj.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    resource_name=quay_pod[0],
                    sleep=30,
                    timeout=600,
                )
                break
        self.quay_operator_csv = get_csvs_start_with_prefix(
            csv_prefix=constants.QUAY_OPERATOR,
            namespace=self.namespace,
        )[0]["metadata"]["name"]
Example #6
0
    def test_pod_disruptions(self, create_pvcs_and_pods):
        """
        Test to perform pod disruption in consumer and provider cluster

        """
        # List of pods to be disrupted. Using different list for consumer and provider for the easy implementation
        pods_on_consumer = [
            "alertmanager_managed_ocs_alertmanager",
            "ocs_osd_controller_manager",
            "prometheus_managed_ocs_prometheus",
            "prometheus_operator",
            "ocs_operator",
        ]
        pods_on_provider = [
            "alertmanager_managed_ocs_alertmanager",
            "ocs_osd_controller_manager",
            "prometheus_managed_ocs_prometheus",
            "prometheus_operator",
            "ocs_provider_server",
            "ocs_operator",
        ]
        disruption_on_consumer = []
        disruption_on_provider = []

        # Start I/O
        log.info("Starting fio on all pods")
        for pod_obj in self.io_pods:
            if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK:
                storage_type = "block"
                direct = 1
            else:
                storage_type = "fs"
                direct = 0
            pod_obj.run_io(
                storage_type=storage_type,
                size="10G",
                fio_filename=f"{pod_obj.name}",
                runtime=320,
                end_fsync=1,
                direct=direct,
                invalidate=0,
                fio_installed=True,
            )

        consumer_index_iter = cycle(self.consumer_indexes)

        # Create Disruptions instance for each pod to be disrupted on consumer
        for pod_type in pods_on_consumer:
            consumer_index = next(consumer_index_iter)
            config.switch_ctx(consumer_index)
            disruption_obj = disruption_helpers.Disruptions()
            # Select each pod to be disrupted from different consumers
            disruption_obj.set_resource(resource=pod_type,
                                        cluster_index=consumer_index)
            disruption_obj.index_of_consumer = consumer_index
            disruption_on_consumer.append(disruption_obj)

        # Create Disruptions instance for each pod to be disrupted on provider
        config.switch_to_provider()
        for pod_type in pods_on_provider:
            disruption_obj = disruption_helpers.Disruptions()
            disruption_obj.set_resource(
                resource=pod_type, cluster_index=self.provider_cluster_index)
            disruption_on_provider.append(disruption_obj)

        # Delete pods on consumer one at a time
        log.info("Starting pod disruptions on consumer clusters")
        for disruptions_obj in disruption_on_consumer:
            disruptions_obj.delete_resource()
            # ocs-operator respin will trigger rook-ceph-tools pod respin.
            # Patch rook-ceph-tools pod to run ceph commands.
            if disruptions_obj.resource == "ocs_operator":
                config.switch_ctx(disruptions_obj.index_of_consumer)
                patch_consumer_toolbox()

        # Delete pods on provider one at a time
        log.info("Starting pod disruptions on provider cluster")
        for disruptions_obj in disruption_on_provider:
            disruptions_obj.delete_resource()

        log.info("Wait for IO to complete on pods")
        for pod_obj in self.io_pods:
            pod_obj.get_fio_results()
            log.info(f"Verified IO on pod {pod_obj.name}")
        log.info("IO is successful on all pods")

        # Performs different checks in the clusters
        for cluster_index in [self.provider_cluster_index
                              ] + self.consumer_indexes:
            config.switch_ctx(cluster_index)

            # Verify managedocs components are Ready
            log.info("Verifying managedocs components state")
            managedocs_obj = OCP(
                kind="managedocs",
                resource_name="managedocs",
                namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
            )
            for component in {"alertmanager", "prometheus", "storageCluster"}:
                assert (
                    managedocs_obj.get()["status"]["components"][component]
                    ["state"] == "Ready"
                ), f"{component} status is {managedocs_obj.get()['status']['components'][component]['state']}"

            # Verify storagecluster status
            log.info("Verifying storagecluster status")
            verify_storage_cluster()

            # Verify CSV status
            for managed_csv in {
                    constants.OCS_CSV_PREFIX,
                    constants.OSD_DEPLOYER,
                    constants.OSE_PROMETHEUS_OPERATOR,
            }:
                csvs = csv.get_csvs_start_with_prefix(
                    managed_csv, constants.OPENSHIFT_STORAGE_NAMESPACE)
                assert (
                    len(csvs) == 1
                ), f"Unexpected number of CSVs with {managed_csv} prefix: {len(csvs)}"
                csv_name = csvs[0]["metadata"]["name"]
                csv_obj = csv.CSV(
                    resource_name=csv_name,
                    namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
                )
                log.info(f"Check if {csv_name} is in Succeeded phase.")
                csv_obj.wait_for_phase(phase="Succeeded", timeout=600)

            # Verify the phase of ceph cluster
            log.info("Verify the phase of ceph cluster")
            cephcluster = OCP(kind="CephCluster",
                              namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
            cephcluster_yaml = cephcluster.get().get("items")[0]
            expected_phase = "Connected"
            if cluster_index == self.provider_cluster_index:
                expected_phase = "Ready"
            assert (
                cephcluster_yaml["status"]["phase"] == expected_phase
            ), f"Status of cephcluster {cephcluster_yaml['metadata']['name']} is {cephcluster_yaml['status']['phase']}"

        # Create PVC and pods on all consumer clusters
        log.info("Creating new PVCs and pods")
        pods = list()
        for cluster_index in self.consumer_indexes:
            config.switch_ctx(cluster_index)
            consumer_cluster_kubeconfig = os.path.join(
                config.clusters[cluster_index].ENV_DATA["cluster_path"],
                config.clusters[cluster_index].RUN.get("kubeconfig_location"),
            )
            pvcs, io_pods = create_pvcs_and_pods(
                pvc_size=self.pvc_size,
                replica_count=1,
                pod_dict_path=constants.PERF_POD_YAML,
            )
            for pvc_obj in pvcs:
                pvc_obj.ocp.cluster_kubeconfig = consumer_cluster_kubeconfig
            for io_pod in io_pods:
                io_pod.ocp.cluster_kubeconfig = consumer_cluster_kubeconfig
            pvcs[0].project.cluster_kubeconfig = consumer_cluster_kubeconfig
            pods.extend(io_pods)

        # Run I/O on new pods
        log.info("Running I/O on new pods")
        for pod_obj in pods:
            if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK:
                storage_type = "block"
                direct = 1
            else:
                storage_type = "fs"
                direct = 0
            pod_obj.run_io(
                storage_type=storage_type,
                size="10G",
                fio_filename=f"{pod_obj.name}",
                runtime=320,
                end_fsync=1,
                direct=direct,
                invalidate=0,
                fio_installed=True,
            )

        log.info("Wait for I/O to complete on new pods")
        for pod_obj in pods:
            pod_obj.get_fio_results()
            log.info(f"Verified IO on the new pod {pod_obj.name}")
        log.info("IO is successful on new pods")
Example #7
0
def verify_managed_service_resources():
    """
    Verify creation and status of resources specific to OSD and ROSA deployments:
    1. ocs-operator, ocs-osd-deployer, ose-prometheus-operator csvs are Succeeded
    2. 1 prometheus pod and 3 alertmanager pods are in Running state
    3. Managedocs components alertmanager, prometheus, storageCluster are in Ready state
    4. Verify that noobaa-operator replicas is set to 0
    5. Verify managed ocs secrets
    6. If cluster is Provider, verify resources specific to provider clusters
    7. [temporarily left out] Verify Networkpolicy and EgressNetworkpolicy creation
    """
    # Verify CSV status
    for managed_csv in {
            constants.OCS_CSV_PREFIX,
            constants.OSD_DEPLOYER,
            constants.OSE_PROMETHEUS_OPERATOR,
    }:
        csvs = csv.get_csvs_start_with_prefix(
            managed_csv, constants.OPENSHIFT_STORAGE_NAMESPACE)
        assert (
            len(csvs) == 1
        ), f"Unexpected number of CSVs with {managed_csv} prefix: {len(csvs)}"
        csv_name = csvs[0]["metadata"]["name"]
        csv_obj = csv.CSV(resource_name=csv_name,
                          namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        log.info(f"Check if {csv_name} is in Succeeded phase.")
        csv_obj.wait_for_phase(phase="Succeeded", timeout=600)

    # Verify alerting secrets creation
    verify_managed_secrets()

    # Verify alerting pods are Running
    pod_obj = OCP(
        kind="pod",
        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
    )
    for alert_pod in {
        (constants.MANAGED_PROMETHEUS_LABEL, 1),
        (constants.MANAGED_ALERTMANAGER_LABEL, 3),
    }:
        pod_obj.wait_for_resource(condition="Running",
                                  selector=alert_pod[0],
                                  resource_count=alert_pod[1])

    # Verify managedocs components are Ready
    log.info("Getting managedocs components data")
    managedocs_obj = OCP(
        kind="managedocs",
        resource_name="managedocs",
        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
    )
    for component in {"alertmanager", "prometheus", "storageCluster"}:
        assert (
            managedocs_obj.get()["status"]["components"][component]["state"] ==
            "Ready"
        ), f"{component} status is {managedocs_obj.get()['status']['components'][component]['state']}"

    # Verify that noobaa-operator replicas is set to 0
    noobaa_deployment = deployment.get_deployments_having_label(
        "operators.coreos.com/mcg-operator.openshift-storage=",
        constants.OPENSHIFT_STORAGE_NAMESPACE,
    )[0]
    log.info(f"Noobaa replicas count: {noobaa_deployment.replicas}")
    assert noobaa_deployment.replicas == 0

    # Verify attributes specific to cluster types
    sc = get_storage_cluster()
    sc_data = sc.get()["items"][0]
    if config.ENV_DATA["cluster_type"].lower() == "provider":
        verify_provider_storagecluster(sc_data)
        verify_provider_resources()
    else:
        verify_consumer_storagecluster(sc_data)
Example #8
0
def ocs_install_verification(timeout=600):
    """
    Perform steps necessary to verify a successful OCS installation

    Args:
        timeout (int): Number of seconds for timeout which will be used in the
            checks used in this function.

    """
    log.info("Verifying OCS installation")
    namespace = config.ENV_DATA['cluster_namespace']

    # Verify Local Storage CSV is in Succeeded phase
    log.info("Verifying Local Storage CSV")
    # There is BZ opened:
    # https://bugzilla.redhat.com/show_bug.cgi?id=1770183
    # which makes this check problematic as current CSV is not the currently
    # installed.
    local_storage_csvs = get_csvs_start_with_prefix(
        csv_prefix=constants.LOCAL_STORAGE_CSV_PREFIX,
        namespace=namespace,
    )
    assert len(local_storage_csvs) == 1, (
        f"There are more than one local storage CSVs: {local_storage_csvs}")
    local_storage_name = local_storage_csvs[0]['metadata']['name']
    log.info(f"Check if local storage operator: {local_storage_name} is in"
             f"Succeeded phase")
    local_storage_csv = CSV(resource_name=local_storage_name,
                            namespace=namespace)
    local_storage_csv.wait_for_phase("Succeeded", timeout=timeout)

    # Verify OCS CSV is in Succeeded phase
    log.info("verifying ocs csv")
    ocs_package_manifest = PackageManifest(
        resource_name=defaults.OCS_OPERATOR_NAME)
    ocs_csv_name = ocs_package_manifest.get_current_csv()
    ocs_csv = CSV(resource_name=ocs_csv_name, namespace=namespace)
    log.info(f"Check if OCS operator: {ocs_csv_name} is in Succeeded phase.")
    ocs_csv.wait_for_phase(phase="Succeeded", timeout=timeout)

    # Verify OCS Cluster Service (ocs-storagecluster) is Ready
    log.info("Verifying OCS Cluster service")
    storage_clusters = StorageCluster(namespace=namespace)
    for item in storage_clusters.get()['items']:

        storage_cluster_name = item['metadata']['name']
        storage_cluster = StorageCluster(resource_name=storage_cluster_name,
                                         namespace=namespace)
        log.info("Checking status of %s", storage_cluster_name)
        log.info(f"Check if StorageCluster: {local_storage_name} is in"
                 f"Succeeded phase")
        storage_cluster.wait_for_phase(phase='Ready', timeout=timeout)

    # Verify pods in running state and proper counts
    log.info("Verifying pod states and counts")
    pod = OCP(kind=constants.POD, namespace=namespace)
    # ocs-operator
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.OCS_OPERATOR_LABEL,
                                 timeout=timeout)
    # rook-ceph-operator
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.OPERATOR_LABEL,
                                 timeout=timeout)
    # noobaa
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.NOOBAA_APP_LABEL,
                                 resource_count=2,
                                 timeout=timeout)
    # local-storage-operator
    assert pod.wait_for_resource(
        condition=constants.STATUS_RUNNING,
        selector=constants.LOCAL_STORAGE_OPERATOR_LABEL,
        timeout=timeout)
    # mons
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.MON_APP_LABEL,
                                 resource_count=3,
                                 timeout=timeout)
    # csi-cephfsplugin
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.CSI_CEPHFSPLUGIN_LABEL,
                                 resource_count=3,
                                 timeout=timeout)
    # csi-cephfsplugin-provisioner
    assert pod.wait_for_resource(
        condition=constants.STATUS_RUNNING,
        selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL,
        resource_count=2,
        timeout=timeout)
    # csi-rbdplugin
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.CSI_RBDPLUGIN_LABEL,
                                 resource_count=3,
                                 timeout=timeout)
    # csi-rbdplugin-profisioner
    assert pod.wait_for_resource(
        condition=constants.STATUS_RUNNING,
        selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL,
        resource_count=2,
        timeout=timeout)
    # osds
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.OSD_APP_LABEL,
                                 resource_count=3,
                                 timeout=timeout)
    # mgr
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.MGR_APP_LABEL,
                                 timeout=timeout)
    # mds
    assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector=constants.MDS_APP_LABEL,
                                 resource_count=2,
                                 timeout=timeout)

    # Verify ceph health
    log.info("Verifying ceph health")
    assert utils.ceph_health_check(namespace=namespace)

    # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd)
    log.info("Verifying storage classes")
    storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace)
    storage_cluster_name = config.ENV_DATA['storage_cluster_name']
    required_storage_classes = {
        f'{storage_cluster_name}-cephfs', f'{storage_cluster_name}-ceph-rbd'
    }
    storage_classes = storage_class.get()
    storage_class_names = {
        item['metadata']['name']
        for item in storage_classes['items']
    }
    assert required_storage_classes.issubset(storage_class_names)

    # Verify OSD's are distributed
    log.info("Verifying OSD's are distributed evenly across worker nodes")
    ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace)
    osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)['items']
    node_names = [osd['spec']['nodeName'] for osd in osds]
    for node in node_names:
        assert not node_names.count(node) > 1, (
            "OSD's are not distributed evenly across worker nodes")
Example #9
0
def verify_managed_service_resources():
    """
    Verify creation and status of resources specific to OSD and ROSA deployments:
    1. ocs-operator, ocs-osd-deployer, ose-prometheus-operator csvs are Succeeded
    2. ocs-converged-pagerduty, ocs-converged-smtp, ocs-converged-deadmanssnitch secrets
    exist in openshift-storage namespace
    3. 1 prometheus pod and 3 alertmanager pods are in Running state
    4. Managedocs components alertmanager, prometheus, storageCluster are in Ready state
    5. Networkpolicy and EgressNetworkpolicy resources are present
    """
    # Verify CSV status
    for managed_csv in {
            constants.OCS_CSV_PREFIX,
            constants.OSD_DEPLOYER,
            constants.OSE_PROMETHEUS_OPERATOR,
    }:
        csvs = csv.get_csvs_start_with_prefix(
            managed_csv, constants.OPENSHIFT_STORAGE_NAMESPACE)
        assert (
            len(csvs) == 1
        ), f"Unexpected number of CSVs with {managed_csv} prefix: {len(csvs)}"
        csv_name = csvs[0]["metadata"]["name"]
        csv_obj = csv.CSV(resource_name=csv_name,
                          namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
        log.info(f"Check if {csv_name} is in Succeeded phase.")
        csv_obj.wait_for_phase(phase="Succeeded", timeout=600)

    # Verify alerting secrets creation
    secret_ocp_obj = OCP(kind="secret",
                         namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
    for secret_name in {
            constants.MANAGED_SMTP_SECRET,
            constants.MANAGED_PAGERDUTY_SECRET,
            constants.MANAGED_DEADMANSSNITCH_SECRET,
    }:
        assert secret_ocp_obj.is_exist(
            resource_name=secret_name
        ), f"{secret_name} does not exist in openshift-storage namespace"

    # Verify alerting pods are Running
    pod_obj = OCP(
        kind="pod",
        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
    )
    for alert_pod in {
        (constants.MANAGED_PROMETHEUS_LABEL, 1),
        (constants.MANAGED_ALERTMANAGER_LABEL, 3),
    }:
        pod_obj.wait_for_resource(condition="Running",
                                  selector=alert_pod[0],
                                  resource_count=alert_pod[1])

    # Verify managedocs components are Ready
    log.info("Getting managedocs components data")
    managedocs_obj = OCP(
        kind="managedocs",
        resource_name="managedocs",
        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
    )
    for component in {"alertmanager", "prometheus", "storageCluster"}:
        assert (
            managedocs_obj.get()["status"]["components"][component]["state"] ==
            "Ready"
        ), f"{component} status is {managedocs_obj.get()['status']['components'][component]['state']}"

    # Verify Networkpolicy and EgressNetworkpolicy creation
    for policy in {
        ("Networkpolicy", "ceph-ingress-rule"),
        ("EgressNetworkpolicy", "egress-rule"),
    }:
        policy_obj = OCP(
            kind=policy[0],
            namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
        )
        assert policy_obj.is_exist(
            resource_name=policy[1]
        ), f"{policy[0]} {policy}[1] does not exist in openshift-storage namespace"