def test_drain_mcg_pod_node(self, node_drain_teardown,
                                reduce_and_resume_cluster_load, pod_to_drain):
        """
        Test drianage of nodes which contain NB resources

        """
        # Retrieve the relevant pod object
        pod_obj = pod.Pod(**pod.get_pods_having_label(
            label=self.labels_map[pod_to_drain],
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        )[0])
        # Retrieve the node name on which the pod resides
        node_name = pod_obj.get()["spec"]["nodeName"]
        # Drain the node
        drain_nodes([node_name])
        # Verify the node was drained properly
        wait_for_nodes_status([node_name],
                              status=constants.NODE_READY_SCHEDULING_DISABLED)
        # Retrieve the new pod that should've been created post-drainage
        pod_obj = pod.Pod(**pod.get_pods_having_label(
            label=self.labels_map[pod_to_drain],
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        )[0])
        # Verify that the new pod has reached a 'RUNNNING' status again and recovered successfully
        wait_for_resource_state(pod_obj, constants.STATUS_RUNNING, timeout=120)
        # Check the NB status to verify the system is healthy
        self.cl_obj.wait_for_noobaa_health_ok()
Esempio n. 2
0
def collect_noobaa_db_dump(log_dir_path):
    """
    Collect the Noobaa DB dump

    Args:
        log_dir_path (str): directory for dumped Noobaa DB

    """
    from ocs_ci.ocs.resources.pod import (
        get_pods_having_label,
        download_file_from_pod,
        Pod,
    )

    nb_db_pod = Pod(
        **get_pods_having_label(
            label=constants.NOOBAA_DB_LABEL, namespace=defaults.ROOK_CLUSTER_NAMESPACE
        )[0]
    )
    ocs_log_dir_path = os.path.join(log_dir_path, "noobaa_db_dump")
    create_directory_path(ocs_log_dir_path)
    ocs_log_dir_path = os.path.join(ocs_log_dir_path, "nbcore.gz")
    nb_db_pod.exec_cmd_on_pod("mongodump --archive=nbcore.gz --gzip --db=nbcore")
    download_file_from_pod(
        pod_name=nb_db_pod.name,
        remotepath="/opt/app-root/src/nbcore.gz",
        localpath=ocs_log_dir_path,
        namespace=defaults.ROOK_CLUSTER_NAMESPACE,
    )
Esempio n. 3
0
    def get_new_pods(self, pod_list):
        """
        Fetches info about the respun pods in the cluster

        Args:
            pod_list (list): list of previous pod objects

        Returns:
            list : list of respun pod objects

        """
        new_pods = []
        for pod_obj in pod_list:
            if any(str in pod_obj.name for str in ["mon", "osd"]):
                pod_label = pod_obj.labels.get("pod-template-hash")
                label_selector = f"pod-template-hash={pod_label}"
            else:
                pod_label = pod_obj.labels.get("deploymentconfig")
                label_selector = f"deploymentconfig={pod_label}"

            pods_data = pod.get_pods_having_label(label_selector,
                                                  pod_obj.namespace)
            for pod_data in pods_data:
                pod_name = pod_data.get("metadata").get("name")
                if "-deploy" not in pod_name and pod_name not in pod_obj.name:
                    new_pods.append(
                        pod.get_pod_obj(pod_name, pod_obj.namespace))
        logger.info(f"Previous pods: {[pod_obj.name for pod_obj in pod_list]}")
        logger.info(f"Respun pods: {[pod_obj.name for pod_obj in new_pods]}")
        return new_pods
    def get_new_pods(self, pod_list):
        """
        Fetches info about the respun pods in the cluster

        Args:
            pod_list (list): list of previous pod objects

        Returns:
            list : list of respun pod objects
        """
        new_pods = []
        for pod_obj in pod_list:
            if any(str in pod_obj.name for str in ['mon', 'osd']):
                pod_label = pod_obj.labels.get('pod-template-hash')
                label_selector = f'pod-template-hash={pod_label}'
            else:
                pod_label = pod_obj.labels.get('deploymentconfig')
                label_selector = f'deploymentconfig={pod_label}'

            pods_data = pod.get_pods_having_label(
                label_selector, pod_obj.namespace
            )
            for pod_data in pods_data:
                pod_name = pod_data.get('metadata').get('name')
                if '-deploy' not in pod_name and pod_name not in pod_obj.name:
                    new_pods.append(
                        pod.get_pod_obj(pod_name, pod_obj.namespace)
                    )
        logger.info(
            f"Previous pods: {[pod_obj.name for pod_obj in pod_list]}"
        )
        logger.info(
            f"Respun pods: {[pod_obj.name for pod_obj in new_pods]}"
        )
        return new_pods
Esempio n. 5
0
def collect_noobaa_db_dump(log_dir_path):
    """
    Collect the Noobaa DB dump

    Args:
        log_dir_path (str): directory for dumped Noobaa DB

    """
    from ocs_ci.ocs.resources.pod import (
        get_pods_having_label,
        download_file_from_pod,
        Pod,
    )

    nb_db_label = (constants.NOOBAA_DB_LABEL_46_AND_UNDER
                   if float(ocsci_config.ENV_DATA["ocs_version"]) < 4.7 else
                   constants.NOOBAA_DB_LABEL_47_AND_ABOVE)
    nb_db_pod = Pod(**get_pods_having_label(
        label=nb_db_label, namespace=defaults.ROOK_CLUSTER_NAMESPACE)[0])
    ocs_log_dir_path = os.path.join(log_dir_path, "noobaa_db_dump")
    create_directory_path(ocs_log_dir_path)
    ocs_log_dir_path = os.path.join(ocs_log_dir_path, "nbcore.gz")
    if float(ocsci_config.ENV_DATA["ocs_version"]) < 4.7:
        cmd = "mongodump --archive=nbcore.gz --gzip --db=nbcore"
    else:
        cmd = 'bash -c "pg_dump nbcore | gzip > nbcore.gz"'

    nb_db_pod.exec_cmd_on_pod(cmd)
    download_file_from_pod(
        pod_name=nb_db_pod.name,
        remotepath="/opt/app-root/src/nbcore.gz",
        localpath=ocs_log_dir_path,
        namespace=defaults.ROOK_CLUSTER_NAMESPACE,
    )
Esempio n. 6
0
def get_registry_pod_obj():
    """
    Function to get registry pod obj

    Returns:
        pod_obj (list): List of Registry pod objs

    Raises:
        UnexpectedBehaviour: When image-registry pod is not present.

    """
    # Sometimes when there is a update in config crd, there will be 2 registry pods
    # i.e. old pod will be terminated and new pod will be up based on new crd
    # so below loop waits till old pod terminates
    wait_time = 30
    for iteration in range(10):
        pod_data = pod.get_pods_having_label(
            label='docker-registry=default',
            namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE)
        pod_obj = [pod.Pod(**data) for data in pod_data]
        if len(pod_obj) == 1:
            break
        elif len(pod_obj) == 0:
            raise UnexpectedBehaviour("Image-registry pod not present")
        elif iteration > 5:
            raise UnexpectedBehaviour(
                "Waited for 3 mins Image-registry pod is not in Running state")
        else:
            logger.info(
                f"Waiting for 30 sec's for registry pod to be up iteration {iteration}"
            )
            time.sleep(wait_time)
    return pod_obj
Esempio n. 7
0
        def _check_resources_deleted(namespace=None):
            """
            check if resources of the pv pool backingstore deleted properly

            Args:
                namespace (str): backing store's namespace

            Returns:
                bool: True if pvc(s) were deleted

            """
            try:
                OCP(kind=constants.PV, resource_name=pv_name).get()
                log.warning(f"Found PV leftovers belonging to {self.name}")
                return False
            except CommandFailed as e:
                if "not found" in str(e):
                    pass
                else:
                    raise
            pvcs = get_all_pvcs(namespace=namespace,
                                selector=f"pool={self.name}")
            pods = get_pods_having_label(namespace=namespace,
                                         label=f"pool={self.name}")
            return len(pvcs["items"]) == 0 and len(pods) == 0
    def test_db_scc(self, teardown):
        """
        Test noobaa db is assigned with scc(anyuid) after changing the default noobaa SCC

        """
        scc_name = constants.NOOBAA_DB_SERVICE_ACCOUNT_NAME
        service_account = constants.NOOBAA_DB_SERVICE_ACCOUNT
        pod_obj = pod.Pod(**pod.get_pods_having_label(
            label=self.labels_map["noobaa_db"],
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        )[0])
        ocp_scc = ocp.OCP(kind=constants.SCC,
                          namespace=defaults.ROOK_CLUSTER_NAMESPACE)
        pod_data = pod_obj.get()

        log.info(f"Verifying current SCC is {scc_name} in db pod")
        assert (pod_data.get("metadata").get("annotations").get(
            "openshift.io/scc") == scc_name), "Invalid default scc"

        log.info("Deleting the user array from the Noobaa scc")
        ocp_scc.patch(
            resource_name=scc_name,
            params='[{"op": "remove", "path": "/users/0", '
            f'"value":{service_account}}}]',
            format_type="json",
        )
        assert not helpers.validate_scc_policy(
            sa_name=scc_name,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            scc_name=scc_name,
        ), "SA name is  present in noobaa scc"
        log.info("Adding the noobaa system sa user to anyuid scc")
        ocp_scc.patch(
            resource_name=constants.ANYUID,
            params='[{"op": "add", "path": "/users/0", '
            f'"value":{service_account}}}]',
            format_type="json",
        )
        assert helpers.validate_scc_policy(
            sa_name=scc_name,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            scc_name=constants.ANYUID,
        ), "SA name is not present in anyuid scc"

        pod_obj.delete(force=True)
        # Verify that the new pod has reached a 'RUNNNING' status
        assert pod_obj.ocp.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=self.labels_map["noobaa_db"],
            resource_count=1,
            timeout=300,
        ), "Noobaa pod did not reach running state"
        pod_data = pod_obj.get()
        log.info("Verifying SCC is now anyuid in the db pod")
        assert (pod_data.get("metadata").get("annotations").get(
            "openshift.io/scc") == constants.ANYUID), "Invalid scc"
        # Check the NB status to verify the system is healthy
        self.cl_obj.wait_for_noobaa_health_ok()
Esempio n. 9
0
 def test_noobaa_kms_validation(self):
     """
     Validate from logs that there is successfully used NooBaa with KMS integration.
     """
     operator_pod = pod.get_pods_having_label(
         label=constants.NOOBAA_OPERATOR_POD_LABEL,
         namespace=defaults.ROOK_CLUSTER_NAMESPACE,
     )[0]
     operator_logs = pod.get_pod_logs(
         pod_name=operator_pod["metadata"]["name"])
     assert "found root secret in external KMS successfully" in operator_logs
Esempio n. 10
0
    def pods(self):
        """
        Returns list of pods of the Deployment resource

        Returns:
            list: Deployment's pods
        """
        selectors = self.data.get("spec").get("selector").get("matchLabels")
        selectors = [f"{key}={selectors[key]}" for key in selectors.keys()]
        selectors_string = ",".join(selectors)
        return [
            Pod(**pod_data) for pod_data in get_pods_having_label(
                selectors_string, self.namespace)
        ]
 def test_deployer_logs_not_empty(self):
     """
     Test that the logs of manager container of ocs-osd-controller-manager pod are not empty
     """
     deployer_pod = pod.get_pods_having_label(
         constants.MANAGED_CONTROLLER_LABEL,
         constants.OPENSHIFT_STORAGE_NAMESPACE)[0]
     deployer_logs = pod.get_pod_logs(
         pod_name=deployer_pod["metadata"]["name"], container="manager")
     log_lines = deployer_logs.split("\n")
     for line in log_lines:
         if "ERR" in line:
             log.info(f"{line}")
     log.info(f"Deployer log has {len(log_lines)} lines.")
     assert len(log_lines) > 100
Esempio n. 12
0
def wait_for_active_pods(job, desired_count, timeout=3):
    """
    Wait for job to load desired number of active pods in time specified
    in timeout.

    Args:
        job (obj): OCS job object
        desired_count (str): Number of desired active pods for provided job
        timeout (int): Number of seconds to wait for the job to get into state

    Returns:
        bool: If job has desired number of active pods

    """
    job_name = job.name
    log.info(f"Checking number of active pods for job {job_name}")

    def _retrieve_job_state():
        job_obj = job.ocp.get(resource_name=job_name, out_yaml_format=True)
        return job_obj["status"]["active"]

    try:
        for state in TimeoutSampler(timeout=timeout,
                                    sleep=3,
                                    func=_retrieve_job_state):
            if state == desired_count:
                return True
            else:
                log.debug(f"Number of active pods for job {job_name}: {state}")
    except TimeoutExpiredError:
        log.error(
            f"Job {job_name} doesn't have correct number of active pods ({desired_count})"
        )
        job_pods = pod.get_pods_having_label(f"job-name={job_name}",
                                             job.namespace)
        for job_pod in job_pods:
            log.info(
                f"Description of job pod {job_pod['metadata']['name']}: {job_pod}"
            )
            pod_logs = pod.get_pod_logs(
                job_pod["metadata"]["name"],
                namespace=job_pod["metadata"]["namespace"])
            log.info(f"Logs from job pod {job_pod['metadata']['name']} are "
                     f"available on DEBUG level")
            log.debug(
                f"Logs from job pod {job_pod['metadata']['name']}: {pod_logs}")

        return False
Esempio n. 13
0
def collect_noobaa_db_dump(log_dir_path):
    """
    Collect the Noobaa DB dump

    Args:
        log_dir_path (str): directory for dumped Noobaa DB

    """
    from ocs_ci.ocs.resources.pod import (
        get_pods_having_label,
        download_file_from_pod,
        Pod,
    )

    ocs_version = version.get_semantic_ocs_version_from_config()
    nb_db_label = (
        constants.NOOBAA_DB_LABEL_46_AND_UNDER
        if ocs_version < version.VERSION_4_7
        else constants.NOOBAA_DB_LABEL_47_AND_ABOVE
    )
    try:
        nb_db_pod = Pod(
            **get_pods_having_label(
                label=nb_db_label, namespace=defaults.ROOK_CLUSTER_NAMESPACE
            )[0]
        )
    except IndexError:
        log.warning(
            "Unable to find pod using label `%s` in namespace `%s`",
            nb_db_label,
            defaults.ROOK_CLUSTER_NAMESPACE,
        )
        return
    ocs_log_dir_path = os.path.join(log_dir_path, "noobaa_db_dump")
    create_directory_path(ocs_log_dir_path)
    ocs_log_dir_path = os.path.join(ocs_log_dir_path, "nbcore.gz")
    if ocs_version < version.VERSION_4_7:
        cmd = "mongodump --archive=nbcore.gz --gzip --db=nbcore"
    else:
        cmd = 'bash -c "pg_dump nbcore | gzip > nbcore.gz"'

    nb_db_pod.exec_cmd_on_pod(cmd)
    download_file_from_pod(
        pod_name=nb_db_pod.name,
        remotepath="/opt/app-root/src/nbcore.gz",
        localpath=ocs_log_dir_path,
        namespace=defaults.ROOK_CLUSTER_NAMESPACE,
    )
    def test_delete_noobaa_resources(self, resource_to_delete):
        """
        Test Noobaa resources delete and check Noobaa health

        """
        pod_obj = pod.Pod(**pod.get_pods_having_label(
            label=self.labels_map[resource_to_delete],
            namespace=defaults.ROOK_CLUSTER_NAMESPACE)[0])

        pod_obj.delete(force=True)
        assert pod_obj.ocp.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=self.labels_map[resource_to_delete],
            resource_count=1,
            timeout=300)
        self.cl_obj.wait_for_noobaa_health_ok()
Esempio n. 15
0
def check_resources_deleted(backingstore_name, namespace=None):
    """
    check if resources of the pv pool backingstore deleted properly

    Args:
        backingstore_name (str): backingstore name
        namespace (str): backing store's namespace

    Returns:
        bool: True if pvc(s) were deleted

    """
    pvcs = get_all_pvcs(namespace=namespace,
                        selector=f"pool={backingstore_name}")
    pods = get_pods_having_label(namespace=namespace,
                                 label=f"pool={backingstore_name}")
    return True if len(pvcs["items"]) == 0 and len(pods) == 0 else False
 def finalizer():
     scc_name = constants.NOOBAA_DB_SERVICE_ACCOUNT_NAME
     service_account = constants.NOOBAA_DB_SERVICE_ACCOUNT
     pod_obj = pod.Pod(**pod.get_pods_having_label(
         label=self.labels_map["noobaa_db"],
         namespace=defaults.ROOK_CLUSTER_NAMESPACE,
     )[0])
     pod_data_list = pod_obj.get()
     ocp_scc = ocp.OCP(kind=constants.SCC,
                       namespace=defaults.ROOK_CLUSTER_NAMESPACE)
     if helpers.validate_scc_policy(
             sa_name=scc_name,
             namespace=defaults.ROOK_CLUSTER_NAMESPACE,
             scc_name=constants.ANYUID,
     ):
         ocp_scc.patch(
             resource_name=constants.ANYUID,
             params='[{"op": "remove", "path": "/users/0", '
             f'"value":{service_account}}}]',
             format_type="json",
         )
     if not helpers.validate_scc_policy(
             sa_name=scc_name,
             namespace=defaults.ROOK_CLUSTER_NAMESPACE,
             scc_name=scc_name,
     ):
         ocp_scc.patch(
             resource_name=scc_name,
             params='[{"op": "add", "path": "/users/0", '
             f'"value":{service_account}}}]',
             format_type="json",
         )
     if (pod_data_list.get("metadata").get("annotations").get(
             "openshift.io/scc") == constants.ANYUID):
         pod_obj.delete(force=True)
         assert pod_obj.ocp.wait_for_resource(
             condition=constants.STATUS_RUNNING,
             selector=self.labels_map["noobaa_db"],
             resource_count=1,
             timeout=300,
         ), "Noobaa pod did not reach running state"
         pod_data_list = pod_obj.get()
         assert (pod_data_list.get("metadata").get("annotations").get(
             "openshift.io/scc") == scc_name), "Invalid scc"
Esempio n. 17
0
    def test_restart_noobaa_resources(self, resource_to_delete):
        """
        Test Noobaa resources restart and check Noobaa health

        """
        labels_map = {
            'noobaa_core': constants.NOOBAA_CORE_POD_LABEL,
            'noobaa_db': constants.NOOBAA_DB_LABEL
        }
        pod_obj = self.resource_obj = pod.Pod(**pod.get_pods_having_label(
            label=labels_map[resource_to_delete],
            namespace=defaults.ROOK_CLUSTER_NAMESPACE)[0])

        pod_obj.delete(force=True)
        assert pod_obj.ocp.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=labels_map[resource_to_delete],
            resource_count=1,
            timeout=300)
        self.cl_obj.wait_for_noobaa_health_ok()
Esempio n. 18
0
def verify_noobaa_endpoint_count():
    """
    Verify noobaa endpoints
    """
    ocs_version = version.get_semantic_ocs_version_from_config()
    disable_noobaa = config.COMPONENTS["disable_noobaa"]
    managed_service = (config.ENV_DATA["platform"].lower()
                       in constants.MANAGED_SERVICE_PLATFORMS)
    max_eps = (constants.MAX_NB_ENDPOINT_COUNT
               if ocs_version >= version.VERSION_4_6 else 1)
    if config.ENV_DATA.get("platform") == constants.IBM_POWER_PLATFORM:
        max_eps = 1
    if not (disable_noobaa or managed_service):
        nb_ep_pods = get_pods_having_label(
            label=constants.NOOBAA_ENDPOINT_POD_LABEL,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        )
        assert len(nb_ep_pods) <= max_eps, (
            f"The number of running NooBaa endpoint pods ({len(nb_ep_pods)}) "
            f"is greater than the maximum defined in the NooBaa CR ({max_eps})"
        )
Esempio n. 19
0
    def workloads_dir_setup(self, request):
        """
        Setting up the environment for the test

        """
        if config.DEPLOYMENT.get("local_storage"):
            self.worker_node = node.get_worker_nodes()[0]
            self.oc_cmd = OCP(namespace=defaults.ROOK_CLUSTER_NAMESPACE)
            mon_pod_name = self.oc_cmd.exec_oc_debug_cmd(
                node=self.worker_node,
                cmd_list=["ls /var/lib/rook/ | grep mon"],
            )
            mon_pod_id = mon_pod_name.split("-")[1].replace("\n", "")

            mon_pods_info = pod.get_pods_having_label(
                label=f"ceph_daemon_id={mon_pod_id}",
                namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            )
            self.mon_pod = pod.get_pod_obj(
                name=mon_pods_info[0]["metadata"]["name"],
                namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            )
        else:
            self.mon_pod = random.choice(pod.get_mon_pods())
        self.mon_suffix = self.mon_pod.get().get("metadata").get("labels").get(
            "mon")

        self.workloads_dir = f"/var/lib/ceph/mon/ceph-{self.mon_suffix}/workloads"
        log.info(f"Selected mon '{self.mon_pod.name}'")
        self.mon_pod.exec_cmd_on_pod(f"mkdir {self.workloads_dir}")
        self.mon_pod.exec_cmd_on_pod(f"touch {self.workloads_dir}/{TEMP_FILE}")

        def finalizer():
            self.mon_pod.exec_cmd_on_pod(f"rm -rf {self.workloads_dir}")
            time.sleep(SLEEP_TIMEOUT)
            utils.ceph_health_check()

        request.addfinalizer(finalizer)
 def test_provider_server_logs(self):
     """
     Test that the logs of ocs-provider-server pod have entries for each consumer
     """
     provider_pod = pod.get_pods_having_label(
         constants.PROVIDER_SERVER_LABEL,
         constants.OPENSHIFT_STORAGE_NAMESPACE)[0]
     provider_logs = pod.get_pod_logs(
         pod_name=provider_pod["metadata"]["name"])
     log_lines = provider_logs.split("\n")
     consumer_names = managedservice.get_consumer_names()
     for consumer_name in consumer_names:
         expected_log = (
             f'successfully Enabled the StorageConsumer resource "{consumer_name}"'
         )
         log_found = False
         for line in log_lines:
             if expected_log in line:
                 log_found = True
                 log.info(
                     f"'{expected_log}' found in ocs-provider-server logs")
                 break
         assert log_found, f"'{expected_log}' not found in ocs-provider-server logs"
Esempio n. 21
0
def get_spun_dc_pods(pod_list):
    """
    Fetches info about the re-spun dc pods

    Args:
        pod_list (list): list of previous pod objects

    Returns:
        list : list of respun pod objects

    """
    new_pods = []
    for pod_obj in pod_list:
        pod_label = pod_obj.labels.get("deploymentconfig")
        label_selector = f"deploymentconfig={pod_label}"

        pods_data = pod.get_pods_having_label(label_selector, pod_obj.namespace)
        for pod_data in pods_data:
            pod_name = pod_data.get("metadata").get("name")
            if "-deploy" not in pod_name and pod_name not in pod_obj.name:
                new_pods.append(pod.get_pod_obj(pod_name, pod_obj.namespace))
    logger.info(f"Previous pods: {[pod_obj.name for pod_obj in pod_list]}")
    logger.info(f"Re-spun pods: {[pod_obj.name for pod_obj in new_pods]}")
    return new_pods
Esempio n. 22
0
    def __init__(self, *args, **kwargs):
        """
        Constructor for the MCG class
        """
        self.namespace = config.ENV_DATA["cluster_namespace"]
        self.operator_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_OPERATOR_POD_LABEL, self.namespace)[0])
        self.core_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_CORE_POD_LABEL, self.namespace)[0])

        self.retrieve_noobaa_cli_binary()
        """
        The certificate will be copied on each mcg_obj instantiation since
        the process is so light and quick, that the time required for the redundant
        copy is neglible in comparison to the time a hash comparison will take.
        """
        retrieve_default_ingress_crt()

        get_noobaa = OCP(kind="noobaa", namespace=self.namespace).get()

        self.s3_endpoint = (get_noobaa.get("items")[0].get("status").get(
            "services").get("serviceS3").get("externalDNS")[0])
        self.s3_internal_endpoint = (get_noobaa.get("items")[0].get(
            "status").get("services").get("serviceS3").get("internalDNS")[0])
        self.mgmt_endpoint = (get_noobaa.get("items")[0].get("status").get(
            "services").get("serviceMgmt").get("externalDNS")[0]) + "/rpc"
        self.region = config.ENV_DATA["region"]

        creds_secret_name = (get_noobaa.get("items")[0].get("status").get(
            "accounts").get("admin").get("secretRef").get("name"))
        secret_ocp_obj = OCP(kind="secret", namespace=self.namespace)
        creds_secret_obj = secret_ocp_obj.get(creds_secret_name)

        self.access_key_id = base64.b64decode(
            creds_secret_obj.get("data").get("AWS_ACCESS_KEY_ID")).decode(
                "utf-8")
        self.access_key = base64.b64decode(
            creds_secret_obj.get("data").get("AWS_SECRET_ACCESS_KEY")).decode(
                "utf-8")

        self.noobaa_user = base64.b64decode(
            creds_secret_obj.get("data").get("email")).decode("utf-8")
        self.noobaa_password = base64.b64decode(
            creds_secret_obj.get("data").get("password")).decode("utf-8")

        self.noobaa_token = self.retrieve_nb_token()

        self.s3_resource = boto3.resource(
            "s3",
            verify=retrieve_verification_mode(),
            endpoint_url=self.s3_endpoint,
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.access_key,
        )

        self.s3_client = self.s3_resource.meta.client

        if config.ENV_DATA["platform"].lower() == "aws" and kwargs.get(
                "create_aws_creds"):
            (
                self.cred_req_obj,
                self.aws_access_key_id,
                self.aws_access_key,
            ) = self.request_aws_credentials()

            self.aws_s3_resource = boto3.resource(
                "s3",
                endpoint_url="https://s3.amazonaws.com",
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_access_key,
            )

        if (config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS
                or storagecluster_independent_check()):
            if not config.ENV_DATA["platform"] == constants.AZURE_PLATFORM and (
                    float(config.ENV_DATA["ocs_version"]) > 4.5):
                logger.info("Checking whether RGW pod is not present")
                pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL,
                                                 namespace=self.namespace)
                assert (
                    not pods
                ), "RGW pods should not exist in the current platform/cluster"

        elif config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS:
            rgw_count = get_rgw_count(config.ENV_DATA["ocs_version"],
                                      check_if_cluster_was_upgraded(), None)
            logger.info(
                f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform'
            )
            rgw_pod = OCP(kind=constants.POD, namespace=self.namespace)
            assert rgw_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.RGW_APP_LABEL,
                resource_count=rgw_count,
                timeout=60,
            )
Esempio n. 23
0
    def __init__(self):
        """
        Constructor for the MCG class
        """

        # Todo: find a better solution for not being able to verify requests with a self-signed cert
        logger.warning('Suppressing InsecureRequestWarnings')
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

        self.namespace = config.ENV_DATA['cluster_namespace']
        ocp_obj = OCP(kind='noobaa', namespace=self.namespace)
        results = ocp_obj.get()
        self.s3_endpoint = (results.get('items')[0].get('status').get(
            'services').get('serviceS3').get('externalDNS')[-1])
        self.mgmt_endpoint = (results.get('items')[0].get('status').get(
            'services').get('serviceMgmt').get('externalDNS')[-1]) + '/rpc'
        self.region = config.ENV_DATA['region']

        creds_secret_name = (results.get('items')[0].get('status').get(
            'accounts').get('admin').get('secretRef').get('name'))
        secret_ocp_obj = OCP(kind='secret', namespace=self.namespace)
        creds_secret_obj = secret_ocp_obj.get(creds_secret_name)

        self.access_key_id = base64.b64decode(
            creds_secret_obj.get('data').get('AWS_ACCESS_KEY_ID')).decode(
                'utf-8')
        self.access_key = base64.b64decode(
            creds_secret_obj.get('data').get('AWS_SECRET_ACCESS_KEY')).decode(
                'utf-8')

        self.noobaa_user = base64.b64decode(
            creds_secret_obj.get('data').get('email')).decode('utf-8')
        self.noobaa_password = base64.b64decode(
            creds_secret_obj.get('data').get('password')).decode('utf-8')

        self.noobaa_token = self.send_rpc_query(
            'auth_api',
            'create_auth',
            params={
                'role': 'admin',
                'system': 'noobaa',
                'email': self.noobaa_user,
                'password': self.noobaa_password
            }).json().get('reply').get('token')

        self.s3_resource = boto3.resource(
            's3',
            verify=False,
            endpoint_url=self.s3_endpoint,
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.access_key)

        self.s3_client = boto3.client('s3',
                                      verify=False,
                                      endpoint_url=self.s3_endpoint,
                                      aws_access_key_id=self.access_key_id,
                                      aws_secret_access_key=self.access_key)

        # Give NooBaa's ServiceAccount permissions in order to execute CLI commands
        registry.add_role_to_user('cluster-admin',
                                  constants.NOOBAA_SERVICE_ACCOUNT,
                                  cluster_role=True)

        self.operator_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_OPERATOR_POD_LABEL, self.namespace)[0])

        if config.ENV_DATA['platform'].lower() == 'aws':
            (self.cred_req_obj, self.aws_access_key_id,
             self.aws_access_key) = self.request_aws_credentials()

            self._ocp_resource = ocp_obj

            self.aws_s3_resource = boto3.resource(
                's3',
                verify=False,
                endpoint_url="https://s3.amazonaws.com",
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_access_key)
            logger.info(
                'Checking whether RGW pod is not present on AWS platform')
            pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL,
                                             namespace=self.namespace)
            assert len(pods) == 0, 'RGW pod should not exist on AWS platform'

        elif config.ENV_DATA.get('platform') == constants.VSPHERE_PLATFORM:
            logger.info('Checking for RGW pod on VSPHERE platform')
            rgw_pod = OCP(kind=constants.POD, namespace=self.namespace)
            assert rgw_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.RGW_APP_LABEL,
                resource_count=1,
                timeout=60)
Esempio n. 24
0
def ocs_install_verification(
    timeout=600, skip_osd_distribution_check=False, ocs_registry_image=None,
    post_upgrade_verification=False,
):
    """
    Perform steps necessary to verify a successful OCS installation

    Args:
        timeout (int): Number of seconds for timeout which will be used in the
            checks used in this function.
        skip_osd_distribution_check (bool): If true skip the check for osd
            distribution.
        ocs_registry_image (str): Specific image to check if it was installed
            properly.
        post_upgrade_verification (bool): Set to True if this function is
            called after upgrade.

    """
    from ocs_ci.ocs.node import get_typed_nodes
    from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs
    from ocs_ci.ocs.resources.pod import get_ceph_tools_pod, get_all_pods
    from ocs_ci.ocs.cluster import validate_cluster_on_pvc
    number_of_worker_nodes = len(get_typed_nodes())
    namespace = config.ENV_DATA['cluster_namespace']
    log.info("Verifying OCS installation")

    # Verify OCS CSV is in Succeeded phase
    log.info("verifying ocs csv")
    ocs_csv = get_ocs_csv()
    # Verify if OCS CSV has proper version.
    csv_version = ocs_csv.data['spec']['version']
    ocs_version = config.ENV_DATA['ocs_version']
    log.info(
        f"Check if OCS version: {ocs_version} matches with CSV: {csv_version}"
    )
    assert ocs_version in csv_version, (
        f"OCS version: {ocs_version} mismatch with CSV version {csv_version}"
    )
    # Verify if OCS CSV has the same version in provided CI build.
    ocs_registry_image = ocs_registry_image or config.DEPLOYMENT.get(
        'ocs_registry_image'
    )
    if ocs_registry_image and ocs_registry_image.endswith(".ci"):
        ocs_registry_image = ocs_registry_image.split(":")[1]
        log.info(
            f"Check if OCS registry image: {ocs_registry_image} matches with "
            f"CSV: {csv_version}"
        )
        ignore_csv_mismatch = config.DEPLOYMENT.get('ignore_csv_mismatch')
        if ignore_csv_mismatch:
            log.info(
                "The possible mismatch will be ignored as you deployed "
                "the different version than the default version from the CSV"
            )
        else:
            assert ocs_registry_image in csv_version, (
                f"OCS registry image version: {ocs_registry_image} mismatch "
                f"with CSV version {csv_version}"
            )

    # Verify OCS Cluster Service (ocs-storagecluster) is Ready
    storage_cluster_name = config.ENV_DATA['storage_cluster_name']
    log.info("Verifying status of storage cluster: %s", storage_cluster_name)
    storage_cluster = StorageCluster(
        resource_name=storage_cluster_name,
        namespace=namespace,
    )
    log.info(
        f"Check if StorageCluster: {storage_cluster_name} is in"
        f"Succeeded phase"
    )
    storage_cluster.wait_for_phase(phase='Ready', timeout=timeout)

    # Verify pods in running state and proper counts
    log.info("Verifying pod states and counts")
    pod = OCP(
        kind=constants.POD, namespace=namespace
    )
    osd_count = (
        int(storage_cluster.data['spec']['storageDeviceSets'][0]['count'])
        * int(storage_cluster.data['spec']['storageDeviceSets'][0]['replica'])
    )

    # check noobaa CR for min number of noobaa endpoint pods
    nb_obj = OCP(kind='noobaa', namespace=defaults.ROOK_CLUSTER_NAMESPACE)
    min_eps = nb_obj.get().get('items')[0].get('spec').get('endpoints').get('minCount')
    max_eps = nb_obj.get().get('items')[0].get('spec').get('endpoints').get('maxCount')

    resources_dict = {
        constants.OCS_OPERATOR_LABEL: 1,
        constants.OPERATOR_LABEL: 1,
        constants.NOOBAA_DB_LABEL: 1,
        constants.NOOBAA_OPERATOR_POD_LABEL: 1,
        constants.NOOBAA_CORE_POD_LABEL: 1,
        constants.MON_APP_LABEL: 3,
        constants.CSI_CEPHFSPLUGIN_LABEL: number_of_worker_nodes,
        constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL: 2,
        constants.CSI_RBDPLUGIN_LABEL: number_of_worker_nodes,
        constants.CSI_RBDPLUGIN_PROVISIONER_LABEL: 2,
        constants.OSD_APP_LABEL: osd_count,
        constants.MGR_APP_LABEL: 1,
        constants.MDS_APP_LABEL: 2,
        constants.NOOBAA_ENDPOINT_POD_LABEL: min_eps
    }
    if config.ENV_DATA.get('platform') in constants.ON_PREM_PLATFORMS:
        # Workaround for https://bugzilla.redhat.com/show_bug.cgi?id=1857802 - RGW count is 1
        # post upgrade to OCS 4.5. Tracked with
        # https://github.com/red-hat-storage/ocs-ci/issues/2532
        rgw_count = 2 if float(config.ENV_DATA['ocs_version']) >= 4.5 and not (
            post_upgrade_verification
        ) else 1
        resources_dict.update({constants.RGW_APP_LABEL: rgw_count})
    for label, count in resources_dict.items():
        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=label,
            resource_count=count,
            timeout=timeout
        )

    nb_ep_pods = get_pods_having_label(
        label=constants.NOOBAA_ENDPOINT_POD_LABEL, namespace=defaults.ROOK_CLUSTER_NAMESPACE
    )
    assert len(nb_ep_pods) <= max_eps, (
        f"The number of running NooBaa endpoint pods ({len(nb_ep_pods)}) "
        f"is greater than the maximum defined in the NooBaa CR ({max_eps})"
    )

    # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd)
    log.info("Verifying storage classes")
    storage_class = OCP(
        kind=constants.STORAGECLASS, namespace=namespace
    )
    storage_cluster_name = config.ENV_DATA['storage_cluster_name']
    required_storage_classes = {
        f'{storage_cluster_name}-cephfs',
        f'{storage_cluster_name}-ceph-rbd'
    }
    storage_classes = storage_class.get()
    storage_class_names = {
        item['metadata']['name'] for item in storage_classes['items']
    }
    assert required_storage_classes.issubset(storage_class_names)

    # Verify OSDs are distributed
    if not skip_osd_distribution_check:
        log.info("Verifying OSDs are distributed evenly across worker nodes")
        ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace)
        osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)['items']
        deviceset_count = get_deviceset_count()
        node_names = [osd['spec']['nodeName'] for osd in osds]
        for node in node_names:
            assert not node_names.count(node) > deviceset_count, (
                "OSD's are not distributed evenly across worker nodes"
            )

    # Verify that CSI driver object contains provisioner names
    log.info("Verifying CSI driver object contains provisioner names.")
    csi_driver = OCP(kind="CSIDriver")
    assert {defaults.CEPHFS_PROVISIONER, defaults.RBD_PROVISIONER} == (
        {item['metadata']['name'] for item in csi_driver.get()['items']}
    )

    # Verify node and provisioner secret names in storage class
    log.info("Verifying node and provisioner secret names in storage class.")
    sc_rbd = storage_class.get(
        resource_name=constants.DEFAULT_STORAGECLASS_RBD
    )
    sc_cephfs = storage_class.get(
        resource_name=constants.DEFAULT_STORAGECLASS_CEPHFS
    )
    assert sc_rbd['parameters']['csi.storage.k8s.io/node-stage-secret-name'] == constants.RBD_NODE_SECRET
    assert sc_rbd['parameters']['csi.storage.k8s.io/provisioner-secret-name'] == constants.RBD_PROVISIONER_SECRET
    assert sc_cephfs['parameters']['csi.storage.k8s.io/node-stage-secret-name'] == constants.CEPHFS_NODE_SECRET
    assert sc_cephfs['parameters']['csi.storage.k8s.io/provisioner-secret-name'] == constants.CEPHFS_PROVISIONER_SECRET
    log.info("Verified node and provisioner secret names in storage class.")

    # Verify ceph osd tree output
    log.info(
        "Verifying ceph osd tree output and checking for device set PVC names "
        "in the output."
    )

    if (
        config.DEPLOYMENT.get('local_storage')
        and config.ENV_DATA['platform'] != constants.BAREMETALPSI_PLATFORM
    ):
        deviceset_pvcs = get_compute_node_names()
    else:
        deviceset_pvcs = [pvc.name for pvc in get_deviceset_pvcs()]

    ct_pod = get_ceph_tools_pod()
    osd_tree = ct_pod.exec_ceph_cmd(ceph_cmd='ceph osd tree', format='json')
    schemas = {
        'root': constants.OSD_TREE_ROOT,
        'rack': constants.OSD_TREE_RACK,
        'host': constants.OSD_TREE_HOST,
        'osd': constants.OSD_TREE_OSD,
        'region': constants.OSD_TREE_REGION,
        'zone': constants.OSD_TREE_ZONE
    }
    schemas['host']['properties']['name'] = {'enum': deviceset_pvcs}
    for item in osd_tree['nodes']:
        validate(instance=item, schema=schemas[item['type']])
        if item['type'] == 'host':
            deviceset_pvcs.remove(item['name'])
    assert not deviceset_pvcs, (
        f"These device set PVCs are not given in ceph osd tree output "
        f"- {deviceset_pvcs}"
    )
    log.info(
        "Verified ceph osd tree output. Device set PVC names are given in the "
        "output."
    )

    # TODO: Verify ceph osd tree output have osd listed as ssd
    # TODO: Verify ceph osd tree output have zone or rack based on AZ

    # Verify CSI snapshotter sidecar container is not present
    log.info("Verifying CSI snapshotter is not present.")
    provisioner_pods = get_all_pods(
        namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        selector=[
            constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL,
            constants.CSI_RBDPLUGIN_PROVISIONER_LABEL
        ]
    )
    for pod_obj in provisioner_pods:
        pod_info = pod_obj.get()
        for container, image in get_images(data=pod_info).items():
            assert ('snapshot' not in container) and ('snapshot' not in image), (
                f"Snapshot container is present in {pod_obj.name} pod. "
                f"Container {container}. Image {image}"
            )
    deployments = ocs_csv.get()['spec']['install']['spec']['deployments']
    rook_ceph_operator_deployment = [
        deployment_val for deployment_val in deployments if deployment_val['name'] == 'rook-ceph-operator'
    ]
    assert {'name': 'CSI_ENABLE_SNAPSHOTTER', 'value': 'false'} in (
        rook_ceph_operator_deployment[0]['spec']['template']['spec']['containers'][0]['env']
    ), "CSI_ENABLE_SNAPSHOTTER value is not set to 'false'."
    log.info("Verified: CSI snapshotter is not present.")

    # Verify pool crush rule is with "type": "zone"
    if utils.get_az_count() == 3:
        log.info("Verifying pool crush rule is with type: zone")
        crush_dump = ct_pod.exec_ceph_cmd(
            ceph_cmd='ceph osd crush dump', format=''
        )
        pool_names = [
            constants.METADATA_POOL, constants.DEFAULT_BLOCKPOOL,
            constants.DATA_POOL
        ]
        crush_rules = [rule for rule in crush_dump['rules'] if rule['rule_name'] in pool_names]
        for crush_rule in crush_rules:
            assert [
                item for item in crush_rule['steps'] if item.get('type') == 'zone'
            ], f"{crush_rule['rule_name']} is not with type as zone"
        log.info("Verified - pool crush rule is with type: zone")
    log.info("Validate cluster on PVC")
    validate_cluster_on_pvc()

    # Verify ceph health
    log.info("Verifying ceph health")
    health_check_tries = 20
    health_check_delay = 30
    if post_upgrade_verification:
        # In case of upgrade with FIO we have to wait longer time to see
        # health OK. See discussion in BZ:
        # https://bugzilla.redhat.com/show_bug.cgi?id=1817727
        health_check_tries = 180
    assert utils.ceph_health_check(
        namespace, health_check_tries, health_check_delay
    )
Esempio n. 25
0
def patch_consumer_toolbox(ceph_admin_key=None):
    """
    Patch the rook-ceph-tools deployment with ceph.admin key. Applicable for MS platform only to enable rook-ceph-tools
    to run ceph commands.

    Args:
        ceph_admin_key (str): The ceph admin key which should be used to patch rook-ceph-tools deployment on consumer

    """

    # Get the admin key if available
    ceph_admin_key = (ceph_admin_key or os.environ.get("CEPHADMINKEY")
                      or config.AUTH.get("external", {}).get("ceph_admin_key"))

    if not ceph_admin_key:
        # TODO: Get the key from provider rook-ceph-tools pod after implementing multicluster deployment
        logger.warning(
            "Ceph admin key not found to patch rook-ceph-tools deployment on consumer with ceph.admin key. "
            "Skipping the step.")
        return

    consumer_tools_pod = get_ceph_tools_pod()

    # Check whether ceph command is working on tools pod. Patch is needed only if the error is "RADOS permission error"
    try:
        consumer_tools_pod.exec_ceph_cmd("ceph health")
        return
    except Exception as exc:
        if "RADOS permission error" not in str(exc):
            logger.warning(
                f"Ceph command on rook-ceph-tools deployment is failing with error {str(exc)}. "
                "This error cannot be fixed by patching the rook-ceph-tools deployment with ceph admin key."
            )
            return

    consumer_tools_deployment = ocp.OCP(
        kind=constants.DEPLOYMENT,
        namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        resource_name="rook-ceph-tools",
    )
    patch_value = (
        f'[{{"op": "replace", "path": "/spec/template/spec/containers/0/env", '
        f'"value":[{{"name": "ROOK_CEPH_USERNAME", "value": "client.admin"}}, '
        f'{{"name": "ROOK_CEPH_SECRET", "value": "{ceph_admin_key}"}}]}}]')
    try:
        consumer_tools_deployment.patch(params=patch_value, format_type="json")
    except Exception as exe:
        logger.warning(
            "Failed to patch rook-ceph-tools deployment in consumer cluster. "
            f"The patch can be applied manually after deployment. Error {str(exe)}"
        )
        return

    # Wait for the existing tools pod to delete
    consumer_tools_pod.ocp.wait_for_delete(
        resource_name=consumer_tools_pod.name)

    # Wait for the new tools pod to reach Running state
    new_tools_pod_info = get_pods_having_label(
        label=constants.TOOL_APP_LABEL,
        namespace=defaults.ROOK_CLUSTER_NAMESPACE,
    )[0]
    new_tools_pod = Pod(**new_tools_pod_info)
    helpers.wait_for_resource_state(new_tools_pod, constants.STATUS_RUNNING)
Esempio n. 26
0
    def test_all_worker_nodes_short_network_failure(
        self, nodes, setup, node_restart_teardown
    ):
        """
        OCS-1432/OCS-1433:
        - Start DeploymentConfig based app pods
        - Make all the worker nodes unresponsive by doing abrupt network failure
        - Reboot the unresponsive node after short duration of ~300 seconds
        - When unresponsive node recovers, app pods and ceph cluster should recover
        - Again run IOs from app pods
        """
        pod_objs = setup
        worker_nodes = node.get_worker_nodes()

        # Run IO on pods
        logger.info(f"Starting IO on {len(pod_objs)} app pods")
        with ThreadPoolExecutor() as executor:
            for pod_obj in pod_objs:
                logger.info(f"Starting IO on pod {pod_obj.name}")
                storage_type = (
                    "block" if pod_obj.pvc.get_pvc_vol_mode == "Block" else "fs"
                )
                executor.submit(
                    pod_obj.run_io,
                    storage_type=storage_type,
                    size="2G",
                    runtime=30,
                    fio_filename=f"{pod_obj.name}_io_f1",
                )

        logger.info(f"IO started on all {len(pod_objs)} app pods")

        # Wait for IO results
        for pod_obj in pod_objs:
            pod.get_fio_rw_iops(pod_obj)

        # Induce network failure on all worker nodes
        with ThreadPoolExecutor() as executor:
            for node_name in worker_nodes:
                executor.submit(node.node_network_failure, node_name, False)

        node.wait_for_nodes_status(
            node_names=worker_nodes, status=constants.NODE_NOT_READY
        )

        logger.info(f"Waiting for {self.short_nw_fail_time} seconds")
        sleep(self.short_nw_fail_time)

        # Reboot the worker nodes
        logger.info(f"Stop and start the worker nodes: {worker_nodes}")
        nodes.restart_nodes_by_stop_and_start(node.get_node_objs(worker_nodes))

        try:
            node.wait_for_nodes_status(
                node_names=worker_nodes, status=constants.NODE_READY
            )
            logger.info("Wait for OCS pods to be in running state")
            if not pod.wait_for_pods_to_be_running(timeout=720):
                raise ResourceWrongStatusException("Pods are not in running state")
        except ResourceWrongStatusException:
            # Restart nodes
            nodes.restart_nodes(node.get_node_objs(worker_nodes))

        ceph_health_check(tries=80)

        # Get current info of app pods
        new_pod_objs = list()
        for pod_obj in pod_objs:
            pod_label = pod_obj.labels.get("deploymentconfig")
            pods_data = pod.get_pods_having_label(
                f"deploymentconfig={pod_label}", pod_obj.namespace
            )
            current_pods = [
                pod_data.get("metadata").get("name")
                for pod_data in pods_data
                if "-deploy" not in pod_data.get("metadata").get("name")
            ]
            logger.info(f"Pods with label {pod_label}: {current_pods}")

            # Remove the older pod from the list if pod is rescheduled
            if len(current_pods) > 1:
                current_pods.remove(pod_obj.name)

            new_pod_obj = pod.get_pod_obj(current_pods.pop(), pod_obj.namespace)
            new_pod_obj.pvc = pod_obj.pvc
            new_pod_objs.append(new_pod_obj)

        logger.info("Wait for app pods are in running state")
        for pod_obj in new_pod_objs:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=pod_obj.name,
                timeout=720,
                sleep=20,
            )
        logger.info("All the app pods reached running state")

        # Run more IOs on app pods
        with ThreadPoolExecutor() as executor:
            for pod_obj in new_pod_objs:
                logger.info(f"Starting IO on pod {pod_obj.name}")
                pod_obj.wl_setup_done = False
                storage_type = (
                    "block" if pod_obj.pvc.get_pvc_vol_mode == "Block" else "fs"
                )
                executor.submit(
                    pod_obj.run_io,
                    storage_type=storage_type,
                    size="1G",
                    runtime=30,
                    fio_filename=f"{pod_obj.name}_io_f2",
                )

        for pod_obj in new_pod_objs:
            pod.get_fio_rw_iops(pod_obj)
Esempio n. 27
0
    def __init__(self):
        """
        Constructor for the MCG class
        """
        self.namespace = config.ENV_DATA['cluster_namespace']
        ocp_obj = OCP(kind='noobaa', namespace=self.namespace)
        results = ocp_obj.get()
        self.s3_endpoint = (results.get('items')[0].get('status').get(
            'services').get('serviceS3').get('externalDNS')[-1])
        self.mgmt_endpoint = (results.get('items')[0].get('status').get(
            'services').get('serviceMgmt').get('externalDNS')[-1]) + '/rpc'
        self.region = config.ENV_DATA['region']

        creds_secret_name = (results.get('items')[0].get('status').get(
            'accounts').get('admin').get('secretRef').get('name'))
        secret_ocp_obj = OCP(kind='secret', namespace=self.namespace)
        creds_secret_obj = secret_ocp_obj.get(creds_secret_name)

        self.access_key_id = base64.b64decode(
            creds_secret_obj.get('data').get('AWS_ACCESS_KEY_ID')).decode(
                'utf-8')
        self.access_key = base64.b64decode(
            creds_secret_obj.get('data').get('AWS_SECRET_ACCESS_KEY')).decode(
                'utf-8')

        self.noobaa_user = base64.b64decode(
            creds_secret_obj.get('data').get('email')).decode('utf-8')
        self.noobaa_password = base64.b64decode(
            creds_secret_obj.get('data').get('password')).decode('utf-8')

        self.noobaa_token = self.send_rpc_query(
            'auth_api',
            'create_auth',
            params={
                'role': 'admin',
                'system': 'noobaa',
                'email': self.noobaa_user,
                'password': self.noobaa_password
            }).json().get('reply').get('token')

        self.s3_resource = boto3.resource(
            's3',
            verify=False,
            endpoint_url=self.s3_endpoint,
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.access_key)

        self.s3_client = boto3.client('s3',
                                      verify=False,
                                      endpoint_url=self.s3_endpoint,
                                      aws_access_key_id=self.access_key_id,
                                      aws_secret_access_key=self.access_key)

        if config.ENV_DATA['platform'].lower() == 'aws':
            (self.cred_req_obj, self.aws_access_key_id,
             self.aws_access_key) = self.request_aws_credentials()

            self._ocp_resource = ocp_obj

            self.aws_s3_resource = boto3.resource(
                's3',
                verify=False,
                endpoint_url="https://s3.amazonaws.com",
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_access_key)
            logger.info(
                'Checking whether RGW pod is not present on AWS platform')
            pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL,
                                             namespace=self.namespace)
            assert len(pods) == 0, 'RGW pod should not exist on AWS platform'

        elif config.ENV_DATA.get('platform') == constants.VSPHERE_PLATFORM:
            logger.info('Checking for RGW pod on VSPHERE platform')
            rgw_pod = OCP(kind=constants.POD, namespace=self.namespace)
            assert rgw_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.RGW_APP_LABEL,
                resource_count=1,
                timeout=60)
Esempio n. 28
0
def ocs_install_verification(
    timeout=600,
    skip_osd_distribution_check=False,
    ocs_registry_image=None,
    post_upgrade_verification=False,
    version_before_upgrade=None,
):
    """
    Perform steps necessary to verify a successful OCS installation

    Args:
        timeout (int): Number of seconds for timeout which will be used in the
            checks used in this function.
        skip_osd_distribution_check (bool): If true skip the check for osd
            distribution.
        ocs_registry_image (str): Specific image to check if it was installed
            properly.
        post_upgrade_verification (bool): Set to True if this function is
            called after upgrade.
        version_before_upgrade (float): Set to OCS version before upgrade

    """
    from ocs_ci.ocs.node import get_nodes
    from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs
    from ocs_ci.ocs.resources.pod import get_ceph_tools_pod, get_all_pods
    from ocs_ci.ocs.cluster import validate_cluster_on_pvc
    from ocs_ci.ocs.resources.fips import check_fips_enabled

    number_of_worker_nodes = len(get_nodes())
    namespace = config.ENV_DATA["cluster_namespace"]
    log.info("Verifying OCS installation")
    if config.ENV_DATA.get("disable_components"):
        for component in config.ENV_DATA["disable_components"]:
            config.COMPONENTS[f"disable_{component}"] = True
    disable_noobaa = config.COMPONENTS["disable_noobaa"]
    disable_rgw = config.COMPONENTS["disable_rgw"]
    disable_blockpools = config.COMPONENTS["disable_blockpools"]
    disable_cephfs = config.COMPONENTS["disable_cephfs"]

    # Verify OCS CSV is in Succeeded phase
    log.info("verifying ocs csv")
    ocs_csv = get_ocs_csv()
    # Verify if OCS CSV has proper version.
    csv_version = ocs_csv.data["spec"]["version"]
    ocs_version = version.get_semantic_ocs_version_from_config()
    log.info(
        f"Check if OCS version: {ocs_version} matches with CSV: {csv_version}")
    assert (
        f"{ocs_version}" in csv_version
    ), f"OCS version: {ocs_version} mismatch with CSV version {csv_version}"
    # Verify if OCS CSV has the same version in provided CI build.
    ocs_registry_image = ocs_registry_image or config.DEPLOYMENT.get(
        "ocs_registry_image")
    if ocs_registry_image and ocs_registry_image.endswith(".ci"):
        ocs_registry_image = ocs_registry_image.rsplit(":", 1)[1]
        log.info(
            f"Check if OCS registry image: {ocs_registry_image} matches with "
            f"CSV: {csv_version}")
        ignore_csv_mismatch = config.DEPLOYMENT.get("ignore_csv_mismatch")
        if ignore_csv_mismatch:
            log.info(
                "The possible mismatch will be ignored as you deployed "
                "the different version than the default version from the CSV")
        else:
            assert ocs_registry_image in csv_version, (
                f"OCS registry image version: {ocs_registry_image} mismatch "
                f"with CSV version {csv_version}")

    # Verify Storage System status
    if ocs_version >= version.VERSION_4_9:
        log.info("Verifying storage system status")
        storage_system = OCP(kind=constants.STORAGESYSTEM, namespace=namespace)
        storage_system_data = storage_system.get()
        storage_system_status = {}
        for condition in storage_system_data["items"][0]["status"][
                "conditions"]:
            storage_system_status[condition["type"]] = condition["status"]
        log.debug(f"storage system status: {storage_system_status}")
        assert storage_system_status == constants.STORAGE_SYSTEM_STATUS, (
            f"Storage System status is not in expected state. Expected {constants.STORAGE_SYSTEM_STATUS}"
            f" but found {storage_system_status}")

    # Verify OCS Cluster Service (ocs-storagecluster) is Ready
    storage_cluster_name = config.ENV_DATA["storage_cluster_name"]
    log.info("Verifying status of storage cluster: %s", storage_cluster_name)
    storage_cluster = StorageCluster(
        resource_name=storage_cluster_name,
        namespace=namespace,
    )
    log.info(f"Check if StorageCluster: {storage_cluster_name} is in"
             f"Succeeded phase")
    storage_cluster.wait_for_phase(phase="Ready", timeout=timeout)

    # Verify pods in running state and proper counts
    log.info("Verifying pod states and counts")
    pod = OCP(kind=constants.POD, namespace=namespace)
    if not config.DEPLOYMENT["external_mode"]:
        osd_count = int(
            storage_cluster.data["spec"]["storageDeviceSets"][0]["count"]
        ) * int(
            storage_cluster.data["spec"]["storageDeviceSets"][0]["replica"])
    rgw_count = None
    if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS:
        if not disable_rgw:
            rgw_count = get_rgw_count(f"{ocs_version}",
                                      post_upgrade_verification,
                                      version_before_upgrade)

    min_eps = constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT
    max_eps = (constants.MAX_NB_ENDPOINT_COUNT
               if ocs_version >= version.VERSION_4_6 else 1)

    if config.ENV_DATA.get("platform") == constants.IBM_POWER_PLATFORM:
        min_eps = 1
        max_eps = 1

    nb_db_label = (constants.NOOBAA_DB_LABEL_46_AND_UNDER
                   if ocs_version < version.VERSION_4_7 else
                   constants.NOOBAA_DB_LABEL_47_AND_ABOVE)
    resources_dict = {
        nb_db_label: 1,
        constants.OCS_OPERATOR_LABEL: 1,
        constants.OPERATOR_LABEL: 1,
        constants.NOOBAA_OPERATOR_POD_LABEL: 1,
        constants.NOOBAA_CORE_POD_LABEL: 1,
        constants.NOOBAA_ENDPOINT_POD_LABEL: min_eps,
    }
    if not config.DEPLOYMENT["external_mode"]:
        resources_dict.update({
            constants.MON_APP_LABEL: 3,
            constants.CSI_CEPHFSPLUGIN_LABEL: number_of_worker_nodes,
            constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL: 2,
            constants.CSI_RBDPLUGIN_LABEL: number_of_worker_nodes,
            constants.CSI_RBDPLUGIN_PROVISIONER_LABEL: 2,
            constants.OSD_APP_LABEL: osd_count,
            constants.MGR_APP_LABEL: 1,
            constants.MDS_APP_LABEL: 2,
            constants.RGW_APP_LABEL: rgw_count,
        })

    if ocs_version >= version.VERSION_4_9:
        resources_dict.update({
            constants.ODF_OPERATOR_CONTROL_MANAGER_LABEL: 1,
        })

    for label, count in resources_dict.items():
        if label == constants.RGW_APP_LABEL:
            if (not config.ENV_DATA.get("platform")
                    in constants.ON_PREM_PLATFORMS or disable_rgw):
                continue
        if "noobaa" in label and disable_noobaa:
            continue
        if "mds" in label and disable_cephfs:
            continue

        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector=label,
            resource_count=count,
            timeout=timeout,
        )

    if not disable_noobaa:
        nb_ep_pods = get_pods_having_label(
            label=constants.NOOBAA_ENDPOINT_POD_LABEL,
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
        )
        assert len(nb_ep_pods) <= max_eps, (
            f"The number of running NooBaa endpoint pods ({len(nb_ep_pods)}) "
            f"is greater than the maximum defined in the NooBaa CR ({max_eps})"
        )

    # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd)
    log.info("Verifying storage classes")
    storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace)
    storage_cluster_name = config.ENV_DATA["storage_cluster_name"]
    required_storage_classes = {
        f"{storage_cluster_name}-cephfs",
        f"{storage_cluster_name}-ceph-rbd",
    }
    if ocs_version >= version.VERSION_4_10:
        # TODO: Add rbd-thick storage class verification in external mode cluster upgraded
        # to OCS 4.8 when the bug 1978542 is fixed
        # Skip rbd-thick storage class verification in external mode upgraded cluster. This is blocked by bug 1978542
        if not (config.DEPLOYMENT["external_mode"]
                and post_upgrade_verification):
            required_storage_classes.update(
                {f"{storage_cluster_name}-ceph-rbd-thick"})
    skip_storage_classes = set()
    if disable_cephfs:
        skip_storage_classes.update({
            f"{storage_cluster_name}-cephfs",
        })
    if disable_blockpools:
        skip_storage_classes.update({
            f"{storage_cluster_name}-ceph-rbd",
        })
    required_storage_classes = required_storage_classes.difference(
        skip_storage_classes)

    if config.DEPLOYMENT["external_mode"]:
        required_storage_classes.update({
            f"{storage_cluster_name}-ceph-rgw",
            f'{config.ENV_DATA["cluster_namespace"]}.noobaa.io',
        })
    storage_classes = storage_class.get()
    storage_class_names = {
        item["metadata"]["name"]
        for item in storage_classes["items"]
    }
    # required storage class names should be observed in the cluster under test
    missing_scs = required_storage_classes.difference(storage_class_names)
    if len(missing_scs) > 0:
        log.error("few storage classess are not present: %s", missing_scs)
    assert list(missing_scs) == []

    # Verify OSDs are distributed
    if not config.DEPLOYMENT["external_mode"]:
        if not skip_osd_distribution_check:
            log.info(
                "Verifying OSDs are distributed evenly across worker nodes")
            ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace)
            osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)["items"]
            deviceset_count = get_deviceset_count()
            node_names = [osd["spec"]["nodeName"] for osd in osds]
            for node in node_names:
                assert (
                    not node_names.count(node) > deviceset_count
                ), "OSD's are not distributed evenly across worker nodes"

    # Verify that CSI driver object contains provisioner names
    log.info("Verifying CSI driver object contains provisioner names.")
    csi_driver = OCP(kind="CSIDriver")
    csi_drivers = {
        item["metadata"]["name"]
        for item in csi_driver.get()["items"]
    }
    assert defaults.CSI_PROVISIONERS.issubset(csi_drivers)

    # Verify node and provisioner secret names in storage class
    log.info("Verifying node and provisioner secret names in storage class.")
    if config.DEPLOYMENT["external_mode"]:
        sc_rbd = storage_class.get(
            resource_name=constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD)
        sc_cephfs = storage_class.get(resource_name=(
            constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_CEPHFS))
    else:
        if not disable_blockpools:
            sc_rbd = storage_class.get(
                resource_name=constants.DEFAULT_STORAGECLASS_RBD)
        if not disable_cephfs:
            sc_cephfs = storage_class.get(
                resource_name=constants.DEFAULT_STORAGECLASS_CEPHFS)
    if not disable_blockpools:
        assert (
            sc_rbd["parameters"]["csi.storage.k8s.io/node-stage-secret-name"]
            == constants.RBD_NODE_SECRET)
        assert (
            sc_rbd["parameters"]["csi.storage.k8s.io/provisioner-secret-name"]
            == constants.RBD_PROVISIONER_SECRET)
    if not disable_cephfs:
        assert (sc_cephfs["parameters"]
                ["csi.storage.k8s.io/node-stage-secret-name"] ==
                constants.CEPHFS_NODE_SECRET)
        assert (sc_cephfs["parameters"]
                ["csi.storage.k8s.io/provisioner-secret-name"] ==
                constants.CEPHFS_PROVISIONER_SECRET)
    log.info("Verified node and provisioner secret names in storage class.")

    ct_pod = get_ceph_tools_pod()

    # https://github.com/red-hat-storage/ocs-ci/issues/3820
    # Verify ceph osd tree output
    if not (config.DEPLOYMENT.get("ui_deployment")
            or config.DEPLOYMENT["external_mode"]):
        log.info(
            "Verifying ceph osd tree output and checking for device set PVC names "
            "in the output.")
        if config.DEPLOYMENT.get("local_storage"):
            deviceset_pvcs = [osd.get_node() for osd in get_osd_pods()]
            # removes duplicate hostname
            deviceset_pvcs = list(set(deviceset_pvcs))
            if config.ENV_DATA.get("platform") == constants.BAREMETAL_PLATFORM:
                deviceset_pvcs = [
                    deviceset.replace(".", "-") for deviceset in deviceset_pvcs
                ]
        else:
            deviceset_pvcs = [pvc.name for pvc in get_deviceset_pvcs()]

        osd_tree = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree",
                                        format="json")
        schemas = {
            "root": constants.OSD_TREE_ROOT,
            "rack": constants.OSD_TREE_RACK,
            "host": constants.OSD_TREE_HOST,
            "osd": constants.OSD_TREE_OSD,
            "region": constants.OSD_TREE_REGION,
            "zone": constants.OSD_TREE_ZONE,
        }
        schemas["host"]["properties"]["name"] = {"enum": deviceset_pvcs}
        for item in osd_tree["nodes"]:
            validate(instance=item, schema=schemas[item["type"]])
            if item["type"] == "host":
                deviceset_pvcs.remove(item["name"])
        assert not deviceset_pvcs, (
            f"These device set PVCs are not given in ceph osd tree output "
            f"- {deviceset_pvcs}")
        log.info(
            "Verified ceph osd tree output. Device set PVC names are given in the "
            "output.")

    # TODO: Verify ceph osd tree output have osd listed as ssd
    # TODO: Verify ceph osd tree output have zone or rack based on AZ

    # Verify CSI snapshotter sidecar container is not present
    # if the OCS version is < 4.6
    if ocs_version < version.VERSION_4_6:
        log.info("Verifying CSI snapshotter is not present.")
        provisioner_pods = get_all_pods(
            namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            selector=[
                constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL,
                constants.CSI_RBDPLUGIN_PROVISIONER_LABEL,
            ],
        )
        for pod_obj in provisioner_pods:
            pod_info = pod_obj.get()
            for container, image in get_images(data=pod_info).items():
                assert ("snapshot" not in container) and (
                    "snapshot" not in image
                ), (f"Snapshot container is present in {pod_obj.name} pod. "
                    f"Container {container}. Image {image}")
        deployments = ocs_csv.get()["spec"]["install"]["spec"]["deployments"]
        rook_ceph_operator_deployment = [
            deployment_val for deployment_val in deployments
            if deployment_val["name"] == "rook-ceph-operator"
        ]
        assert {
            "name": "CSI_ENABLE_SNAPSHOTTER",
            "value": "false"
        } in (rook_ceph_operator_deployment[0]["spec"]["template"]["spec"]
              ["containers"][0]["env"]
              ), "CSI_ENABLE_SNAPSHOTTER value is not set to 'false'."
        log.info("Verified: CSI snapshotter is not present.")

    # Verify pool crush rule is with "type": "zone"
    if utils.get_az_count() == 3:
        log.info("Verifying pool crush rule is with type: zone")
        crush_dump = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd crush dump",
                                          format="")
        pool_names = [
            constants.METADATA_POOL,
            constants.DEFAULT_BLOCKPOOL,
            constants.DATA_POOL,
        ]
        crush_rules = [
            rule for rule in crush_dump["rules"]
            if rule["rule_name"] in pool_names
        ]
        for crush_rule in crush_rules:
            assert [
                item for item in crush_rule["steps"]
                if item.get("type") == "zone"
            ], f"{crush_rule['rule_name']} is not with type as zone"
        log.info("Verified - pool crush rule is with type: zone")
    log.info("Validate cluster on PVC")
    validate_cluster_on_pvc()

    # Verify ceph health
    log.info("Verifying ceph health")
    health_check_tries = 20
    health_check_delay = 30
    if post_upgrade_verification:
        # In case of upgrade with FIO we have to wait longer time to see
        # health OK. See discussion in BZ:
        # https://bugzilla.redhat.com/show_bug.cgi?id=1817727
        health_check_tries = 180
    assert utils.ceph_health_check(namespace, health_check_tries,
                                   health_check_delay)
    if config.ENV_DATA.get("fips"):
        # In case that fips is enabled when deploying,
        # a verification of the installation of it will run
        # on all running state pods
        check_fips_enabled()
    if config.ENV_DATA.get("encryption_at_rest"):
        osd_encryption_verification()
        if config.DEPLOYMENT.get("kms_deployment"):
            kms = KMS.get_kms_deployment()
            kms.post_deploy_verification()

    storage_cluster_obj = get_storage_cluster()
    is_flexible_scaling = (
        storage_cluster_obj.get()["items"][0].get("spec").get(
            "flexibleScaling", False))
    if is_flexible_scaling is True:
        failure_domain = storage_cluster_obj.data["items"][0]["status"][
            "failureDomain"]
        assert failure_domain == "host", (
            f"The expected failure domain on cluster with flexible scaling is 'host',"
            f" the actaul failure domain is {failure_domain}")

    if ocs_version >= version.VERSION_4_7:
        log.info("Verifying images in storage cluster")
        verify_sc_images(storage_cluster)

    if config.ENV_DATA.get("is_multus_enabled"):
        verify_multus_network()
Esempio n. 29
0
    def __init__(self, *args, **kwargs):
        """
        Constructor for the MCG class
        """
        self.namespace = config.ENV_DATA['cluster_namespace']
        self.operator_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_OPERATOR_POD_LABEL, self.namespace)[0])
        self.core_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_CORE_POD_LABEL, self.namespace)[0])

        self.retrieve_noobaa_cli_binary()
        """
        The certificate will be copied on each mcg_obj instantiation since
        the process is so light and quick, that the time required for the redundant
        copy is neglible in comparison to the time a hash comparison will take.
        """
        retrieve_default_ingress_crt()

        get_noobaa = OCP(kind='noobaa', namespace=self.namespace).get()

        self.s3_endpoint = (get_noobaa.get('items')[0].get('status').get(
            'services').get('serviceS3').get('externalDNS')[0])
        self.s3_internal_endpoint = (get_noobaa.get('items')[0].get(
            'status').get('services').get('serviceS3').get('internalDNS')[0])
        self.mgmt_endpoint = (get_noobaa.get('items')[0].get('status').get(
            'services').get('serviceMgmt').get('externalDNS')[0]) + '/rpc'
        self.region = config.ENV_DATA['region']

        creds_secret_name = (get_noobaa.get('items')[0].get('status').get(
            'accounts').get('admin').get('secretRef').get('name'))
        secret_ocp_obj = OCP(kind='secret', namespace=self.namespace)
        creds_secret_obj = secret_ocp_obj.get(creds_secret_name)

        self.access_key_id = base64.b64decode(
            creds_secret_obj.get('data').get('AWS_ACCESS_KEY_ID')).decode(
                'utf-8')
        self.access_key = base64.b64decode(
            creds_secret_obj.get('data').get('AWS_SECRET_ACCESS_KEY')).decode(
                'utf-8')

        self.noobaa_user = base64.b64decode(
            creds_secret_obj.get('data').get('email')).decode('utf-8')
        self.noobaa_password = base64.b64decode(
            creds_secret_obj.get('data').get('password')).decode('utf-8')

        self.noobaa_token = self.send_rpc_query(
            'auth_api',
            'create_auth',
            params={
                'role': 'admin',
                'system': 'noobaa',
                'email': self.noobaa_user,
                'password': self.noobaa_password
            }).json().get('reply').get('token')

        self.s3_resource = boto3.resource(
            's3',
            verify=constants.DEFAULT_INGRESS_CRT_LOCAL_PATH,
            endpoint_url=self.s3_endpoint,
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.access_key)

        self.s3_client = self.s3_resource.meta.client

        if (config.ENV_DATA['platform'].lower() == 'aws'
                and kwargs.get('create_aws_creds')):
            (self.cred_req_obj, self.aws_access_key_id,
             self.aws_access_key) = self.request_aws_credentials()

            self.aws_s3_resource = boto3.resource(
                's3',
                endpoint_url="https://s3.amazonaws.com",
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_access_key)
            logger.info(
                'Checking whether RGW pod is not present on AWS platform')
            pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL,
                                             namespace=self.namespace)
            assert len(pods) == 0, 'RGW pod should not exist on AWS platform'

        elif config.ENV_DATA.get('platform') in constants.ON_PREM_PLATFORMS:
            rgw_count = 2 if float(
                config.ENV_DATA['ocs_version']) >= 4.5 else 1
            logger.info(
                f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform'
            )
            rgw_pod = OCP(kind=constants.POD, namespace=self.namespace)
            assert rgw_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.RGW_APP_LABEL,
                resource_count=rgw_count,
                timeout=60)
Esempio n. 30
0
    def test_mcg_namespace_disruptions_crd(
        self,
        mcg_obj,
        cld_mgr,
        awscli_pod,
        bucketclass_dict,
        bucket_factory,
        node_drain_teardown,
    ):
        """
        Test MCG namespace disruption flow

        1. Create NS resources with CRDs
        2. Create NS bucket with CRDs
        3. Upload to NS bucket
        4. Delete noobaa related pods and verify integrity of objects
        5. Create public access policy on NS bucket and verify Get op
        6. Drain nodes containing noobaa pods and verify integrity of objects
        7. Perform put operation to validate public access denial
        7. Edit/verify and remove objects on NS bucket

        """
        data = "Sample string content to write to a S3 object"
        object_key = "ObjKey-" + str(uuid.uuid4().hex)
        awscli_node_name = awscli_pod.get()["spec"]["nodeName"]

        aws_s3_creds = {
            "access_key_id": cld_mgr.aws_client.access_key,
            "access_key": cld_mgr.aws_client.secret_key,
            "endpoint": constants.MCG_NS_AWS_ENDPOINT,
            "region": config.ENV_DATA["region"],
        }

        # S3 account details
        user_name = "nb-user" + str(uuid.uuid4().hex)
        email = user_name + "@mail.com"

        logger.info("Setting up test files for upload, to the bucket/resources")
        setup_base_objects(awscli_pod, MCG_NS_ORIGINAL_DIR, MCG_NS_RESULT_DIR, amount=3)

        # Create the namespace resource and verify health
        ns_buc = bucket_factory(
            amount=1,
            interface=bucketclass_dict["interface"],
            bucketclass=bucketclass_dict,
        )[0]
        ns_bucket = ns_buc.name

        aws_target_bucket = ns_buc.bucketclass.namespacestores[0].uls_name

        logger.info(f"Namespace bucket: {ns_bucket} created")

        logger.info(f"Uploading objects to ns bucket: {ns_bucket}")
        sync_object_directory(
            awscli_pod,
            src=MCG_NS_ORIGINAL_DIR,
            target=f"s3://{ns_bucket}",
            s3_obj=mcg_obj,
        )

        for pod_to_respin in self.labels_map:
            logger.info(f"Re-spinning mcg resource: {self.labels_map[pod_to_respin]}")
            pod_obj = pod.Pod(
                **pod.get_pods_having_label(
                    label=self.labels_map[pod_to_respin],
                    namespace=defaults.ROOK_CLUSTER_NAMESPACE,
                )[0]
            )

            pod_obj.delete(force=True)

            assert pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=self.labels_map[pod_to_respin],
                resource_count=1,
                timeout=300,
            )

            logger.info(
                f"Downloading objects from ns bucket: {ns_bucket} "
                f"after re-spinning: {self.labels_map[pod_to_respin]}"
            )
            sync_object_directory(
                awscli_pod,
                src=f"s3://{ns_bucket}",
                target=MCG_NS_RESULT_DIR,
                s3_obj=mcg_obj,
            )

            logger.info(
                f"Verifying integrity of objects "
                f"after re-spinning: {self.labels_map[pod_to_respin]}"
            )
            compare_directory(
                awscli_pod, MCG_NS_ORIGINAL_DIR, MCG_NS_RESULT_DIR, amount=3
            )

        # S3 account
        user = NoobaaAccount(mcg_obj, name=user_name, email=email, buckets=[ns_bucket])
        logger.info(f"Noobaa account: {user.email_id} with S3 access created")

        # Admin sets Public access policy(*)
        bucket_policy_generated = gen_bucket_policy(
            user_list=["*"],
            actions_list=["GetObject"],
            resources_list=[f'{ns_bucket}/{"*"}'],
        )
        bucket_policy = json.dumps(bucket_policy_generated)

        logger.info(
            f"Creating bucket policy on bucket: {ns_bucket} with wildcard (*) Principal"
        )
        put_policy = put_bucket_policy(mcg_obj, ns_bucket, bucket_policy)
        logger.info(f"Put bucket policy response from Admin: {put_policy}")

        logger.info(f"Getting bucket policy on bucket: {ns_bucket}")
        get_policy = get_bucket_policy(mcg_obj, ns_bucket)
        logger.info(f"Got bucket policy: {get_policy['Policy']}")

        # MCG admin writes an object to bucket
        logger.info(f"Writing object on bucket: {ns_bucket} by admin")
        assert s3_put_object(mcg_obj, ns_bucket, object_key, data), "Failed: PutObject"

        # Verifying whether Get operation is allowed to any S3 user
        logger.info(
            f"Get object action on namespace bucket: {ns_bucket} "
            f"with user: {user.email_id}"
        )
        assert s3_get_object(user, ns_bucket, object_key), "Failed: GetObject"

        # Upload files to NS target
        logger.info(
            f"Uploading objects directly to ns resource target: {aws_target_bucket}"
        )
        sync_object_directory(
            awscli_pod,
            src=MCG_NS_ORIGINAL_DIR,
            target=f"s3://{aws_target_bucket}",
            signed_request_creds=aws_s3_creds,
        )

        for pod_to_drain in self.labels_map:
            pod_obj = pod.Pod(
                **pod.get_pods_having_label(
                    label=self.labels_map[pod_to_drain],
                    namespace=defaults.ROOK_CLUSTER_NAMESPACE,
                )[0]
            )

            # Retrieve the node name on which the pod resides
            node_name = pod_obj.get()["spec"]["nodeName"]

            if awscli_node_name == node_name:
                logger.info(
                    f"Skipping node drain since aws cli pod node: "
                    f"{awscli_node_name} is same as {pod_to_drain} "
                    f"pod node: {node_name}"
                )
                continue

            # Drain the node
            drain_nodes([node_name])
            wait_for_nodes_status(
                [node_name], status=constants.NODE_READY_SCHEDULING_DISABLED
            )
            schedule_nodes([node_name])
            wait_for_nodes_status(timeout=300)

            # Retrieve the new pod
            pod_obj = pod.Pod(
                **pod.get_pods_having_label(
                    label=self.labels_map[pod_to_drain],
                    namespace=defaults.ROOK_CLUSTER_NAMESPACE,
                )[0]
            )
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING, timeout=120)

            # Verify all storage pods are running
            wait_for_storage_pods()

            logger.info(
                f"Downloading objects from ns bucket: {ns_bucket} "
                f"after draining node: {node_name} with pod {pod_to_drain}"
            )
            sync_object_directory(
                awscli_pod,
                src=f"s3://{ns_bucket}",
                target=MCG_NS_RESULT_DIR,
                s3_obj=mcg_obj,
            )

            logger.info(
                f"Verifying integrity of objects "
                f"after draining node with pod: {pod_to_drain}"
            )
            compare_directory(
                awscli_pod, MCG_NS_ORIGINAL_DIR, MCG_NS_RESULT_DIR, amount=3
            )

        logger.info(f"Editing the namespace resource bucket: {ns_bucket}")
        namespace_bucket_update(
            mcg_obj,
            bucket_name=ns_bucket,
            read_resource=[aws_target_bucket],
            write_resource=aws_target_bucket,
        )

        logger.info(f"Verifying object download after edit on ns bucket: {ns_bucket}")
        sync_object_directory(
            awscli_pod,
            src=f"s3://{ns_bucket}",
            target=MCG_NS_RESULT_DIR,
            s3_obj=mcg_obj,
        )

        # Verifying whether Put object action is denied
        logger.info(
            f"Verifying whether user: {user.email_id} has only public read access"
        )

        logger.info(f"Removing objects from ns bucket: {ns_bucket}")
        rm_object_recursive(awscli_pod, target=ns_bucket, mcg_obj=mcg_obj)