Ejemplo n.º 1
0
    def retrieve_noobaa_cli_binary(self):
        """
        Copy the NooBaa CLI binary from the operator pod
        if it wasn't found locally, or if the hashes between
        the two don't match.

        Raises:
            NoobaaCliChecksumFailedException: If checksum doesn't match.
            AssertionError: In the case CLI binary doesn't exist.

        """
        def _compare_cli_hashes():
            """
            Verify that the remote and local CLI binaries are the same
            in order to make sure the local bin is up to date

            Returns:
                bool: Whether the local and remote hashes are identical

            """
            remote_cli_bin_md5 = cal_md5sum(
                self.operator_pod, constants.NOOBAA_OPERATOR_POD_CLI_PATH)
            logger.info(f"Remote noobaa cli md5 hash: {remote_cli_bin_md5}")
            local_cli_bin_md5 = calc_local_file_md5_sum(
                constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH)
            logger.info(f"Local noobaa cli md5 hash: {local_cli_bin_md5}")
            return remote_cli_bin_md5 == local_cli_bin_md5

        if (not os.path.isfile(constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH)
                or not _compare_cli_hashes()):
            logger.info(
                f"The MCG CLI binary could not not found in {constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH},"
                " attempting to copy it from the MCG operator pod")
            local_mcg_cli_dir = os.path.dirname(
                constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH)
            remote_mcg_cli_basename = os.path.basename(
                constants.NOOBAA_OPERATOR_POD_CLI_PATH)
            # The MCG CLI retrieval process is known to be flaky
            # and there's an active BZ regardaing it -
            # https://bugzilla.redhat.com/show_bug.cgi?id=2011845
            # rsync should be more reliable than cp, thus the use of oc rsync.
            if version.get_semantic_ocs_version_from_config(
            ) > version.VERSION_4_5:
                cmd = (
                    f"oc rsync -n {self.namespace} {self.operator_pod.name}:"
                    f"{constants.NOOBAA_OPERATOR_POD_CLI_PATH}"
                    f" {local_mcg_cli_dir}")
                exec_cmd(cmd)
                os.rename(
                    os.path.join(local_mcg_cli_dir, remote_mcg_cli_basename),
                    constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH,
                )
            else:
                cmd = (f"oc exec -n {self.namespace} {self.operator_pod.name}"
                       f" -- cat {constants.NOOBAA_OPERATOR_POD_CLI_PATH}"
                       f"> {constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH}")
                proc = subprocess.run(cmd, shell=True, capture_output=True)
                logger.info(
                    f"MCG CLI copying process stdout:{proc.stdout.decode()}, stderr: {proc.stderr.decode()}"
                )
            # Add an executable bit in order to allow usage of the binary
            current_file_permissions = os.stat(
                constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH)
            os.chmod(
                constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH,
                current_file_permissions.st_mode | stat.S_IEXEC,
            )
            # Make sure the binary was copied properly and has the correct permissions
            assert os.path.isfile(
                constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH
            ), f"MCG CLI file not found at {constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH}"
            assert os.access(
                constants.NOOBAA_OPERATOR_LOCAL_CLI_PATH, os.X_OK
            ), "The MCG CLI binary does not have execution permissions"
            if not _compare_cli_hashes():
                raise NoobaaCliChecksumFailedException(
                    "Binary hash doesn't match the one on the operator pod")
Ejemplo n.º 2
0
def setup_ceph_toolbox(force_setup=False):
    """
    Setup ceph-toolbox - also checks if toolbox exists, if it exists it
    behaves as noop.

    Args:
        force_setup (bool): force setup toolbox pod

    """
    if ocsci_config.ENV_DATA["mcg_only_deployment"]:
        log.info("Skipping Ceph toolbox setup due to running in MCG only mode")
        return
    namespace = ocsci_config.ENV_DATA["cluster_namespace"]
    ceph_toolbox = get_pod_name_by_pattern("rook-ceph-tools", namespace)
    # setup toolbox for external mode
    # Refer bz: 1856982 - invalid admin secret
    if len(ceph_toolbox) == 1:
        log.info("Ceph toolbox already exists, skipping")
        if force_setup:
            log.info("Running force setup for Ceph toolbox!")
        else:
            return
    external_mode = ocsci_config.DEPLOYMENT.get("external_mode")

    if version.get_semantic_ocs_version_from_config() == version.VERSION_4_2:
        tool_box_data = templating.load_yaml(constants.TOOL_POD_YAML)
        tool_box_data["spec"]["template"]["spec"]["containers"][0][
            "image"
        ] = get_rook_version()
        rook_toolbox = OCS(**tool_box_data)
        rook_toolbox.create()
    else:
        if external_mode:
            toolbox = templating.load_yaml(constants.TOOL_POD_YAML)
            toolbox["spec"]["template"]["spec"]["containers"][0][
                "image"
            ] = get_rook_version()
            toolbox["metadata"]["name"] += "-external"
            keyring_dict = ocsci_config.EXTERNAL_MODE.get("admin_keyring")
            env = toolbox["spec"]["template"]["spec"]["containers"][0]["env"]
            # replace secret
            env = [item for item in env if not (item["name"] == "ROOK_CEPH_SECRET")]
            env.append({"name": "ROOK_CEPH_SECRET", "value": keyring_dict["key"]})
            toolbox["spec"]["template"]["spec"]["containers"][0]["env"] = env
            # add ceph volumeMounts
            ceph_volume_mount_path = {"mountPath": "/etc/ceph", "name": "ceph-config"}
            ceph_volume = {"name": "ceph-config", "emptyDir": {}}
            toolbox["spec"]["template"]["spec"]["containers"][0]["volumeMounts"].append(
                ceph_volume_mount_path
            )
            toolbox["spec"]["template"]["spec"]["volumes"].append(ceph_volume)
            rook_toolbox = OCS(**toolbox)
            rook_toolbox.create()
            return

        # Workaround for https://bugzilla.redhat.com/show_bug.cgi?id=1982721
        # TODO: Remove workaround when bug 1982721 is fixed
        # https://github.com/red-hat-storage/ocs-ci/issues/4585
        if ocsci_config.ENV_DATA.get("is_multus_enabled"):
            toolbox = templating.load_yaml(constants.TOOL_POD_YAML)
            toolbox["spec"]["template"]["spec"]["containers"][0][
                "image"
            ] = get_rook_version()
            toolbox["metadata"]["name"] += "-multus"
            toolbox["spec"]["template"]["metadata"]["annotations"] = {
                "k8s.v1.cni.cncf.io/networks": "openshift-storage/ocs-public"
            }
            toolbox["spec"]["template"]["spec"]["hostNetwork"] = False
            rook_toolbox = OCS(**toolbox)
            rook_toolbox.create()
            return

        # for OCS >= 4.3 there is new toolbox pod deployment done here:
        # https://github.com/openshift/ocs-operator/pull/207/
        log.info("starting ceph toolbox pod")
        run_cmd(
            "oc patch ocsinitialization ocsinit -n openshift-storage --type "
            'json --patch  \'[{ "op": "replace", "path": '
            '"/spec/enableCephTools", "value": true }]\''
        )
        toolbox_pod = OCP(kind=constants.POD, namespace=namespace)
        toolbox_pod.wait_for_resource(
            condition="Running",
            selector="app=rook-ceph-tools",
            resource_count=1,
            timeout=120,
        )
Ejemplo n.º 3
0
    not (config.ENV_DATA["platform"].lower() in MANAGED_SERVICE_PLATFORMS
         and config.ENV_DATA["cluster_type"].lower() == "provider"),
    reason="Test runs ONLY on managed service provider cluster",
)

ms_consumer_required = pytest.mark.skipif(
    not (config.ENV_DATA["platform"].lower() in MANAGED_SERVICE_PLATFORMS
         and config.ENV_DATA["cluster_type"].lower() == "consumer"),
    reason="Test runs ONLY on managed service consumer cluster",
)

kms_config_required = pytest.mark.skipif(
    (config.ENV_DATA["KMS_PROVIDER"].lower() != HPCS_KMS_PROVIDER
     and load_auth_config().get("vault", {}).get("VAULT_ADDR") is None)
    or (not (config.ENV_DATA["KMS_PROVIDER"].lower() == HPCS_KMS_PROVIDER
             and version.get_semantic_ocs_version_from_config() >=
             version.VERSION_4_10 and load_auth_config().get(
                 "hpcs", {}).get("IBM_KP_SERVICE_INSTANCE_ID") is not None, )),
    reason="KMS config not found in auth.yaml",
)

skipif_aws_i3 = pytest.mark.skipif(
    config.ENV_DATA["platform"].lower() == "aws"
    and config.DEPLOYMENT.get("local_storage") is True,
    reason="Test will not run on AWS i3",
)

skipif_bm = pytest.mark.skipif(
    config.ENV_DATA["platform"].lower() == "baremetal"
    and config.DEPLOYMENT.get("local_storage") is True,
    reason="Test will not run on Bare Metal",
Ejemplo n.º 4
0
    def __init__(self, *args, **kwargs):
        """
        Constructor for the MCG class
        """
        self.namespace = config.ENV_DATA["cluster_namespace"]
        self.operator_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_OPERATOR_POD_LABEL, self.namespace)[0])
        self.core_pod = Pod(**get_pods_having_label(
            constants.NOOBAA_CORE_POD_LABEL, self.namespace)[0])

        self.retrieve_noobaa_cli_binary()
        """
        The certificate will be copied on each mcg_obj instantiation since
        the process is so light and quick, that the time required for the redundant
        copy is neglible in comparison to the time a hash comparison will take.
        """
        retrieve_default_ingress_crt()

        get_noobaa = OCP(kind="noobaa", namespace=self.namespace).get()

        self.s3_endpoint = (get_noobaa.get("items")[0].get("status").get(
            "services").get("serviceS3").get("externalDNS")[0])
        self.s3_internal_endpoint = (get_noobaa.get("items")[0].get(
            "status").get("services").get("serviceS3").get("internalDNS")[0])
        self.mgmt_endpoint = (get_noobaa.get("items")[0].get("status").get(
            "services").get("serviceMgmt").get("externalDNS")[0]) + "/rpc"
        self.region = config.ENV_DATA["region"]

        creds_secret_name = (get_noobaa.get("items")[0].get("status").get(
            "accounts").get("admin").get("secretRef").get("name"))
        secret_ocp_obj = OCP(kind="secret", namespace=self.namespace)
        creds_secret_obj = secret_ocp_obj.get(creds_secret_name)

        self.access_key_id = base64.b64decode(
            creds_secret_obj.get("data").get("AWS_ACCESS_KEY_ID")).decode(
                "utf-8")
        self.access_key = base64.b64decode(
            creds_secret_obj.get("data").get("AWS_SECRET_ACCESS_KEY")).decode(
                "utf-8")

        self.noobaa_user = base64.b64decode(
            creds_secret_obj.get("data").get("email")).decode("utf-8")
        self.noobaa_password = base64.b64decode(
            creds_secret_obj.get("data").get("password")).decode("utf-8")

        self.noobaa_token = self.retrieve_nb_token()

        self.s3_resource = boto3.resource(
            "s3",
            verify=retrieve_verification_mode(),
            endpoint_url=self.s3_endpoint,
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.access_key,
        )

        self.s3_client = self.s3_resource.meta.client

        if config.ENV_DATA["platform"].lower() == "aws" and kwargs.get(
                "create_aws_creds"):
            (
                self.cred_req_obj,
                self.aws_access_key_id,
                self.aws_access_key,
            ) = self.request_aws_credentials()

            self.aws_s3_resource = boto3.resource(
                "s3",
                endpoint_url="https://s3.amazonaws.com",
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_access_key,
            )

        if (config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS
                or storagecluster_independent_check()):
            if not config.ENV_DATA["platform"] == constants.AZURE_PLATFORM and (
                    version.get_semantic_ocs_version_from_config() >
                    version.VERSION_4_5):
                logger.info("Checking whether RGW pod is not present")
                pods = pod.get_pods_having_label(label=constants.RGW_APP_LABEL,
                                                 namespace=self.namespace)
                assert (
                    not pods
                ), "RGW pods should not exist in the current platform/cluster"

        elif (config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS
              and not config.ENV_DATA["mcg_only_deployment"]):
            rgw_count = get_rgw_count(config.ENV_DATA["ocs_version"],
                                      check_if_cluster_was_upgraded(), None)
            logger.info(
                f'Checking for RGW pod/s on {config.ENV_DATA.get("platform")} platform'
            )
            rgw_pod = OCP(kind=constants.POD, namespace=self.namespace)
            assert rgw_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.RGW_APP_LABEL,
                resource_count=rgw_count,
                timeout=60,
            )
Ejemplo n.º 5
0
    def test_object_actions(self, mcg_obj, bucket_factory):
        """
        Test to verify different object actions and cross account access to buckets
        """
        data = "Sample string content to write to a new S3 object"
        object_key = "ObjKey-" + str(uuid.uuid4().hex)

        # Creating multiple obc users (accounts)
        obc = bucket_factory(amount=1, interface="OC")
        obc_obj = OBC(obc[0].name)

        # Creating noobaa account to access bucket belonging to obc account
        user_name = "noobaa-user" + str(uuid.uuid4().hex)
        email = user_name + "@mail.com"
        user = NoobaaAccount(
            mcg_obj, name=user_name, email=email, buckets=[obc_obj.bucket_name]
        )

        # Admin sets policy on obc bucket with obc account principal
        bucket_policy_generated = gen_bucket_policy(
            user_list=[obc_obj.obc_account, user.email_id],
            actions_list=["PutObject"]
            if version.get_semantic_ocs_version_from_config() <= version.VERSION_4_6
            else ["GetObject", "DeleteObject"],
            effect="Allow"
            if version.get_semantic_ocs_version_from_config() <= version.VERSION_4_6
            else "Deny",
            resources_list=[f'{obc_obj.bucket_name}/{"*"}'],
        )
        bucket_policy = json.dumps(bucket_policy_generated)

        logger.info(
            f"Creating bucket policy on bucket: {obc_obj.bucket_name} with principal: {obc_obj.obc_account}"
        )
        put_policy = put_bucket_policy(mcg_obj, obc_obj.bucket_name, bucket_policy)
        logger.info(f"Put bucket policy response from Admin: {put_policy}")

        # Get Policy
        logger.info(f"Getting Bucket policy on bucket: {obc_obj.bucket_name}")
        get_policy = get_bucket_policy(mcg_obj, obc_obj.bucket_name)
        logger.info(f"Got bucket policy: {get_policy['Policy']}")

        # Verifying whether users can put object
        logger.info(
            f"Adding object on bucket: {obc_obj.bucket_name} using user: {obc_obj.obc_account}"
        )
        assert s3_put_object(
            obc_obj, obc_obj.bucket_name, object_key, data
        ), "Failed: Put Object"

        logger.info(
            f"Adding object on bucket: {obc_obj.bucket_name} using user: {user.email_id}"
        )
        assert s3_put_object(
            user, obc_obj.bucket_name, object_key, data
        ), "Failed: Put Object"

        # Verifying whether Get action is not allowed
        logger.info(
            f"Verifying whether user: "******"ocs_version"]) >= 4.6 else obc_obj.obc_account}'
            f" is denied to Get object"
        )
        try:
            if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_6:
                s3_get_object(user, obc_obj.bucket_name, object_key)
            else:
                s3_get_object(obc_obj, obc_obj.bucket_name, object_key)
        except boto3exception.ClientError as e:
            logger.info(e.response)
            response = HttpResponseParser(e.response)
            if response.error["Code"] == "AccessDenied":
                logger.info("Get Object action has been denied access")
            else:
                raise UnexpectedBehaviour(
                    f"{e.response} received invalid error code {response.error['Code']}"
                )
        else:
            assert False, "Get object succeeded when it should have failed"

        if version.get_semantic_ocs_version_from_config() == version.VERSION_4_6:
            logger.info(
                f"Verifying whether the user: "******"{obc_obj.obc_account} is able to access Get action"
                f"irrespective of the policy set"
            )
            assert s3_get_object(
                obc_obj, obc_obj.bucket_name, object_key
            ), "Failed: Get Object"

        # Verifying whether obc account allowed to create multipart
        logger.info(
            f"Creating multipart on bucket: {obc_obj.bucket_name}"
            f" with key: {object_key} using user: {obc_obj.obc_account}"
        )
        create_multipart_upload(obc_obj, obc_obj.bucket_name, object_key)

        # Verifying whether S3 user is allowed to create multipart
        logger.info(
            f"Creating multipart on bucket: {obc_obj.bucket_name} "
            f"with key: {object_key} using user: {user.email_id}"
        )
        create_multipart_upload(user, obc_obj.bucket_name, object_key)

        # Verifying whether obc account is denied access to delete object
        logger.info(
            f"Verifying whether user: "******"ocs_version"]) >= 4.6 else obc_obj.obc_account}'
            f"is denied to Delete object"
        )
        try:
            if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_6:
                s3_delete_object(user, obc_obj.bucket_name, object_key)
            else:
                s3_delete_object(obc_obj, obc_obj.bucket_name, object_key)
        except boto3exception.ClientError as e:
            logger.info(e.response)
            response = HttpResponseParser(e.response)
            if response.error["Code"] == "AccessDenied":
                logger.info("Delete action has been denied access")
            else:
                raise UnexpectedBehaviour(
                    f"{e.response} received invalid error code {response.error['Code']}"
                )
        else:
            assert False, "Delete object succeeded when it should have failed"

        # Admin sets a policy on obc-account bucket with noobaa-account principal (cross account access)
        new_policy_generated = gen_bucket_policy(
            user_list=[user.email_id],
            actions_list=["GetObject", "DeleteObject"]
            if float(config.ENV_DATA["ocs_version"]) <= 4.6
            else ["PutObject"],
            effect="Allow"
            if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_6
            else "Deny",
            resources_list=[f'{obc_obj.bucket_name}/{"*"}'],
        )
        new_policy = json.dumps(new_policy_generated)

        logger.info(
            f"Creating bucket policy on bucket: {obc_obj.bucket_name} with principal: {obc_obj.obc_account}"
        )
        put_policy = put_bucket_policy(mcg_obj, obc_obj.bucket_name, new_policy)
        logger.info(f"Put bucket policy response from admin: {put_policy}")

        # Get Policy
        logger.info(f"Getting bucket policy on bucket: {obc_obj.bucket_name}")
        get_policy = get_bucket_policy(mcg_obj, obc_obj.bucket_name)
        logger.info(f"Got bucket policy: {get_policy['Policy']}")

        # Verifying whether Get, Delete object is allowed
        logger.info(
            f"Getting object on bucket: {obc_obj.bucket_name} with user: {user.email_id}"
        )
        for get_resp in TimeoutSampler(
            30, 4, s3_get_object, user, obc_obj.bucket_name, object_key
        ):
            if "403" not in str(get_resp["ResponseMetadata"]["HTTPStatusCode"]):
                logger.info("GetObj operation successful")
                break
            else:
                logger.info("GetObj operation is denied access")
        logger.info(
            f"Deleting object on bucket: {obc_obj.bucket_name} with user: {user.email_id}"
        )
        for del_resp in TimeoutSampler(
            30, 4, s3_delete_object, user, obc_obj.bucket_name, object_key
        ):
            if "403" not in str(del_resp["ResponseMetadata"]["HTTPStatusCode"]):
                logger.info("DeleteObj operation successful")
                break
            else:
                logger.info("DeleteObj operation is denied access")

        # Verifying whether Put object action is denied
        logger.info(
            f"Verifying whether user: {user.email_id} is denied to Put object after updating policy"
        )
        try:
            s3_put_object(user, obc_obj.bucket_name, object_key, data)
        except boto3exception.ClientError as e:
            logger.info(e.response)
            response = HttpResponseParser(e.response)
            if response.error["Code"] == "AccessDenied":
                logger.info("Put object action has been denied access")
            else:
                raise UnexpectedBehaviour(
                    f"{e.response} received invalid error code {response.error['Code']}"
                )
Ejemplo n.º 6
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        set_registry_to_managed_state()
        image = None
        ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace)
        try:
            ceph_cluster.get().get("items")[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        # disconnected installation?
        load_cluster_info()
        if config.DEPLOYMENT.get("disconnected"):
            image = prepare_disconnected_ocs_deployment()

        if config.DEPLOYMENT["external_mode"]:
            self.deploy_with_external_mode()
        else:
            self.deploy_ocs_via_operator(image)
            if config.ENV_DATA["mcg_only_deployment"]:
                mcg_only_post_deployment_checks()
                return

            pod = ocp.OCP(kind=constants.POD, namespace=self.namespace)
            cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace)
            # Check for Ceph pods
            mon_pod_timeout = (
                900 if self.platform == constants.IBMCLOUD_PLATFORM else 600
            )
            assert pod.wait_for_resource(
                condition="Running",
                selector="app=rook-ceph-mon",
                resource_count=3,
                timeout=mon_pod_timeout,
            )
            assert pod.wait_for_resource(
                condition="Running", selector="app=rook-ceph-mgr", timeout=600
            )
            assert pod.wait_for_resource(
                condition="Running",
                selector="app=rook-ceph-osd",
                resource_count=3,
                timeout=600,
            )

            # validate ceph mon/osd volumes are backed by pvc
            validate_cluster_on_pvc()

            # validate PDB creation of MON, MDS, OSD pods
            validate_pdb_creation()

            # check for odf-console
            ocs_version = version.get_semantic_ocs_version_from_config()
            if ocs_version >= version.VERSION_4_9:
                assert pod.wait_for_resource(
                    condition="Running", selector="app=odf-console", timeout=600
                )

            # Creating toolbox pod
            setup_ceph_toolbox()

            assert pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector="app=rook-ceph-tools",
                resource_count=1,
                timeout=600,
            )

            if not config.COMPONENTS["disable_cephfs"]:
                # Check for CephFilesystem creation in ocp
                cfs_data = cfs.get()
                cfs_name = cfs_data["items"][0]["metadata"]["name"]

                if helpers.validate_cephfilesystem(cfs_name):
                    logger.info("MDS deployment is successful!")
                    defaults.CEPHFILESYSTEM_NAME = cfs_name
                else:
                    logger.error("MDS deployment Failed! Please check logs!")

        # Change monitoring backend to OCS
        if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
            "persistent-monitoring"
        ):
            setup_persistent_monitoring()
        elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get(
            "telemeter_server_url"
        ):
            # Create configmap cluster-monitoring-config to reconfigure
            # telemeter server url when 'persistent-monitoring' is False
            create_configmap_cluster_monitoring_pod(
                telemeter_server_url=config.ENV_DATA["telemeter_server_url"]
            )

        if not config.COMPONENTS["disable_cephfs"]:
            # Change registry backend to OCS CEPHFS RWX PVC
            registry.change_registry_backend_to_ocs()

        # Enable console plugin
        enable_console_plugin()

        # Verify health of ceph cluster
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        try:
            ceph_health_check(namespace=self.namespace, tries=30, delay=10)
        except CephHealthException as ex:
            err = str(ex)
            logger.warning(f"Ceph health check failed with {err}")
            if "clock skew detected" in err:
                logger.info(
                    f"Changing NTP on compute nodes to" f" {constants.RH_NTP_CLOCK}"
                )
                if self.platform == constants.VSPHERE_PLATFORM:
                    update_ntp_compute_nodes()
                assert ceph_health_check(namespace=self.namespace, tries=60, delay=10)

        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()
Ejemplo n.º 7
0
    def deploy_ocs_via_operator(self, image=None):
        """
        Method for deploy OCS via OCS operator

        Args:
            image (str): Image of ocs registry.

        """
        ui_deployment = config.DEPLOYMENT.get("ui_deployment")
        live_deployment = config.DEPLOYMENT.get("live_deployment")
        arbiter_deployment = config.DEPLOYMENT.get("arbiter_deployment")

        if ui_deployment and ui_deployment_conditions():
            self.deployment_with_ui()
            # Skip the rest of the deployment when deploy via UI
            return
        else:
            logger.info("Deployment of OCS via OCS operator")
            self.label_and_taint_nodes()

        if not live_deployment:
            create_catalog_source(image)

        if config.DEPLOYMENT.get("local_storage"):
            setup_local_storage(storageclass=self.DEFAULT_STORAGECLASS_LSO)

        logger.info("Creating namespace and operator group.")
        run_cmd(f"oc create -f {constants.OLM_YAML}")

        # create multus network
        if config.ENV_DATA.get("is_multus_enabled"):
            logger.info("Creating multus network")
            multus_data = templating.load_yaml(constants.MULTUS_YAML)
            multus_config_str = multus_data["spec"]["config"]
            multus_config_dct = json.loads(multus_config_str)
            if config.ENV_DATA.get("multus_public_network_interface"):
                multus_config_dct["master"] = config.ENV_DATA.get(
                    "multus_public_network_interface"
                )
            multus_data["spec"]["config"] = json.dumps(multus_config_dct)
            multus_data_yaml = tempfile.NamedTemporaryFile(
                mode="w+", prefix="multus", delete=False
            )
            templating.dump_data_to_temp_yaml(multus_data, multus_data_yaml.name)
            run_cmd(f"oc create -f {multus_data_yaml.name}")

        if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM:
            ibmcloud.add_deployment_dependencies()
            if not live_deployment:
                create_ocs_secret(self.namespace)
        self.subscribe_ocs()
        operator_selector = get_selector_for_ocs_operator()
        subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval")
        ocs_version = version.get_semantic_ocs_version_from_config()
        if ocs_version >= version.VERSION_4_9:
            ocs_operator_names = [
                defaults.ODF_OPERATOR_NAME,
                defaults.OCS_OPERATOR_NAME,
            ]
            build_number = version.get_semantic_version(get_ocs_build_number())
            if build_number >= version.get_semantic_version("4.9.0-231"):
                ocs_operator_names.append(defaults.MCG_OPERATOR)
            else:
                ocs_operator_names.append(defaults.NOOBAA_OPERATOR)
        else:
            ocs_operator_names = [defaults.OCS_OPERATOR_NAME]
        channel = config.DEPLOYMENT.get("ocs_csv_channel")
        is_ibm_sa_linked = False

        for ocs_operator_name in ocs_operator_names:
            package_manifest = PackageManifest(
                resource_name=ocs_operator_name,
                selector=operator_selector,
                subscription_plan_approval=subscription_plan_approval,
            )
            package_manifest.wait_for_resource(timeout=300)
            csv_name = package_manifest.get_current_csv(channel=channel)
            csv = CSV(resource_name=csv_name, namespace=self.namespace)
            if (
                config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM
                and not live_deployment
            ):
                if not is_ibm_sa_linked:
                    logger.info("Sleeping for 60 seconds before applying SA")
                    time.sleep(60)
                    link_all_sa_and_secret_and_delete_pods(
                        constants.OCS_SECRET, self.namespace
                    )
                    is_ibm_sa_linked = True
            csv.wait_for_phase("Succeeded", timeout=720)
        # create storage system
        if ocs_version >= version.VERSION_4_9:
            exec_cmd(f"oc apply -f {constants.STORAGE_SYSTEM_ODF_YAML}")

        ocp_version = version.get_semantic_ocp_version_from_config()
        if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM:
            config_map = ocp.OCP(
                kind="configmap",
                namespace=self.namespace,
                resource_name=constants.ROOK_OPERATOR_CONFIGMAP,
            )
            config_map.get(retry=10, wait=5)
            config_map_patch = (
                '\'{"data": {"ROOK_CSI_KUBELET_DIR_PATH": "/var/data/kubelet"}}\''
            )
            logger.info("Patching config map to change KUBLET DIR PATH")
            exec_cmd(
                f"oc patch configmap -n {self.namespace} "
                f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}"
            )
            if config.DEPLOYMENT.get("create_ibm_cos_secret", True):
                logger.info("Creating secret for IBM Cloud Object Storage")
                with open(constants.IBM_COS_SECRET_YAML, "r") as cos_secret_fd:
                    cos_secret_data = yaml.load(cos_secret_fd, Loader=yaml.SafeLoader)
                key_id = config.AUTH["ibmcloud"]["ibm_cos_access_key_id"]
                key_secret = config.AUTH["ibmcloud"]["ibm_cos_secret_access_key"]
                cos_secret_data["data"]["IBM_COS_ACCESS_KEY_ID"] = key_id
                cos_secret_data["data"]["IBM_COS_SECRET_ACCESS_KEY"] = key_secret
                cos_secret_data_yaml = tempfile.NamedTemporaryFile(
                    mode="w+", prefix="cos_secret", delete=False
                )
                templating.dump_data_to_temp_yaml(
                    cos_secret_data, cos_secret_data_yaml.name
                )
                exec_cmd(f"oc create -f {cos_secret_data_yaml.name}")

        # Modify the CSV with custom values if required
        if all(
            key in config.DEPLOYMENT for key in ("csv_change_from", "csv_change_to")
        ):
            modify_csv(
                csv=csv_name,
                replace_from=config.DEPLOYMENT["csv_change_from"],
                replace_to=config.DEPLOYMENT["csv_change_to"],
            )

        # create custom storage class for StorageCluster CR if necessary
        if self.CUSTOM_STORAGE_CLASS_PATH is not None:
            with open(self.CUSTOM_STORAGE_CLASS_PATH, "r") as custom_sc_fo:
                custom_sc = yaml.load(custom_sc_fo, Loader=yaml.SafeLoader)
            # set value of DEFAULT_STORAGECLASS to mach the custom storage cls
            self.DEFAULT_STORAGECLASS = custom_sc["metadata"]["name"]
            run_cmd(f"oc create -f {self.CUSTOM_STORAGE_CLASS_PATH}")

        # Set rook log level
        self.set_rook_log_level()

        # creating StorageCluster
        if config.DEPLOYMENT.get("kms_deployment"):
            kms = KMS.get_kms_deployment()
            kms.deploy()

        if config.ENV_DATA["mcg_only_deployment"]:
            mcg_only_deployment()
            return

        cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML)
        # Figure out all the OCS modules enabled/disabled
        # CLI parameter --disable-components takes the precedence over
        # anything which comes from config file
        if config.ENV_DATA.get("disable_components"):
            for component in config.ENV_DATA["disable_components"]:
                config.COMPONENTS[f"disable_{component}"] = True
                logger.warning(f"disabling: {component}")

        # Update cluster_data with respective component enable/disable
        for key in config.COMPONENTS.keys():
            comp_name = constants.OCS_COMPONENTS_MAP[key.split("_")[1]]
            if config.COMPONENTS[key]:
                if "noobaa" in key:
                    merge_dict(
                        cluster_data,
                        {
                            "spec": {
                                "multiCloudGateway": {"reconcileStrategy": "ignore"}
                            }
                        },
                    )
                else:
                    merge_dict(
                        cluster_data,
                        {
                            "spec": {
                                "managedResources": {
                                    f"{comp_name}": {"reconcileStrategy": "ignore"}
                                }
                            }
                        },
                    )

        if arbiter_deployment:
            cluster_data["spec"]["arbiter"] = {}
            cluster_data["spec"]["nodeTopologies"] = {}
            cluster_data["spec"]["arbiter"]["enable"] = True
            cluster_data["spec"]["nodeTopologies"][
                "arbiterLocation"
            ] = self.get_arbiter_location()
            cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 4

        cluster_data["metadata"]["name"] = config.ENV_DATA["storage_cluster_name"]

        deviceset_data = cluster_data["spec"]["storageDeviceSets"][0]
        device_size = int(config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE))

        logger.info(
            "Flexible scaling is available from version 4.7 on LSO cluster with less than 3 zones"
        )
        zone_num = get_az_count()
        if (
            config.DEPLOYMENT.get("local_storage")
            and ocs_version >= version.VERSION_4_7
            and zone_num < 3
            and not config.DEPLOYMENT.get("arbiter_deployment")
        ):
            cluster_data["spec"]["flexibleScaling"] = True
            # https://bugzilla.redhat.com/show_bug.cgi?id=1921023
            cluster_data["spec"]["storageDeviceSets"][0]["count"] = 3
            cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 1

        # set size of request for storage
        if self.platform.lower() == constants.BAREMETAL_PLATFORM:
            pv_size_list = helpers.get_pv_size(
                storageclass=self.DEFAULT_STORAGECLASS_LSO
            )
            pv_size_list.sort()
            deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][
                "storage"
            ] = f"{pv_size_list[0]}"
        else:
            deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][
                "storage"
            ] = f"{device_size}Gi"

        # set storage class to OCS default on current platform
        if self.DEFAULT_STORAGECLASS:
            deviceset_data["dataPVCTemplate"]["spec"][
                "storageClassName"
            ] = self.DEFAULT_STORAGECLASS

        # StorageCluster tweaks for LSO
        if config.DEPLOYMENT.get("local_storage"):
            cluster_data["spec"]["manageNodes"] = False
            cluster_data["spec"]["monDataDirHostPath"] = "/var/lib/rook"
            deviceset_data["name"] = constants.DEFAULT_DEVICESET_LSO_PVC_NAME
            deviceset_data["portable"] = False
            deviceset_data["dataPVCTemplate"]["spec"][
                "storageClassName"
            ] = self.DEFAULT_STORAGECLASS_LSO
            lso_type = config.DEPLOYMENT.get("type")
            if (
                self.platform.lower() == constants.AWS_PLATFORM
                and not lso_type == constants.AWS_EBS
            ):
                deviceset_data["count"] = 2
            # setting resource limits for AWS i3
            # https://access.redhat.com/documentation/en-us/red_hat_openshift_container_storage/4.6/html-single/deploying_openshift_container_storage_using_amazon_web_services/index#creating-openshift-container-storage-cluster-on-amazon-ec2_local-storage
            if (
                ocs_version >= version.VERSION_4_5
                and config.ENV_DATA.get("worker_instance_type")
                == constants.AWS_LSO_WORKER_INSTANCE
            ):
                deviceset_data["resources"] = {
                    "limits": {"cpu": 2, "memory": "5Gi"},
                    "requests": {"cpu": 1, "memory": "5Gi"},
                }
            if (ocp_version >= version.VERSION_4_6) and (
                ocs_version >= version.VERSION_4_6
            ):
                cluster_data["metadata"]["annotations"] = {
                    "cluster.ocs.openshift.io/local-devices": "true"
                }
            count = config.DEPLOYMENT.get("local_storage_storagedeviceset_count")
            if count is not None:
                deviceset_data["count"] = count

        # Allow lower instance requests and limits for OCS deployment
        # The resources we need to change can be found here:
        # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116
        if config.DEPLOYMENT.get("allow_lower_instance_requirements"):
            none_resources = {"Requests": None, "Limits": None}
            deviceset_data["resources"] = deepcopy(none_resources)
            resources = [
                "mon",
                "mds",
                "rgw",
                "mgr",
                "noobaa-core",
                "noobaa-db",
            ]
            if ocs_version >= version.VERSION_4_5:
                resources.append("noobaa-endpoint")
            cluster_data["spec"]["resources"] = {
                resource: deepcopy(none_resources) for resource in resources
            }
            if ocs_version >= version.VERSION_4_5:
                cluster_data["spec"]["resources"]["noobaa-endpoint"] = {
                    "limits": {"cpu": "100m", "memory": "100Mi"},
                    "requests": {"cpu": "100m", "memory": "100Mi"},
                }
        else:
            local_storage = config.DEPLOYMENT.get("local_storage")
            platform = config.ENV_DATA.get("platform", "").lower()
            if local_storage and platform == "aws":
                resources = {
                    "mds": {
                        "limits": {"cpu": 3, "memory": "8Gi"},
                        "requests": {"cpu": 1, "memory": "8Gi"},
                    }
                }
                if ocs_version < version.VERSION_4_5:
                    resources["noobaa-core"] = {
                        "limits": {"cpu": 2, "memory": "8Gi"},
                        "requests": {"cpu": 1, "memory": "8Gi"},
                    }
                    resources["noobaa-db"] = {
                        "limits": {"cpu": 2, "memory": "8Gi"},
                        "requests": {"cpu": 1, "memory": "8Gi"},
                    }
                cluster_data["spec"]["resources"] = resources

        # Enable host network if enabled in config (this require all the
        # rules to be enabled on underlaying platform).
        if config.DEPLOYMENT.get("host_network"):
            cluster_data["spec"]["hostNetwork"] = True

        cluster_data["spec"]["storageDeviceSets"] = [deviceset_data]

        if self.platform == constants.IBMCLOUD_PLATFORM:
            mon_pvc_template = {
                "spec": {
                    "accessModes": ["ReadWriteOnce"],
                    "resources": {"requests": {"storage": "20Gi"}},
                    "storageClassName": self.DEFAULT_STORAGECLASS,
                    "volumeMode": "Filesystem",
                }
            }
            cluster_data["spec"]["monPVCTemplate"] = mon_pvc_template
            # Need to check if it's needed for ibm cloud to set manageNodes
            cluster_data["spec"]["manageNodes"] = False

        if config.ENV_DATA.get("encryption_at_rest"):
            if ocs_version < version.VERSION_4_6:
                error_message = "Encryption at REST can be enabled only on OCS >= 4.6!"
                logger.error(error_message)
                raise UnsupportedFeatureError(error_message)
            logger.info("Enabling encryption at REST!")
            cluster_data["spec"]["encryption"] = {
                "enable": True,
            }
            if config.DEPLOYMENT.get("kms_deployment"):
                cluster_data["spec"]["encryption"]["kms"] = {
                    "enable": True,
                }

        if config.DEPLOYMENT.get("ceph_debug"):
            setup_ceph_debug()
            cluster_data["spec"]["managedResources"] = {
                "cephConfig": {"reconcileStrategy": "ignore"}
            }
        if config.ENV_DATA.get("is_multus_enabled"):
            cluster_data["spec"]["network"] = {
                "provider": "multus",
                "selectors": {
                    "public": f"{defaults.ROOK_CLUSTER_NAMESPACE}/ocs-public"
                },
            }

        cluster_data_yaml = tempfile.NamedTemporaryFile(
            mode="w+", prefix="cluster_storage", delete=False
        )
        templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name)
        run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=1200)
        if config.DEPLOYMENT["infra_nodes"]:
            _ocp = ocp.OCP(kind="node")
            _ocp.exec_oc_cmd(
                command=f"annotate namespace {defaults.ROOK_CLUSTER_NAMESPACE} "
                f"{constants.NODE_SELECTOR_ANNOTATION}"
            )
Ejemplo n.º 8
0
    def subscribe_ocs(self):
        """
        This method subscription manifest and subscribe to OCS operator.

        """
        live_deployment = config.DEPLOYMENT.get("live_deployment")
        if (
            config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM
            and not live_deployment
        ):
            link_all_sa_and_secret_and_delete_pods(constants.OCS_SECRET, self.namespace)
        operator_selector = get_selector_for_ocs_operator()
        # wait for package manifest
        # For OCS version >= 4.9, we have odf-operator
        ocs_version = version.get_semantic_ocs_version_from_config()
        if ocs_version >= version.VERSION_4_9:
            ocs_operator_name = defaults.ODF_OPERATOR_NAME
            subscription_file = constants.SUBSCRIPTION_ODF_YAML
        else:
            ocs_operator_name = defaults.OCS_OPERATOR_NAME
            subscription_file = constants.SUBSCRIPTION_YAML

        package_manifest = PackageManifest(
            resource_name=ocs_operator_name,
            selector=operator_selector,
        )
        # Wait for package manifest is ready
        package_manifest.wait_for_resource(timeout=300)
        default_channel = package_manifest.get_default_channel()
        subscription_yaml_data = templating.load_yaml(subscription_file)
        subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval")
        if subscription_plan_approval:
            subscription_yaml_data["spec"][
                "installPlanApproval"
            ] = subscription_plan_approval
        custom_channel = config.DEPLOYMENT.get("ocs_csv_channel")
        if custom_channel:
            logger.info(f"Custom channel will be used: {custom_channel}")
            subscription_yaml_data["spec"]["channel"] = custom_channel
        else:
            logger.info(f"Default channel will be used: {default_channel}")
            subscription_yaml_data["spec"]["channel"] = default_channel
        if config.DEPLOYMENT.get("stage"):
            subscription_yaml_data["spec"]["source"] = constants.OPERATOR_SOURCE_NAME
        if config.DEPLOYMENT.get("live_deployment"):
            subscription_yaml_data["spec"]["source"] = config.DEPLOYMENT.get(
                "live_content_source", defaults.LIVE_CONTENT_SOURCE
            )
        subscription_manifest = tempfile.NamedTemporaryFile(
            mode="w+", prefix="subscription_manifest", delete=False
        )
        templating.dump_data_to_temp_yaml(
            subscription_yaml_data, subscription_manifest.name
        )
        run_cmd(f"oc create -f {subscription_manifest.name}")
        logger.info("Sleeping for 90 seconds after subscribing OCS")
        time.sleep(90)
        if subscription_plan_approval == "Manual":
            wait_for_install_plan_and_approve(self.namespace)
            csv_name = package_manifest.get_current_csv(channel=custom_channel)
            csv = CSV(resource_name=csv_name, namespace=self.namespace)
            csv.wait_for_phase("Installing", timeout=60)
Ejemplo n.º 9
0
def test_noobaa_bucket_quota(measure_noobaa_exceed_bucket_quota):
    """
    Test that there are appropriate alerts when NooBaa Bucket Quota is reached.
    """
    api = prometheus.PrometheusAPI()

    alerts = measure_noobaa_exceed_bucket_quota.get("prometheus_alerts")

    # since version 4.5 all NooBaa alerts have defined Pending state
    if version.get_semantic_ocs_version_from_config() < version.VERSION_4_5:
        expected_alerts = [
            (
                constants.ALERT_BUCKETREACHINGQUOTASTATE,
                "A NooBaa Bucket Is In Reaching Quota State",
                ["firing"],
                "warning",
            ),
            (
                constants.ALERT_BUCKETERRORSTATE,
                "A NooBaa Bucket Is In Error State",
                ["pending", "firing"],
                "warning",
            ),
            (
                constants.ALERT_BUCKETEXCEEDINGQUOTASTATE,
                "A NooBaa Bucket Is In Exceeding Quota State",
                ["firing"],
                "warning",
            ),
        ]
    else:
        expected_alerts = [
            (
                constants.ALERT_BUCKETREACHINGQUOTASTATE,
                "A NooBaa Bucket Is In Reaching Quota State",
                ["pending", "firing"],
                "warning",
            ),
            (
                constants.ALERT_BUCKETERRORSTATE,
                "A NooBaa Bucket Is In Error State",
                ["pending", "firing"],
                "warning",
            ),
            (
                constants.ALERT_BUCKETEXCEEDINGQUOTASTATE,
                "A NooBaa Bucket Is In Exceeding Quota State",
                ["pending", "firing"],
                "warning",
            ),
        ]

    for target_label, target_msg, target_states, target_severity in expected_alerts:
        prometheus.check_alert_list(
            label=target_label,
            msg=target_msg,
            alerts=alerts,
            states=target_states,
            severity=target_severity,
        )
        # the time to wait is increased because it takes more time for OCS
        # cluster to resolve its issues
        pg_wait = 480
        api.check_alert_cleared(
            label=target_label,
            measure_end_time=measure_noobaa_exceed_bucket_quota.get("stop"),
            time_min=pg_wait,
        )