def get_pre_upgrade_image(self, csv_name_pre_upgrade): """ Getting all OCS cluster images, before upgrade Args: csv_name_pre_upgrade: (str): OCS operator name Returns: dict: Contains all OCS cluster images dict keys: Image name dict values: Image full path """ csv_name_pre_upgrade = csv_name_pre_upgrade log.info(f"CSV name before upgrade is: {csv_name_pre_upgrade}") csv_pre_upgrade = CSV(resource_name=csv_name_pre_upgrade, namespace=self.namespace) return get_images(csv_pre_upgrade.get())
def get_images_post_upgrade(self, channel, pre_upgrade_images, upgrade_version): """ Checks if all images of OCS cluster upgraded, and return list of all images if upgrade success Args: channel: (str): OCS subscription channel pre_upgrade_images: (dict): Contains all OCS cluster images upgrade_version: (str): version to be upgraded Returns: set: Contains full path of OCS cluster old images """ operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=self.subscription_plan_approval, ) csv_name_post_upgrade = package_manifest.get_current_csv(channel) csv_post_upgrade = CSV(resource_name=csv_name_post_upgrade, namespace=self.namespace) log.info( f"Waiting for CSV {csv_name_post_upgrade} to be in succeeded state" ) # Workaround for patching missing ceph-rook-tools pod after upgrade if self.version_before_upgrade == "4.2" and upgrade_version == "4.3": log.info("Force creating Ceph toolbox after upgrade 4.2 -> 4.3") setup_ceph_toolbox(force_setup=True) # End of workaround if config.DEPLOYMENT.get("external_mode"): timeout = 200 else: timeout = 200 * get_osd_count() csv_post_upgrade.wait_for_phase("Succeeded", timeout=timeout) post_upgrade_images = get_images(csv_post_upgrade.get()) old_images, _, _ = get_upgrade_image_info(pre_upgrade_images, post_upgrade_images) return old_images
def test_upgrade(): namespace = config.ENV_DATA['cluster_namespace'] ocs_catalog = CatalogSource( resource_name=constants.OPERATOR_CATALOG_SOURCE_NAME, namespace="openshift-marketplace", ) image_url = ocs_catalog.get_image_url() image_tag = ocs_catalog.get_image_name() if config.DEPLOYMENT.get('upgrade_to_latest', True): new_image_tag = get_latest_ds_olm_tag() else: new_image_tag = get_next_version_available_for_upgrade(image_tag) cs_data = deepcopy(ocs_catalog.data) cs_data['spec']['image'] = ':'.join([image_url, new_image_tag]) package_manifest = PackageManifest(resource_name=OCS_OPERATOR_NAME) csv_name_pre_upgrade = package_manifest.get_current_csv() log.info(f"CSV name before upgrade is: {csv_name_pre_upgrade}") csv_pre_upgrade = CSV(resource_name=csv_name_pre_upgrade, namespace=namespace) pre_upgrade_images = get_images(csv_pre_upgrade.get()) with NamedTemporaryFile() as cs_yaml: dump_data_to_temp_yaml(cs_data, cs_yaml.name) ocs_catalog.apply(cs_yaml.name) # Wait for package manifest is ready package_manifest.wait_for_resource() subscription_plan_approval = config.DEPLOYMENT.get( 'subscription_plan_approval') if subscription_plan_approval == 'Manual': wait_for_install_plan_and_approve(namespace) attempts = 145 for attempt in range(1, attempts): if attempts == attempt: raise TimeoutException("No new CSV found after upgrade!") log.info(f"Attempt {attempt}/{attempts} to check CSV upgraded.") package_manifest.reload_data() csv_name_post_upgrade = package_manifest.get_current_csv() if csv_name_post_upgrade == csv_name_pre_upgrade: log.info(f"CSV is still: {csv_name_post_upgrade}") sleep(5) else: log.info(f"CSV now upgraded to: {csv_name_post_upgrade}") break csv_post_upgrade = CSV(resource_name=csv_name_post_upgrade, namespace=namespace) log.info( f"Waiting for CSV {csv_name_post_upgrade} to be in succeeded state") csv_post_upgrade.wait_for_phase("Succeeded", timeout=600) post_upgrade_images = get_images(csv_post_upgrade.get()) old_images, _, _ = get_upgrade_image_info(pre_upgrade_images, post_upgrade_images) verify_image_versions(old_images) ocs_install_verification(timeout=600, skip_osd_distribution_check=True)
def couchbase_subscription(self): """ Creates subscription for Couchbase operator """ # Create an operator group for Couchbase log.info("Creating operator group for couchbase") self.couchbase_operatorgroup() subscription_yaml = templating.load_yaml( constants.COUCHBASE_OPERATOR_SUBSCRIPTION_YAML) self.subscription_yaml = OCS(**subscription_yaml) self.subscription_yaml.create() self.cb_subscription = False # Wait for the CSV to reach succeeded state cb_csv = self.get_couchbase_csv() cb_csv_obj = CSV(resource_name=cb_csv, namespace=constants.COUCHBASE_OPERATOR) cb_csv_obj.wait_for_phase("Succeeded", timeout=720)
def deploy_with_external_mode(self): """ This function handles the deployment of OCS on external/indpendent RHCS cluster """ live_deployment = config.DEPLOYMENT.get("live_deployment") logger.info("Deploying OCS with external mode RHCS") ui_deployment = config.DEPLOYMENT.get("ui_deployment") if not ui_deployment: logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") if not live_deployment: create_catalog_source() self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get( "subscription_plan_approval") package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get("ocs_csv_channel") csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Succeeded", timeout=720) # Create secret for external cluster create_external_secret() cluster_data = templating.load_yaml( constants.EXTERNAL_STORAGE_CLUSTER_YAML) cluster_data["metadata"]["name"] = config.ENV_DATA[ "storage_cluster_name"] cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="external_cluster_storage", delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=2400) self.external_post_deploy_validation() setup_ceph_toolbox()
def get_ocs_csv(): """ Get the OCS CSV object Returns: CSV: OCS CSV object """ namespace = config.ENV_DATA['cluster_namespace'] operator_selector = get_selector_for_ocs_operator() ocs_package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, ) channel = config.DEPLOYMENT.get('ocs_csv_channel') ocs_csv_name = ocs_package_manifest.get_current_csv(channel=channel) ocs_csv = CSV(resource_name=ocs_csv_name, namespace=namespace) log.info(f"Check if OCS operator: {ocs_csv_name} is in Succeeded phase.") ocs_csv.wait_for_phase(phase="Succeeded", timeout=600) return ocs_csv
def get_ocs_csv(): """ Get the OCS CSV object Returns: CSV: OCS CSV object Raises: CSVNotFound: In case no CSV found. """ namespace = config.ENV_DATA["cluster_namespace"] operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval") ocs_package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) channel = config.DEPLOYMENT.get("ocs_csv_channel") ocs_csv_name = None # OCS CSV is extracted from the available CSVs in cluster namespace # for Openshift dedicated platform if ( config.ENV_DATA["platform"].lower() == constants.OPENSHIFT_DEDICATED_PLATFORM or config.ENV_DATA["platform"].lower() == constants.ROSA_PLATFORM ): ocp_cluster = OCP(namespace=config.ENV_DATA["cluster_namespace"], kind="csv") for item in ocp_cluster.get()["items"]: if item["metadata"]["name"].startswith(defaults.OCS_OPERATOR_NAME): ocs_csv_name = item["metadata"]["name"] if not ocs_csv_name: raise CSVNotFound(f"No OCS CSV found for {config.ENV_DATA['platform']}") else: ocs_csv_name = ocs_package_manifest.get_current_csv(channel=channel) ocs_csv = CSV(resource_name=ocs_csv_name, namespace=namespace) log.info(f"Check if OCS operator: {ocs_csv_name} is in Succeeded phase.") ocs_csv.wait_for_phase(phase="Succeeded", timeout=600) return ocs_csv
def deploy_ocs_via_operator(self): """ Method for deploy OCS via OCS operator """ logger.info("Deployment of OCS via OCS operator") olm_manifest, subscription_manifest = ( self.get_olm_and_subscription_manifest() ) self.label_and_taint_nodes() run_cmd(f"oc create -f {olm_manifest}") catalog_source = CatalogSource( resource_name='ocs-catalogsource', namespace='openshift-marketplace', ) # Wait for catalog source is ready catalog_source.wait_for_state("READY") run_cmd(f"oc create -f {subscription_manifest}") package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME ) # Wait for package manifest is ready package_manifest.wait_for_resource() channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV( resource_name=csv_name, kind="csv", namespace=self.namespace ) csv.wait_for_phase("Succeeded", timeout=400) ocs_operator_storage_cluster_cr = config.DEPLOYMENT.get( 'ocs_operator_storage_cluster_cr' ) cluster_data = templating.load_yaml( ocs_operator_storage_cluster_cr ) cluster_data['metadata']['name'] = config.ENV_DATA[ 'storage_cluster_name' ] deviceset_data = templating.load_yaml( constants.DEVICESET_YAML ) device_size = int( config.ENV_DATA.get('device_size', defaults.DEVICE_SIZE) ) deviceset_data['dataPVCTemplate']['spec']['resources']['requests'][ 'storage' ] = f"{device_size}Gi" # Allow lower instance requests and limits for OCS deployment if config.DEPLOYMENT.get('allow_lower_instance_requirements'): none_resources = {'Requests': None, 'Limits': None} deviceset_data["resources"] = deepcopy(none_resources) cluster_data['spec']['resources'] = { resource: deepcopy(none_resources) for resource in [ 'mon', 'mds', 'rgw', 'mgr', 'noobaa-core', 'noobaa-db', ] } if self.platform.lower() == constants.VSPHERE_PLATFORM: cluster_data['spec']['monPVCTemplate']['spec'][ 'storageClassName' ] = constants.DEFAULT_SC_VSPHERE deviceset_data['dataPVCTemplate']['spec'][ 'storageClassName' ] = constants.DEFAULT_SC_VSPHERE # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get('host_network'): cluster_data['spec']['hostNetwork'] = True cluster_data['spec']['storageDeviceSets'] = [deviceset_data] cluster_data_yaml = tempfile.NamedTemporaryFile( mode='w+', prefix='cluster_storage', delete=False ) templating.dump_data_to_temp_yaml( cluster_data, cluster_data_yaml.name ) run_cmd(f"oc create -f {cluster_data_yaml.name}")
def deploy_ocs_via_operator(self): """ Method for deploy OCS via OCS operator """ ui_deployment = config.DEPLOYMENT.get('ui_deployment') live_deployment = config.DEPLOYMENT.get('live_deployment') if config.DEPLOYMENT.get('local_storage'): setup_local_storage() if ui_deployment: if not live_deployment: self.create_ocs_operator_source() self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") if not live_deployment: self.create_ocs_operator_source() self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Succeeded", timeout=720) cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) cluster_data['metadata']['name'] = config.ENV_DATA[ 'storage_cluster_name'] deviceset_data = cluster_data['spec']['storageDeviceSets'][0] device_size = int( config.ENV_DATA.get('device_size', defaults.DEVICE_SIZE)) deviceset_data['dataPVCTemplate']['spec']['resources']['requests'][ 'storage'] = f"{device_size}Gi" if self.platform.lower() == constants.VSPHERE_PLATFORM: deviceset_data['dataPVCTemplate']['spec'][ 'storageClassName'] = constants.DEFAULT_SC_VSPHERE if config.DEPLOYMENT.get('local_storage'): cluster_data['spec']['manageNodes'] = False cluster_data['spec']['monDataDirHostPath'] = '/var/lib/rook' deviceset_data['portable'] = False deviceset_data['dataPVCTemplate']['spec'][ 'storageClassName'] = 'localblock' if self.platform.lower() == constants.AWS_PLATFORM: deviceset_data['count'] = 2 # Allow lower instance requests and limits for OCS deployment if config.DEPLOYMENT.get('allow_lower_instance_requirements'): none_resources = {'Requests': None, 'Limits': None} deviceset_data["resources"] = deepcopy(none_resources) cluster_data['spec']['resources'] = { resource: deepcopy(none_resources) for resource in [ 'mon', 'mds', 'rgw', 'mgr', 'noobaa-core', 'noobaa-db', ] } # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get('host_network'): cluster_data['spec']['hostNetwork'] = True cluster_data['spec']['storageDeviceSets'] = [deviceset_data] cluster_data_yaml = tempfile.NamedTemporaryFile( mode='w+', prefix='cluster_storage', delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=2400)
def deploy_ocs_via_operator(self): """ Method for deploy OCS via OCS operator """ ui_deployment = config.DEPLOYMENT.get("ui_deployment") live_deployment = config.DEPLOYMENT.get("live_deployment") if ui_deployment: if not live_deployment: self.create_ocs_operator_source() self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() if config.DEPLOYMENT.get("local_storage"): setup_local_storage(storageclass=self.DEFAULT_STORAGECLASS_LSO) logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: ibmcloud.add_deployment_dependencies() if not live_deployment: create_ocs_secret(self.namespace) create_ocs_secret(constants.MARKETPLACE_NAMESPACE) if not live_deployment: self.create_ocs_operator_source() self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get( "subscription_plan_approval") package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get("ocs_csv_channel") csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) if (config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment): csv.wait_for_phase("Installing", timeout=720) logger.info("Sleeping for 30 seconds before applying SA") time.sleep(30) link_all_sa_and_secret(constants.OCS_SECRET, self.namespace) logger.info("Deleting all pods in openshift-storage namespace") exec_cmd(f"oc delete pod --all -n {self.namespace}") csv.wait_for_phase("Succeeded", timeout=720) # Modify the CSV with custom values if required if all(key in config.DEPLOYMENT for key in ("csv_change_from", "csv_change_to")): modify_csv( csv=csv_name, replace_from=config.DEPLOYMENT["csv_change_from"], replace_to=config.DEPLOYMENT["csv_change_to"], ) # create custom storage class for StorageCluster CR if necessary if self.CUSTOM_STORAGE_CLASS_PATH is not None: with open(self.CUSTOM_STORAGE_CLASS_PATH, "r") as custom_sc_fo: custom_sc = yaml.load(custom_sc_fo, Loader=yaml.SafeLoader) # set value of DEFAULT_STORAGECLASS to mach the custom storage cls self.DEFAULT_STORAGECLASS = custom_sc["metadata"]["name"] run_cmd(f"oc create -f {self.CUSTOM_STORAGE_CLASS_PATH}") # creating StorageCluster if self.platform == constants.IBM_POWER_PLATFORM: cluster_data = templating.load_yaml( constants.IBM_STORAGE_CLUSTER_YAML) else: cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) cluster_data["metadata"]["name"] = config.ENV_DATA[ "storage_cluster_name"] if self.platform == constants.IBM_POWER_PLATFORM: numberofstoragenodes = config.ENV_DATA["number_of_storage_nodes"] deviceset = [None] * numberofstoragenodes for i in range(numberofstoragenodes): deviceset_data = cluster_data["spec"]["storageDeviceSets"][i] device_size = int( config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE)) # set size of request for storage if self.platform.lower() == "powervs": pv_size_list = helpers.get_pv_size( storageclass=self.DEFAULT_STORAGECLASS_LSO) pv_size_list.sort() deviceset_data["dataPVCTemplate"]["spec"]["resources"][ "requests"]["storage"] = f"{pv_size_list[0]}" else: deviceset_data["dataPVCTemplate"]["spec"]["resources"][ "requests"]["storage"] = f"{device_size}Gi" # set storage class to OCS default on current platform if self.DEFAULT_STORAGECLASS_LSO: deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS_LSO # StorageCluster tweaks for LSO if config.DEPLOYMENT.get("local_storage"): cluster_data["spec"]["manageNodes"] = False cluster_data["spec"][ "monDataDirHostPath"] = "/var/lib/rook" deviceset_data["portable"] = False deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS_LSO deviceset[i] = deviceset_data else: deviceset_data = cluster_data["spec"]["storageDeviceSets"][0] device_size = int( config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE)) # set size of request for storage if self.platform.lower() == constants.BAREMETAL_PLATFORM: pv_size_list = helpers.get_pv_size( storageclass=self.DEFAULT_STORAGECLASS_LSO) pv_size_list.sort() deviceset_data["dataPVCTemplate"]["spec"]["resources"][ "requests"]["storage"] = f"{pv_size_list[0]}" else: deviceset_data["dataPVCTemplate"]["spec"]["resources"][ "requests"]["storage"] = f"{device_size}Gi" # set storage class to OCS default on current platform if self.DEFAULT_STORAGECLASS: deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS ocs_version = float(config.ENV_DATA["ocs_version"]) ocp_version = float(get_ocp_version()) # StorageCluster tweaks for LSO if config.DEPLOYMENT.get("local_storage"): cluster_data["spec"]["manageNodes"] = False cluster_data["spec"]["monDataDirHostPath"] = "/var/lib/rook" deviceset_data["portable"] = False deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS_LSO if self.platform.lower() == constants.AWS_PLATFORM: deviceset_data["count"] = 2 if ocs_version >= 4.5: deviceset_data["resources"] = { "limits": { "cpu": 2, "memory": "5Gi" }, "requests": { "cpu": 1, "memory": "5Gi" }, } if (ocp_version >= 4.6) and (ocs_version >= 4.6): cluster_data["metadata"]["annotations"] = { "cluster.ocs.openshift.io/local-devices": "true" } # Allow lower instance requests and limits for OCS deployment # The resources we need to change can be found here: # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116 if config.DEPLOYMENT.get("allow_lower_instance_requirements"): none_resources = {"Requests": None, "Limits": None} deviceset_data["resources"] = deepcopy(none_resources) resources = [ "mon", "mds", "rgw", "mgr", "noobaa-core", "noobaa-db", ] if ocs_version >= 4.5: resources.append("noobaa-endpoint") cluster_data["spec"]["resources"] = { resource: deepcopy(none_resources) for resource in resources } if ocs_version >= 4.5: cluster_data["spec"]["resources"]["noobaa-endpoint"] = { "limits": { "cpu": 1, "memory": "500Mi" }, "requests": { "cpu": 1, "memory": "500Mi" }, } else: local_storage = config.DEPLOYMENT.get("local_storage") platform = config.ENV_DATA.get("platform", "").lower() if local_storage and platform == "aws": resources = { "mds": { "limits": { "cpu": 3, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } } if ocs_version < 4.5: resources["noobaa-core"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } resources["noobaa-db"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } cluster_data["spec"]["resources"] = resources # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get("host_network"): cluster_data["spec"]["hostNetwork"] = True if self.platform == constants.IBM_POWER_PLATFORM: cluster_data["spec"]["storageDeviceSets"] = deviceset else: cluster_data["spec"]["storageDeviceSets"] = [deviceset_data] if self.platform == constants.IBMCLOUD_PLATFORM: mon_pvc_template = { "spec": { "accessModes": ["ReadWriteOnce"], "resources": { "requests": { "storage": "20Gi" } }, "storageClassName": self.DEFAULT_STORAGECLASS, "volumeMode": "Filesystem", } } cluster_data["spec"]["monPVCTemplate"] = mon_pvc_template # Need to check if it's needed for ibm cloud to set manageNodes cluster_data["spec"]["manageNodes"] = False if config.ENV_DATA.get("encryption_at_rest"): if ocs_version < 4.6: error_message = "Encryption at REST can be enabled only on OCS >= 4.6!" logger.error(error_message) raise UnsupportedFeatureError(error_message) logger.info("Enabling encryption at REST!") cluster_data["spec"]["encryption"] = { "enable": True, } cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cluster_storage", delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=1200) if config.DEPLOYMENT["infra_nodes"]: _ocp = ocp.OCP(kind="node") _ocp.exec_oc_cmd( command=f"annotate namespace {defaults.ROOK_CLUSTER_NAMESPACE} " f"{constants.NODE_SELECTOR_ANNOTATION}")
def deploy_ocs_via_operator(self, image=None): """ Method for deploy OCS via OCS operator Args: image (str): Image of ocs registry. """ ui_deployment = config.DEPLOYMENT.get("ui_deployment") live_deployment = config.DEPLOYMENT.get("live_deployment") arbiter_deployment = config.DEPLOYMENT.get("arbiter_deployment") if ui_deployment: self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() if config.DEPLOYMENT.get("local_storage"): setup_local_storage(storageclass=self.DEFAULT_STORAGECLASS_LSO) logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: ibmcloud.add_deployment_dependencies() if not live_deployment: create_ocs_secret(self.namespace) if not live_deployment: create_catalog_source(image) self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get( "subscription_plan_approval") package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get("ocs_csv_channel") csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) if (config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment): csv.wait_for_phase("Installing", timeout=720) logger.info("Sleeping for 30 seconds before applying SA") time.sleep(30) link_all_sa_and_secret(constants.OCS_SECRET, self.namespace) logger.info("Deleting all pods in openshift-storage namespace") exec_cmd(f"oc delete pod --all -n {self.namespace}") csv.wait_for_phase("Succeeded", timeout=720) ocp_version = float(get_ocp_version()) if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: config_map = ocp.OCP( kind="configmap", namespace=self.namespace, resource_name=constants.ROOK_OPERATOR_CONFIGMAP, ) config_map.get(retry=10, wait=5) config_map_patch = ( '\'{"data": {"ROOK_CSI_KUBELET_DIR_PATH": "/var/data/kubelet"}}\'' ) logger.info("Patching config map to change KUBLET DIR PATH") exec_cmd( f"oc patch configmap -n {self.namespace} " f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}") if config.DEPLOYMENT.get("create_ibm_cos_secret", True): logger.info("Creating secret for IBM Cloud Object Storage") with open(constants.IBM_COS_SECRET_YAML, "r") as cos_secret_fd: cos_secret_data = yaml.load(cos_secret_fd, Loader=yaml.SafeLoader) key_id = config.AUTH["ibmcloud"]["ibm_cos_access_key_id"] key_secret = config.AUTH["ibmcloud"][ "ibm_cos_secret_access_key"] cos_secret_data["data"]["IBM_COS_ACCESS_KEY_ID"] = key_id cos_secret_data["data"][ "IBM_COS_SECRET_ACCESS_KEY"] = key_secret cos_secret_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cos_secret", delete=False) templating.dump_data_to_temp_yaml(cos_secret_data, cos_secret_data_yaml.name) exec_cmd(f"oc create -f {cos_secret_data_yaml.name}") # Modify the CSV with custom values if required if all(key in config.DEPLOYMENT for key in ("csv_change_from", "csv_change_to")): modify_csv( csv=csv_name, replace_from=config.DEPLOYMENT["csv_change_from"], replace_to=config.DEPLOYMENT["csv_change_to"], ) # create custom storage class for StorageCluster CR if necessary if self.CUSTOM_STORAGE_CLASS_PATH is not None: with open(self.CUSTOM_STORAGE_CLASS_PATH, "r") as custom_sc_fo: custom_sc = yaml.load(custom_sc_fo, Loader=yaml.SafeLoader) # set value of DEFAULT_STORAGECLASS to mach the custom storage cls self.DEFAULT_STORAGECLASS = custom_sc["metadata"]["name"] run_cmd(f"oc create -f {self.CUSTOM_STORAGE_CLASS_PATH}") # creating StorageCluster if config.DEPLOYMENT.get("kms_deployment"): kms = KMS.get_kms_deployment() kms.deploy() cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) # Figure out all the OCS modules enabled/disabled # CLI parameter --disable-components takes the precedence over # anything which comes from config file if config.ENV_DATA.get("disable_components"): for component in config.ENV_DATA["disable_components"]: config.COMPONENTS[f"disable_{component}"] = True logger.warning(f"disabling: {component}") # Update cluster_data with respective component enable/disable for key in config.COMPONENTS.keys(): comp_name = constants.OCS_COMPONENTS_MAP[key.split("_")[1]] if config.COMPONENTS[key]: if "noobaa" in key: merge_dict( cluster_data, { "spec": { "multiCloudGateway": { "reconcileStrategy": "ignore" } } }, ) else: merge_dict( cluster_data, { "spec": { "managedResources": { f"{comp_name}": { "reconcileStrategy": "ignore" } } } }, ) if arbiter_deployment: cluster_data["spec"]["arbiter"] = {} cluster_data["spec"]["nodeTopologies"] = {} cluster_data["spec"]["arbiter"]["enable"] = True cluster_data["spec"]["nodeTopologies"][ "arbiterLocation"] = self.get_arbiter_location() cluster_data["spec"]["storageDeviceSets"][0][ "replica"] = config.DEPLOYMENT.get( "ocs_operator_nodes_to_label", 4) cluster_data["metadata"]["name"] = config.ENV_DATA[ "storage_cluster_name"] deviceset_data = cluster_data["spec"]["storageDeviceSets"][0] device_size = int( config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE)) logger.info( "Flexible scaling is available from version 4.7 on LSO cluster with less than 3 zones" ) ocs_version = config.ENV_DATA["ocs_version"] zone_num = get_az_count() if (config.DEPLOYMENT.get("local_storage") and Version.coerce(ocs_version) >= Version.coerce("4.7") and zone_num < 3): cluster_data["spec"]["flexibleScaling"] = True # https://bugzilla.redhat.com/show_bug.cgi?id=1921023 cluster_data["spec"]["storageDeviceSets"][0]["count"] = 3 cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 1 # set size of request for storage if self.platform.lower() == constants.BAREMETAL_PLATFORM: pv_size_list = helpers.get_pv_size( storageclass=self.DEFAULT_STORAGECLASS_LSO) pv_size_list.sort() deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage"] = f"{pv_size_list[0]}" else: deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage"] = f"{device_size}Gi" # set storage class to OCS default on current platform if self.DEFAULT_STORAGECLASS: deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS ocs_version = float(config.ENV_DATA["ocs_version"]) # StorageCluster tweaks for LSO if config.DEPLOYMENT.get("local_storage"): cluster_data["spec"]["manageNodes"] = False cluster_data["spec"]["monDataDirHostPath"] = "/var/lib/rook" deviceset_data["name"] = constants.DEFAULT_DEVICESET_LSO_PVC_NAME deviceset_data["portable"] = False deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS_LSO lso_type = config.DEPLOYMENT.get("type") if (self.platform.lower() == constants.AWS_PLATFORM and not lso_type == constants.AWS_EBS): deviceset_data["count"] = 2 if (ocp_version >= 4.6) and (ocs_version >= 4.6): cluster_data["metadata"]["annotations"] = { "cluster.ocs.openshift.io/local-devices": "true" } # Allow lower instance requests and limits for OCS deployment # The resources we need to change can be found here: # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116 if config.DEPLOYMENT.get("allow_lower_instance_requirements"): none_resources = {"Requests": None, "Limits": None} deviceset_data["resources"] = deepcopy(none_resources) resources = [ "mon", "mds", "rgw", "mgr", "noobaa-core", "noobaa-db", ] if ocs_version >= 4.5: resources.append("noobaa-endpoint") cluster_data["spec"]["resources"] = { resource: deepcopy(none_resources) for resource in resources } if ocs_version >= 4.5: cluster_data["spec"]["resources"]["noobaa-endpoint"] = { "limits": { "cpu": 1, "memory": "500Mi" }, "requests": { "cpu": 1, "memory": "500Mi" }, } else: local_storage = config.DEPLOYMENT.get("local_storage") platform = config.ENV_DATA.get("platform", "").lower() if local_storage and platform == "aws": resources = { "mds": { "limits": { "cpu": 3, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } } if ocs_version < 4.5: resources["noobaa-core"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } resources["noobaa-db"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } cluster_data["spec"]["resources"] = resources # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get("host_network"): cluster_data["spec"]["hostNetwork"] = True cluster_data["spec"]["storageDeviceSets"] = [deviceset_data] if self.platform == constants.IBMCLOUD_PLATFORM: mon_pvc_template = { "spec": { "accessModes": ["ReadWriteOnce"], "resources": { "requests": { "storage": "20Gi" } }, "storageClassName": self.DEFAULT_STORAGECLASS, "volumeMode": "Filesystem", } } cluster_data["spec"]["monPVCTemplate"] = mon_pvc_template # Need to check if it's needed for ibm cloud to set manageNodes cluster_data["spec"]["manageNodes"] = False if config.ENV_DATA.get("encryption_at_rest"): if ocs_version < 4.6: error_message = "Encryption at REST can be enabled only on OCS >= 4.6!" logger.error(error_message) raise UnsupportedFeatureError(error_message) logger.info("Enabling encryption at REST!") cluster_data["spec"]["encryption"] = { "enable": True, } if config.DEPLOYMENT.get("kms_deployment"): cluster_data["spec"]["encryption"]["kms"] = { "enable": True, } if config.DEPLOYMENT.get("ceph_debug"): setup_ceph_debug() cluster_data["spec"]["managedResources"] = { "cephConfig": { "reconcileStrategy": "ignore" } } cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cluster_storage", delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=1200) if config.DEPLOYMENT["infra_nodes"]: _ocp = ocp.OCP(kind="node") _ocp.exec_oc_cmd( command=f"annotate namespace {defaults.ROOK_CLUSTER_NAMESPACE} " f"{constants.NODE_SELECTOR_ANNOTATION}")
def test_upgrade(): ceph_cluster = CephCluster() with CephHealthMonitor(ceph_cluster): namespace = config.ENV_DATA['cluster_namespace'] version_before_upgrade = config.ENV_DATA.get("ocs_version") upgrade_version = config.UPGRADE.get("upgrade_ocs_version", version_before_upgrade) ocs_registry_image = config.UPGRADE.get('upgrade_ocs_registry_image') if ocs_registry_image: upgrade_version = get_ocs_version_from_image(ocs_registry_image) parsed_version_before_upgrade = parse_version(version_before_upgrade) parsed_upgrade_version = parse_version(upgrade_version) assert parsed_upgrade_version >= parsed_version_before_upgrade, ( f"Version you would like to upgrade to: {upgrade_version} " f"is not higher or equal to the version you currently running: " f"{version_before_upgrade}") operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=OCS_OPERATOR_NAME, selector=operator_selector, ) channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name_pre_upgrade = package_manifest.get_current_csv(channel) log.info(f"CSV name before upgrade is: {csv_name_pre_upgrade}") csv_pre_upgrade = CSV(resource_name=csv_name_pre_upgrade, namespace=namespace) pre_upgrade_images = get_images(csv_pre_upgrade.get()) version_change = parsed_upgrade_version > parsed_version_before_upgrade if version_change: version_config_file = os.path.join(constants.CONF_DIR, 'ocs_version', f'ocs-{upgrade_version}.yaml') load_config_file(version_config_file) ocs_catalog = CatalogSource( resource_name=constants.OPERATOR_CATALOG_SOURCE_NAME, namespace=constants.MARKETPLACE_NAMESPACE, ) upgrade_in_current_source = config.UPGRADE.get( 'upgrade_in_current_source', False) if not upgrade_in_current_source: if not ocs_catalog.is_exist() and not upgrade_in_current_source: log.info("OCS catalog source doesn't exist. Creating new one.") create_catalog_source(ocs_registry_image, ignore_upgrade=True) image_url = ocs_catalog.get_image_url() image_tag = ocs_catalog.get_image_name() log.info(f"Current image is: {image_url}, tag: {image_tag}") if ocs_registry_image: image_url, new_image_tag = ocs_registry_image.split(':') elif (config.UPGRADE.get('upgrade_to_latest', True) or version_change): new_image_tag = get_latest_ds_olm_tag() else: new_image_tag = get_next_version_available_for_upgrade( image_tag) cs_data = deepcopy(ocs_catalog.data) image_for_upgrade = ':'.join([image_url, new_image_tag]) log.info(f"Image: {image_for_upgrade} will be used for upgrade.") cs_data['spec']['image'] = image_for_upgrade with NamedTemporaryFile() as cs_yaml: dump_data_to_temp_yaml(cs_data, cs_yaml.name) ocs_catalog.apply(cs_yaml.name) # Wait for the new package manifest for upgrade. operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=OCS_OPERATOR_NAME, selector=operator_selector, ) package_manifest.wait_for_resource() channel = config.DEPLOYMENT.get('ocs_csv_channel') if not channel: channel = package_manifest.get_default_channel() # update subscription subscription = OCP( resource_name=constants.OCS_SUBSCRIPTION, kind='subscription', namespace=config.ENV_DATA['cluster_namespace'], ) current_ocs_source = subscription.data['spec']['source'] log.info(f"Current OCS subscription source: {current_ocs_source}") ocs_source = current_ocs_source if upgrade_in_current_source else ( constants.OPERATOR_CATALOG_SOURCE_NAME) patch_subscription_cmd = ( f'oc patch subscription {constants.OCS_SUBSCRIPTION} ' f'-n {namespace} --type merge -p \'{{"spec":{{"channel": ' f'"{channel}", "source": "{ocs_source}"}}}}\'') run_cmd(patch_subscription_cmd) subscription_plan_approval = config.DEPLOYMENT.get( 'subscription_plan_approval') if subscription_plan_approval == 'Manual': wait_for_install_plan_and_approve(namespace) attempts = 145 for attempt in range(1, attempts + 1): log.info(f"Attempt {attempt}/{attempts} to check CSV upgraded.") csv_name_post_upgrade = package_manifest.get_current_csv(channel) if csv_name_post_upgrade == csv_name_pre_upgrade: log.info(f"CSV is still: {csv_name_post_upgrade}") sleep(5) else: log.info(f"CSV now upgraded to: {csv_name_post_upgrade}") break if attempts == attempt: raise TimeoutException("No new CSV found after upgrade!") csv_post_upgrade = CSV(resource_name=csv_name_post_upgrade, namespace=namespace) log.info( f"Waiting for CSV {csv_name_post_upgrade} to be in succeeded state" ) if version_before_upgrade == '4.2' and upgrade_version == '4.3': log.info("Force creating Ceph toolbox after upgrade 4.2 -> 4.3") setup_ceph_toolbox(force_setup=True) osd_count = get_osd_count() csv_post_upgrade.wait_for_phase("Succeeded", timeout=200 * osd_count) post_upgrade_images = get_images(csv_post_upgrade.get()) old_images, _, _ = get_upgrade_image_info(pre_upgrade_images, post_upgrade_images) verify_image_versions(old_images, parsed_upgrade_version) ocs_install_verification( timeout=600, skip_osd_distribution_check=True, ocs_registry_image=ocs_registry_image, post_upgrade_verification=True, )
def subscribe_ocs(self): """ This method subscription manifest and subscribe to OCS operator. """ live_deployment = config.DEPLOYMENT.get("live_deployment") if ( config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment ): link_all_sa_and_secret_and_delete_pods(constants.OCS_SECRET, self.namespace) operator_selector = get_selector_for_ocs_operator() # wait for package manifest # For OCS version >= 4.9, we have odf-operator ocs_version = version.get_semantic_ocs_version_from_config() if ocs_version >= version.VERSION_4_9: ocs_operator_name = defaults.ODF_OPERATOR_NAME subscription_file = constants.SUBSCRIPTION_ODF_YAML else: ocs_operator_name = defaults.OCS_OPERATOR_NAME subscription_file = constants.SUBSCRIPTION_YAML package_manifest = PackageManifest( resource_name=ocs_operator_name, selector=operator_selector, ) # Wait for package manifest is ready package_manifest.wait_for_resource(timeout=300) default_channel = package_manifest.get_default_channel() subscription_yaml_data = templating.load_yaml(subscription_file) subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval") if subscription_plan_approval: subscription_yaml_data["spec"][ "installPlanApproval" ] = subscription_plan_approval custom_channel = config.DEPLOYMENT.get("ocs_csv_channel") if custom_channel: logger.info(f"Custom channel will be used: {custom_channel}") subscription_yaml_data["spec"]["channel"] = custom_channel else: logger.info(f"Default channel will be used: {default_channel}") subscription_yaml_data["spec"]["channel"] = default_channel if config.DEPLOYMENT.get("stage"): subscription_yaml_data["spec"]["source"] = constants.OPERATOR_SOURCE_NAME if config.DEPLOYMENT.get("live_deployment"): subscription_yaml_data["spec"]["source"] = config.DEPLOYMENT.get( "live_content_source", defaults.LIVE_CONTENT_SOURCE ) subscription_manifest = tempfile.NamedTemporaryFile( mode="w+", prefix="subscription_manifest", delete=False ) templating.dump_data_to_temp_yaml( subscription_yaml_data, subscription_manifest.name ) run_cmd(f"oc create -f {subscription_manifest.name}") logger.info("Sleeping for 90 seconds after subscribing OCS") time.sleep(90) if subscription_plan_approval == "Manual": wait_for_install_plan_and_approve(self.namespace) csv_name = package_manifest.get_current_csv(channel=custom_channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Installing", timeout=60)
def deploy_with_external_mode(self): """ This function handles the deployment of OCS on external/indpendent RHCS cluster """ live_deployment = config.DEPLOYMENT.get('live_deployment') logger.info("Deploying OCS with external mode RHCS") logger.info("Creating namespace and operator group") run_cmd(f"oc create -f {constants.OLM_YAML}") if not live_deployment: self.create_ocs_operator_source() self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Succeeded", timeout=720) # Create secret for external cluster secret_data = templating.load_yaml( constants.EXTERNAL_CLUSTER_SECRET_YAML ) external_cluster_details = config.EXTERNAL_MODE.get( 'external_cluster_details', '' ) if not external_cluster_details: raise ExternalClusterDetailsException( "No external cluster data found" ) secret_data['data']['external_cluster_details'] = ( external_cluster_details ) secret_data_yaml = tempfile.NamedTemporaryFile( mode='w+', prefix='external_cluster_secret', delete=False ) templating.dump_data_to_temp_yaml( secret_data, secret_data_yaml.name ) logger.info("Creating external cluster secret") run_cmd(f"oc create -f {secret_data_yaml.name}") cluster_data = templating.load_yaml( constants.EXTERNAL_STORAGE_CLUSTER_YAML ) cluster_data['metadata']['name'] = config.ENV_DATA[ 'storage_cluster_name' ] cluster_data_yaml = tempfile.NamedTemporaryFile( mode='w+', prefix='external_cluster_storage', delete=False ) templating.dump_data_to_temp_yaml( cluster_data, cluster_data_yaml.name ) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=2400) self.external_post_deploy_validation() setup_ceph_toolbox()
def deploy_ocs_via_operator(self): """ Method for deploy OCS via OCS operator """ ui_deployment = config.DEPLOYMENT.get('ui_deployment') if ui_deployment: logger.info("Preparing stuff for deployment by openshift-console") else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() run_cmd(f"oc create -f {constants.OLM_YAML}") self.create_catalog_source() if ui_deployment: logger.info( "Skipping the rest of the deployment because it will be done " "by UI deployment!") return self.subscribe_ocs() # Skip rest of the deployment when deploy via UI package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME) channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Succeeded", timeout=720) cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) cluster_data['metadata']['name'] = config.ENV_DATA[ 'storage_cluster_name'] deviceset_data = cluster_data['spec']['storageDeviceSets'][0] device_size = int( config.ENV_DATA.get('device_size', defaults.DEVICE_SIZE)) deviceset_data['dataPVCTemplate']['spec']['resources']['requests'][ 'storage'] = f"{device_size}Gi" # Allow lower instance requests and limits for OCS deployment if config.DEPLOYMENT.get('allow_lower_instance_requirements'): none_resources = {'Requests': None, 'Limits': None} deviceset_data["resources"] = deepcopy(none_resources) cluster_data['spec']['resources'] = { resource: deepcopy(none_resources) for resource in [ 'mon', 'mds', 'rgw', 'mgr', 'noobaa-core', 'noobaa-db', ] } if self.platform.lower() == constants.VSPHERE_PLATFORM: deviceset_data['dataPVCTemplate']['spec'][ 'storageClassName'] = constants.DEFAULT_SC_VSPHERE # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get('host_network'): cluster_data['spec']['hostNetwork'] = True cluster_data['spec']['storageDeviceSets'] = [deviceset_data] cluster_data_yaml = tempfile.NamedTemporaryFile( mode='w+', prefix='cluster_storage', delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=2400)
def ocs_install_verification(timeout=600, skip_osd_distribution_check=False): """ Perform steps necessary to verify a successful OCS installation Args: timeout (int): Number of seconds for timeout which will be used in the checks used in this function. skip_osd_distribution_check (bool): If true skip the check for osd distribution. """ from ocs_ci.ocs.node import get_typed_nodes number_of_worker_nodes = len(get_typed_nodes()) namespace = config.ENV_DATA['cluster_namespace'] log.info("Verifying OCS installation") # Verify OCS CSV is in Succeeded phase log.info("verifying ocs csv") ocs_package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME) ocs_csv_name = ocs_package_manifest.get_current_csv() ocs_csv = CSV(resource_name=ocs_csv_name, namespace=namespace) log.info(f"Check if OCS operator: {ocs_csv_name} is in Succeeded phase.") ocs_csv.wait_for_phase(phase="Succeeded", timeout=timeout) # Verify OCS Cluster Service (ocs-storagecluster) is Ready storage_cluster_name = config.ENV_DATA['storage_cluster_name'] log.info("Verifying status of storage cluster: %s", storage_cluster_name) storage_cluster = StorageCluster( resource_name=storage_cluster_name, namespace=namespace, ) log.info(f"Check if StorageCluster: {storage_cluster_name} is in" f"Succeeded phase") storage_cluster.wait_for_phase(phase='Ready', timeout=timeout) # Verify pods in running state and proper counts log.info("Verifying pod states and counts") pod = OCP(kind=constants.POD, namespace=namespace) # ocs-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OCS_OPERATOR_LABEL, timeout=timeout) # rook-ceph-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OPERATOR_LABEL, timeout=timeout) # noobaa assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.NOOBAA_APP_LABEL, resource_count=2, timeout=timeout) # mons assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MON_APP_LABEL, resource_count=3, timeout=timeout) # csi-cephfsplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_LABEL, resource_count=number_of_worker_nodes, timeout=timeout) # csi-cephfsplugin-provisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # csi-rbdplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_LABEL, resource_count=number_of_worker_nodes, timeout=timeout) # csi-rbdplugin-profisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # osds osd_count = ( int(storage_cluster.data['spec']['storageDeviceSets'][0]['count']) * int(storage_cluster.data['spec']['storageDeviceSets'][0]['replica'])) assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OSD_APP_LABEL, resource_count=osd_count, timeout=timeout) # mgr assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MGR_APP_LABEL, timeout=timeout) # mds assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MDS_APP_LABEL, resource_count=2, timeout=timeout) # rgw check only for VmWare if config.ENV_DATA.get('platform') == constants.VSPHERE_PLATFORM: assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.RGW_APP_LABEL, resource_count=1, timeout=timeout) # Verify ceph health log.info("Verifying ceph health") assert utils.ceph_health_check(namespace=namespace) # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd) log.info("Verifying storage classes") storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace) storage_cluster_name = config.ENV_DATA['storage_cluster_name'] required_storage_classes = { f'{storage_cluster_name}-cephfs', f'{storage_cluster_name}-ceph-rbd' } storage_classes = storage_class.get() storage_class_names = { item['metadata']['name'] for item in storage_classes['items'] } assert required_storage_classes.issubset(storage_class_names) # Verify OSD's are distributed if not skip_osd_distribution_check: log.info("Verifying OSD's are distributed evenly across worker nodes") ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace) osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)['items'] node_names = [osd['spec']['nodeName'] for osd in osds] for node in node_names: assert not node_names.count(node) > 1, ( "OSD's are not distributed evenly across worker nodes")
def ocs_install_verification(timeout=600, skip_osd_distribution_check=False): """ Perform steps necessary to verify a successful OCS installation Args: timeout (int): Number of seconds for timeout which will be used in the checks used in this function. skip_osd_distribution_check (bool): If true skip the check for osd distribution. """ from ocs_ci.ocs.node import get_typed_nodes from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs from ocs_ci.ocs.resources.pod import get_ceph_tools_pod, get_all_pods number_of_worker_nodes = len(get_typed_nodes()) namespace = config.ENV_DATA['cluster_namespace'] log.info("Verifying OCS installation") # Verify OCS CSV is in Succeeded phase log.info("verifying ocs csv") operator_selector = get_selector_for_ocs_operator() ocs_package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, ) ocs_csv_name = ocs_package_manifest.get_current_csv() ocs_csv = CSV(resource_name=ocs_csv_name, namespace=namespace) log.info(f"Check if OCS operator: {ocs_csv_name} is in Succeeded phase.") ocs_csv.wait_for_phase(phase="Succeeded", timeout=timeout) # Verify OCS Cluster Service (ocs-storagecluster) is Ready storage_cluster_name = config.ENV_DATA['storage_cluster_name'] log.info("Verifying status of storage cluster: %s", storage_cluster_name) storage_cluster = StorageCluster( resource_name=storage_cluster_name, namespace=namespace, ) log.info(f"Check if StorageCluster: {storage_cluster_name} is in" f"Succeeded phase") storage_cluster.wait_for_phase(phase='Ready', timeout=timeout) # Verify pods in running state and proper counts log.info("Verifying pod states and counts") pod = OCP(kind=constants.POD, namespace=namespace) # ocs-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OCS_OPERATOR_LABEL, timeout=timeout) # rook-ceph-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OPERATOR_LABEL, timeout=timeout) # noobaa assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.NOOBAA_APP_LABEL, resource_count=2, timeout=timeout) # mons assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MON_APP_LABEL, resource_count=3, timeout=timeout) # csi-cephfsplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_LABEL, resource_count=number_of_worker_nodes, timeout=timeout) # csi-cephfsplugin-provisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # csi-rbdplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_LABEL, resource_count=number_of_worker_nodes, timeout=timeout) # csi-rbdplugin-provisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # osds osd_count = ( int(storage_cluster.data['spec']['storageDeviceSets'][0]['count']) * int(storage_cluster.data['spec']['storageDeviceSets'][0]['replica'])) assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OSD_APP_LABEL, resource_count=osd_count, timeout=timeout) # mgr assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MGR_APP_LABEL, timeout=timeout) # mds assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MDS_APP_LABEL, resource_count=2, timeout=timeout) # rgw check only for VmWare if config.ENV_DATA.get('platform') == constants.VSPHERE_PLATFORM: assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.RGW_APP_LABEL, resource_count=1, timeout=timeout) # Verify ceph health log.info("Verifying ceph health") assert utils.ceph_health_check(namespace=namespace) # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd) log.info("Verifying storage classes") storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace) storage_cluster_name = config.ENV_DATA['storage_cluster_name'] required_storage_classes = { f'{storage_cluster_name}-cephfs', f'{storage_cluster_name}-ceph-rbd' } storage_classes = storage_class.get() storage_class_names = { item['metadata']['name'] for item in storage_classes['items'] } assert required_storage_classes.issubset(storage_class_names) # Verify OSD's are distributed if not skip_osd_distribution_check: log.info("Verifying OSD's are distributed evenly across worker nodes") ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace) osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)['items'] node_names = [osd['spec']['nodeName'] for osd in osds] for node in node_names: assert not node_names.count(node) > 1, ( "OSD's are not distributed evenly across worker nodes") # Verify that CSI driver object contains provisioner names log.info("Verifying CSI driver object contains provisioner names.") csi_driver = OCP(kind="CSIDriver") assert {defaults.CEPHFS_PROVISIONER, defaults.RBD_PROVISIONER} == ({ item['metadata']['name'] for item in csi_driver.get()['items'] }) # Verify node and provisioner secret names in storage class log.info("Verifying node and provisioner secret names in storage class.") sc_rbd = storage_class.get( resource_name=constants.DEFAULT_STORAGECLASS_RBD) sc_cephfs = storage_class.get( resource_name=constants.DEFAULT_STORAGECLASS_CEPHFS) assert sc_rbd['parameters'][ 'csi.storage.k8s.io/node-stage-secret-name'] == constants.RBD_NODE_SECRET assert sc_rbd['parameters'][ 'csi.storage.k8s.io/provisioner-secret-name'] == constants.RBD_PROVISIONER_SECRET assert sc_cephfs['parameters'][ 'csi.storage.k8s.io/node-stage-secret-name'] == constants.CEPHFS_NODE_SECRET assert sc_cephfs['parameters'][ 'csi.storage.k8s.io/provisioner-secret-name'] == constants.CEPHFS_PROVISIONER_SECRET log.info("Verified node and provisioner secret names in storage class.") # Verify ceph osd tree output log.info( "Verifying ceph osd tree output and checking for device set PVC names " "in the output.") deviceset_pvcs = [pvc.name for pvc in get_deviceset_pvcs()] ct_pod = get_ceph_tools_pod() osd_tree = ct_pod.exec_ceph_cmd(ceph_cmd='ceph osd tree', format='json') schemas = { 'root': constants.OSD_TREE_ROOT, 'rack': constants.OSD_TREE_RACK, 'host': constants.OSD_TREE_HOST, 'osd': constants.OSD_TREE_OSD, 'region': constants.OSD_TREE_REGION, 'zone': constants.OSD_TREE_ZONE } schemas['host']['properties']['name'] = {'enum': deviceset_pvcs} for item in osd_tree['nodes']: validate(instance=item, schema=schemas[item['type']]) if item['type'] == 'host': deviceset_pvcs.remove(item['name']) assert not deviceset_pvcs, ( f"These device set PVCs are not given in ceph osd tree output " f"- {deviceset_pvcs}") log.info( "Verified ceph osd tree output. Device set PVC names are given in the " "output.") # TODO: Verify ceph osd tree output have osd listed as ssd # TODO: Verify ceph osd tree output have zone or rack based on AZ # Verify CSI snapshotter sidecar container is not present log.info("Verifying CSI snapshotter is not present.") provisioner_pods = get_all_pods( namespace=defaults.ROOK_CLUSTER_NAMESPACE, selector=[ constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, constants.CSI_RBDPLUGIN_PROVISIONER_LABEL ]) for pod_obj in provisioner_pods: pod_info = pod_obj.get() for container, image in get_images(data=pod_info).items(): assert ('snapshot' not in container) and ( 'snapshot' not in image), ( f"Snapshot container is present in {pod_obj.name} pod. " f"Container {container}. Image {image}") assert { 'name': 'CSI_ENABLE_SNAPSHOTTER', 'value': 'false' } in (ocs_csv.get()['spec']['install']['spec']['deployments'][0]['spec'] ['template']['spec']['containers'][0]['env'] ), "CSI_ENABLE_SNAPSHOTTER value is not set to 'false'." log.info("Verified: CSI snapshotter is not present.") # Verify pool crush rule is with "type": "zone" if utils.get_az_count() == 3: log.info("Verifying pool crush rule is with type: zone") crush_dump = ct_pod.exec_ceph_cmd(ceph_cmd='ceph osd crush dump', format='') pool_names = [ constants.METADATA_POOL, constants.DEFAULT_BLOCKPOOL, constants.DATA_POOL ] crush_rules = [ rule for rule in crush_dump['rules'] if rule['rule_name'] in pool_names ] for crush_rule in crush_rules: assert [ item for item in crush_rule['steps'] if item.get('type') == 'zone' ], f"{crush_rule['rule_name']} is not with type as zone" log.info("Verified - pool crush rule is with type: zone")
def ocs_install_verification(timeout=600): """ Perform steps necessary to verify a successful OCS installation Args: timeout (int): Number of seconds for timeout which will be used in the checks used in this function. """ log.info("Verifying OCS installation") namespace = config.ENV_DATA['cluster_namespace'] # Verify Local Storage CSV is in Succeeded phase log.info("Verifying Local Storage CSV") # There is BZ opened: # https://bugzilla.redhat.com/show_bug.cgi?id=1770183 # which makes this check problematic as current CSV is not the currently # installed. local_storage_csvs = get_csvs_start_with_prefix( csv_prefix=constants.LOCAL_STORAGE_CSV_PREFIX, namespace=namespace, ) assert len(local_storage_csvs) == 1, ( f"There are more than one local storage CSVs: {local_storage_csvs}") local_storage_name = local_storage_csvs[0]['metadata']['name'] log.info(f"Check if local storage operator: {local_storage_name} is in" f"Succeeded phase") local_storage_csv = CSV(resource_name=local_storage_name, namespace=namespace) local_storage_csv.wait_for_phase("Succeeded", timeout=timeout) # Verify OCS CSV is in Succeeded phase log.info("verifying ocs csv") ocs_package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME) ocs_csv_name = ocs_package_manifest.get_current_csv() ocs_csv = CSV(resource_name=ocs_csv_name, namespace=namespace) log.info(f"Check if OCS operator: {ocs_csv_name} is in Succeeded phase.") ocs_csv.wait_for_phase(phase="Succeeded", timeout=timeout) # Verify OCS Cluster Service (ocs-storagecluster) is Ready log.info("Verifying OCS Cluster service") storage_clusters = StorageCluster(namespace=namespace) for item in storage_clusters.get()['items']: storage_cluster_name = item['metadata']['name'] storage_cluster = StorageCluster(resource_name=storage_cluster_name, namespace=namespace) log.info("Checking status of %s", storage_cluster_name) log.info(f"Check if StorageCluster: {local_storage_name} is in" f"Succeeded phase") storage_cluster.wait_for_phase(phase='Ready', timeout=timeout) # Verify pods in running state and proper counts log.info("Verifying pod states and counts") pod = OCP(kind=constants.POD, namespace=namespace) # ocs-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OCS_OPERATOR_LABEL, timeout=timeout) # rook-ceph-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OPERATOR_LABEL, timeout=timeout) # noobaa assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.NOOBAA_APP_LABEL, resource_count=2, timeout=timeout) # local-storage-operator assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.LOCAL_STORAGE_OPERATOR_LABEL, timeout=timeout) # mons assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MON_APP_LABEL, resource_count=3, timeout=timeout) # csi-cephfsplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_LABEL, resource_count=3, timeout=timeout) # csi-cephfsplugin-provisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # csi-rbdplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_LABEL, resource_count=3, timeout=timeout) # csi-rbdplugin-profisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # osds assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OSD_APP_LABEL, resource_count=3, timeout=timeout) # mgr assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MGR_APP_LABEL, timeout=timeout) # mds assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MDS_APP_LABEL, resource_count=2, timeout=timeout) # Verify ceph health log.info("Verifying ceph health") assert utils.ceph_health_check(namespace=namespace) # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd) log.info("Verifying storage classes") storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace) storage_cluster_name = config.ENV_DATA['storage_cluster_name'] required_storage_classes = { f'{storage_cluster_name}-cephfs', f'{storage_cluster_name}-ceph-rbd' } storage_classes = storage_class.get() storage_class_names = { item['metadata']['name'] for item in storage_classes['items'] } assert required_storage_classes.issubset(storage_class_names) # Verify OSD's are distributed log.info("Verifying OSD's are distributed evenly across worker nodes") ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace) osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)['items'] node_names = [osd['spec']['nodeName'] for osd in osds] for node in node_names: assert not node_names.count(node) > 1, ( "OSD's are not distributed evenly across worker nodes")
def deploy_ocs_via_operator(self): """ Method for deploy OCS via OCS operator """ ui_deployment = config.DEPLOYMENT.get('ui_deployment') live_deployment = config.DEPLOYMENT.get('live_deployment') if config.DEPLOYMENT.get('local_storage'): setup_local_storage() if ui_deployment: if not live_deployment: self.create_ocs_operator_source() self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") if not live_deployment: self.create_ocs_operator_source() self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Succeeded", timeout=720) # Modify the CSV with custom values if required if all(key in config.DEPLOYMENT for key in ('csv_change_from', 'csv_change_to')): modify_csv(csv=csv_name, replace_from=config.DEPLOYMENT['csv_change_from'], replace_to=config.DEPLOYMENT['csv_change_to']) cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) cluster_data['metadata']['name'] = config.ENV_DATA[ 'storage_cluster_name'] deviceset_data = cluster_data['spec']['storageDeviceSets'][0] device_size = int( config.ENV_DATA.get('device_size', defaults.DEVICE_SIZE)) if self.platform.lower() == constants.BAREMETAL_PLATFORM: pv_size_list = helpers.get_pv_size( storageclass=constants.DEFAULT_STORAGECLASS_LSO) pv_size_list.sort() deviceset_data['dataPVCTemplate']['spec']['resources']['requests'][ 'storage'] = f"{pv_size_list[0]}" else: deviceset_data['dataPVCTemplate']['spec']['resources']['requests'][ 'storage'] = f"{device_size}Gi" if self.platform.lower() == constants.VSPHERE_PLATFORM: deviceset_data['dataPVCTemplate']['spec'][ 'storageClassName'] = constants.DEFAULT_SC_VSPHERE if config.DEPLOYMENT.get('local_storage'): cluster_data['spec']['manageNodes'] = False cluster_data['spec']['monDataDirHostPath'] = '/var/lib/rook' deviceset_data['portable'] = False deviceset_data['dataPVCTemplate']['spec'][ 'storageClassName'] = 'localblock' if self.platform.lower() == constants.AWS_PLATFORM: deviceset_data['count'] = 2 ocs_version = float(config.ENV_DATA['ocs_version']) # Allow lower instance requests and limits for OCS deployment # The resources we need to change can be found here: # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116 if config.DEPLOYMENT.get('allow_lower_instance_requirements'): none_resources = {'Requests': None, 'Limits': None} deviceset_data["resources"] = deepcopy(none_resources) resources = [ 'mon', 'mds', 'rgw', 'mgr', 'noobaa-core', 'noobaa-db', ] if ocs_version >= 4.5: resources.append('noobaa-endpoint') cluster_data['spec']['resources'] = { resource: deepcopy(none_resources) for resource in resources } else: local_storage = config.DEPLOYMENT.get('local_storage') platform = config.ENV_DATA.get('platform', '').lower() if local_storage and platform == 'aws': resources = { 'mds': { 'limits': { 'cpu': 3 }, 'requests': { 'cpu': 1 } }, 'noobaa-core': { 'limits': { 'cpu': 2, 'memory': '8Gi' }, 'requests': { 'cpu': 1, 'memory': '8Gi' } }, 'noobaa-db': { 'limits': { 'cpu': 2, 'memory': '8Gi' }, 'requests': { 'cpu': 1, 'memory': '8Gi' } } } if ocs_version >= 4.5: resources['noobaa-endpoint'] = { 'limits': { 'cpu': 2, 'memory': '8Gi' }, 'requests': { 'cpu': 1, 'memory': '8Gi' } } cluster_data['spec']['resources'] = resources # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get('host_network'): cluster_data['spec']['hostNetwork'] = True cluster_data['spec']['storageDeviceSets'] = [deviceset_data] cluster_data_yaml = tempfile.NamedTemporaryFile( mode='w+', prefix='cluster_storage', delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=2400)
def test_upgrade(): ceph_cluster = CephCluster() with CephHealthMonitor(ceph_cluster): namespace = config.ENV_DATA['cluster_namespace'] ocs_catalog = CatalogSource( resource_name=constants.OPERATOR_CATALOG_SOURCE_NAME, namespace=constants.MARKETPLACE_NAMESPACE, ) version_before_upgrade = config.ENV_DATA.get("ocs_version") upgrade_version = config.UPGRADE.get("upgrade_ocs_version", version_before_upgrade) parsed_version_before_upgrade = parse_version(version_before_upgrade) parsed_upgrade_version = parse_version(upgrade_version) assert parsed_upgrade_version >= parsed_version_before_upgrade, ( f"Version you would like to upgrade to: {upgrade_version} " f"is not higher or equal to the version you currently running: " f"{version_before_upgrade}") version_change = parsed_upgrade_version > parsed_version_before_upgrade if version_change: version_config_file = os.path.join(constants.CONF_DIR, 'ocs_version', f'ocs-{upgrade_version}.yaml') assert os.path.exists(version_config_file), ( f"OCS version config file {version_config_file} doesn't exist!" ) with open(os.path.abspath( os.path.expanduser(version_config_file))) as file_stream: custom_config_data = yaml.safe_load(file_stream) config.update(custom_config_data) image_url = ocs_catalog.get_image_url() image_tag = ocs_catalog.get_image_name() log.info(f"Current image is: {image_url}, tag: {image_tag}") ocs_registry_image = config.UPGRADE.get('upgrade_ocs_registry_image') if ocs_registry_image: image_url, new_image_tag = ocs_registry_image.split(':') elif config.UPGRADE.get('upgrade_to_latest', True) or version_change: new_image_tag = get_latest_ds_olm_tag() else: new_image_tag = get_next_version_available_for_upgrade(image_tag) cs_data = deepcopy(ocs_catalog.data) image_for_upgrade = ':'.join([image_url, new_image_tag]) log.info(f"Image: {image_for_upgrade} will be used for upgrade.") cs_data['spec']['image'] = image_for_upgrade operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=OCS_OPERATOR_NAME, selector=operator_selector, ) csv_name_pre_upgrade = package_manifest.get_current_csv() log.info(f"CSV name before upgrade is: {csv_name_pre_upgrade}") csv_pre_upgrade = CSV(resource_name=csv_name_pre_upgrade, namespace=namespace) pre_upgrade_images = get_images(csv_pre_upgrade.get()) with NamedTemporaryFile() as cs_yaml: dump_data_to_temp_yaml(cs_data, cs_yaml.name) ocs_catalog.apply(cs_yaml.name) # Wait for package manifest is ready package_manifest.wait_for_resource() subscription_plan_approval = config.DEPLOYMENT.get( 'subscription_plan_approval') if subscription_plan_approval == 'Manual': wait_for_install_plan_and_approve(namespace) attempts = 145 for attempt in range(1, attempts): if attempts == attempt: raise TimeoutException("No new CSV found after upgrade!") log.info(f"Attempt {attempt}/{attempts} to check CSV upgraded.") package_manifest.reload_data() csv_name_post_upgrade = package_manifest.get_current_csv() if csv_name_post_upgrade == csv_name_pre_upgrade: log.info(f"CSV is still: {csv_name_post_upgrade}") sleep(5) else: log.info(f"CSV now upgraded to: {csv_name_post_upgrade}") break csv_post_upgrade = CSV(resource_name=csv_name_post_upgrade, namespace=namespace) log.info( f"Waiting for CSV {csv_name_post_upgrade} to be in succeeded state" ) if version_before_upgrade == '4.2' and upgrade_version == '4.3': log.info("Force creating Ceph toolbox after upgrade 4.2 -> 4.3") setup_ceph_toolbox(force_setup=True) csv_post_upgrade.wait_for_phase("Succeeded", timeout=600) post_upgrade_images = get_images(csv_post_upgrade.get()) old_images, _, _ = get_upgrade_image_info(pre_upgrade_images, post_upgrade_images) verify_image_versions(old_images, parsed_upgrade_version) ocs_install_verification(timeout=600, skip_osd_distribution_check=True)