def setup_persistent_monitoring(): """ Change monitoring backend to OCS """ sc = helpers.default_storage_class(interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=["prometheus", "alertmanager"], ) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get("telemeter_server_url"), ) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)(pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)(validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
def test_running_cluster_mointoring_with_persistent_stoarge(self): """ A test case to configure the persistent volume on monitoring pods """ # Create configmap cluster-monitoring-config create_configmap_cluster_monitoring_pod(self.sc_obj.name) # Validate the pods are respinned and in running state validate_monitoring_pods_are_respinned_and_running_state( self.pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods validate_pvc_are_mounted_on_monitoring_pods(self.pods_list)
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") self.deploy_ocs_via_operator() pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( 'persistent-monitoring'): sc = helpers.default_storage_class( interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=['prometheus', 'alertmanager']) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get( "telemeter_server_url")) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)( pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)( validate_pvc_are_mounted_on_monitoring_pods)(pods_list) elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( "telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") try: ceph_health_check(namespace=self.namespace, tries=30, delay=10) except CephHealthException as ex: err = str(ex) logger.warning(f"Ceph health check failed with {err}") if "clock skew detected" in err: logger.info(f"Changing NTP on compute nodes to" f" {constants.RH_NTP_CLOCK}") update_ntp_compute_nodes() assert ceph_health_check(namespace=self.namespace, tries=60, delay=10) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default() if check_nodes_specs(min_cpu=constants.MIN_NODE_CPU, min_memory=constants.MIN_NODE_MEMORY): logger.info("The cluster specs meet the minimum requirements and " "therefore, NooBaa auto scale will be enabled") min_nb_eps = config.DEPLOYMENT.get('min_noobaa_endpoints') max_nb_eps = config.DEPLOYMENT.get('max_noobaa_endpoints') change_noobaa_endpoints_count(min_nb_eps=min_nb_eps, max_nb_eps=max_nb_eps) else: logger.warning( "The cluster specs do not meet the minimum requirements" " and therefore, NooBaa auto scale will remain disabled") change_noobaa_endpoints_count(min_nb_eps=1, max_nb_eps=1)
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ _templating = templating.Templating() ceph_cluster = ocp.OCP( kind='CephCluster', namespace=self.namespace ) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") if not self.ocs_operator_deployment: create_oc_resource( 'common.yaml', self.cluster_path, _templating, config.ENV_DATA ) run_cmd( f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"' ) run_cmd( f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {self.namespace}" ) # HACK: If you would like to drop this hack, make sure that you # also updated docs and write appropriate unit/integration tests # for config processing. if config.ENV_DATA.get('monitoring_enabled') in ( "true", "True", True ): # RBAC rules for monitoring, based on documentation change in # rook: # https://github.com/rook/rook/commit/1b6fe840f6ae7372a9675ba727ecc65326708aa8 # HACK: This should be dropped when OCS is managed by OLM apply_oc_resource( 'rbac.yaml', self.cluster_path, _templating, config.ENV_DATA, template_dir="monitoring" ) # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'operator-openshift.yaml', self.cluster_path, _templating, config.ENV_DATA ) logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {self.namespace} " f"--timeout=120s" ) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {self.namespace} " f"--timeout=120s" ) create_oc_resource( 'cluster.yaml', self.cluster_path, _templating, config.ENV_DATA ) else: self.deploy_ocs_via_operator() pod = ocp.OCP( kind=constants.POD, namespace=self.namespace ) cfs = ocp.OCP( kind=constants.CEPHFILESYSTEM, namespace=self.namespace ) # Check for Ceph pods assert pod.wait_for_resource( condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600 ) assert pod.wait_for_resource( condition='Running', selector='app=rook-ceph-mgr', timeout=600 ) assert pod.wait_for_resource( condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600 ) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600 ) if not self.ocs_operator_deployment: logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # HACK: This should be dropped (including service-monitor.yaml and # prometheus-rules.yaml files) when OCS is managed by OLM if config.ENV_DATA.get('monitoring_enabled') not in ( "true", "True", True ): # HACK: skip creation of rook-ceph-mgr service monitor when # monitoring is enabled (if this were not skipped, the step # would fail because rook would create the service monitor at # this point already) create_oc_resource( "service-monitor.yaml", self.cluster_path, _templating, config.ENV_DATA ) # HACK: skip creation of prometheus-rules, rook-ceph is # concerned with it's setup now, based on clarification from # Umanga Chapagain create_oc_resource( "prometheus-rules.yaml", self.cluster_path, _templating, config.ENV_DATA ) logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem fs_data = templating.load_yaml(constants.CEPHFILESYSTEM_YAML) fs_data['metadata']['namespace'] = self.namespace ceph_obj = OCS(**fs_data) ceph_obj.create() assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600 ) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error( f"MDS deployment Failed! Please check logs!" ) if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get('persistent-monitoring'): # Create a pool, secrets and sc secret_obj = helpers.create_secret(interface_type=constants.CEPHBLOCKPOOL) cbj_obj = helpers.create_ceph_block_pool() sc_obj = helpers.create_storage_class( interface_type=constants.CEPHBLOCKPOOL, interface_name=cbj_obj.name, secret_name=secret_obj.name ) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=['prometheus', 'alertmanager'] ) # Create configmap cluster-monitoring-config create_configmap_cluster_monitoring_pod(sc_obj.name) # Take some time to respin the pod waiting_time = 30 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state validate_pods_are_respinned_and_running_state( pods_list ) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods validate_pvc_are_mounted_on_monitoring_pods(pods_list) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check( namespace=self.namespace ) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") self.deploy_ocs_via_operator() pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600) # Workaround for https://bugzilla.redhat.com/show_bug.cgi?id=1847098 if config.DEPLOYMENT.get('local_storage'): tools_pod = run_cmd( f"oc -n {self.namespace} get pod -l 'app=rook-ceph-tools' " f"-o jsonpath='{{.items[0].metadata.name}}'") pgs_to_autoscale = [ 'ocs-storagecluster-cephblockpool', 'ocs-storagecluster-cephfilesystem-data0' ] for pg in pgs_to_autoscale: run_cmd(f"oc -n {self.namespace} exec {tools_pod} -- " f"ceph osd pool set {pg} pg_autoscale_mode on") # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( 'persistent-monitoring'): sc = helpers.default_storage_class( interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=['prometheus', 'alertmanager']) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get( "telemeter_server_url")) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)( pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)( validate_pvc_are_mounted_on_monitoring_pods)(pods_list) elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( "telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=self.namespace) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace) try: ceph_cluster.get().get("items")[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") if config.DEPLOYMENT["external_mode"]: logger.info("Deploying OCS on external mode RHCS") return self.deploy_with_external_mode() self.deploy_ocs_via_operator() pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-mon", resource_count=3, timeout=600, ) assert pod.wait_for_resource(condition="Running", selector="app=rook-ceph-mgr", timeout=600) assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-osd", resource_count=3, timeout=600, ) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector="app=rook-ceph-tools", resource_count=1, timeout=600, ) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data["items"][0]["metadata"]["name"] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "persistent-monitoring"): sc = helpers.default_storage_class( interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=["prometheus", "alertmanager"], ) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get( "telemeter_server_url"), ) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)( pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)( validate_pvc_are_mounted_on_monitoring_pods)(pods_list) elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") try: ceph_health_check(namespace=self.namespace, tries=30, delay=10) except CephHealthException as ex: err = str(ex) logger.warning(f"Ceph health check failed with {err}") if "clock skew detected" in err: logger.info(f"Changing NTP on compute nodes to" f" {constants.RH_NTP_CLOCK}") if self.platform == constants.VSPHERE_PLATFORM: update_ntp_compute_nodes() assert ceph_health_check(namespace=self.namespace, tries=60, delay=10) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ set_registry_to_managed_state() image = None ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace) try: ceph_cluster.get().get("items")[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") # disconnected installation? load_cluster_info() if config.DEPLOYMENT.get("disconnected"): image = prepare_disconnected_ocs_deployment() if config.DEPLOYMENT["external_mode"]: self.deploy_with_external_mode() else: self.deploy_ocs_via_operator(image) pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods mon_pod_timeout = (900 if self.platform == constants.IBMCLOUD_PLATFORM else 600) assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-mon", resource_count=3, timeout=mon_pod_timeout, ) assert pod.wait_for_resource(condition="Running", selector="app=rook-ceph-mgr", timeout=600) assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-osd", resource_count=3, timeout=600, ) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector="app=rook-ceph-tools", resource_count=1, timeout=600, ) if not config.COMPONENTS["disable_cephfs"]: # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data["items"][0]["metadata"]["name"] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "persistent-monitoring"): setup_persistent_monitoring() elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) if not config.COMPONENTS["disable_cephfs"]: # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster logger.info("Done creating rook resources, waiting for HEALTH_OK") try: ceph_health_check(namespace=self.namespace, tries=30, delay=10) except CephHealthException as ex: err = str(ex) logger.warning(f"Ceph health check failed with {err}") if "clock skew detected" in err: logger.info(f"Changing NTP on compute nodes to" f" {constants.RH_NTP_CLOCK}") if self.platform == constants.VSPHERE_PLATFORM: update_ntp_compute_nodes() assert ceph_health_check(namespace=self.namespace, tries=60, delay=10) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP( kind='CephCluster', namespace=self.namespace ) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") self.deploy_ocs_via_operator() pod = ocp.OCP( kind=constants.POD, namespace=self.namespace ) cfs = ocp.OCP( kind=constants.CEPHFILESYSTEM, namespace=self.namespace ) # Check for Ceph pods assert pod.wait_for_resource( condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600 ) assert pod.wait_for_resource( condition='Running', selector='app=rook-ceph-mgr', timeout=600 ) assert pod.wait_for_resource( condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600 ) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600 ) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error( f"MDS deployment Failed! Please check logs!" ) # Change monitoring backend to OCS if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get('persistent-monitoring'): sc = helpers.default_storage_class(interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=['prometheus', 'alertmanager'] ) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get("telemeter_server_url")) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state validate_pods_are_respinned_and_running_state( pods_list ) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods validate_pvc_are_mounted_on_monitoring_pods(pods_list) elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get("telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check( namespace=self.namespace ) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()