def delete_pods(self): """ Try to delete pods: - Rook operator - OSD - MGR - MON """ pod_list = [] rook_operator_pod = pod.get_ocs_operator_pod( ocs_label=constants.OPERATOR_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) pod_list.append(rook_operator_pod) osd_pods = pod.get_osd_pods() pod_list.extend(osd_pods) mgr_pods = pod.get_mgr_pods() pod_list.extend(mgr_pods) mon_pods = pod.get_mon_pods() pod_list.extend(mon_pods) logger.info(f"Deleting pods: {[p.name for p in pod_list]}") pod.delete_pods(pod_objs=pod_list)
def test_delete_rook_ceph_mon_pod(self): for i in range(5): rook_operator_pod = pod.get_ocs_operator_pod( ocs_label=constants.OPERATOR_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) assert rook_operator_pod, "No rook operator pod found" log.info( f"Found rook-operator pod {rook_operator_pod.name}. Deleting it." ) operator_deleted = rook_operator_pod.delete(wait=False) assert operator_deleted, f"Failed to delete pod {rook_operator_pod.name}" try: for pod_list in TimeoutSampler( 30, 1, pod.get_pods_having_label, constants.ROOK_CEPH_DETECT_VERSION_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ): if len(pod_list) > 0: self.rook_detect_pod_name = ( pod_list[0].get("metadata").get("name")) self.rook_detect_pod_obj = pod.get_pod_obj( self.rook_detect_pod_name, constants.OPENSHIFT_STORAGE_NAMESPACE, ) break except TimeoutExpiredError: assert True, "rook-ceph-detect-version pod not found" log.info( f"Found rook-ceph-detect-version pod {self.rook_detect_pod_name}. Deleting it" ) rook_detect_deleted = self.rook_detect_pod_obj.delete(wait=True) assert (rook_detect_deleted ), f"Failed to delete pod {self.rook_detect_pod_name}" self.rook_detect_pod_obj.ocp.wait_for_delete( self.rook_detect_pod_name) # Make sure there's no detect-version pod leftover try: for pod_list in TimeoutSampler( 30, 1, pod.get_pods_having_label, constants.ROOK_CEPH_DETECT_VERSION_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ): if len(pod_list) == 0: break else: log.info( f"Pod {pod_list[0].get('metadata').get('name')} found. waiting for it to be deleted" ) except TimeoutExpiredError: assert True, "rook-ceph-detect-version pod still exists"
def get_ocs_operator_node_name(): """ Getting node's name that running ocs-operator pod Returns: str: node's name that running ocs-operator pod """ ocs_operator_pod = get_ocs_operator_pod() log.debug(f"ocs operator pod info: {ocs_operator_pod}") ocs_operator_node = get_pod_node(ocs_operator_pod) return get_node_name(ocs_operator_node)
def restart_ocs_operator_node(self): """ Restart node that runs OCS operator pod """ pod_obj = pod.get_ocs_operator_pod() node_obj = pod.get_pod_node(pod_obj) self.nodes.restart_nodes([node_obj]) wait_for_nodes_status() pod.wait_for_pods_to_be_running( namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, pod_names=[pod_obj.name] )
def test_delete_rook_ceph_mon_pod(self): for i in range(30): self.rook_detect_pod_name = None rook_operator_pod = pod.get_ocs_operator_pod( ocs_label=constants.OPERATOR_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) assert rook_operator_pod, "No rook operator pod found" log.info( f"Found rook-operator pod {rook_operator_pod.name}. Deleting it." ) operator_deleted = rook_operator_pod.delete(wait=False) assert operator_deleted, f"Failed to delete pod {rook_operator_pod.name}" try: for pod_list in TimeoutSampler( 30, 1, pod.get_pods_having_label, constants.ROOK_CEPH_DETECT_VERSION_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ): if len(pod_list) > 0: self.rook_detect_pod_name = ( pod_list[0].get("metadata").get("name")) rook_detect_pod_list = pod.get_pod_objs( pod_names=[self.rook_detect_pod_name], namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) if len(rook_detect_pod_list) > 0: log.info( f"Found rook-ceph-detect-version pod {self.rook_detect_pod_name}. Deleting it" ) self.rook_detect_pod_obj = rook_detect_pod_list[0] rook_detect_deleted = False try: rook_detect_deleted = self.rook_detect_pod_obj.delete( wait=True) except CommandFailed: log.warning( f"{self.rook_detect_pod_name} pod not found" ) else: log.info( f"Deletion status: {rook_detect_deleted}") assert ( rook_detect_deleted ), f"Failed to delete pod {self.rook_detect_pod_name}" self.rook_detect_pod_obj.ocp.wait_for_delete( self.rook_detect_pod_name) self.num_of_deletions += 1 except TimeoutExpiredError: log.warning("rook-ceph-detect-version pod not found") # Make sure there's no detect-version pod leftover try: for pod_list in TimeoutSampler( 60, 1, pod.get_pods_having_label, constants.ROOK_CEPH_DETECT_VERSION_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ): if len(pod_list) == 0: break else: log.info( f"Pod {pod_list[0].get('metadata').get('name')} found. waiting for it to be deleted" ) except TimeoutExpiredError: assert True, "rook-ceph-detect-version pod still exists" log.info(f"Num of deletions: {self.num_of_deletions}/30") assert ( self.num_of_deletions > 0 ), "All (20) attempts to delete rook-ceph-detect-version pod failed."
def set_resource(self, resource, leader_type="provisioner", cluster_index=None): self.resource = resource if (config.ENV_DATA["platform"] in constants.MANAGED_SERVICE_PLATFORMS) and ( resource in CEPH_PODS ): # If the platform is Managed Services, then the ceph pods will be present in the provider cluster. # Consumer cluster will be the primary cluster context in a multicluster run. Setting 'cluster_kubeconfig' # attribute to use as the value of the parameter '--kubeconfig' in the 'oc' commands to get ceph pods. provider_kubeconfig = os.path.join( config.clusters[config.get_provider_index()].ENV_DATA["cluster_path"], config.clusters[config.get_provider_index()].RUN.get( "kubeconfig_location" ), ) self.cluster_kubeconfig = provider_kubeconfig elif config.ENV_DATA["platform"] in constants.MANAGED_SERVICE_PLATFORMS: # cluster_index is used to identify the the cluster in which the pod is residing. If cluster_index is not # passed, assume that the context is already changed to the cluster where the pod is residing. cluster_index = ( cluster_index if cluster_index is not None else config.cur_index ) self.cluster_kubeconfig = os.path.join( config.clusters[cluster_index].ENV_DATA["cluster_path"], config.clusters[cluster_index].RUN.get("kubeconfig_location"), ) resource_count = 0 if self.resource == "mgr": self.resource_obj = pod.get_mgr_pods() self.selector = constants.MGR_APP_LABEL if self.resource == "mon": self.resource_obj = pod.get_mon_pods() self.selector = constants.MON_APP_LABEL if self.resource == "osd": self.resource_obj = pod.get_osd_pods() self.selector = constants.OSD_APP_LABEL if self.resource == "mds": self.resource_obj = pod.get_mds_pods() self.selector = constants.MDS_APP_LABEL if self.resource == "cephfsplugin": self.resource_obj = pod.get_plugin_pods(interface=constants.CEPHFILESYSTEM) self.selector = constants.CSI_CEPHFSPLUGIN_LABEL if self.resource == "rbdplugin": self.resource_obj = pod.get_plugin_pods(interface=constants.CEPHBLOCKPOOL) self.selector = constants.CSI_RBDPLUGIN_LABEL if self.resource == "cephfsplugin_provisioner": self.resource_obj = [ pod.get_plugin_provisioner_leader( interface=constants.CEPHFILESYSTEM, leader_type=leader_type ) ] self.selector = constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL resource_count = len(pod.get_cephfsplugin_provisioner_pods()) if self.resource == "rbdplugin_provisioner": self.resource_obj = [ pod.get_plugin_provisioner_leader( interface=constants.CEPHBLOCKPOOL, leader_type=leader_type ) ] self.selector = constants.CSI_RBDPLUGIN_PROVISIONER_LABEL resource_count = len(pod.get_rbdfsplugin_provisioner_pods()) if self.resource == "operator": self.resource_obj = pod.get_operator_pods() self.selector = constants.OPERATOR_LABEL if self.resource == "ocs_operator": self.resource_obj = [pod.get_ocs_operator_pod()] self.selector = constants.OCS_OPERATOR_LABEL if self.resource == "alertmanager_managed_ocs_alertmanager": self.resource_obj = pod.get_alertmanager_managed_ocs_alertmanager_pods() self.selector = constants.MANAGED_ALERTMANAGER_LABEL if self.resource == "ocs_osd_controller_manager": self.resource_obj = [pod.get_ocs_osd_controller_manager_pod()] self.selector = constants.MANAGED_CONTROLLER_LABEL # Setting resource_count because odf-operator-controller-manager pod also have the same label. resource_count = len( pod.get_pods_having_label( constants.MANAGED_CONTROLLER_LABEL, config.ENV_DATA["cluster_namespace"], ) ) if self.resource == "prometheus_managed_ocs_prometheus": self.resource_obj = [pod.get_prometheus_managed_ocs_prometheus_pod()] self.selector = constants.MANAGED_PROMETHEUS_LABEL if self.resource == "prometheus_operator": self.resource_obj = [pod.get_prometheus_operator_pod()] self.selector = constants.PROMETHEUS_OPERATOR_LABEL if self.resource == "ocs_provider_server": self.resource_obj = [pod.get_ocs_provider_server_pod()] self.selector = constants.PROVIDER_SERVER_LABEL self.resource_count = resource_count or len(self.resource_obj)