def _check_heketi_and_gluster_pod_after_node_reboot(self, heketi_node): openshift_ops.switch_oc_project( self._master, self.storage_project_name) heketi_pod = openshift_ops.get_pod_names_from_dc( self._master, self.heketi_dc_name)[0] # Wait for heketi pod to become ready and running openshift_ops.wait_for_pod_be_ready(self._master, heketi_pod) heketi_ops.hello_heketi(self._master, self.heketi_server_url) # Wait for glusterfs pods to become ready if hosted on same node heketi_node_ip = openshift_ops.oc_get_custom_resource( self._master, 'pod', '.:status.hostIP', heketi_pod)[0] if heketi_node_ip in self.gluster_servers: gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node( self._master, heketi_node) # Wait for glusterfs pod to become ready openshift_ops.wait_for_pod_be_ready(self._master, gluster_pod) services = ( ("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for service, state in services: openshift_ops.check_service_status_on_pod( self._master, gluster_pod, service, "active", state)
def _wait_for_gluster_pod_after_node_reboot(self, node_hostname): """Wait for glusterfs pod to be ready after node reboot""" openshift_ops.wait_for_ocp_node_be_ready( self.ocp_client, node_hostname) gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node( self.ocp_client, node_hostname) openshift_ops.wait_for_pod_be_ready(self.ocp_client, gluster_pod) services = ( ("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for service, state in services: openshift_ops.check_service_status_on_pod( self.ocp_client, gluster_pod, service, "active", state)
def _node_reboot(self): storage_hostname = (g.config["gluster_servers"] [self.gluster_servers[0]]["storage"]) cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'" ret, out, err = g.run(storage_hostname, cmd) self.addCleanup(self._wait_for_gluster_pod_to_be_ready) if ret != 255: err_msg = "failed to reboot host %s error: %s" % ( storage_hostname, err) g.log.error(err_msg) raise AssertionError(err_msg) try: g.ssh_close_connection(storage_hostname) except Exception as e: g.log.error("failed to close connection with host %s" " with error: %s" % (storage_hostname, e)) raise # added sleep as node will restart after 3 sec time.sleep(3) for w in Waiter(timeout=600, interval=10): try: if g.rpyc_get_connection(storage_hostname, user="******"): g.rpyc_close_connection(storage_hostname, user="******") break except Exception as err: g.log.info("exception while getting connection: '%s'" % err) if w.expired: error_msg = ("exceeded timeout 600 sec, node '%s' is " "not reachable" % storage_hostname) g.log.error(error_msg) raise ExecutionError(error_msg) # wait for the gluster pod to be in 'Running' state self._wait_for_gluster_pod_to_be_ready() # glusterd and gluster-blockd service should be up and running service_names = ("glusterd", "gluster-blockd", "tcmu-runner") for gluster_pod in self.gluster_pod_list: for service in service_names: g.log.info("gluster_pod - '%s' : gluster_service '%s'" % ( gluster_pod, service)) check_service_status_on_pod( self.oc_node, gluster_pod, service, "running" )
def reboot_gluster_node_and_wait_for_services(self): gluster_node_ip = ( g.config["gluster_servers"][self.gluster_servers[0]]["storage"]) gluster_pod = list( filter(lambda pod: (pod["pod_host_ip"] == gluster_node_ip), get_ocp_gluster_pod_details(self.oc_node))) if not gluster_pod: raise ExecutionError("Gluster pod Host IP '%s' not matched." % gluster_node_ip) gluster_pod = gluster_pod[0]["pod_name"] self.addCleanup(wait_for_pod_be_ready, self.oc_node, gluster_pod) node_reboot_by_command(gluster_node_ip, timeout=600, wait_step=10) # wait for the gluster pod to be in 'Running' state wait_for_pod_be_ready(self.oc_node, gluster_pod) # glusterd and gluster-blockd service should be up and running services = (("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for service, state in services: check_service_status_on_pod(self.oc_node, gluster_pod, service, "active", state)