def _execute_cmd(self, cmd_str, run_as_root): # Split command string and delete empty elements. command = cmd_str.split(' ') command = filter(lambda x: x != '', command) try: # Execute start command. out, err = utils.execute(*command, run_as_root=run_as_root) if out: msg = ("CMD '%s' output stdout: %s") % (cmd_str, out) LOG.info("%s", msg) if err: msg = ("CMD '%s' output stderr: %s") % (cmd_str, err) LOG.warning("%s", msg) return 1 except Exception as e: msg = ("CMD '%s' raised exception: %s") % (cmd_str, e) LOG.error("%s", e) return 1 return 0
def monitor_processes(self): """Monitor processes. This method monitors the processes using process name written in the process list. :returns: List of down process """ down_process_list = [] for process in self.process_list: process_name = process['process_name'] try: # Execute monitoring command. out, err = utils.execute('ps', '-ef', run_as_root=False) if process_name in out: LOG.debug("Process '%s' is found." % process_name) else: # Append down_process_list. down_process_list.append(process) LOG.warning("Process '%s' is not found.", process_name) except Exception as e: LOG.error("Monitoring command raised exception: %s", e) return down_process_list
def _check_hb_line(self): """Check whether the corosync communication is normal. :returns: 0 if normal, 1 if abnormal, 2 if configuration file is wrong or neither pacemaker nor pacemaker-remote is running. """ # Check whether the pacemaker services is normal. corosync_status = self._check_pacemaker_services('corosync') pacemaker_status = self._check_pacemaker_services('pacemakerd') pacemaker_remote_status = self._check_pacemaker_services( 'pacemaker_remoted') if corosync_status is False or pacemaker_status is False: if pacemaker_remote_status is False: LOG.error( "Neither pacemaker nor pacemaker-remote is running.") return 2 else: LOG.info("Works on pacemaker-remote.") return 0 # Check whether the neccesary parameters are set. if CONF.host.corosync_multicast_interfaces is None or \ CONF.host.corosync_multicast_ports is None: msg = ("corosync_multicast_interfaces or " "corosync_multicast_ports is not set.") LOG.error("%s", msg) return 2 # Check whether the corosync communication is normal. # Modified by Heechul Kim # Use pcs command to check remote resource is configured # since there is no corosync configuration in compute nodes. is_resource_registered = False cmd_str = ("pcs status resources %s") % (self.my_hostname) command = cmd_str.split(' ') try: out, err = utils.execute(*command, run_as_root=True) if err: raise Exception(err) LOG.info("%s", out) is_resource_registered = True except Exception as e: LOG.warning("Exception caught: %s", e) msg = ("Remote resource '%s' is not registered.") \ % self.my_hostname LOG.warning("%s", msg) if is_resource_registered is False: LOG.error("Remote resource is not registered in cluster.") return 2 return 0
def _get_cib_xml(self): try: # Execute cibadmin command. out, err = utils.execute('cibadmin', '--query', run_as_root=True) if err: msg = ("cibadmin command output stderr: %s") % err raise Exception(msg) except Exception as e: LOG.warning("Exception caught: %s", e) return return out
def _check_pacemaker_services(self, target_service): try: cmd_str = 'systemctl status ' + target_service command = cmd_str.split(' ') # Execute command. out, err = utils.execute(*command, run_as_root=True) if err: raise Exception return True except Exception: return False
def _get_crmmon_xml(self): """Get summary of cluster's current state in XML format.""" try: # Execute crm_mon command. out, err = utils.execute('crm_mon', '-X', run_as_root=True) if err: msg = ("crmmon command output stderr: %s") % err raise Exception(msg) except Exception as e: LOG.warning("Exception caught: %s", e) return return out
def _is_poweroff(self, hostname): ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname) if ipmi_values is None: LOG.error("Failed to get params of ipmi RA.") return False cmd_str = ("timeout %s ipmitool -U %s -P %s -I %s -H %s " "power status") \ % (str(CONF.host.ipmi_timeout), ipmi_values['userid'], ipmi_values['passwd'], ipmi_values['interface'], ipmi_values['ipaddr']) command = cmd_str.split(' ') retry_count = 0 while True: try: # Execute ipmitool command. out, err = utils.execute(*command, run_as_root=False) if err: msg = ("ipmitool command output stderr: %s") % err raise Exception(msg) msg = ("ipmitool command output stdout: %s") % out if 'Power is off' in out: LOG.info("%s", msg) return True else: raise Exception(msg) except Exception as e: if retry_count < CONF.host.ipmi_retry_max: LOG.warning("Retry executing ipmitool command. (%s)", e) retry_count = retry_count + 1 eventlet.greenthread.sleep(CONF.host.ipmi_retry_interval) else: LOG.error("Exception caught: %s", e) return False
def _check_host_status_by_crmadmin(self): try: # Execute crmadmin command. out, err = utils.execute('crmadmin', '-S', self.my_hostname, run_as_root=True) if err: msg = ("crmadmin command output stderr: %s") % err raise Exception(msg) # If own host is stable status, crmadmin outputs # 'S_IDLE' or 'S_NOT_DC' if 'S_IDLE' in out or 'S_NOT_DC' in out: return 0 else: raise Exception( "crmadmin command output unexpected host status.") except Exception as e: LOG.warning("Exception caught: %s", e) LOG.warning("'%s' is unstable state on cluster.", self.my_hostname) return 1
def _check_hb_line(self): """Check whether the corosync communication is normal. :returns: 0 if normal, 1 if abnormal, 2 if configuration file is wrong or neither pacemaker nor pacemaker-remote is running. """ # Check whether the pacemaker services is normal. corosync_status = self._check_pacemaker_services('corosync') pacemaker_status = self._check_pacemaker_services('pacemaker') pacemaker_remote_status = self._check_pacemaker_services( 'pacemaker_remote') if corosync_status is False or pacemaker_status is False: if pacemaker_remote_status is False: LOG.error( "Neither pacemaker nor pacemaker-remote is running.") return 2 else: LOG.info("Works on pacemaker-remote.") return 0 # Check whether the neccesary parameters are set. if CONF.host.corosync_multicast_interfaces is None or \ CONF.host.corosync_multicast_ports is None: msg = ("corosync_multicast_interfaces or " "corosync_multicast_ports is not set.") LOG.error("%s", msg) return 2 # Check whether the corosync communication is normal. corosync_multicast_interfaces = \ CONF.host.corosync_multicast_interfaces.split(',') corosync_multicast_ports = \ CONF.host.corosync_multicast_ports.split(',') if len(corosync_multicast_interfaces) != len(corosync_multicast_ports): msg = ("Incorrect parameters corosync_multicast_interfaces or " "corosync_multicast_ports.") LOG.error("%s", msg) return 2 is_nic_normal = False for num in range(0, len(corosync_multicast_interfaces)): cmd_str = ("timeout %s tcpdump -n -c 1 -p -i %s port %s") \ % (CONF.host.tcpdump_timeout, corosync_multicast_interfaces[num], corosync_multicast_ports[num]) command = cmd_str.split(' ') try: # Execute tcpdump command. out, err = utils.execute(*command, run_as_root=True) # If command doesn't raise exception, nic is normal. msg = ("Corosync communication using '%s' is normal.") \ % corosync_multicast_interfaces[num] LOG.info("%s", msg) is_nic_normal = True break except Exception: msg = ("Corosync communication using '%s' is failed.") \ % corosync_multicast_interfaces[num] LOG.warning("%s", msg) if is_nic_normal is False: LOG.error("Corosync communication is failed.") return 1 return 0