Exemple #1
0
    def _execute_cmd(self, cmd_str, run_as_root):

        # Split command string and delete empty elements.
        command = cmd_str.split(' ')
        command = filter(lambda x: x != '', command)

        try:
            # Execute start command.
            out, err = utils.execute(*command, run_as_root=run_as_root)

            if out:
                msg = ("CMD '%s' output stdout: %s") % (cmd_str, out)
                LOG.info("%s", msg)

            if err:
                msg = ("CMD '%s' output stderr: %s") % (cmd_str, err)
                LOG.warning("%s", msg)
                return 1

        except Exception as e:
            msg = ("CMD '%s' raised exception: %s") % (cmd_str, e)
            LOG.error("%s", e)
            return 1

        return 0
Exemple #2
0
    def monitor_processes(self):
        """Monitor processes.

        This method monitors the processes using process name written in the
        process list.

        :returns: List of down process
        """
        down_process_list = []
        for process in self.process_list:
            process_name = process['process_name']

            try:
                # Execute monitoring command.
                out, err = utils.execute('ps', '-ef', run_as_root=False)
                if process_name in out:
                    LOG.debug("Process '%s' is found." % process_name)
                else:
                    # Append down_process_list.
                    down_process_list.append(process)
                    LOG.warning("Process '%s' is not found.", process_name)
            except Exception as e:
                LOG.error("Monitoring command raised exception: %s", e)

        return down_process_list
Exemple #3
0
    def _check_hb_line(self):
        """Check whether the corosync communication is normal.

        :returns: 0 if normal, 1 if abnormal, 2 if configuration file is
            wrong or neither pacemaker nor pacemaker-remote is running.
        """
        # Check whether the pacemaker services is normal.
        corosync_status = self._check_pacemaker_services('corosync')
        pacemaker_status = self._check_pacemaker_services('pacemakerd')
        pacemaker_remote_status = self._check_pacemaker_services(
            'pacemaker_remoted')

        if corosync_status is False or pacemaker_status is False:
            if pacemaker_remote_status is False:
                LOG.error(
                    "Neither pacemaker nor pacemaker-remote is running.")
                return 2
            else:
                LOG.info("Works on pacemaker-remote.")
                return 0

        # Check whether the neccesary parameters are set.
        if CONF.host.corosync_multicast_interfaces is None or \
            CONF.host.corosync_multicast_ports is None:
            msg = ("corosync_multicast_interfaces or "
                   "corosync_multicast_ports is not set.")
            LOG.error("%s", msg)
            return 2

        # Check whether the corosync communication is normal.
        # Modified by Heechul Kim
        # Use pcs command to check remote resource is configured 
        # since there is no corosync configuration in compute nodes.
        is_resource_registered = False
        cmd_str = ("pcs status resources %s") % (self.my_hostname)
        command = cmd_str.split(' ')
        try:
            out, err = utils.execute(*command, run_as_root=True)
            if err:
               raise Exception(err)
            LOG.info("%s", out)
            is_resource_registered = True
        except Exception as e:
            LOG.warning("Exception caught: %s", e)
            msg = ("Remote resource '%s' is not registered.") \
                % self.my_hostname
            LOG.warning("%s", msg)

        if is_resource_registered is False:
            LOG.error("Remote resource is not registered in cluster.")
            return 2

        return 0
Exemple #4
0
    def _get_cib_xml(self):
        try:
            # Execute cibadmin command.
            out, err = utils.execute('cibadmin', '--query', run_as_root=True)

            if err:
                msg = ("cibadmin command output stderr: %s") % err
                raise Exception(msg)

        except Exception as e:
            LOG.warning("Exception caught: %s", e)
            return

        return out
Exemple #5
0
    def _check_pacemaker_services(self, target_service):
        try:
            cmd_str = 'systemctl status ' + target_service
            command = cmd_str.split(' ')

            # Execute command.
            out, err = utils.execute(*command, run_as_root=True)

            if err:
                raise Exception

            return True

        except Exception:
            return False
Exemple #6
0
    def _get_crmmon_xml(self):
        """Get summary of cluster's current state in XML format."""
        try:
            # Execute crm_mon command.
            out, err = utils.execute('crm_mon', '-X', run_as_root=True)

            if err:
                msg = ("crmmon command output stderr: %s") % err
                raise Exception(msg)

        except Exception as e:
            LOG.warning("Exception caught: %s", e)
            return

        return out
Exemple #7
0
    def _is_poweroff(self, hostname):
        ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname)
        if ipmi_values is None:
            LOG.error("Failed to get params of ipmi RA.")
            return False

        cmd_str = ("timeout %s ipmitool -U %s -P %s -I %s -H %s "
                   "power status") \
            % (str(CONF.host.ipmi_timeout), ipmi_values['userid'],
               ipmi_values['passwd'], ipmi_values['interface'],
               ipmi_values['ipaddr'])
        command = cmd_str.split(' ')

        retry_count = 0
        while True:
            try:
                # Execute ipmitool command.
                out, err = utils.execute(*command, run_as_root=False)

                if err:
                    msg = ("ipmitool command output stderr: %s") % err
                    raise Exception(msg)

                msg = ("ipmitool command output stdout: %s") % out

                if 'Power is off' in out:
                    LOG.info("%s", msg)
                    return True
                else:
                    raise Exception(msg)

            except Exception as e:
                if retry_count < CONF.host.ipmi_retry_max:
                    LOG.warning("Retry executing ipmitool command. (%s)", e)
                    retry_count = retry_count + 1
                    eventlet.greenthread.sleep(CONF.host.ipmi_retry_interval)
                else:
                    LOG.error("Exception caught: %s", e)
                    return False
Exemple #8
0
    def _check_host_status_by_crmadmin(self):
        try:
            # Execute crmadmin command.
            out, err = utils.execute('crmadmin', '-S', self.my_hostname,
                                     run_as_root=True)

            if err:
                msg = ("crmadmin command output stderr: %s") % err
                raise Exception(msg)

            # If own host is stable status, crmadmin outputs
            # 'S_IDLE' or 'S_NOT_DC'
            if 'S_IDLE' in out or 'S_NOT_DC' in out:
                return 0
            else:
                raise Exception(
                    "crmadmin command output unexpected host status.")

        except Exception as e:
            LOG.warning("Exception caught: %s", e)
            LOG.warning("'%s' is unstable state on cluster.",
                        self.my_hostname)
            return 1
Exemple #9
0
    def _check_hb_line(self):
        """Check whether the corosync communication is normal.

        :returns: 0 if normal, 1 if abnormal, 2 if configuration file is
            wrong or neither pacemaker nor pacemaker-remote is running.
        """
        # Check whether the pacemaker services is normal.
        corosync_status = self._check_pacemaker_services('corosync')
        pacemaker_status = self._check_pacemaker_services('pacemaker')
        pacemaker_remote_status = self._check_pacemaker_services(
            'pacemaker_remote')

        if corosync_status is False or pacemaker_status is False:
            if pacemaker_remote_status is False:
                LOG.error(
                    "Neither pacemaker nor pacemaker-remote is running.")
                return 2
            else:
                LOG.info("Works on pacemaker-remote.")
                return 0

        # Check whether the neccesary parameters are set.
        if CONF.host.corosync_multicast_interfaces is None or \
            CONF.host.corosync_multicast_ports is None:
            msg = ("corosync_multicast_interfaces or "
                   "corosync_multicast_ports is not set.")
            LOG.error("%s", msg)
            return 2

        # Check whether the corosync communication is normal.
        corosync_multicast_interfaces = \
            CONF.host.corosync_multicast_interfaces.split(',')
        corosync_multicast_ports = \
            CONF.host.corosync_multicast_ports.split(',')

        if len(corosync_multicast_interfaces) != len(corosync_multicast_ports):
            msg = ("Incorrect parameters corosync_multicast_interfaces or "
                   "corosync_multicast_ports.")
            LOG.error("%s", msg)
            return 2

        is_nic_normal = False
        for num in range(0, len(corosync_multicast_interfaces)):
            cmd_str = ("timeout %s tcpdump -n -c 1 -p -i %s port %s") \
                % (CONF.host.tcpdump_timeout,
                   corosync_multicast_interfaces[num],
                   corosync_multicast_ports[num])
            command = cmd_str.split(' ')

            try:
                # Execute tcpdump command.
                out, err = utils.execute(*command, run_as_root=True)

                # If command doesn't raise exception, nic is normal.
                msg = ("Corosync communication using '%s' is normal.") \
                    % corosync_multicast_interfaces[num]
                LOG.info("%s", msg)
                is_nic_normal = True
                break
            except Exception:
                msg = ("Corosync communication using '%s' is failed.") \
                    % corosync_multicast_interfaces[num]
                LOG.warning("%s", msg)

        if is_nic_normal is False:
            LOG.error("Corosync communication is failed.")
            return 1

        return 0