コード例 #1
0
    def if_ha(self):
        rc, stdout, stderr, exc = run_cmd("crm_mon -r -1 -X")
        if rc == 127:  # command not found
            return False
        if rc == 107:  # stopped?
            return False
        try:
            xml_root = ET.fromstring(stdout)
        except ET.ParseError:
            return False
        if xml_root.tag != 'crm_mon':
            return False
        ha_status = {'nodes': [], 'resources': []}
        xml_nodes = xml_root.find('nodes')
        for container in xml_root.find('resources'):
            if container.tag == 'resource':
                res = self._get_res(container)
                if res is not None:
                    ha_status['resources'].append(res)
            else:
                for resource in container.findall('resource'):
                    res = self._get_res(resource)
                    if res is not None:
                        ha_status['resources'].append(res)

        if xml_nodes is None:
            return False
        for elem in xml_nodes:
            attr = elem.attrib
            ha_status['nodes'].append(attr)
            hostname = attr['name']
            if hostname not in self.hosts:
                self.hosts.append(hostname)

        return ha_status
コード例 #2
0
 def get_downed_hosts(self, hosts):
     if self.out is None:
         return
     downed = []
     answers = []
     for host in hosts:
         answer = {
             'column': host,
             'status': 'UNKN',
             'category': category.UNKN,
             'history': [],
             'info': '',
             'details': ''
         }
         cmd = (
             "ssh -o ConnectTimeout={} -o StrictHostKeyChecking=no {} uname"
         ).format(self.args.timeout, host)
         rc, stdout, stderr, exc = run_cmd(cmd)
         if rc:
             answer['status'] = 'DOWN'
             answer['category'] = category.BAD
             downed.append(host)
         if rc == 0:
             answer['status'] = 'OK'
             answer['category'] = category.GOOD
         answers.append(answer)
     self.log.debug("Hosts: '{}' are down".format(str(downed)))
     self.out.line('ssh', answers)
     return set(downed)
コード例 #3
0
    def status(self):
        res, comment = True, ''

        cmd = self.cmd_prefix
        cmd += ('scontrol ping')

        rc, stdout, stderr, exc = run_cmd(cmd, timeout=self.timeout)
        expected1 = 'Slurmctld(primary/backup) at '
        expected2 = ' are UP/DOWN'

        if rc != 0:
            res = False
            comment = "'{}' exit code is not 0".cormat(cmd)
            return res, comment

        stdout = stdout.strip()

        if (len(stdout) < len(expected1) + len(expected2)
                or stdout[:len(expected1)] != expected1
                or stdout[-len(expected2):] != expected2):

            res = False

            comment = "Stdout of '{}' is not matching '{}...{}'".format(
                cmd, expected1, expected2)

            return res, comment

        return res, comment
コード例 #4
0
 def status(self):
     res, comment = True, ""
     cmd = self.cmd_prefix
     cmd += ("dig +tries=1 +time={} +short @localhost localhost").format(
         self.timeout)
     expected = "127.0.0.1"
     rc, stdout, stderr, exc = run_cmd(cmd)
     if rc or stdout.strip() != "127.0.0.1":
         comment = "'{}' did not return '{}'".format(cmd, expected)
         res = False
     return res, comment
コード例 #5
0
    def check_zfs(self, answer, res, host):
        running_on = [e['name'] for e in res['running_on']]
        if not running_on:
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['details'] = "ZFS is not running anywhere"
            return answer

        if len(running_on) > 1:
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['details'] = "pcs reported ZFS is mounted on 2 nodes"
            return answer

        running_on = running_on[0]

        cmd = 'ssh -o ConnectTimeout={} -o StrictHostKeyChecking=no {} '
        cmd = cmd.format(self.args.timeout, host)
        cmd += 'zpool list -H -o name,health'
        rc, stdout, stderr, exc = run_cmd(cmd)

        if rc:
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['details'] = "'{}' returned non-zero exit code".format(cmd)
            return answer

        if stdout and host != running_on:
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['details'] = (
                "'{}' returned some output on passive node"
            ).format(cmd)
            return answer

        if host != running_on:
            return answer

        stdout = stdout.strip().split('\n')
        for line in stdout:
            name, status = line.split()
            if status != "ONLINE":
                answer['status'] = 'ERR'
                answer['category'] = category.ERROR
                answer['details'] += (
                    "Status of '{}' is '{}'"
                ).format(name, status)
        if answer['category'] != category.GOOD:
            return answer

        answer['status'] = 'ONLINE'
        return answer
コード例 #6
0
ファイル: sshd.py プロジェクト: cedriccastagnede/trix-status
    def status(self):
        res, comment = True, ""

        cmd = self.cmd_prefix
        cmd += 'ssh localhost uptime'
        rc, stdout, stderr, exc = run_cmd(cmd)

        if rc or len(stdout.strip().split('\n')) < 1:
            res = False
            comment = "'{}' returned unexpected result".format(cmd)
            return res, comment

        return res, comment
コード例 #7
0
    def status(self):
        res, comment = True, ""

        cmd = self.cmd_prefix
        cmd += 'sacctmgr -n list cluster'
        rc, stdout, stderr, exc = run_cmd(cmd)

        stdout = stdout.strip().split('\n')

        if rc or len(stdout) < 1 or len(stdout[0].split()) < 2:
            res = False
            comment = "'{}' returned no clusters configured".format(cmd)
            return res, comment

        return res, comment
コード例 #8
0
    def status(self):
        res, comment = True, ""

        num = "123"
        cmd = self.cmd_prefix
        if self.cmd_prefix:
            cmd += '"'
        cmd += "mysql -e 'select " + num + ";' -s -r"
        if self.cmd_prefix:
            cmd += '"'
        rc, stdout, srderr, exc = run_cmd(cmd)
        stdout = stdout.strip()
        if rc or stdout != num:
            res = False
            comment = '{} returned unexpected result'.format(cmd)
            return res, comment

        return res, comment
コード例 #9
0
    def check_fencing(self):
        stonith_conf = []
        rc, stdout, stderr, exc = run_cmd("pcs property")
        stonith_enabled = 'false'
        if rc == 0:
            stdout = stdout.split('\n')
            for line in stdout:
                find = ' stonith-enabled:'
                if len(line) > len(find) and line[:len(find)] == find:
                    stonith_enabled = line.split(':')[1].strip()

        for res in self.ha_status['resources']:
            agent = res['resource_agent']
            find = 'stonith:'
            if len(agent) > len(find) and agent[:len(find)] == find:
                stonith_conf.append(res)
        answers = []
        for node_id, host in self.node_ids.items():
            answer = {
                'column': host,
                'status': 'UNCONFIG',
                'category': category.UNKN,
                'history': [],
                'info': '',
                'details': ''
            }
            node_stonith = None
            for res in stonith_conf:
                nodes = [e['name'] for e in res['running_on']]
                if host in nodes:
                    node_stonith = res
                    break
            if node_stonith is not None:
                if stonith_enabled == 'true':
                    answer['status'] = 'CONFIGURED'
                    answer['category'] = category.GOOD
                else:
                    answer['status'] = 'DISABLED'
            answers.append(answer)

        self.out.line('STONITH', answers)

        return answers
コード例 #10
0
    def status(self):
        res, comment = True, ""

        ping = "111222333"
        cmd = self.cmd_prefix
        if self.cmd_prefix:
            cmd += '"'
        cmd += "mongo --eval '{ping: " + ping + "}'"
        if self.cmd_prefix:
            cmd += '"'
        rc, stdout, stderr, exc = run_cmd(cmd)

        stdout = stdout.strip().split('\n')

        if rc or len(stdout) < 1 or stdout[-1] != ping:
            res = False
            comment = "'{}' returned no ping".format(cmd)
            return res, comment

        return res, comment
コード例 #11
0
ファイル: munge.py プロジェクト: cedriccastagnede/trix-status
    def status(self):
        res, comment = True, ""

        cmd = self.cmd_prefix
        cmd += 'munge -n | unmunge'
        rc, stdout, stderr, exc = run_cmd(cmd)

        stdout = stdout.strip().split('\n')

        if len(stdout) < 1 or len(stdout[0].split()) < 2:
            res = False
            comment = "'{}' returned no status".format(cmd)
            return res, comment

        status = stdout[0].split()[1]
        if rc or status != 'Success':
            res = False
            comment = "'{}' returned error".format(cmd)
            return res, comment

        return res, comment
コード例 #12
0
    def check_drbd(self, answer, res, host):
        cmd = 'ssh -o ConnectTimeout={} -o StrictHostKeyChecking=no {} '
        cmd = cmd.format(self.args.timeout, host)
        cmd += 'drbd-overview | grep trinity'
        rc, stdout, stderr, exc = run_cmd(cmd)

        if rc:
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['details'] = "{} returned non-zero exit code".format(cmd)
            return answer

        stdout = stdout.strip()
        stdout = stdout.split()

        if len(stdout) < 4 or stdout[3] < 7 or stdout[3][:6] != 'UpToDa':
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['details'] = 'DRBD status is not UpToDate'
            return answer

        return answer
コード例 #13
0
    def cmd(self, cmd):
        self.tagged_log_debug("Command to run: '{}'".format(cmd))
        rc, stdout, stderr, e = utils.run_cmd(cmd)

        if e:
            self.tagged_log_debug(
                "Exception on running '{}': '{}'".format(cmd, e)
            )

        stdout_lines = []
        stdout_lines = filter(
            lambda x: True if x else False,
            stdout.split('\n')
        )

        oneline = lambda x: "\\n".join(x.split("\n"))
        self.tagged_log_debug(
            (
                "cmd = '{}', rc = {}, stdout = '{}', stderr = '{}'"
            ).format(cmd, rc, oneline(stdout), oneline(stderr))
        )

        return rc, stdout, stdout_lines, stderr
コード例 #14
0
    def get_sinfo(self):
        """
        Returns stdout for
        sinfo -N -o "%N %6T"
        """
        self.statuses = {}
        cmd = 'sinfo -N -o "%N %6T"'
        rc, stdout, _, _ = utils.run_cmd(cmd)

        if rc:
            return self.statuses

        for line in stdout.split("\n"):
            line = line.split()
            if len(line) < 2:
                continue
            nodename = line[0]
            status = line[1]
            if nodename not in self.statuses:
                self.statuses[nodename] = set()
            self.statuses[nodename].add(status)

        return self.statuses
コード例 #15
0
    def status(self):
        res, comment = True, ''

        cmd = self.cmd_prefix
        cmd += 'chronyc tracking'
        rc, stdout, stderr, exc = run_cmd(cmd)

        if rc != 0:
            res = False
            comment = "'{}' exit code is not 0".cormat(cmd)
            return res, comment

        stdout = stdout.split('\n')

        if len(stdout) < 1:
            res = False
            comment = "'{}' returned no output".format(cmd)
            return res, comment

        line1 = stdout[0]
        line1 = line1.split()

        # Magic number is from man chronyc
        if line1[3] == '7F7F0101':
            res = False
            comment = 'Computer is not synchronised to any external source.'
            return res, comment

        cmd = self.cmd_prefix
        cmd += 'chronyc sources'
        rc, stdout, stderr, exc = run_cmd(cmd)

        if rc != 0:
            res = False
            comment = "'{}' exit code is not 0".cormat(cmd)
            return res, comment

        stdout = stdout.strip().split('\n')
        if len(stdout) < 1:
            res = False
            comment = "'{}' returned no output".format(cmd)
            return res, comment

        try:
            n_sources = int(stdout[0].split()[-1])
        except (IndexError, ValueError):
            n_sources = 0

        if n_sources < 1:
            res = False
            comment = "'{}' did not return numnber of sources".format(cmd)
            return res, comment

        try:
            is_current_synced = bool(
                [True for e in stdout[-n_sources:] if e[1] == '*'])
        except IndexError:
            is_current_synced = False

        if not is_current_synced:
            res = False
            comment = "'{}' returned no currenly synced servers".format(cmd)
            return res, comment

        return res, comment
コード例 #16
0
    def check_systemd_unit(self,
                           answer,
                           service,
                           host=None,
                           need_started=True,
                           need_enabled=True):

        if host is not None:
            cmd_prefix = (
                "ssh -o ConnectTimeout={} -o StrictHostKeyChecking=no {} "
            ).format(self.args.timeout, host)

        else:
            cmd_prefix = ""

        cmd = cmd_prefix
        cmd += "systemctl is-enabled " + service
        rc, stdout, stderr, exc = run_cmd(cmd)

        is_enabled = stdout.strip()

        if need_enabled and is_enabled != "enabled":
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['info'] = 'systemd'
            answer['details'] = 'Autostart is disabled for the unit.'

        if not need_enabled and is_enabled != "disabled":
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['info'] = 'systemd'
            answer['details'] = 'Autostart is enabled for the unit.'

        cmd = cmd_prefix
        cmd += "systemctl status {}".format(service)
        rc, stdout, stderr, exc = run_cmd(cmd)

        if rc:
            is_started = False
        else:
            is_started = True

        if need_started and not is_started:
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['info'] = 'systemd'
            answer['details'] = 'Unit should run on this host.'
            return answer

        if not need_started and is_started:
            answer['status'] = 'ERR'
            answer['category'] = category.ERROR
            answer['info'] = 'systemd'
            answer['details'] = 'Unit should not run on this host.'
            return answer

        if not is_started:
            return answer

        answer['status'] = 'UP'
        answer['category'] = category.GOOD
        answer['info'] = ''

        answer = self.service_checker(answer, service, host)
        return answer