Exemple #1
0
if go.options.job_exclusive:
    report_states.append(ND_job_exclusive)
if go.options.unknown:
    report_states.append(ND_state_unknown)
if go.options.bad:
    report_states.append(ND_bad)
if go.options.error:
    report_states.append(ND_error)
if go.options.idle:
    report_states.append(ND_idle)

if len(report_states) == 0:
    report_states = all_states

if go.options.singlenodeinfo or go.options.reportnodeinfo:
    nodeinfo = collect_nodeinfo()[2]
    if len(nodeinfo) == 0:
        _log.error('No nodeinfo found')
        sys.exit(1)

    ordered = sorted(nodeinfo.items(), key=lambda x: len(x[1]), reverse=True)

    if go.options.singlenodeinfo:
        if len(nodeinfo) > 1:
            msg = "Not all nodes have same parameters. Using most frequent ones."
            if go.options.reportnodeinfo:
                _log.warning(msg)
            else:
                _log.error(msg)

        # usage: export `./show_nodes -I` ; env |grep SHOWNODES_
Exemple #2
0
def main():
    """Main"""

    options = {
        'nagios': ('Report in nagios format', None, 'store_true', False, 'n'),
        'regex': ('Filter on regexp, data for first match', None, 'regex', None, 'r'),
        'allregex': ('Combined with --regex/-r, return all data', None, 'store_true', False, 'A'),
        'anystate': ('Matches any state (eg down_on_error node will also list as error)',
                     None, 'store_true', False, 'a'),
        'down': ('Down nodes', None, 'store_true', False, 'D'),
        'downonerror': ('Down on error nodes', None, 'store_true', False, 'E'),
        'offline': ('Offline nodes', None, 'store_true', False, 'o'),
        'partial': ('Partial nodes (one or more running job(s), jobslot(s) available)', None, 'store_true', False, 'p'),
        'job-exclusive': ('Job-exclusive nodes (no jobslots available)', None, 'store_true', False, 'x'),
        'free': ('Free nodes (0 or more running jobs, jobslot(s) available)', None, 'store_true', False, 'f'),
        'unknown': ('State unknown nodes', None, 'store_true', False, 'u'),
        'bad': ('Bad nodes (broken jobregex)', None, 'store_true', False, 'b'),
        'error': ('Error nodes', None, 'store_true', False, 'e'),
        'idle': ('Idle nodes (No running jobs, jobslot(s) available)', None, 'store_true', False, 'i'),
        'singlenodeinfo': (('Single (most-frequent) node information in key=value format'
                            '(no combination with other options)'), None, 'store_true', False, 'I'),
        'reportnodeinfo': ('Report node information (no combination with other options)',
                           None, 'store_true', False, 'R'),
        'moab': ('Use moab information (mdiag -n)', None, 'store_true', False, 'm'),
        'moabxml': ('Use xml moab data from file (for testing)', None, 'store', None),
        'shorthost': ('Return (short) hostname', None, 'store_true', False, 's'),
        'invert': ('Return inverted selection', None, 'store_true', False, 'v'),
        }

    go = simple_option(options)

    if go.options.nagios and not go.options.debug:
        fancylogger.logToDevLog(enable=True)
        fancylogger.logToScreen(enable=False)
        fancylogger.setLogLevelInfo()

    all_states = ND_NAGIOS_CRITICAL + ND_NAGIOS_WARNING + ND_NAGIOS_OK
    report_states = []
    if go.options.down:
        report_states.append(ND_down)
    if go.options.downonerror:
        report_states.append(ND_down_on_error)
    if go.options.offline:
        report_states.append(ND_offline)
    if go.options.free:
        report_states.append(ND_free)
    if go.options.partial:
        report_states.append(ND_free_and_job)
    if go.options.job_exclusive:
        report_states.append(ND_job_exclusive)
    if go.options.unknown:
        report_states.append(ND_state_unknown)
    if go.options.bad:
        report_states.append(ND_bad)
    if go.options.error:
        report_states.append(ND_error)
    if go.options.idle:
        report_states.append(ND_idle)

    if len(report_states) == 0:
        report_states = all_states

    if go.options.singlenodeinfo or go.options.reportnodeinfo:
        nodeinfo = collect_nodeinfo()[2]
        if len(nodeinfo) == 0:
            _log.error('No nodeinfo found')
            sys.exit(1)

        ordered = sorted(nodeinfo.items(), key=lambda x: len(x[1]), reverse=True)

        if go.options.singlenodeinfo:
            if len(nodeinfo) > 1:
                msg = "Not all nodes have same parameters. Using most frequent ones."
                if go.options.reportnodeinfo:
                    _log.warning(msg)
                else:
                    _log.error(msg)

            # usage: export `./show_nodes -I` ; env |grep SHOWNODES_
            most_freq = ordered[0][0]
            msg = []
            msg.append("SHOWNODES_PPN=%d" % most_freq[0])
            msg.append("SHOWNODES_PHYSMEMMB=%d" % (most_freq[1] * 1024))
        else:
            msg = []
            for info, nodes in ordered:
                txt = "%d nodes with %d cores, %s MB physmem, %s GB swap and %s GB local disk" % (
                    len(nodes), info[0], info[1] * 1024, info[2], info[3])
                msg.append(txt)
                # print and _log are dumped to stdout at different moment, repeat the txt in the debug log
                _log.debug("Found %s with matching nodes: %s" % (txt, nodes))

        print "\n".join(msg)
        sys.exit(0)

    if go.options.moab:

        if go.options.moabxml:
            try:
                moabxml = open(go.options.moabxml).read()
            except (OSError, IOError):
                _log.error('Failed to read moab xml from %s' % go.options.moabxml)
        else:
            moabxml = None
        nodes_dict = moab_get_nodes_dict(xml=moabxml)

        nodes = get_nodes(nodes_dict)
    else:
        nodes = get_nodes()

    nagiosexit = {
        NDNAG_WARNING: warning_exit,
        NDNAG_CRITICAL: critical_exit,
        NDNAG_OK: ok_exit,
    }

    nagios_res = {}
    detailed_res = {}
    nodes_found = []

    all_nodes = []

    for name, full_state in nodes:
        all_nodes.append(name)

        if go.options.regex and not go.options.regex.search(name):
            continue

        nagios_state = full_state['derived']['nagiosstate']
        if nagios_state not in nagios_res:
            nagios_res[nagios_state] = []

        state = full_state['derived']['state']
        states = full_state['derived']['states']

        if state == ND_free and ND_idle in states:
            state = ND_idle  # special case for idle
        if state not in detailed_res:
            detailed_res[state] = []

        if go.options.anystate:
            states_to_check = states
        else:
            states_to_check = [state]

        # filter the allowed states
        if any(x for x in states_to_check if x in report_states):
            nagios_res[nagios_state].append(states)
            detailed_res[state].append(states)
            nodes_found.append(name)

            if go.options.regex and not go.options.allregex:
                break

    if go.options.invert:
        nodes_found = [x for x in all_nodes if x not in nodes_found]

    if go.options.regex and not go.options.allregex:
        # there should only be one node
        nagios_state, all_states = nagios_res.items()[0]
        states = all_states[0]
        if go.options.nagios:
            msg = "show_nodes - %s" % ",".join(states)
            nagiosexit[nagios_state](msg)
        else:
            txt = "%s %s" % (nagios_state, ",".join(states))
            print txt
    else:
        if go.options.nagios:
            msg = NagiosResult('show_nodes')
            txt = []
            total = 0
            for state in all_states:
                if state in detailed_res:
                    nr = len(detailed_res[state])
                else:
                    nr = 0
                total += nr
                setattr(msg, state, nr)
            msg.total = total

            reported_state = [str(NDNAG_OK), '']
            if ND_bad in detailed_res:
                reported_state[0] = NDNAG_CRITICAL
                msg.message += ' - %s bad nodes' % (len(detailed_res[ND_bad]))
            nagiosexit[reported_state[0]](msg)
        else:
            # just print the nodes
            if go.options.shorthost:
                nodes_found = [x.split('.')[0] for x in nodes_found]
            print ' '.join(nodes_found)
Exemple #3
0
    for idx, key in enumerate(ND_STATE_NOTOK):
        value = TRANSLATE_STATE[key]
        stats[idx][1] = fmt % (value, key, sl.count(key))

    stats[-1][1] = fmt % ('o', "other", other)

    print "\n".join([''.join(x) for x in stats])


def display_node_types(types):
    """Give an overview of all types of nodes"""
    template = "%sppn=%s, physmem=%sGB, swap=%sGB, vmem=%sGB, local disk=%sGB"
    txt = ['', 'Node type:']
    offset = ' '
    if len(types) > 1:
        txt[-1].replace(':', 's:')
        offset = " " * 2

    for typ, nodes in sorted(types.items(), key=lambda x: len(x[1]), reverse=True):
        # most frequent first
        cores, phys, swap, disk = typ
        txt.append(template % (offset, cores, phys, swap, phys + swap, disk))

    print "\n".join(txt)


if __name__ == '__main__':
    node_list, state_list, types = collect_nodeinfo()
    display_cluster_status(node_list, state_list)
    display_node_types(types)
Exemple #4
0
    for idx, key in enumerate(ND_STATE_NOTOK):
        value = TRANSLATE_STATE[key]
        stats[idx][1] = fmt % (value, key, sl.count(key))

    stats[-1][1] = fmt % ('o', "other", other)

    print "\n".join([''.join(x) for x in stats])


def display_node_types(types):
    """Give an overview of all types of nodes"""
    template = "%sppn=%s, physmem=%sGB, swap=%sGB, vmem=%sGB, local disk=%sGB"
    txt = ['', 'Node type:']
    offset = ' '
    if len(types) > 1:
        txt[-1].replace(':', 's:')
        offset = " " * 2

    for typ, _ in sorted(types.items(), key=lambda x: len(x[1]), reverse=True):
        # most frequent first
        cores, phys, swap, disk = typ
        txt.append(template % (offset, cores, phys, swap, phys + swap, disk))

    print "\n".join(txt)


if __name__ == '__main__':
    node_list, state_list, types = collect_nodeinfo()
    display_cluster_status(node_list, state_list)
    display_node_types(types)
Exemple #5
0
def main():
    """Main"""

    options = {
        'nagios': ('Report in nagios format', None, 'store_true', False, 'n'),
        'regex': ('Filter on regexp, data for first match', None, 'regex',
                  None, 'r'),
        'allregex': ('Combined with --regex/-r, return all data', None,
                     'store_true', False, 'A'),
        'anystate':
        ('Matches any state (eg down_on_error node will also list as error)',
         None, 'store_true', False, 'a'),
        'down': ('Down nodes', None, 'store_true', False, 'D'),
        'downonerror': ('Down on error nodes', None, 'store_true', False, 'E'),
        'offline': ('Offline nodes', None, 'store_true', False, 'o'),
        'offline_idle': ('Offline idle nodes', None, 'store_true', False, 'O'),
        'partial':
        ('Partial nodes (one or more running job(s), jobslot(s) available)',
         None, 'store_true', False, 'p'),
        'job-exclusive': ('Job-exclusive nodes (no jobslots available)', None,
                          'store_true', False, 'x'),
        'free': ('Free nodes (0 or more running jobs, jobslot(s) available)',
                 None, 'store_true', False, 'f'),
        'unknown': ('State unknown nodes', None, 'store_true', False, 'u'),
        'bad': ('Bad nodes (broken jobregex)', None, 'store_true', False, 'b'),
        'error': ('Error nodes', None, 'store_true', False, 'e'),
        'idle': ('Idle nodes (No running jobs, jobslot(s) available)', None,
                 'store_true', False, 'i'),
        'singlenodeinfo':
        (('Single (most-frequent) node information in key=value format'
          '(no combination with other options)'), None, 'store_true', False,
         'I'),
        'reportnodeinfo':
        ('Report node information (no combination with other options)', None,
         'store_true', False, 'R'),
        'moab': ('Use moab information (mdiag -n)', None, 'store_true', False,
                 'm'),
        'moabxml': ('Use xml moab data from file (for testing)', None, 'store',
                    None),
        'shorthost': ('Return (short) hostname', None, 'store_true', False,
                      's'),
        'invert': ('Return inverted selection', None, 'store_true', False,
                   'v'),
    }

    go = simple_option(options)

    if go.options.nagios and not go.options.debug:
        fancylogger.logToDevLog(enable=True)
        fancylogger.logToScreen(enable=False)
        fancylogger.setLogLevelInfo()

    all_states = ND_NAGIOS_CRITICAL + ND_NAGIOS_WARNING + ND_NAGIOS_OK
    report_states = []
    if go.options.down:
        report_states.append(ND_down)
    if go.options.downonerror:
        report_states.append(ND_down_on_error)
    if go.options.offline:
        report_states.append(ND_offline)
    if go.options.free:
        report_states.append(ND_free)
    if go.options.partial:
        report_states.append(ND_free_and_job)
    if go.options.job_exclusive:
        report_states.append(ND_job_exclusive)
    if go.options.unknown:
        report_states.append(ND_state_unknown)
    if go.options.bad:
        report_states.append(ND_bad)
    if go.options.error:
        report_states.append(ND_error)
    if go.options.idle:
        report_states.append(ND_idle)
    if go.options.offline_idle:
        report_states.append(ND_offline_idle)

    if len(report_states) == 0:
        report_states = all_states

    if go.options.singlenodeinfo or go.options.reportnodeinfo:
        nodeinfo = collect_nodeinfo()[2]
        if len(nodeinfo) == 0:
            _log.error('No nodeinfo found')
            sys.exit(1)

        ordered = sorted(nodeinfo.items(),
                         key=lambda x: len(x[1]),
                         reverse=True)

        if go.options.singlenodeinfo:
            if len(nodeinfo) > 1:
                msg = "Not all nodes have same parameters. Using most frequent ones."
                if go.options.reportnodeinfo:
                    _log.warning(msg)
                else:
                    _log.error(msg)

            # usage: export `./show_nodes -I` ; env |grep SHOWNODES_
            most_freq = ordered[0][0]
            msg = []
            msg.append("SHOWNODES_PPN=%d" % most_freq[0])
            msg.append("SHOWNODES_PHYSMEMMB=%d" % (most_freq[1] * 1024))
        else:
            msg = []
            for info, nodes in ordered:
                txt = "%d nodes with %d cores, %s MB physmem, %s GB swap and %s GB local disk" % (
                    len(nodes), info[0], info[1] * 1024, info[2], info[3])
                msg.append(txt)
                # print and _log are dumped to stdout at different moment, repeat the txt in the debug log
                _log.debug("Found %s with matching nodes: %s" % (txt, nodes))

        print "\n".join(msg)
        sys.exit(0)

    if go.options.moab:

        if go.options.moabxml:
            try:
                moabxml = open(go.options.moabxml).read()
            except (OSError, IOError):
                _log.error('Failed to read moab xml from %s' %
                           go.options.moabxml)
        else:
            moabxml = None
        nodes_dict = moab_get_nodes_dict(xml=moabxml)

        nodes = get_nodes(nodes_dict)
    else:
        nodes = get_nodes()

    nagiosexit = {
        NDNAG_WARNING: warning_exit,
        NDNAG_CRITICAL: critical_exit,
        NDNAG_OK: ok_exit,
    }

    nagios_res = {}
    detailed_res = {}
    nodes_found = []

    all_nodes = []

    for name, full_state in nodes:
        all_nodes.append(name)

        if go.options.regex and not go.options.regex.search(name):
            continue

        nagios_state = full_state['derived']['nagiosstate']
        if nagios_state not in nagios_res:
            nagios_res[nagios_state] = []

        state = full_state['derived']['state']
        states = full_state['derived']['states']

        if state == ND_free and ND_idle in states:
            state = ND_idle  # special case for idle
        if state == ND_offline and ND_idle in states:
            state = ND_offline_idle
        if state not in detailed_res:
            detailed_res[state] = []

        if go.options.anystate:
            states_to_check = states
        else:
            states_to_check = [state]

        # filter the allowed states
        if any(x for x in states_to_check if x in report_states):
            nagios_res[nagios_state].append(states)
            detailed_res[state].append(states)
            nodes_found.append(name)

            if go.options.regex and not go.options.allregex:
                break

    if go.options.invert:
        nodes_found = [x for x in all_nodes if x not in nodes_found]

    if go.options.regex and not go.options.allregex:
        # there should only be one node
        nagios_state, all_states = nagios_res.items()[0]
        states = all_states[0]
        if go.options.nagios:
            msg = "show_nodes - %s" % ",".join(states)
            nagiosexit[nagios_state](msg)
        else:
            txt = "%s %s" % (nagios_state, ",".join(states))
            print txt
    else:
        if go.options.nagios:
            msg = NagiosResult('show_nodes')
            txt = []
            total = 0
            for state in all_states:
                if state in detailed_res:
                    nr = len(detailed_res[state])
                else:
                    nr = 0
                total += nr
                setattr(msg, state, nr)
            msg.total = total

            reported_state = [str(NDNAG_OK), '']
            if ND_bad in detailed_res:
                reported_state[0] = NDNAG_CRITICAL
                msg.message += ' - %s bad nodes' % (len(detailed_res[ND_bad]))
            nagiosexit[reported_state[0]](msg)
        else:
            # just print the nodes
            if go.options.shorthost:
                nodes_found = [x.split('.')[0] for x in nodes_found]
            print ' '.join(nodes_found)