def pwQuery(ip): """run the proware query """ status = nagios.OK message = snmp.get(ip, pwOID["defaultMsg"]).strip() errMessages = [] # check proware events events = snmp.walk(ip, pwOID["event"]) for name, sStat in events: if sStat != "Good(1)": status = nagios.CRITICAL name = name.rstrip(".0").replace("event", "") errMessages.append("%s-%s" % (name, sStat)) # disk status sTable = snmp.walkTable("192.168.0.112", pwOID["slotTable"]) dStatCount = iterCount(sTable["slotStatus"]) # hot spare check if "Free(2)" not in dStatCount: dStatCount["Free(2)"] = 0 if dStatCount["Free(2)"] != 1: status = nagios.CRITICAL errMessages.append("%s Hot Spares" % (dStatCount["Free(2)"])) # indivdual slot check slotStat = dict(zip(sTable["slotDiskIndex"], zip(sTable["slotStatus"], sTable["slotBadBlockNumber"]))) sIdx = slotStat.keys() sIdx.sort() for s in sIdx: sStat, sBB = slotStat[s] if sStat not in ("Arranged(1)", "Free(2)"): status = nagios.CRITICAL errMessages.append("Slot %s Status-%s" % (s, sStat)) if sBB > 0: errMessages.append("Slot %s-%s Bad Blocks" % (s, sBB)) if status == nagios.OK: status = nagios.WARN if len(errMessages) > 0: message = ";".join(errMessages) return status, message
def countList(ip,oid): """Go through a list (specified by oid) and return a dict of { value:count,...} for allt values in list. """ return iterCount(walkList(ip,oid))
def aceraQuery(ip): """run the proware query """ status = nagios.OK message = snmp.get(ip, aceraOID["defaultMsg"]).strip() errMessages = [] # check voltages for mv in ACREA_MV: sMV = snmp.get(ip, aceraOID[mv]) if float(abs(sMV - mv)) / float(mv) >= WARN_MVOLT_PCT_TOL: status = nagios.WARN errMessages.append("%s mV line is %s mV" % (mv, sMV)) # temperatures sBT = snmp.get(ip, aceraOID["boardTemp"]) if sBT >= WARN_TEMP: status = nagios.WARN errMessages.append("Controller Temp-%s C" % (sBT)) dskTbl = snmp.walkTable(ip, aceraOID["dskTbl"]) dskDict = dict(zip(dskTbl["hwHddIndex"], dskTbl["hwHddTemp"])) for dsk, tmp in dskDict.items(): if tmp >= WARN_TEMP: status = nagios.WARN errMessages.append("Disk %s Temp-%s C" % (dsk, tmp)) # fan RPM minRPM = min(snmp.walkList(ip, aceraOID["fanTbl"])) if minRPM <= WARN_LOW_FAN_RPM: status = nagios.WARN errMessages.append("Fan RPM Low-%s" % (minRPM)) # CRITICAL tests follow if minRPM <= CRIT_LOW_FAN_RPM: status = nagios.CRITICAL errMessages.append("Fan RPM Low-%s" % (minRPM)) # power supplies pwrCt = snmp.countList(ip, aceraOID["pwrTbl"]) for stat, n in pwrCt.items(): if stat != "Ok(1)": status = nagios.CRITICAL errMessages.append("Power Supply Status-%s" % (stat)) # disk roles dskStates = snmp.walkList(ip, aceraOID["hddStates"]) dskIdx = snmp.walkList(ip, aceraOID["hddIdx"]) dsCt = iterCount(dskStates) if "Hot Spare" not in dsCt: dsCt["Hot Spare"] = 0 if dsCt["Hot Spare"] != 1: status = nagios.CRITICAL errMessages.append("%s Hot Spares" % (dsCt["Hot Spare"])) for n, s in enumerate(dskStates): if s not in ("RaidSet Member", "Hot Spare"): status = nagios.CRITICAL errMessages.append("Slot %s Status-%s" % (dskIdx[n], s)) # raid+volume status rNames = snmp.walkList(ip, aceraOID["raidNames"]) + snmp.walkList(ip, aceraOID["volNames"]) rStates = snmp.walkList(ip, aceraOID["raidStates"]) + snmp.walkList(ip, aceraOID["volStates"]) for n, s in enumerate(rStates): if s != "Normal": status = nagios.CRITICAL errMessages.append("%s Status-%s" % (rNames[n], s)) if len(errMessages) > 0: message = ";".join(errMessages) return status, message