def check_sensor_oper_status(index, sensor_name): global args, status, statusstr, oid_sensor_operstatus oper_status = my_snmp_get(args, oid_sensor_operstatus.format(index)) if oper_status.value == u'1': # Ok return elif oper_status.value == u'2': # Unavailable status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, '{} sensor status: Unavailable'.format(sensor_name)) elif oper_status.value == u'3': # Non-operational status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, '{} sensor status: Non-operational'.format(sensor_name))
def check_entity_state(index, entity_name): global args, status, statusstr, oid_state_oper global oid_state_usage, oid_state_alarm, oid_state_standby standby_status = my_snmp_get_int(args, oid_state_standby.format(index)) if standby_status in [2, 3]: # Entity is standby unit return oper_status = my_snmp_get_int(args, oid_state_oper.format(index)) if oper_status == 2: # disabled status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'Entity {} is in a disabled state'.format(entity_name)) usage_status = my_snmp_get_int(args, oid_state_usage.format(index)) if usage_status == 4: # busy status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'Entity {} is fully utilized, no capacity left'.format( entity_name)) alarm_status = my_snmp_get( args, oid_state_alarm.format(index)).value.encode('latin1') alarm_bit = unpack("B", alarm_status)[0] if alarm_bit == 0: # unknown, but also ok return elif alarm_bit == 1: # underRepair status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, 'Entity {} is undergoing repair'.format(entity_name)) elif alarm_bit == 2: # Critical status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'Entity {} is in critical state'.format(entity_name)) elif alarm_bit == 3: # Major status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'Entity {} is in major alarm state'.format(entity_name)) elif alarm_bit == 4: # Minor status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, 'Entity {} is in minor alarm state'.format(entity_name)) elif alarm_bit == 5: # Warning status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, 'Entity {} is in warning state'.format(entity_name)) elif alarm_bit == 6: # Indeterminate status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, 'Entity {} is in an indeterminative state'.format(entity_name))
statusstr = "" num_ibgp = 0 for index, peer in data.iteritems(): if local_as not in peer['cbgpPeer2RemoteAs'].value: continue num_ibgp += 1 peer_ip = snmp_oid_decode_ip(index) admin_state = int(peer['cbgpPeer2AdminStatus'].value) bgp_state = int(peer['cbgpPeer2State'].value) last_error = peer['cbgpPeer2LastErrorTxt'].value.strip() if not last_error: last_error = "None" if admin_state == 1: # Down status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, "{} admin down".format(peer_ip)) continue if bgp_state in [0, 1, 2, 3, 4, 5]: # none/idle/connect/active/opensent/openconfirm status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "{} BGP session down (Last error: {})".format(peer_ip, last_error)) continue # All checks completed, exiting with the relevant message check_if_ok(status, statusstr)
peer_ip = snmp_oid_decode_ip(index) if peer_ip != args.p: continue peer_found = True admin_state = peer['cbgpPeer2AdminStatus'].value bgp_state = peer['cbgpPeer2State'].value last_error = peer['cbgpPeer2LastErrorTxt'].value remote_as = peer['cbgpPeer2RemoteAs'].value if not last_error.strip(): last_error = 'None' admin_state = int(str(admin_state)) if admin_state == 1: # Down status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, "{}(AS{}) admin down".format(orig_args_p, remote_as)) continue bgp_state = int(str(bgp_state)) if bgp_state in [0, 1, 2, 3, 4, 5]: # none/idle/connect/active/opensent/openconfirm status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "{}(AS{}) BGP session down".format(orig_args_p, remote_as)) continue statusstr = last_error if not peer_found: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "BGP session for peer {} not found!".format(orig_args_p))
# Now loop over data, and for _iBGP_ check the states status = STATE_OK statusstr = '' num_ibgp = 0 for index, peer in data.iteritems(): if local_as not in peer['dellNetBgpM2PeerRemoteAs'].value: continue num_ibgp += 1 peername = ftos_get_peer_ip(peer['dellNetBgpM2PeerRemoteAddr'], peer['dellNetBgpM2PeerRemoteAddrType']) bgp_fsm_state = int(str(peer['dellNetBgpM2PeerStatus'].value)) if bgp_fsm_state == 1: # 1=halted, 2=running status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, "{} iBGP Admin down".format(peername)) continue peer_state = int(str(peer['dellNetBgpM2PeerState'].value)) if peer_state in [ 1, 2, 3, 4, 5 ]: # idle/connect/active/opensent/openconfirm, 6=established status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "{} iBGP session down".format(peername)) # All checks completed, exiting with the relevant message check_if_ok(status, statusstr) print "OK: All ({}) iBGP sessions established".format(num_ibgp)
if chassis1['cvsChassisRole'].value == u'1': print "CRITICAL: VSS Chassis 1 is in standalone mode!" sys.exit(STATE_CRIT) if chassis2['cvsChassisRole'].value == u'1': print "CRITICAL: VSS Chassis 2 is in standalone mode!" sys.exit(STATE_CRIT) # Chassis uptime is an indicator of recent VSS member failure status = STATE_OK statusstr = "" chassis1_tt = float(int(str(chassis1['cvsChassisUpTime'].value))) * 0.01 chassis2_tt = float(int(str(chassis1['cvsChassisUpTime'].value))) * 0.01 if chassis1_tt < vss_uptime_warn: chassis1_str = "Chassis 1 uptime {} seconds".format(int(chassis1_tt)) if chassis1_tt < vss_uptime_crit: status, statusstr = trigger_not_ok(status, statusstr, STATE_CRIT, chassis1_str) else: status, statusstr = trigger_not_ok(status, statusstr, STATE_WARN, chassis1_str) if chassis2_tt < vss_uptime_warn: chassis2_str = "Chassis 2 uptime {} seconds".format(int(chassis2_tt)) if chassis2_tt < vss_uptime_crit: status, statusstr = trigger_not_ok(status, statusstr, STATE_CRIT, chassis2_str) else: status, statusstr = trigger_not_ok(status, statusstr, STATE_WARN, chassis2_str) check_if_ok(status, statusstr) # Getting VSL info per chassis chassis1_vsl = VSL[list(VSL.keys())[0]]
stackunit_status = snmpresult_to_dict(raw_stackunit_status) num_mgmt_units = 0 for index, su in stackunit_status.iteritems(): mgmt_status = my_snmp_get(args, oid_mgmt_status.format(index)) if mgmt_status.value == u'1': num_mgmt_units += 1 if f10: unit_status_label = 'chStackUnitStatus' else: unit_status_label = 'dellNetStackUnitStatus' unit_status = int(str(su[unit_status_label].value)) if unit_status == 2: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'Stack-unit {} is unsupported'.format(index)) elif unit_status == 3: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, STATE_WARN, 'Stack-unit {} has software image version mismatch'.format(index)) elif unit_status == 4: status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, 'Stack-unit {} has configuration mismatch'.format(index)) elif unit_status == 5: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'Stack-unit {} is DOWN'.format(index)) elif unit_status == 6: status, statusstr = trigger_not_ok(
# Check VPC general status and the status of peer-links rawdata = my_snmp_walk(args, oids) if not rawdata: print "OK: Switch does not implement Cisco VPC, or does not have it enabled." sys.exit(STATE_OK) data = snmpresult_to_dict(rawdata) vpc_domain_ids = [] for vpc_domain in data: vpc_domain_ids.append(vpc_domain) vpc_data = data[vpc_domain] # 1 = primarySecondary, 2 = primary, 3 = secondaryPrimary, 4 = secondary, 5 = noneEstablished if int(vpc_data['cVpcRoleStatus'].value) == 5: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'VPC Domain {} no peerlinks established'.format(vpc_domain)) continue # No point in checking further # 1 = true, 2 = false if int(vpc_data['cVpcDualActiveDetectionStatus'].value) == 1: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'VPC Domain {} Dual active detected!'.format(vpc_domain)) # 1 = disabled, 2 = alive, 3 = peerUnreachable, 4 = aliveButDomainIdDismatch, 5 = suspendedAsISSU # 6 = suspendedAsDestIPUnreachable, 7 = suspendedAsVRFUnusable, 8 = misconfigured vpc_pkstatus = int(vpc_data['cVpcPeerKeepAliveStatus'].value) if vpc_pkstatus != 2: vpc_pkerrors = { 1: {
status = STATE_OK statusstr = '' num_vc = 0 for index, vc in data.iteritems(): if int(str(vc['cpwVcID'].value)) in vpls_vcid_range: mpls_vc_type = 'VPLS' else: mpls_vc_type = 'EoMPLS' num_vc += 1 # Check VC status if vc['cpwVcAdminStatus'].value == u'1': if vc['cpwVcOperStatus'].value != u'1': status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "OperStatus {} on {} VCID {}".format( cpw_oper_status_mapping[vc['cpwVcOperStatus'].value], mpls_vc_type, vc['cpwVcID'].value)) continue if vc['cpwVcInboundOperStatus'].value != '1': status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "InboundOperStatus {} on {} VCID {}".format( cpw_oper_status_mapping[ vc['cpwVcInboundOperStatus'].value], mpls_vc_type, vc['cpwVcID'].value)) if vc['cpwVcOutboundOperStatus'].value != '1': status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "OutboundOperStatus {} on {} VCID {}".format( cpw_oper_status_mapping[
# First off we'll try getting some Power status for those that support it # 1/on - Admin power on # 2/off - Admin power off # 3/inlineAuto,4/inlineOn,5/powerCycle - PoE stuff, irrelevant for us so not much caring here # cefcFRUPowerAdminStatus - 1=on, 2=off, 3=inlineAuto, 4=inlineOn, 5=powerCycle pwr_adminstatus = my_snmp_get( args, "CISCO-ENTITY-FRU-CONTROL-MIB::cefcFRUPowerAdminStatus.{0}".format( index)) if 'NOSUCHINSTANCE' not in pwr_adminstatus.value: pwr_adminstatus.value = int(str(pwr_adminstatus.value)) if pwr_adminstatus.value == 1: pass # ok elif pwr_adminstatus == 2: status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, "PowerAdminStatus Off for {0}".format(descr)) elif pwr_adminstatus == 3: pass # ok - PoE stuff elif pwr_adminstatus == 4: pass # ok - PoE stuff elif pwr_adminstatus == 5: pass # ok - PoE stuff # cefcFRUPowerOperStatus # 1/offenvOther - Specifies that FRU is powered off because of a problem not listed below # 2/on - Specifies that FRU is powered on # 3/offAdmin - Specifies that Admin has turned off the FRU # 4/offDenied - Specifies that FRU is powered off because the available system power is insufficient # 5/offEnvPower - FRU is turned off beacuse of a power problem. For example power translation or distribution problems. # 6/offEnvTemp - FRU is turned off because of a temperature problem
u'NOSUCHOBJECT', u'NOSUCHINSTANCE' ]: print "OK: Either switch doesn't support the VLT MIB, or it doesn't run VLT." sys.exit(STATE_OK) # Check VLT status rawdata = my_snmp_walk(args, oids) data = snmpresult_to_dict(rawdata) status = STATE_OK statusstr = "" for index, vltdomain in data.iteritems(): vlt_domain_id = int(str(index)) peer_status = int(str(vltdomain['dellNetVLTPeerStatus'].value)) if peer_status == 0: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'VLT domain {}: Peer session is not established'.format( vlt_domain_id)) if peer_status in [2, 3]: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'VLT domain {}: Peer is down!'.format(vlt_domain_id)) icl_status = int(str(vltdomain['dellNetVLTIclStatus'].value)) if peer_status == 0: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'VLT domain {}: ICL link is not established'.format(vlt_domain_id)) if peer_status in [2, 3]: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, 'VLT domain {}: ICL link is down!'.format(vlt_domain_id))
if not args.c: args.c = 169200 status = STATE_OK statusstr = "" # Fetch oxidized node status resp = requests.get("http://{}:{}/nodes?format=json".format(args.H, args.p)) nodes = json.loads(resp.content) # Loop over and check timestamps cur_time = int(time()) for node in nodes: node_update_ts = strtime_to_timestamp(node['time']) diff_ts = cur_time - node_update_ts if diff_ts > args.c: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "{} hasn't been backed up since {}".format(node['name'], node['time'])) elif diff_ts > args.w: status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, "{} hasn't been backed up since {}".format(node['name'], node['time'])) # Check status and exit accordingly check_if_ok(status, statusstr) print "OK: All equipment properly backed up." sys.exit(status)
sys.exit(STATE_CRIT) rawdata = my_snmp_walk(args, 'OSPFV3-MIB::ospfv3NbrState.{}'.format(interface.oid_index)) # Check for neighbours and their states status = STATE_OK statusstr = "" num_neis = 0 for nei in rawdata: num_neis += 1 nei_state = int(str(nei.value)) if nei_state not in ospfv3_ok_states: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "Neighbour {} on interface {} down".format(num_neis, args.i)) if num_neis < 1: status, statusstr = trigger_not_ok( status, statusstr, STATE_CRIT, "CRITICAL: No OSPFv3 neighbours found on interface {}".format(args.i)) # Check status check_if_ok(status, statusstr) print "OK: All {} neighbours on interface {} is up".format(num_neis, args.i)
] rawdata = my_snmp_walk(args, oids) data = snmpresult_to_dict(rawdata) # Loop through them and check num ports vs num active ports and operational status status = STATE_OK statusstr = "" for index, lag in data.iteritems(): lag_name = my_snmp_get(args, 'IF-MIB::ifDescr.{}'.format(lag['dot3aAggCfgIfIndex'].value)).value num_ports = int(str(lag['dot3aAggCfgNumPorts'].value)) if num_ports < 1: status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, '{} has no configured members'.format(lag_name)) continue active_ports = ftos_parse_lag_active_ports(lag['dot3aAggCfgPortListString'].value) if active_ports < num_ports and active_ports > 1: status, statusstr = trigger_not_ok( status, statusstr, STATE_WARN, '{}: Only {} ports of configured {} is up'.format(lag_name, active_ports, num_ports)) elif active_ports < 1: status, statusstr = trigger_not_ok( status, statusstr,