def manage_alarm(name, network, level, action, severity, alarm_id, timestamp): """ Manage raise and clear of port and interface alarms """ ts = datetime.datetime.fromtimestamp( float(timestamp)).strftime('%Y-%m-%d %H:%M:%S') collectd.debug("%s %s %s %s alarm for %s:%s [%s] %s" % (PLUGIN, severity, level, alarm_id, network, name, action, ts)) if action == ALARM_ACTION_CLEAR: alarm_state = fm_constants.FM_ALARM_STATE_CLEAR reason = '' repair = '' else: # reason ad repair strings are only needed on alarm assertion alarm_state = fm_constants.FM_ALARM_STATE_SET reason = "'" + network.upper() + "' " + level repair = 'Check cabling and far-end port configuration ' \ 'and status on adjacent equipment.' # build the alarm eid and name string if level == LEVEL_PORT: eid = 'host=' + obj.hostname + "." + level + '=' + name reason += " failed" else: eid = 'host=' + obj.hostname + "." + level + '=' + network if severity == fm_constants.FM_ALARM_SEVERITY_MAJOR: reason += " degraded" else: reason += " failed" if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR: if api.clear_fault(alarm_id, eid) is False: collectd.error("%s %s:%s clear_fault failed" % (PLUGIN, alarm_id, eid)) return True else: return False else: fault = fm_api.Fault( uuid="", alarm_id=alarm_id, alarm_state=alarm_state, entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, entity_instance_id=eid, severity=severity, reason_text=reason, alarm_type=fm_constants.FM_ALARM_TYPE_7, probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN, proposed_repair_action=repair, service_affecting=True, timestamp=ts, suppression=True) alarm_uuid = api.set_fault(fault) if pc.is_uuid_like(alarm_uuid) is False: collectd.error("%s %s:%s set_fault failed:%s" % (PLUGIN, alarm_id, eid, alarm_uuid)) return True else: return False
def raise_alarm(): """Raise Remote Logging Server Alarm""" repair = 'Ensure Remote Log Server IP is reachable from ' repair += 'Controller through OAM interface; otherwise ' repair += 'contact next level of support.' reason = 'Controller cannot establish connection with ' reason += 'remote logging server.' try: fault = fm_api.Fault( alarm_id=PLUGIN_ALARMID, alarm_state=fm_constants.FM_ALARM_STATE_SET, entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, entity_instance_id=obj.base_eid, severity=fm_constants.FM_ALARM_SEVERITY_MINOR, reason_text=reason, alarm_type=fm_constants.FM_ALARM_TYPE_1, probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6, proposed_repair_action=repair, service_affecting=False, suppression=False) alarm_uuid = api.set_fault(fault) if pc.is_uuid_like(alarm_uuid) is False: collectd.error("%s 'set_fault' failed ; %s:%s ; %s" % (PLUGIN, PLUGIN_ALARMID, obj.base_eid, alarm_uuid)) else: collectd.info("%s %s:%s alarm raised" % (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) obj.alarmed = True except Exception as ex: collectd.error("%s 'set_fault' exception ; %s:%s ; %s " % (PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
def raise_alarm(alarm_cause, interface=None, data=0): """Assert a cause based PTP alarm""" collectd.debug("%s Raising Alarm %d" % (PLUGIN, alarm_cause)) alarm = get_alarm_object(alarm_cause, interface) if alarm is None: # log created for None case in the get_alarm_object util return True # copy the reason as it might be updated for the OOT, # most typical, case. reason = alarm.reason # Handle some special cases # if alarm_cause == ALARM_CAUSE__OOT: # If this is an out of tolerance alarm then add the # out of tolerance reading to the reason string before # asserting the alarm. # # Keep the alarm updated with the latest sample reading # and severity even if its already asserted. if abs(float(data)) > 100000000000: reason += 'more than 100 seconds' elif abs(float(data)) > 10000000000: reason += 'more than 10 seconds' elif abs(float(data)) > 1000000000: reason += 'more than 1 second' elif abs(float(data)) > 1000000: reason += str(abs(int(data)) / 1000000) reason += ' millisecs' elif abs(float(data)) > 1000: reason += str(abs(int(data)) / 1000) reason += ' microsecs' else: reason += str(float(data)) reason += ' ' + PLUGIN_TYPE_INSTANCE elif alarm.raised is True: # If alarm already raised then exit. # # All other alarms are a Major so there is no need to # track a change in severity and update accordingly. return True elif alarm_cause == ALARM_CAUSE__PROCESS: reason = 'Provisioned ' + PTP + ' \'' + obj.mode reason += '\' time stamping mode seems to be unsupported by this host' try: fault = fm_api.Fault( alarm_id=PLUGIN_ALARMID, alarm_state=fm_constants.FM_ALARM_STATE_SET, entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, entity_instance_id=alarm.eid, severity=alarm.severity, reason_text=reason, alarm_type=obj.alarm_type, probable_cause=alarm.cause, proposed_repair_action=alarm.repair, service_affecting=False, # obj.service_affecting, suppression=True) # obj.suppression) alarm_uuid = api.set_fault(fault) if pc.is_uuid_like(alarm_uuid) is False: # Don't _add_unreachable_server list if the fm call failed. # That way it will be retried at a later time. collectd.error("%s 'set_fault' failed ; %s:%s ; %s" % (PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm_uuid)) return False else: collectd.info("%s %s:%s:%s alarm raised" % (PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm.severity)) alarm.raised = True return True except Exception as ex: collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" % (PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm.severity, ex)) return False
def manage_alarm(name, level, action, severity, alarm_id, timestamp): """Manage raise and clear port and interface alarms""" ts = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f') if action == ALARM_ACTION_CLEAR: alarm_state = fm_constants.FM_ALARM_STATE_CLEAR reason = '' repair = '' else: # reason ad repair strings are only needed on alarm assertion alarm_state = fm_constants.FM_ALARM_STATE_SET reason = "'" + name + "' " + level repair = 'Check cabling and far-end port configuration ' \ 'and status on adjacent equipment.' # build the alarm eid and name string if level == LEVEL_INTERFACE: eid = 'host=' + obj.hostname + "." + level + '=' + name reason += " failed" else: eid = 'host=' + obj.hostname + "." + level + '=' + name if severity == fm_constants.FM_ALARM_SEVERITY_MAJOR: reason += " degraded" else: reason += " failed" if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR: try: if api.clear_fault(alarm_id, eid) is False: collectd.info("%s %s:%s alarm already cleared" % (PLUGIN, alarm_id, eid)) else: collectd.info("%s %s:%s alarm cleared" % (PLUGIN, alarm_id, eid)) return True except Exception as ex: collectd.error("%s 'clear_fault' failed ; %s:%s ; %s" % (PLUGIN, alarm_id, eid, ex)) return False else: fault = fm_api.Fault( uuid="", alarm_id=alarm_id, alarm_state=alarm_state, entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, entity_instance_id=eid, severity=severity, reason_text=reason, alarm_type=fm_constants.FM_ALARM_TYPE_7, probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN, proposed_repair_action=repair, service_affecting=True, timestamp=ts, suppression=True) try: alarm_uuid = api.set_fault(fault) except Exception as ex: collectd.error("%s 'set_fault' exception ; %s:%s ; %s" % (PLUGIN, alarm_id, eid, ex)) return False if pc.is_uuid_like(alarm_uuid) is False: collectd.error("%s 'set_fault' failed ; %s:%s ; %s" % (PLUGIN, alarm_id, eid, alarm_uuid)) return False else: return True