예제 #1
0
def manage_alarm(name, network, level, action, severity, alarm_id, timestamp):
    """ Manage raise and clear of port and interface alarms """

    ts = datetime.datetime.fromtimestamp(
        float(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
    collectd.debug("%s %s %s %s alarm for %s:%s [%s] %s" % (PLUGIN,
                   severity, level, alarm_id, network, name, action, ts))

    if action == ALARM_ACTION_CLEAR:
        alarm_state = fm_constants.FM_ALARM_STATE_CLEAR
        reason = ''
        repair = ''
    else:
        # reason ad repair strings are only needed on alarm assertion
        alarm_state = fm_constants.FM_ALARM_STATE_SET
        reason = "'" + network.upper() + "' " + level
        repair = 'Check cabling and far-end port configuration ' \
                 'and status on adjacent equipment.'

    # build the alarm eid and name string
    if level == LEVEL_PORT:
        eid = 'host=' + obj.hostname + "." + level + '=' + name
        reason += " failed"
    else:
        eid = 'host=' + obj.hostname + "." + level + '=' + network
        if severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
            reason += " degraded"
        else:
            reason += " failed"

    if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
        if api.clear_fault(alarm_id, eid) is False:
            collectd.error("%s %s:%s clear_fault failed" %
                           (PLUGIN, alarm_id, eid))
            return True
        else:
            return False
    else:
        fault = fm_api.Fault(
            uuid="",
            alarm_id=alarm_id,
            alarm_state=alarm_state,
            entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
            entity_instance_id=eid,
            severity=severity,
            reason_text=reason,
            alarm_type=fm_constants.FM_ALARM_TYPE_7,
            probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
            proposed_repair_action=repair,
            service_affecting=True,
            timestamp=ts,
            suppression=True)

        alarm_uuid = api.set_fault(fault)
        if pc.is_uuid_like(alarm_uuid) is False:
            collectd.error("%s %s:%s set_fault failed:%s" %
                           (PLUGIN, alarm_id, eid, alarm_uuid))
            return True
        else:
            return False
예제 #2
0
def raise_alarm():
    """Raise Remote Logging Server Alarm"""

    repair = 'Ensure Remote Log Server IP is reachable from '
    repair += 'Controller through OAM interface; otherwise '
    repair += 'contact next level of support.'

    reason = 'Controller cannot establish connection with '
    reason += 'remote logging server.'

    try:
        fault = fm_api.Fault(
            alarm_id=PLUGIN_ALARMID,
            alarm_state=fm_constants.FM_ALARM_STATE_SET,
            entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
            entity_instance_id=obj.base_eid,
            severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
            reason_text=reason,
            alarm_type=fm_constants.FM_ALARM_TYPE_1,
            probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6,
            proposed_repair_action=repair,
            service_affecting=False,
            suppression=False)

        alarm_uuid = api.set_fault(fault)
        if pc.is_uuid_like(alarm_uuid) is False:
            collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
                           (PLUGIN, PLUGIN_ALARMID,
                            obj.base_eid, alarm_uuid))
        else:
            collectd.info("%s %s:%s alarm raised" %
                          (PLUGIN, PLUGIN_ALARMID, obj.base_eid))
            obj.alarmed = True

    except Exception as ex:
        collectd.error("%s 'set_fault' exception ; %s:%s ; %s " %
                       (PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
예제 #3
0
파일: ptp.py 프로젝트: starlingx/monitoring
def raise_alarm(alarm_cause, interface=None, data=0):
    """Assert a cause based PTP alarm"""

    collectd.debug("%s Raising Alarm %d" % (PLUGIN, alarm_cause))

    alarm = get_alarm_object(alarm_cause, interface)
    if alarm is None:
        # log created for None case in the get_alarm_object util
        return True

    # copy the reason as it might be updated for the OOT,
    # most typical, case.
    reason = alarm.reason

    # Handle some special cases
    #

    if alarm_cause == ALARM_CAUSE__OOT:
        # If this is an out of tolerance alarm then add the
        # out of tolerance reading to the reason string before
        # asserting the alarm.
        #
        # Keep the alarm updated with the latest sample reading
        # and severity even if its already asserted.
        if abs(float(data)) > 100000000000:
            reason += 'more than 100 seconds'
        elif abs(float(data)) > 10000000000:
            reason += 'more than 10 seconds'
        elif abs(float(data)) > 1000000000:
            reason += 'more than 1 second'
        elif abs(float(data)) > 1000000:
            reason += str(abs(int(data)) / 1000000)
            reason += ' millisecs'
        elif abs(float(data)) > 1000:
            reason += str(abs(int(data)) / 1000)
            reason += ' microsecs'
        else:
            reason += str(float(data))
            reason += ' ' + PLUGIN_TYPE_INSTANCE

    elif alarm.raised is True:
        # If alarm already raised then exit.
        #
        # All other alarms are a Major so there is no need to
        # track a change in severity and update accordingly.
        return True

    elif alarm_cause == ALARM_CAUSE__PROCESS:
        reason = 'Provisioned ' + PTP + ' \'' + obj.mode
        reason += '\' time stamping mode seems to be unsupported by this host'

    try:
        fault = fm_api.Fault(
            alarm_id=PLUGIN_ALARMID,
            alarm_state=fm_constants.FM_ALARM_STATE_SET,
            entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
            entity_instance_id=alarm.eid,
            severity=alarm.severity,
            reason_text=reason,
            alarm_type=obj.alarm_type,
            probable_cause=alarm.cause,
            proposed_repair_action=alarm.repair,
            service_affecting=False,  # obj.service_affecting,
            suppression=True)         # obj.suppression)

        alarm_uuid = api.set_fault(fault)
        if pc.is_uuid_like(alarm_uuid) is False:

            # Don't _add_unreachable_server list if the fm call failed.
            # That way it will be retried at a later time.
            collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
                           (PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm_uuid))
            return False

        else:
            collectd.info("%s %s:%s:%s alarm raised" %
                          (PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm.severity))
            alarm.raised = True
            return True

    except Exception as ex:
        collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" %
                       (PLUGIN,
                        PLUGIN_ALARMID,
                        alarm.eid,
                        alarm.severity,
                        ex))
    return False
예제 #4
0
def manage_alarm(name, level, action, severity, alarm_id, timestamp):
    """Manage raise and clear port and interface alarms"""

    ts = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')

    if action == ALARM_ACTION_CLEAR:
        alarm_state = fm_constants.FM_ALARM_STATE_CLEAR
        reason = ''
        repair = ''
    else:
        # reason ad repair strings are only needed on alarm assertion
        alarm_state = fm_constants.FM_ALARM_STATE_SET
        reason = "'" + name + "' " + level
        repair = 'Check cabling and far-end port configuration ' \
                 'and status on adjacent equipment.'

    # build the alarm eid and name string
    if level == LEVEL_INTERFACE:
        eid = 'host=' + obj.hostname + "." + level + '=' + name
        reason += " failed"
    else:
        eid = 'host=' + obj.hostname + "." + level + '=' + name
        if severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
            reason += " degraded"
        else:
            reason += " failed"

    if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
        try:
            if api.clear_fault(alarm_id, eid) is False:
                collectd.info("%s %s:%s alarm already cleared" %
                              (PLUGIN, alarm_id, eid))
            else:
                collectd.info("%s %s:%s alarm cleared" %
                              (PLUGIN, alarm_id, eid))
            return True

        except Exception as ex:
            collectd.error("%s 'clear_fault' failed ; %s:%s ; %s" %
                           (PLUGIN, alarm_id, eid, ex))
            return False

    else:
        fault = fm_api.Fault(
            uuid="",
            alarm_id=alarm_id,
            alarm_state=alarm_state,
            entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
            entity_instance_id=eid,
            severity=severity,
            reason_text=reason,
            alarm_type=fm_constants.FM_ALARM_TYPE_7,
            probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
            proposed_repair_action=repair,
            service_affecting=True,
            timestamp=ts,
            suppression=True)

        try:
            alarm_uuid = api.set_fault(fault)
        except Exception as ex:
            collectd.error("%s 'set_fault' exception ; %s:%s ; %s" %
                           (PLUGIN, alarm_id, eid, ex))
            return False

        if pc.is_uuid_like(alarm_uuid) is False:
            collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
                           (PLUGIN, alarm_id, eid, alarm_uuid))
            return False
        else:
            return True