Ejemplo n.º 1
0
def tolerance_check(
    *,
    set_sync_time: Optional[float],
    levels: Optional[Tuple[float, float]],
    notice_only: bool = False,
) -> Generator[Tuple[int, str], None, None]:
    if set_sync_time is not None:
        set_item_state("time_server", set_sync_time)
        return

    last_sync = get_item_state("time_server")
    now = time.time()
    pot_newline = "\n" if notice_only else ""
    label = "Time since last sync"

    if last_sync is None:
        set_item_state("time_server", now)
        yield 0, f"{pot_newline}{label}: N/A (started monitoring)"
        return

    state, text, _metric = check_levels(
        now - last_sync,
        None,
        levels,
        human_readable_func=render.timespan,
        infoname=label,
    )
    yield state, text if state else f"{pot_newline}{text}"
Ejemplo n.º 2
0
def util_counter(stats: CPUInfo, this_time: float) -> CPUInfo:
    # Compute jiffi-differences of all relevant counters
    diff_values = []
    for n, v in enumerate(stats[1:], start=1):
        countername = "cpu.util.%d" % n
        last_val = get_item_state(countername, (0, 0))[1]
        diff_values.append(v - last_val)
        set_item_state(countername, (this_time, v))

    return CPUInfo(stats.name, *diff_values)
Ejemplo n.º 3
0
def cpu_util_time(this_time, core, perc, threshold, warn_core, crit_core):
    core_state_name = "cpu.util.core.high.%s" % core
    if perc > threshold:
        timestamp = get_item_state(core_state_name, 0)
        high_load_duration = (this_time - timestamp)
        state, infotext, _ = check_levels(
            high_load_duration,
            "%s_is_under_high_load_for" % core,  # Not used
            (warn_core, crit_core),
            human_readable_func=get_age_human_readable,
            infoname="%s is under high load for" % core)
        if timestamp == 0:
            set_item_state(core_state_name, this_time)
        elif state:
            return state, infotext, []
        return 0, "", []

    clear_item_state(core_state_name)
    return 0, "", []
Ejemplo n.º 4
0
def _get_value_diff(diff_name, svc_value, timespan):
    this_time = time.time()
    old_state = get_item_state(diff_name, None)

    # first call: take current value as diff or assume 0.0
    if old_state is None:
        diff_val = 0
        set_item_state(diff_name, (this_time, svc_value))
        return diff_val

    # Get previous value and time difference
    last_time, last_val = old_state
    timedif = max(this_time - last_time, 0)
    if timedif < float(timespan):
        diff_val = svc_value - last_val
    else:
        diff_val = 0
        set_item_state(diff_name, (this_time, svc_value))

    return diff_val
Ejemplo n.º 5
0
def check_cpu_util_unix(values: CPUInfo, params, cores=None, values_counter=True):
    this_time = time.time()
    if values_counter:
        diff_values = util_counter(values, this_time)
        sum_jiffies = diff_values.total_sum
        if sum_jiffies == 0:
            raise MKCounterWrapped("Too short time difference since last check")
        user_perc, system_perc, wait_perc, steal_perc, guest_perc, util_total_perc = diff_values.utils_perc
    else:
        user_perc = values.user
        system_perc = values.system
        wait_perc = values.iowait
        steal_perc = values.steal
        guest_perc = values.guest
        util_total_perc = values.util_total

    yield check_levels(user_perc,
                       'user',
                       None,
                       human_readable_func=get_percent_human_readable,
                       infoname="User")
    yield check_levels(system_perc,
                       'system',
                       None,
                       human_readable_func=get_percent_human_readable,
                       infoname="System")
    yield check_levels(wait_perc,
                       'wait',
                       params.get('iowait'),
                       human_readable_func=get_percent_human_readable,
                       infoname="Wait")

    # Compute values used in virtualized environments (Xen, etc.)
    # Only do this for counters that have counted at least one tick
    # since the system boot. This avoids silly output in systems
    # where these counters are not being used
    if values.steal:
        yield check_levels(steal_perc,
                           "steal",
                           params.get('steal'),
                           human_readable_func=get_percent_human_readable,
                           infoname="Steal")

    if values.guest:
        yield check_levels(guest_perc,
                           'guest',
                           None,
                           human_readable_func=get_percent_human_readable,
                           infoname="Guest")

    summary_cores = []
    if cores:
        for core in cores:
            prev_total = get_item_state("cpu.util.%s.total" % core.name, 0)
            util_total = core.util_total
            total_diff = util_total - prev_total
            set_item_state("cpu.util.%s.total" % core.name, util_total)
            total_perc = (100.0 * total_diff / sum_jiffies) * len(cores)
            summary_cores.append((core.name, total_perc))

    for check_result in check_cpu_util(util_total_perc,
                                       params,
                                       this_time,
                                       summary_cores,
                                       perf_max=None):
        yield check_result