Example #1
0
def wmi_calculate_raw_average_time(table, row, column):
    measure = int(table.get(row, column))
    base = int(table.get(row, column + "_Base"))

    sample_time = get_wmi_time(table, row)

    measure_per_sec = get_rate("%s_%s" % (column, table.name()), sample_time,
                               measure)
    base_per_sec = get_rate("%s_%s_Base" % (column, table.name()), sample_time,
                            base)

    if base_per_sec == 0:
        return 0
    return measure_per_sec / base_per_sec  # fixed: true-division
Example #2
0
def wmi_yield_raw_persec(
    table: WMITable,
    row: Union[str, int],
    column: Union[str, int],
    infoname: Optional[str],
    perfvar: Optional[str],
    levels=None,
):
    if table is None:
        # This case may be when a check was discovered with a table which subsequently
        # disappeared again. We expect to get None in this case and return some "nothing happened"
        return 0, "", []

    if row == "":
        row = 0

    try:
        value = table.get(row, column)
        assert value
    except KeyError:
        return 3, "Item not present anymore", []

    value_per_sec = get_rate("%s_%s" % (column, table.name),
                             get_wmi_time(table, row), int(value))

    return check_levels(
        value_per_sec,
        perfvar,
        get_levels_quadruple(levels),
        infoname=infoname,
    )
Example #3
0
def check_firewall_if(item, params, data):

    infotext_names = {
        "ip4_in_blocked": "Incoming IPv4 packets blocked: ",
    }

    this_time = time.time()

    for what, counter in data.items():
        rate = get_rate("firewall_if-%s.%s" % (what, item), this_time, counter, onwrap=RAISE)

        if params.get("averaging"):
            backlog_minutes = params["averaging"]
            avgrate = get_average(
                "firewall_if-%s.%s" % (what, item), this_time, rate, backlog_minutes
            )
            check_against = avgrate
        else:
            check_against = rate

        status, infotext, extraperf = check_levels(
            check_against,
            what,
            params.get(what),
            human_readable_func=lambda x: "%.2f pkts/s" % x,
            infoname=infotext_names[what],
        )

        perfdata: List[Any]
        perfdata = [(what, rate)] + extraperf[:1]  # type: ignore[operator]

        yield status, infotext, perfdata
Example #4
0
def check_hp_msa_io(item, params, parsed):
    disks = {}
    now = time.time()
    for key, values in parsed.items():
        disk = disks.setdefault(key, {})
        try:
            read = int(values["data-read-numeric"])
            disk["read_throughput"] = get_rate("%s_read" % key, now, read)
        except (KeyError, ValueError, TypeError):
            pass
        try:
            written = int(values["data-written-numeric"])
            disk["write_throughput"] = get_rate("%s_write" % key, now, written)
        except (KeyError, ValueError, TypeError):
            pass

    return check_diskstat_dict(item, params, disks)
Example #5
0
def wmi_calculate_raw_average_time(
    table: WMITable,
    row: Union[str, int],
    column: str,
) -> float:
    measure = table.get(row, column)
    base = table.get(row, column + "_Base")
    assert measure
    assert base

    sample_time = get_wmi_time(table, row)

    measure_per_sec = get_rate("%s_%s" % (column, table.name), sample_time, int(measure))
    base_per_sec = get_rate("%s_%s_Base" % (column, table.name), sample_time, int(base))

    if base_per_sec == 0:
        return 0
    return measure_per_sec / base_per_sec  # fixed: true-division
Example #6
0
def check_cpu_util_linux_container(_no_item, params, parsed):
    con_ticks = parsed.get("container_ticks")
    sys_ticks = parsed.get("system_ticks")
    num_cpus = parsed.get("num_cpus")
    if None in (con_ticks, sys_ticks, num_cpus):
        return

    cpu_tick_rate = get_rate("container_ticks", sys_ticks, con_ticks)

    cpu_usage = cpu_tick_rate * num_cpus * 100.0

    return check_cpu_util(cpu_usage, params, perf_max=num_cpus * 100)
Example #7
0
def handle_graylog_messages(messages, params):
    msgs_levels_upper = params.get("msgs_upper", (None, None))
    msgs_levels_lower = params.get("msgs_lower", (None, None))

    yield check_levels(
        messages,
        "messages",
        msgs_levels_upper + msgs_levels_lower,
        human_readable_func=int,
        infoname="Total number of messages",
    )

    avg_key = "msgs_avg"
    avg = params.get(avg_key, 30)
    msgs_avg_levels_upper = params.get("msgs_avg_upper", (None, None))
    msgs_avg_levels_lower = params.get("msgs_avg_lower", (None, None))
    this_time = time.time()

    rate = get_rate("graylog_%s.rate" % avg_key, this_time, messages)
    avg_rate = get_average("graylog_%s.avg" % avg_key, this_time, rate, avg)

    yield check_levels(
        avg_rate,
        avg_key,
        msgs_avg_levels_upper + msgs_avg_levels_lower,
        infoname="Average number of messages (%s)" %
        get_age_human_readable(avg * 60),
    )

    diff_key = "msgs_diff"
    timespan = params.get(diff_key, 1800)
    diff_levels_upper = params.get("%s_upper" % diff_key, (None, None))
    diff_levels_lower = params.get("%s_lower" % diff_key, (None, None))

    diff = _get_value_diff("graylog_%s" % diff_key, messages, timespan)

    yield check_levels(
        diff,
        "graylog_diff",
        diff_levels_upper + diff_levels_lower,
        infoname="Total number of messages last %s" %
        get_age_human_readable(timespan),
    )
Example #8
0
def check_azure_metric(  # pylint: disable=too-many-locals
        resource,
        metric_key,
        cmk_key,
        display_name,
        levels=None,
        levels_lower=None,
        use_rate=False):
    metric = resource.get('metrics', {}).get(metric_key)
    if metric is None:
        return None

    if use_rate:
        countername = "%s.%s" % (resource['id'], metric_key)
        value = get_rate(countername, time.time(), metric.value)
        unit = "%s_rate" % metric.unit
    else:
        value = metric.value
        unit = metric.unit

    if value is None:
        return 3, "Metric %s is 'None'" % display_name, []

    # convert to SI-unit
    if unit == "milli_seconds":
        value /= 1000.
    elif unit == "seconds_rate":
        # we got seconds, but we computed the rate -> seconds per second:
        # how long happend something / time period = percent of the time
        # e.g. CPU time: how much percent of of the time was the CPU busy.
        value *= 100.
        unit = "percent"

    return check_levels(
        value,
        cmk_key,
        (levels or (None, None)) + (levels_lower or (None, None)),
        infoname=display_name,
        human_readable_func=_AZURE_METRIC_FMT.get(
            unit, str),  # type: ignore[arg-type]
        boundaries=(0, None),
    )
Example #9
0
def _check_diskstat_old(item, params, this_time, info):
    # sum up over all devices
    if item == 'read':
        index = 2  # sectors read
    elif item == 'write':
        index = 3  # sectors written
    else:
        return (3, "invalid item %s" % (item,))

    this_val = 0
    for line in info:
        if line[0] is not None:
            return 3, "read/write mode not supported in a cluster"
        if ' ' not in line[1]:
            this_val += int(line[index])

    per_sec = get_rate("diskstat." + item, this_time, this_val)
    mb_per_s = per_sec / 2048.0  # Diskstat output is in sectors a 512 Byte
    kb_per_s = per_sec / 2.0
    perfdata = [(item, "%f" % kb_per_s)]
    return (0, "%.1f MB/s" % mb_per_s, perfdata)
Example #10
0
def _check_diskstat_old(
    item: str, params: Any, this_time: float, info: Sequence[Sequence[Any]]
) -> Union[Tuple[int, str], Tuple[int, str, Sequence[Tuple[str, str]]]]:
    # sum up over all devices
    if item == "read":
        index = 2  # sectors read
    elif item == "write":
        index = 3  # sectors written
    else:
        return (3, "invalid item %s" % (item, ))

    this_val = 0
    for line in info:
        if line[0] is not None:
            return 3, "read/write mode not supported in a cluster"
        if " " not in line[1]:
            this_val += int(line[index])

    per_sec = get_rate("diskstat." + item, this_time, this_val)
    mb_per_s = per_sec / 2048.0  # Diskstat output is in sectors a 512 Byte
    kb_per_s = per_sec / 2.0
    perfdata = [(item, "%f" % kb_per_s)]
    return (0, "%.1f MB/s" % mb_per_s, perfdata)
Example #11
0
def size_trend(
    check,
    item,
    resource,
    levels,
    used_mb,
    size_mb: float,
    timestamp=None,
):  # pylint: disable=function-redefined
    """Trend computation for size related checks of disks, ram, etc.
    Trends are computed in two steps. In the first step the delta to
    the last check is computed, using a normal check_mk counter.
    In the second step an average over that counter is computed to
    make a long-term prediction.

    Note:
      This function is experimental and may change in future releases.
      Use at your own risk!

    Args:
      check (str): The name of the check, e.g. "df".
      item (str): The name of the item, e.g. the mountpoint "/" for df.
      resource (str): The resource in question, e.g. "disk", "ram", "swap".
      levels (dict): Level parameters for the trend computation. Items:
          "trend_range"          : 24,       # interval for the trend in hours
          "trend_perfdata"       : True      # generate perfomance data for trends
          "trend_bytes"          : (10, 20), # change during trend_range
          "trend_shrinking_bytes": (16, 32), # Bytes of shrinking during trend_range
          "trend_perc"           : (1, 2),   # percent change during trend_range
          "trend_shrinking_perc" : (1, 2),   # percent decreasing change during trend_range
          "trend_timeleft"       : (72, 48)  # time left in hours until full
          "trend_showtimeleft    : True      # display time left in infotext
        The item "trend_range" is required. All other items are optional.
      timestamp (float, optional): Time in secs used to calculate the rate
        and average. Defaults to "None".
      used_mb (float): Used space in MB.
      size_mb (float): Max. available space in MB.

    Returns:
      A tuple of (state, infotext, perfdata) for the trend computation.
      If a MKCounterWrapped occurs (i.e. there is not enough data
      present for the trend computation) the tuple (0, '', []) is
      returned.
    """

    perfdata: List[
        Union[  #
            Tuple[str, float],  #
            Tuple[str, float, Optional[float], Optional[float], Optional[float], Optional[float]],
        ]
    ]
    state, infotext, perfdata, problems = 0, "", [], []

    MB = 1024.0 * 1024.0
    H24 = 60 * 60 * 24

    range_hours = levels["trend_range"]
    range_sec = range_hours * 3600.0
    if not timestamp:
        timestamp = time.time()

    # compute current rate in MB/s by computing delta since last check
    try:
        rate = get_rate(
            "%s.%s.delta" % (check, item), timestamp, used_mb, allow_negative=True, onwrap=RAISE
        )
    except MKCounterWrapped:
        # need more data for computing a trend
        return 0, "", []

    if levels.get("trend_perfdata"):
        perfdata.append(("growth", rate * H24))

    # average trend in MB/s, initialized with zero (by default)
    rate_avg = get_average("%s.%s.trend" % (check, item), timestamp, rate, range_sec / 60.0)

    trend = rate_avg * range_sec
    sign = "+" if trend > 0 else ""
    infotext += ", trend: %s%s / %g hours" % (
        sign,
        get_bytes_human_readable(trend * MB),
        range_hours,
    )

    # levels for performance data
    warn_perf: Optional[float] = None
    crit_perf: Optional[float] = None

    # apply levels for absolute growth / interval
    trend_bytes = levels.get("trend_bytes")
    if trend_bytes:
        wa, cr = trend_bytes
        warn_perf, crit_perf = wa / MB, cr / MB
        if trend * MB >= wa:
            problems.append(
                "growing too fast (warn/crit at %s/%s per %.1f h)(!"
                % (
                    get_bytes_human_readable(wa),
                    get_bytes_human_readable(cr),
                    range_hours,
                )
            )
            state = max(1, state)
            if trend * MB >= cr:
                state = 2
                problems[-1] += "!"
            problems[-1] += ")"

    tmp_state, tmp_problem = _check_shrinking(
        trend * MB,
        levels.get("trend_shrinking_bytes"),
        range_hours,
        get_bytes_human_readable,
    )
    if tmp_state > 0:
        state = max(state, tmp_state)
        problems.append(tmp_problem)

    # apply levels for growth relative to filesystem size
    trend_perc: Optional[Tuple[float, float]] = levels.get("trend_perc")
    if trend_perc:
        wa_perc, cr_perc = trend_perc
        wa = wa_perc / 100.0 * size_mb
        cr = cr_perc / 100.0 * size_mb
        if warn_perf is not None:
            assert crit_perf is not None
            warn_perf = min(warn_perf, wa)
            crit_perf = min(crit_perf, cr)
        else:
            warn_perf, crit_perf = wa, cr
        if trend >= wa:
            problems.append(
                "growing too fast (warn/crit at %s/%s per %.1f h)(!"
                % (
                    get_percent_human_readable(wa_perc),
                    get_percent_human_readable(cr_perc),
                    range_hours,
                )
            )
            state = max(1, state)
            if trend >= cr:
                state = 2
                problems[-1] += "!"
            problems[-1] += ")"

    tmp_state, tmp_problem = _check_shrinking(
        100 * trend / size_mb,
        levels.get("trend_shrinking_perc"),
        range_hours,
        get_percent_human_readable,
    )
    if tmp_state > 0:
        state = max(state, tmp_state)
        problems.append(tmp_problem)

    # compute time until filesystem is full (only for positive trend, of course)

    # The start value of hours_left is negative. The pnp graph and the perfometer
    # will interpret this as inifinite -> not growing
    hours_left = -1
    if trend > 0:

        def format_hours(hours):
            if hours > 365 * 24:
                return "more than a year"
            elif hours > 90 * 24:
                return "%0d months" % (hours / (30 * 24))  # fixed: true-division
            elif hours > 4 * 7 * 24:  # 4 weeks
                return "%0d weeks" % (hours / (7 * 24))  # fixed: true-division
            elif hours > 7 * 24:  # 1 week
                return "%0.1f weeks" % (hours / (7 * 24))  # fixed: true-division
            elif hours > 2 * 24:  # 2 days
                return "%0.1f days" % (hours / 24)  # fixed: true-division
            return "%d hours" % hours

        hours_left = (size_mb - used_mb) / trend * range_hours
        hours_txt = format_hours(hours_left)

        timeleft = levels.get("trend_timeleft")
        if timeleft:
            wa, cr = timeleft
            if hours_left <= cr:
                state = 2
                problems.append("only %s until %s full(!!)" % (hours_txt, resource))
            elif hours_left <= wa:
                state = max(state, 1)
                problems.append("only %s until %s full(!)" % (hours_txt, resource))
            elif hours_left <= wa * 2 or levels.get("trend_showtimeleft"):
                problems.append("time left until %s full: %s" % (resource, hours_txt))
        elif levels.get("trend_showtimeleft"):
            problems.append("time left until %s full: %s" % (resource, hours_txt))

    if levels.get("trend_perfdata"):
        perfdata.append(
            (
                "trend",
                rate_avg * H24,
                (warn_perf / range_sec * H24) if warn_perf is not None else None,
                (crit_perf / range_sec * H24) if crit_perf is not None else None,
                0,
                1.0 * size_mb / range_hours,
            )
        )

    if levels.get("trend_showtimeleft"):
        perfdata.append(("trend_hoursleft", hours_left))

    if problems:
        infotext += " - %s" % ", ".join(problems)

    return state, infotext, perfdata
Example #12
0
def check_temperature_trend(temp, params, output_unit, crit, crit_lower,
                            unique_name):
    def combiner(status, infotext):
        if "status" in dir(combiner):
            combiner.status = max(combiner.status, status)
        else:
            combiner.status = status

        if "infotext" in dir(combiner):
            combiner.infotext += ", " + infotext
        else:
            combiner.infotext = infotext

    try:
        trend_range_min = params["period"]
        this_time = time.time()

        # first compute current rate in C/s by computing delta since last check
        rate = get_rate("temp.%s.delta" % unique_name,
                        this_time,
                        temp,
                        allow_negative=True)

        # average trend, initialize with zero (by default), rate_avg is in C/s
        rate_avg = get_average("temp.%s.trend" % unique_name, this_time, rate,
                               trend_range_min)

        # rate_avg is growth in C/s, trend is in C per trend range minutes
        trend = float(rate_avg * trend_range_min * 60.0)
        sign = "+" if trend > 0 else ""
        combiner(
            0, "rate: %s%s/%g min" %
            (sign, render_temp(trend, output_unit, True), trend_range_min))

        if "trend_levels" in params:
            warn_upper_trend, crit_upper_trend = params["trend_levels"]
        else:
            warn_upper_trend = crit_upper_trend = None
        # it may be unclear to the user if he should specify temperature decrease as a negative
        # number or positive. This works either way. Having a positive lower bound makes no
        # sense anyway.
        if "trend_levels_lower" in params:
            warn_lower_trend, crit_lower_trend = [
                abs(x) * -1 for x in params["trend_levels_lower"]
            ]
        else:
            warn_lower_trend = crit_lower_trend = None

        if crit_upper_trend is not None and trend > crit_upper_trend:
            combiner(
                2,
                "rising faster than %s/%g min(!!)" % (render_temp(
                    crit_upper_trend, output_unit, True), trend_range_min),
            )
        elif warn_upper_trend is not None and trend > warn_upper_trend:
            combiner(
                1,
                "rising faster than %s/%g min(!)" % (render_temp(
                    warn_upper_trend, output_unit, True), trend_range_min),
            )
        elif crit_lower_trend is not None and trend < crit_lower_trend:
            combiner(
                2,
                "falling faster than %s/%g min(!!)" % (render_temp(
                    crit_lower_trend, output_unit, True), trend_range_min),
            )
        elif warn_lower_trend is not None and trend < warn_lower_trend:
            combiner(
                1,
                "falling faster than %s/%g min(!)" % (render_temp(
                    warn_lower_trend, output_unit, True), trend_range_min),
            )

        if "trend_timeleft" in params:
            # compute time until temperature limit is reached
            # The start value of minutes_left is negative. The pnp graph and the perfometer
            # will interpret this as infinite -> not growing
            limit = crit if trend > 0 else crit_lower

            if limit:  # crit levels may not be set, especially lower level
                diff_to_limit = limit - temp
                if rate_avg != 0.0:
                    minutes_left = (diff_to_limit /
                                    rate_avg) / 60.0  # fixed: true-division
                else:
                    minutes_left = float("inf")

                def format_minutes(minutes):
                    if minutes > 60:  # hours
                        hours = int(minutes / 60.0)
                        minutes += -int(hours) * 60
                        return "%dh %02dm" % (hours, minutes)
                    return "%d minutes" % minutes

                warn, crit = params["trend_timeleft"]
                if minutes_left <= crit:
                    combiner(
                        2, "%s until temp limit reached(!!)" %
                        format_minutes(minutes_left))
                elif minutes_left <= warn:
                    combiner(
                        1, "%s until temp limit reached(!)" %
                        format_minutes(minutes_left))
    except MKCounterWrapped:
        pass
    return combiner.status, combiner.infotext
Example #13
0
def check_diskstat_line(this_time, item, params, line, mode='sectors'):
    average_range = params.get("average")
    if average_range == 0:
        average_range = None  # disable averaging when 0 is set

    perfdata = []
    infos = []
    status = 0
    node = line[0]
    if node is not None and node != "":
        infos.append("Node %s" % node)

    for what, ctr in [("read", line[2]), ("write", line[3])]:
        if node:
            countername = "diskstat.%s.%s.%s" % (node, item, what)
        else:
            countername = "diskstat.%s.%s" % (item, what)

        # unpack levels now, need also for perfdata
        levels = params.get(what)
        if isinstance(levels, tuple):
            warn, crit = levels
        else:
            warn, crit = None, None

        per_sec = get_rate(countername, this_time, int(ctr))
        if mode == 'sectors':
            # compute IO rate in bytes/sec
            bytes_per_sec = per_sec * 512
        elif mode == 'bytes':
            bytes_per_sec = per_sec

        dsname = what

        # compute average of the rate over ___ minutes
        if average_range is not None:
            perfdata.append((dsname, bytes_per_sec, warn, crit))
            bytes_per_sec = get_average(countername + ".avg", this_time, bytes_per_sec,
                                        average_range)
            dsname += ".avg"

        # check levels
        state, text, extraperf = check_levels(bytes_per_sec,
                                              dsname,
                                              levels,
                                              scale=1048576,
                                              statemarkers=True,
                                              unit='/s',
                                              human_readable_func=get_bytes_human_readable,
                                              infoname=what)
        if text:
            infos.append(text)
        status = max(state, status)
        perfdata += extraperf

    # Add performance data for averaged IO
    if average_range is not None:
        perfdata = [perfdata[0], perfdata[2], perfdata[1], perfdata[3]]

    # Process IOs when available
    ios_per_sec = None
    if len(line) >= 6 and line[4] >= 0 and line[5] > 0:
        reads, writes = map(int, line[4:6])
        if "read_ios" in params:
            warn, crit = params["read_ios"]
            if reads >= crit:
                infos.append('Read operations: %d (!!)' % (reads))
                status = 2
            elif reads >= warn:
                infos.append('Read operations: %d (!)' % (reads))
                status = max(status, 1)
        else:
            warn, crit = None, None
        if "write_ios" in params:
            warn, crit = params["write_ios"]
            if writes >= crit:
                infos.append('Write operations: %d (!!)' % (writes))
                status = 2
            elif writes >= warn:
                infos.append('Write operations: %d (!)' % (writes))
                status = max(status, 1)
        else:
            warn, crit = None, None
        ios = reads + writes
        ios_per_sec = get_rate(countername + ".ios", this_time, ios)
        infos.append("IOs: %.2f/sec" % ios_per_sec)

        if params.get("latency_perfdata"):
            perfdata.append(("ios", ios_per_sec))

    # Do Latency computation if this information is available:
    if len(line) >= 7 and line[6] >= 0:
        timems = int(line[6])
        timems_per_sec = get_rate(countername + ".time", this_time, timems)
        if not ios_per_sec:
            latency = 0.0
        else:
            latency = timems_per_sec / ios_per_sec  # fixed: true-division
        infos.append("Latency: %.2fms" % latency)
        if "latency" in params:
            warn, crit = params["latency"]
            if latency >= crit:
                status = 2
                infos[-1] += "(!!)"
            elif latency >= warn:
                status = max(status, 1)
                infos[-1] += "(!)"
        else:
            warn, crit = None, None

        if params.get("latency_perfdata"):
            perfdata.append(("latency", latency, warn, crit))

    # Queue Lengths (currently only Windows). Windows uses counters here.
    # I have not understood, why....
    if len(line) >= 9:
        for what, ctr in [("read", line[7]), ("write", line[8])]:
            countername = "diskstat.%s.ql.%s" % (item, what)
            levels = params.get(what + "_ql")
            if levels:
                warn, crit = levels
            else:
                warn, crit = None, None

            qlx = get_rate(countername, this_time, int(ctr))
            ql = qlx / 10000000.0
            infos.append(what.title() + " Queue: %.2f" % ql)

            # check levels
            if levels is not None:
                if ql >= crit:
                    status = 2
                    infos[-1] += "(!!)"
                elif ql >= warn:
                    status = max(status, 1)
                    infos[-1] += "(!)"

            if params.get("ql_perfdata"):
                perfdata.append((what + "_ql", ql))

    return (status, ", ".join(infos), perfdata)