Example #1
0
def check_liebert_humidity_air(
    item: str,
    params: Parameters,
    section_liebert_humidity_air: Optional[ParsedSection],
    section_liebert_system: Optional[Dict[str, str]],
) -> CheckGenerator:

    if section_liebert_humidity_air is None or section_liebert_system is None:
        return

    value, unit = _get_item_data(item, section_liebert_humidity_air)
    if value is None:
        return

    device_state = section_liebert_system.get('Unit Operating State')
    if "Unavailable" in value and device_state == "standby":
        yield Result(state=state.OK, summary="Unit is in standby (unavailable)")
        return

    try:
        value = float(value)
    except ValueError:
        return

    yield from check_levels(
        value=value,
        metric_name='humidity',
        levels_upper=params['levels'],
        levels_lower=params['levels_lower'],
        render_func=lambda retval: '%.2f %s' % (retval, unit),
    )
Example #2
0
def test_check_levels(value, kwargs, result):
    assert list(utils.check_levels(value, **kwargs)) == result
Example #3
0
def check_temperature(
    reading: float,
    params: TempParamType,
    *,
    unique_name: Optional[str] = None,
    value_store: Optional[str] = None,
    dev_unit: Optional[str] = "c",
    dev_levels: Optional[Tuple[float, float]] = None,
    dev_levels_lower: Optional[Tuple[float, float]] = None,
    dev_status: Optional[StatusType] = None,
    dev_status_name: Optional[str] = None,
) -> CheckGenerator:
    """This function checks the temperature value against specified levels and issues a warn/cirt
    message. Levels can be supplied by the user or the device. The user has the possibility to configure
    the preferred levels. Additionally, it is possible to check temperature trends. All internal
    computations are done in Celsius.

    Args:
        reading (Number): The numeric temperature value itself.
        params (dict): A dictionary giving the user's configuration. See below.
        unique_name (str): The name under which to track performance data.
        dev_unit (str): The unit. May be one of 'c', 'f' or 'k'. Default is 'c'.
        dev_levels (Optional[LevelsType]): The upper levels (warn, crit)
        dev_levels_lower (Optional[LevelsType]): The lower levels (warn, crit)
        dev_status (Optional[StatusType]): The status according to the device itself.
        dev_status_name (Optional[str]): The device's own name for the status.

    Configuration:
        The parameter "params" may contain user configurable settings with the following keys:
            - input_unit -- The device's unit, user defined.
            - output_unit -- The unit by which to report.
            - levels -- Upper levels, user defined.
            - levels_lower -- Lower levels, user defined.
            - device_levels_handling -- One of the following modes:
                - usrdefault (default) -- Use user's levels, if not there use device's levels.
                - usr -- Always use user's levels. Ignore device's levels.
                - devdefault -- Use device's levels, if not there use user's levels.
                - dev -- Always use device's levels. Ignore users's levels.
                - best -- Report the least critical status of user's and device's levels.
                - worst -- Report the most critical status of user's and device's levels.
            - trend_compute -- If set calculates temperature trend:
                - period -- The period for the trend computation in minutes, e.g. rise of 12°/60 min
                - trend_levels -- Temperature increase per period. (warn, crit)
                - trend_levels_lower -- Temperature decrease per period. (warn, crit)
                - trend_timeleft -- Time left until a CRITICAL temperature level is reached (upper or lower).

    GUI:
         - cmk/gui/plugins/wato/check_parameters/temperature.py

    """
    # Convert legacy tuple params into new dict
    params = _migrate_params(params)

    input_unit = params.get("input_unit", dev_unit)
    output_unit = params.get("output_unit", "c")
    temp = to_celsius(reading, input_unit)

    # User levels are already in Celsius
    usr_levels_upper = _validate_levels(params.get("levels"))
    usr_levels_lower = _validate_levels(params.get("levels_lower"))
    dev_levels_upper = to_celsius(dev_levels, dev_unit)
    dev_levels_lower = to_celsius(dev_levels_lower, dev_unit)

    device_levels_handling = params.get("device_levels_handling", "usrdefault")

    usr_result, usr_metric = check_levels(
        value=temp,
        metric_name='temp',
        levels_upper=usr_levels_upper,
        levels_lower=usr_levels_lower,
        label='Temperature',
        render_func=lambda temp: _render_temp_with_unit(temp, output_unit),
    )

    assert isinstance(usr_result, Result)

    dev_result, dev_metric = check_levels(
        value=temp,
        metric_name='temp',
        levels_upper=dev_levels_upper,
        levels_lower=dev_levels_lower,
        label='Temperature',
        render_func=lambda temp: _render_temp_with_unit(temp, output_unit),
    )

    assert isinstance(dev_result, Result)

    usr_results = [usr_result]
    dev_results = [dev_result]
    if unique_name is not None and params.get('trend_compute') is not None:
        usr_results.extend(result for result in _check_trend(
            value_store=value_store,
            temp=temp,
            params=params['trend_compute'],
            output_unit=output_unit,
            crit_temp=usr_levels_upper[1]
            if usr_levels_upper is not None else None,
            crit_temp_lower=usr_levels_lower[1]
            if usr_levels_lower is not None else None,
            unique_name=unique_name,
        ))

        dev_results.extend(result for result in _check_trend(
            value_store=value_store,
            temp=temp,
            params=params['trend_compute'],
            output_unit=output_unit,
            crit_temp=dev_levels_upper[1]
            if dev_levels_upper is not None else None,
            crit_temp_lower=dev_levels_lower[1]
            if dev_levels_lower is not None else None,
            unique_name=unique_name,
        ))

    if dev_status is not None:
        dev_results.append(
            Result(
                state=state(dev_status),
                notice='State on device: %s' % dev_status_name,
            ))

    if device_levels_handling == 'usr':
        yield usr_metric
        yield from usr_results
        yield Result(state=state.OK,
                     details='Configuration: only use user levels')
        return

    if device_levels_handling == 'dev':
        yield dev_metric
        yield from dev_results
        yield Result(state=state.OK,
                     details='Configuration: only use device levels')
        return

    if device_levels_handling == 'usrdefault':
        if usr_levels_upper is not None or usr_levels_lower is not None:
            yield usr_metric
            yield from usr_results
            suffix = '(used user levels)'

        elif dev_levels_upper is not None or dev_levels_lower is not None:
            yield dev_metric
            yield from dev_results
            suffix = '(used device levels)'

        else:
            yield usr_metric
            yield from usr_results
            suffix = '(no levels found)'

        yield Result(
            state=state.OK,
            details='Configuration: prefer user levels over device levels %s' %
            suffix,
        )

        return

    if device_levels_handling == 'devdefault':
        if dev_levels_upper is not None or dev_levels_lower is not None:
            yield dev_metric
            yield from dev_results
            suffix = '(used device levels)'

        elif usr_levels_upper is not None or usr_levels_lower is not None:
            yield usr_metric
            yield from usr_results
            suffix = '(used user levels)'

        else:
            yield dev_metric
            yield from dev_results
            suffix = '(no levels found)'

        yield Result(
            state=state.OK,
            details='Configuration: prefer device levels over user levels %s' %
            suffix,
        )

        return

    if device_levels_handling == 'worst':
        usr_overall_state = state.worst(*(result.state
                                          for result in usr_results))
        dev_overall_state = state.worst(*(result.state
                                          for result in dev_results))
        worst_state = state.worst(usr_overall_state, dev_overall_state)

        if usr_overall_state == worst_state:
            yield usr_metric
            yield from usr_results
        else:
            yield dev_metric
            yield from dev_results

        yield Result(state=state.OK,
                     details='Configuration: show most critical state')

        return

    if device_levels_handling == 'best':
        usr_overall_state = state.worst(*(result.state
                                          for result in usr_results))
        dev_overall_state = state.worst(*(result.state
                                          for result in dev_results))
        best_state = state.best(usr_overall_state, dev_overall_state)

        if usr_overall_state == best_state:
            yield usr_metric
            yield from usr_results
        else:
            yield dev_metric
            yield from dev_results

        yield Result(state=state.OK,
                     details='Configuration: show least critical state')

        return
Example #4
0
def _check_trend(
    value_store,
    temp: float,
    params: TrendComputeDict,
    output_unit: str,
    crit_temp: Optional[float],
    crit_temp_lower: Optional[float],
    unique_name: str,
) -> Generator[Result, None, None]:
    trend_range_min = params["period"]
    this_time = time.time()

    # current rate since last check
    rate = get_rate(
        value_store=value_store,
        key="temp.%s.delta" % unique_name,
        time=this_time,
        value=temp,
    )

    # average trend, initialized with initial temperature value on first check
    rate_avg = get_average(
        value_store=value_store,
        key="temp.%s.trend" % unique_name,
        time=this_time,
        value=rate,
        backlog_minutes=trend_range_min,
    )

    trend = rate_avg * trend_range_min * 60.0
    levels_upper_trend = _validate_levels(params.get('trend_levels'))

    levels_lower_trend = _validate_levels(params.get('trend_levels_lower'))
    if levels_lower_trend is not None:
        # GUI representation of this parameter is labelled 'temperature decrease'; the user may input this
        # as a positive or negative value
        levels_lower_trend = (abs(levels_lower_trend[0]) * -1,
                              abs(levels_lower_trend[1]) * -1)

    yield from check_levels(
        value=trend,
        levels_upper=levels_upper_trend,
        levels_lower=levels_lower_trend,
        label='Temperature trend',
        render_func=lambda trend: render_temp(
            trend,
            output_unit,
            relative=True,
            sign=True,
        ) + temp_unitsym[output_unit] + ' per ' + str(trend_range_min) +
        ' min',
    )

    if "trend_timeleft" not in params:
        return

    limit = crit_temp if trend > 0 else crit_temp_lower
    if limit is None:
        # crit levels may not be set
        return

    # compute time until temperature limit is reached
    warn_timeleft_min, crit_timeleft_min = params["trend_timeleft"]
    if warn_timeleft_min is None or crit_timeleft_min is None:
        levels_timeleft_sec = None
    else:
        levels_timeleft_sec = (warn_timeleft_min * 60.0,
                               crit_timeleft_min * 60.0)

    diff_to_limit = limit - temp
    seconds_left = float(diff_to_limit / rate_avg)

    yield from check_levels(
        value=seconds_left,
        levels_lower=levels_timeleft_sec,
        render_func=timespan,
        label='Time until temperature limit reached',
    )
Example #5
0
def check_livestatus_status(item: str, params: Parameters, section_livestatus_status: ParsedSection,
                            section_livestatus_ssl_certs: ParsedSection) -> CheckGenerator:
    if item not in section_livestatus_status:
        return
    status = section_livestatus_status[item]

    # Ignore down sites. This happens on a regular basis due to restarts
    # of the core. The availability of a site is monitored with 'omd_status'.
    if status is None:
        yield Result(state=state(params["site_stopped"]), summary="Site is currently not running")
        return

    # Check Performance counters
    this_time = time.time()
    for key, title in [
        ("host_checks", "HostChecks"),
        ("service_checks", "ServiceChecks"),
        ("forks", "ProcessCreations"),
        ("connections", "LivestatusConnects"),
        ("requests", "LivestatusRequests"),
        ("log_messages", "LogMessages"),
    ]:
        value = get_rate(
            value_store=get_value_store(),
            key="livestatus_status.%s.%s" % (item, key),
            time=this_time,
            value=float(status[key]),
        )
        yield Result(state=state.OK, summary="%s: %.1f/s" % (title, value))
        yield Metric(name=key, value=value)

    if status["program_version"].startswith("Check_MK"):
        # We have a CMC here.

        for factor, human_func, key, title in [
            (1, lambda x: "%.3fs" % x, "average_latency_generic", "Average check latency"),
            (1, lambda x: "%.3fs" % x, "average_latency_cmk", "Average Checkmk latency"),
            (100, render.percent, "helper_usage_generic", "Check helper usage"),
            (100, render.percent, "helper_usage_cmk", "Checkmk helper usage"),
            (100, render.percent, "livestatus_usage", "Livestatus usage"),
            (1, lambda x: "%.1f/s" % x, "livestatus_overflows_rate", "Livestatus overflow rate"),
        ]:
            if key == "helper_usage_cmk" and status[key] == "":
                # Quick workaround for enabled checker/fetcher mode. Will soon be replaced once
                # the livestatus status table has been updated.
                continue

            value = factor * float(status[key])
            yield from check_levels(value=value,
                                    metric_name=key,
                                    levels_upper=params.get(key),
                                    render_func=human_func,
                                    label=title)

    yield from check_levels(
        value=int(status["num_hosts"]),
        metric_name="monitored_hosts",
        levels_upper=params.get("levels_hosts"),
        label="Monitored Hosts",
    )
    yield from check_levels(
        value=int(status["num_services"]),
        metric_name="monitored_services",
        levels_upper=params.get("levels_services"),
        label="Services",
    )
    # Output some general information
    yield Result(state=state.OK,
                 summary="Core version: %s" %
                 status["program_version"].replace("Check_MK", "Checkmk"))
    yield Result(state=state.OK, summary="Livestatus version: %s" % status["livestatus_version"])

    # cert_valid_until should only be empty in one case that we know of so far:
    # the value is collected via the linux special agent with the command 'date'
    # for 32bit systems, dates after 19th Jan 2038 (32bit limit)
    # the 'date'-command will return an error and thus no result
    # this happens e.g. for hacky raspberry pi setups that are not officially supported
    pem_path = "/omd/sites/%s/etc/ssl/sites/%s.pem" % (item, item)
    cert_valid_until = section_livestatus_ssl_certs.get(item, {}).get(pem_path)
    if cert_valid_until is not None and cert_valid_until != '':
        days_left = (int(cert_valid_until) - time.time()) / 86400.0
        valid_until_formatted = time.strftime("%Y-%m-%d %H:%M:%S",
                                              time.localtime(int(cert_valid_until)))

        yield from check_levels(
            value=days_left,
            metric_name="site_cert_days",
            label="Site certificate validity (until %s)" % valid_until_formatted,
            levels_lower=(params["site_cert_days"][0], params["site_cert_days"][1]),
        )

    settings = [
        ("execute_host_checks", "Active host checks are disabled"),
        ("execute_service_checks", "Active service checks are disabled"),
        ("accept_passive_host_checks", "Passive host check are disabled"),
        ("accept_passive_service_checks", "Passive service checks are disabled"),
        ("check_host_freshness", "Host freshness checking is disabled"),
        ("check_service_freshness", "Service freshness checking is disabled"),
        #   ("enable_event_handlers",         "Alert handlers are disabled"), # special case below
        ("enable_flap_detection", "Flap detection is disabled"),
        ("enable_notifications", "Notifications are disabled"),
        ("process_performance_data", "Performance data is disabled"),
        ("check_external_commands", "External commands are disabled"),
    ]
    # Check settings of enablings. Here we are quiet unless a non-OK state is found
    for settingname, title in settings:
        if status[settingname] != '1' and params[settingname] != 0:
            yield Result(state=state(params[settingname]), summary=title)

    # special considerations for enable_event_handlers
    if status["program_version"].startswith("Check_MK 1.2.6"):
        # In CMC <= 1.2.6 event handlers cannot be enabled. So never warn.
        return
    if status.get("has_event_handlers", '1') == '0':
        # After update from < 1.2.7 the check would warn about disabled alert
        # handlers since they are disabled in this case. But the user has no alert
        # handlers defined, so this is nothing to warn about. Start warn when the
        # user defines his first alert handlers.
        return
    if status["enable_event_handlers"] != '1' and params["enable_event_handlers"] != 0:
        yield Result(state=state(params["enable_event_handlers"]),
                     summary="Alert handlers are disabled")