Beispiel #1
0
def _check(
    params: Params,
    section: PodContainers,
    curr_timestamp_seconds: int,
    host_value_store: MutableMapping[str, Any],
) -> CheckResult:
    restart_count = sum(container.restart_count
                        for container in section.containers.values())
    yield from check_levels(
        restart_count,
        levels_upper=params["restart_count"][1]
        if params["restart_count"] != "no_levels" else None,
        metric_name="kube_pod_restart_count",
        render_func=str,
        label="Total",
    )
    restart_rate = _calc_restart_rate_in_last_hour(
        restart_count,
        curr_timestamp_seconds,
        host_value_store,
    )
    if restart_rate is not None:
        yield from check_levels(
            restart_rate,
            levels_upper=params["restart_rate"][1]
            if params["restart_rate"] != "no_levels" else None,
            metric_name="kube_pod_restart_rate",
            render_func=str,
            label="In last hour",
        )
Beispiel #2
0
def check_resource_quota_resource(
    params: Params,
    resource_usage: Optional[PerformanceUsage],
    hard_requirement: Optional[HardResourceRequirement],
    resource_type: ResourceType,
    render_func: Callable[[float], str],
):
    """Check result for resource quota usage & requirement

    While the general picture is similar to check_resource, there is one key difference:

    * for resources in check_resource, the resource section contains an aggregation of the request
    and limit values of the underlying containers. In resource quota, the configured hard spec
    value is taken instead (aggregated configured values vs single configured value)

    -> while the API data is mandatory for check_resource, it is optional for resource quota and the
    service is allowed to only display the performance usage value
    """
    usage = resource_usage.resource.usage if resource_usage is not None else None
    if usage is not None:
        yield from check_levels(
            usage,
            label="Usage",
            levels_upper=params["usage"][1] if params["usage"] != "no_levels" else None,
            metric_name=f"kube_{resource_type}_usage",
            render_func=render_func,
            boundaries=(0.0, None),
        )

    if hard_requirement is None:
        return

    for requirement_type, requirement_value in [
        ("request", hard_requirement.request),
        ("limit", hard_requirement.limit),
    ]:
        if requirement_value is None:
            # user has not configured a value for this requirement
            continue

        requirement_type = cast(RequirementType, requirement_type)
        if requirement_value != 0.0 and usage is not None:
            yield from check_with_utilization(
                usage,
                resource_type=resource_type,
                requirement_type=requirement_type,
                kubernetes_object=None,
                requirement_value=requirement_value,
                params=params,
                render_func=render_func,
            )
        else:  # requirements with no usage
            yield from check_levels(
                requirement_value,
                label=absolute_title[requirement_type],
                metric_name=f"kube_{resource_type}_{requirement_type}",
                render_func=render_func,
                boundaries=(0.0, None),
            )
Beispiel #3
0
def check(params: Mapping[str, Any], section: PodConditions) -> CheckResult:
    """Check every condition in the section. Return one result if all conditions
    passed. Otherwise, return four results if one or more conditions are faulty
    or missing, defining each state according to `last_transition_time` and the
    respective levels in `params`.

    A pod transitions through the conditions in the order specified in
    `LOGICAL_ORDER`.  The last two conditions, `containersready` and `ready`,
    can be in a failed state simultaneously.  When a condition is missing (i.e.
    is `None`), it means that the previous condition is in a failed state."""
    if all(cond and cond.status for _, cond in section):
        yield Result(state=State.OK, summary="Ready, all conditions passed")
        return
    section_dict = section.dict()
    curr_timestamp = time.time()
    for name in LOGICAL_ORDER:
        cond_service_text = ADDITIONAL_SERVICE_TEXT[name]
        cond = section_dict[name]
        if cond is not None:
            time_diff = curr_timestamp - cond[
                "last_transition_time"]  # keep the last-seen one
            if cond["status"] is True:
                yield Result(state=State.OK, summary=cond_service_text.passed)
                continue
            summary_prefix = f"{cond_service_text.not_passed} ({cond['reason']}: {cond['detail']})"
        else:
            summary_prefix = cond_service_text.not_passed
        for result in check_levels(time_diff,
                                   levels_upper=get_levels_for(params, name),
                                   render_func=render.timespan):
            yield Result(state=result.state,
                         summary=f"{summary_prefix} for {result.summary}")
Beispiel #4
0
def check_with_utilization(
    usage: float,
    resource_type: Literal["memory", "cpu"],
    requirement_type: Literal["limit", "request"],
    requirement_value: float,
    param: Param,
    render_func: Callable[[float], str],
) -> Iterable[Union[Metric, Result]]:
    utilization = usage * 100.0 / requirement_value
    result, metric = check_levels(
        utilization,
        levels_upper=param[1] if param != "no_levels" else None,
        metric_name=f"kube_{resource_type}_{requirement_type}_utilization",
        render_func=render.percent,
        boundaries=(0.0, None),
    )
    assert isinstance(result, Result)
    percentage, *warn_crit = result.summary.split()
    yield Result(
        state=result.state,
        summary=" ".join([
            f"{requirement_type.title()} utilization: {percentage} - {render_func(usage)} of {render_func(requirement_value)}"
        ] + warn_crit),
    )
    yield metric
Beispiel #5
0
def check_free_pods(vs_result: VSResultPercent, pod_resources: PodResources,
                    allocatable_pods: int) -> CheckResult:
    # At the cluster level there can be more pods pending than space available. Thus, the number of
    # free pods may be negative.
    num_free_pods = max(
        0, allocatable_pods - len(pod_resources.pending) -
        len(pod_resources.running))

    if vs_result == "no_levels":
        levels = None
    elif vs_result[0] == "levels_abs":
        levels = Levels(*vs_result[1])
    else:  # vs_result[0] == "levels_perc"
        levels = Levels(*tuple(
            math.ceil(level * allocatable_pods / 100)
            for level in vs_result[1]))

    yield from check_levels(
        value=num_free_pods,
        label="Free",
        metric_name="kube_pod_free",
        levels_lower=levels,
        render_func=lambda x: str(int(x)),
        notice_only=True,
    )
Beispiel #6
0
def check(params: Mapping[str, Tuple[int, int]], section: PodContainers) -> CheckResult:
    restart_count = sum(container.restart_count for container in section.containers.values())
    yield from check_levels(
        restart_count,
        levels_upper=params.get("restart_count"),
        metric_name="kube_pod_restart_count",
        render_func=str,
        label="Total",
    )
    yield from check_levels(
        _calc_restart_rate_in_last_hour(restart_count),
        levels_upper=params.get("restart_rate"),
        metric_name="kube_pod_restart_rate",
        render_func=str,
        label="In last hour",
    )
Beispiel #7
0
def check_resource(
    params: Params,
    resource_usage: Optional[PerformanceUsage],
    resources: Resources,
    allocatable_resource: Optional[AllocatableResource],
    resource_type: ResourceType,
    render_func: Callable[[float], str],
) -> CheckResult:
    if resource_usage is not None:
        usage = resource_usage.resource.usage
        yield from check_levels(
            usage,
            label="Usage",
            levels_upper=params["usage"][1] if params["usage"] != "no_levels" else None,
            metric_name=f"kube_{resource_type}_usage",
            render_func=render_func,
            boundaries=(0.0, None),
        )
    for requirement_type, kubernetes_object, requirement in requirements_for_object(
        resources, allocatable_resource
    ):
        if requirement != 0.0 and resource_usage is not None:
            result, metric = check_with_utilization(
                usage,
                resource_type,
                requirement_type,
                kubernetes_object,
                requirement,
                params,
                render_func,
            )
            yield Metric(f"kube_{resource_type}_{requirement_type}", requirement)
        else:  # requirements with no usage
            result, metric = check_levels(
                requirement,
                label=absolute_title[requirement_type],
                metric_name=f"kube_{resource_type}_{requirement_type}",
                render_func=render_func,
                boundaries=(0.0, None),
            )
        assert isinstance(result, Result)
        summary = result.summary
        if requirement_type in ["request", "limit"]:
            summary = f"{result.summary} ({count_overview(resources, requirement_type)})"
        yield Result(state=result.state, summary=summary)
        yield metric
Beispiel #8
0
def check_resource(
    params: Params,
    usage: Optional[Usage],
    resources: Resources,
    resource_type: Literal["memory", "cpu"],
    render_func: Callable[[float], str],
) -> CheckResult:
    if usage is not None:
        total_usage = usage.usage
        yield from check_levels(
            total_usage,
            label="Usage",
            levels_upper=params["usage"][1]
            if params["usage"] != "no_levels" else None,
            metric_name=f"kube_{resource_type}_usage",
            render_func=render_func,
            boundaries=(0.0, None),
        )

    for requirement_name, requirement in iterate_resources(resources):
        if requirement != 0.0 and usage is not None:
            result, metric = check_with_utilization(
                total_usage,
                resource_type,
                requirement_name,
                requirement,
                params[requirement_name],
                render_func,
            )
            yield Metric(f"kube_{resource_type}_{requirement_name}",
                         requirement)
        else:  # requirements with no usage
            result, metric = check_levels(
                requirement,
                label=requirement_name.title(),
                metric_name=f"kube_{resource_type}_{requirement_name}",
                render_func=render_func,
                boundaries=(0.0, None),
            )
        assert isinstance(result, Result)
        yield Result(
            state=result.state,
            summary=
            f"{result.summary} ({count_overview(resources, requirement_name)})",
        )
        yield metric
Beispiel #9
0
def check(params: Params, section: PodContainers) -> CheckResult:
    restart_count = sum(container.restart_count
                        for container in section.containers.values())
    yield from check_levels(
        restart_count,
        levels_upper=params["restart_count"][1]
        if params["restart_count"] != "no_levels" else None,
        metric_name="kube_pod_restart_count",
        render_func=str,
        label="Total",
    )
    yield from check_levels(
        _calc_restart_rate_in_last_hour(restart_count),
        levels_upper=params["restart_rate"][1]
        if params["restart_rate"] != "no_levels" else None,
        metric_name="kube_pod_restart_rate",
        render_func=str,
        label="In last hour",
    )
def check_proxmox_ve_vm_backup_status(
    now: datetime,
    params: Mapping[str, Any],
    section: Section,
) -> CheckResult:
    """If conditions provided calculate and compare age of last backup agains provided
    levels and define result status accordingly
    >>> for result in check_proxmox_ve_vm_backup_status(
    ...     datetime.strptime("2020-12-07 21:28:02", '%Y-%m-%d %H:%M:%S'),
    ...     {'age_levels_upper': (93600, 180000)},
    ...     parse_proxmox_ve_vm_backup_status([[
    ...       '{"last_backup": {'
    ...       '  "archive_name": "/some/where/vzdump-qemu-109-2020_12_06-21_28_02.vma.zst",'
    ...       '  "archive_size": 1099511627776,'
    ...       '  "started_time": "2020-12-06 21:28:02",'
    ...       '  "transfer_time": 100}}']])):
    ...   print(result)
    Result(state=<State.OK: 0>, summary='Age: 1 day 0 hours')
    Metric('age', 86400.0, levels=(93600.0, 180000.0))
    Result(state=<State.OK: 0>, summary='Time: 2020-12-06 21:28:02')
    Result(state=<State.OK: 0>, summary='Size: 1.00 TiB')
    Result(state=<State.OK: 0>, summary='Bandwidth: 11.0 GB/s')
    """
    age_levels_upper = params.get("age_levels_upper")
    last_backup = section.get("last_backup")
    if not last_backup:
        yield (Result(state=State.CRIT, summary="No backup found") if age_levels_upper else  #
               Result(state=State.OK, summary="No backup found and none needed"))
        return
    if "error" in last_backup:
        yield Result(
            state=State.CRIT,
            summary=f"Last backup failed with message {last_backup['error']!r}",
        )
        return

    # Proxmox VE logs only provide time stamps w/o time zone so we have to hope the Proxmox VE node
    # is located close to us
    started_time = last_backup.get("started_time")
    if started_time:
        yield from check_levels(
            value=(now - started_time).total_seconds(),
            levels_upper=age_levels_upper,
            metric_name="age",
            render_func=render.timespan,
            label="Age",
        )
    yield Result(state=State.OK, summary=f"Time: {last_backup.get('started_time')}")
    yield Result(state=State.OK, summary=f"Size: {render.bytes(last_backup['archive_size'])}")

    transfer_size = last_backup.get("transfer_size", last_backup.get("archive_size", 0))
    yield Result(
        state=State.OK,
        summary=f"Bandwidth: {render.iobandwidth(transfer_size / last_backup['transfer_time'])}",
    )
Beispiel #11
0
def check_mobileiron_misc(params: Mapping[str, Any],
                          section: Section) -> CheckResult:

    if availableCapacity := section.availableCapacity:
        yield from check_levels(
            label="Available capacity",
            value=availableCapacity,
            levels_upper=params.get("available_capacity"),
            metric_name="capacity_perc",
            render_func=render.percent,
        )
def check_checkpoint_connections(
    params,
    section: Section,
) -> CheckResult:
    yield from check_levels(
        value=section.current,
        levels_upper=params["levels"],
        metric_name="connections",
        label="Current connections",
        render_func=str,
    )
Beispiel #13
0
def check_kube_pod_status(
    params: Params,
    section_kube_pod_containers: Optional[PodContainers],
    section_kube_pod_init_containers: Optional[PodContainers],
    section_kube_pod_lifecycle: Optional[PodLifeCycle],
) -> CheckResult:
    assert section_kube_pod_lifecycle is not None, "Missing Api data"

    pod_containers = _pod_containers(section_kube_pod_containers)
    pod_init_containers = _pod_containers(section_kube_pod_init_containers)

    status_message = _pod_status_message(
        pod_containers,
        pod_init_containers,
        section_kube_pod_lifecycle,
    )

    now = time.time()
    value_store = get_value_store()
    group_levels, group_statuses = _get_group_from_params(
        status_message, params)
    if value_store.get("group") != group_statuses:
        value_store["group"] = group_statuses
        value_store["duration_per_status"] = {status_message: 0.0}
    else:
        previous_status = value_store["previous_status"]
        value_store["duration_per_status"][
            previous_status] += now - value_store["previous_time"]
        value_store["duration_per_status"].setdefault(status_message, 0.0)

    value_store["previous_time"] = now
    value_store["previous_status"] = status_message

    levels = None if group_levels == "no_levels" else group_levels[1]

    if levels is None:
        yield Result(state=State.OK, summary=status_message)
    else:
        for result in check_levels(
                sum(time
                    for time in value_store["duration_per_status"].values()),
                render_func=render.timespan,
                levels_upper=levels,
        ):
            yield Result(state=result.state,
                         summary=f"{status_message}: since {result.summary}")
            if len(value_store["duration_per_status"]) > 1:
                seen_statuses = ", ".join(
                    f"{s} ({render.timespan(t)})"
                    for s, t in value_store["duration_per_status"].items())
                yield Result(state=State.OK, notice=f"Seen: {seen_statuses}")

    yield from _container_status_details(pod_init_containers)
    yield from _container_status_details(pod_containers)
Beispiel #14
0
def check(params: KubeContainersLevelsUpperLower, section: ContainerCount) -> CheckResult:
    """Computes `total` and uses `check_levels` for each section element,
    setting levels from `params` individually"""
    section_dict = section.dict()
    section_dict["total"] = sum(section_dict.values())
    for name, value in section_dict.items():
        yield from check_levels(
            value,
            levels_upper=params.get(f"{name}_upper"),
            levels_lower=params.get(f"{name}_lower"),
            metric_name=f"kube_node_container_count_{name}",
            label=f"{name.title()}",
        )
def check(params: K8sContainersLevelsUpperLower, section: ContainerCount) -> CheckResult:
    """Computes `total` and uses `check_levels` for each section element,
    setting levels from `params` individually"""
    section_dict = section.dict()
    section_dict["total"] = sum(section_dict.values())
    for name, value in section_dict.items():
        levels = params.get(name, {})
        assert isinstance(levels, dict)
        yield from check_levels(
            value,
            levels_upper=levels.get("levels_upper"),
            levels_lower=levels.get("levels_lower"),
            metric_name=f"k8s_node_container_count_{name}",
            label=f"Number of {name} node containers",
        )
Beispiel #16
0
def check(params: Mapping[str, Optional[Tuple[float, float]]],
          section: PodConditions) -> CheckResult:
    curr_timestamp = int(time())
    for name, value in section:
        if value.status:
            yield Result(state=State.OK,
                         summary=f"{name.title()} condition passed")
            continue
        time_diff = curr_timestamp - value.last_transition_time
        summary = f"{name.title()} condition not passed ({value.reason}: {value.detail}) for {{}}"
        for result in check_levels(time_diff,
                                   levels_upper=params.get(name),
                                   render_func=render.timespan):
            yield Result(state=result.state,
                         summary=summary.format(result.summary))
def check_proxmox_ve_snapshot_age(params: Mapping[str, Any],
                                  section: Section) -> CheckResult:
    if not section["snaptimes"]:
        yield Result(state=State.OK, summary="No snapshot found")
        return

    # timestamps and timezones...
    age = max(time.time() - min(section["snaptimes"]), 0)
    yield from check_levels(
        age,
        levels_upper=params["oldest_levels"],
        metric_name="age",
        render_func=render.timespan,
        label="Age",
        boundaries=params["oldest_levels"],
    )
Beispiel #18
0
def check_apache_status(item: str, params: Mapping[str, Any],
                        section: Section) -> CheckResult:
    if item.endswith(":None"):
        # fix item name discovered before werk 2763
        item = item[:-5]

    data = section.get(item)
    if data is None:
        return

    this_time = int(time.time())
    value_store = get_value_store()

    if "Total Accesses" in data:
        data["ReqPerSec"] = get_rate(value_store,
                                     "apache_status_%s_accesses" % item,
                                     this_time, data.pop("Total Accesses"))
    if "Total kBytes" in data:
        data["BytesPerSec"] = get_rate(value_store,
                                       "apache_status_%s_bytes" % item,
                                       this_time,
                                       data.pop("Total kBytes") * 1024)

    for key, label in ((k, l) for k, l in _CHECK_LEVEL_ENTRIES if k in data):
        value = data[key]
        levels_are_lower = key == "OpenSlots"
        notice_only = key not in {
            "Uptime", "IdleWorkers", "BusyWorkers", "TotalSlots"
        }

        renderer = None
        if key == "Uptime":
            renderer = render.timespan
        elif not isinstance(value, float):
            renderer = lambda i: "%d" % int(i)

        yield from check_levels(
            value,
            metric_name=key.replace(" ", "_"),
            levels_lower=params.get(key) if levels_are_lower else None,
            levels_upper=None if levels_are_lower else params.get(key),
            render_func=renderer,
            label=label,
            notice_only=notice_only,
        )

    yield from _scoreboard_results(data)
Beispiel #19
0
def check(params: Mapping[str, VSResultAge],
          section: PodConditions) -> CheckResult:
    """Check every condition in the section. Return one result if all conditions
    passed. Otherwise, return four results if one or more conditions are faulty
    or missing, defining each state according to `last_transition_time` and the
    respective levels in `params`.

    A pod transitions through the conditions in the order specified in
    `LOGICAL_ORDER`.  The last two conditions, `containersready` and `ready`,
    can be in a failed state simultaneously.  When a condition is missing (i.e.
    is `None`), it means that the previous condition is in a failed state."""
    section_dict = section.dict()

    if all(cond and cond.status for _, cond in section):
        yield Result(
            state=State.OK,
            summary="Ready, all conditions passed",
            details="\n".join([
                condition_detailed_description(name, cond["status"],
                                               cond["reason"], cond["detail"])
                for name in LOGICAL_ORDER
                if (cond := section_dict.get(name)) is not None
            ]),
        )
        return

    curr_timestamp = time.time()
    for name in LOGICAL_ORDER:
        cond = section_dict[name]
        if cond is not None:
            time_diff = curr_timestamp - cond[
                "last_transition_time"]  # keep the last-seen one
            if (status := cond["status"]) is True:
                yield Result(state=State.OK,
                             summary=condition_short_description(
                                 name, str(status)))
                continue
            summary_prefix = condition_detailed_description(
                name, status, cond["reason"], cond["detail"])
        else:
            summary_prefix = condition_short_description(name, "False")
        for result in check_levels(time_diff,
                                   levels_upper=get_levels_for(params, name),
                                   render_func=render.timespan):
            yield Result(state=result.state,
                         summary=f"{summary_prefix} for {result.summary}")
Beispiel #20
0
def _fileinfo_check_function(
    check_definition: List[MetricInfo],
    params: Mapping[str, Any],
) -> CheckResult:

    for metric in check_definition:
        if metric.value is None:
            continue

        max_levels = params.get("max" + metric.key, (None, None))
        min_levels = params.get("min" + metric.key, (None, None))

        yield from check_levels(
            metric.value,
            levels_upper=max_levels,
            levels_lower=min_levels,
            metric_name=metric.key,
            label=metric.title,
            render_func=metric.verbose_func,
        )
Beispiel #21
0
def _check_individual_files(
    params: Mapping[str, Any],
    file_name: str,
    file_size: int,
    file_age: int,
    skip_ok_files: bool,
) -> CheckResult:
    '''
        This function checks individual files against levels defined for the file group.
        This is done to generate information for the long output.
    '''

    for key, value in [
        ("age_oldest", file_age),
        ("age_newest", file_age),
        ("size_smallest", file_size),
        ("size_largest", file_size),
    ]:
        levels_upper = params.get("max" + key, (None, None))
        levels_lower = params.get("min" + key, (None, None))
        results = check_levels(
            value,
            metric_name=key,
            levels_upper=levels_upper,
            levels_lower=levels_lower,
        )

    overall_state = max(r.state.value for r in results
                        if isinstance(r, Result))
    if skip_ok_files and State(overall_state) == State.OK:
        return

    age = render.timespan(file_age)
    size = render.filesize(file_size)

    yield Result(
        state=State.OK,
        notice=f"[{file_name}] Age: {age}, Size: {size}",
    )
Beispiel #22
0
def check_with_utilization(
    usage: float,
    resource_type: ResourceType,
    requirement_type: RequirementType,
    kubernetes_object: Optional[AllocatableKubernetesObject],
    requirement_value: float,
    params: Params,
    render_func: Callable[[float], str],
) -> Iterable[Union[Metric, Result]]:
    utilization = usage * 100.0 / requirement_value
    if kubernetes_object is None:
        metric_name = f"kube_{resource_type}_{requirement_type}_utilization"
        assert requirement_type != "allocatable"
        param = params[requirement_type]
        title = utilization_title[requirement_type]
    else:
        metric_name = f"kube_{resource_type}_{kubernetes_object}_{requirement_type}_utilization"
        param = params[kubernetes_object]
        title = utilization_title[kubernetes_object]
    result, metric = check_levels(
        utilization,
        levels_upper=param[1] if param != "no_levels" else None,
        metric_name=metric_name,
        render_func=render.percent,
        boundaries=(0.0, None),
    )
    assert isinstance(result, Result)
    percentage, *warn_crit = result.summary.split()
    yield Result(
        state=result.state,
        summary=" ".join(
            [f"{title}: {percentage} - {render_func(usage)} of {render_func(requirement_value)}"]
            + warn_crit
        ),
    )
    yield metric
Beispiel #23
0
def check_kube_pod_status(
    params: Params,
    section_kube_pod_containers: Optional[PodContainers],
    section_kube_pod_init_containers: Optional[PodContainers],
    section_kube_pod_lifecycle: Optional[PodLifeCycle],
) -> CheckResult:
    assert section_kube_pod_lifecycle is not None, "Missing Api data"

    pod_containers = _pod_containers(section_kube_pod_containers)
    pod_init_containers = _pod_containers(section_kube_pod_init_containers)

    status_message = _pod_status_message(
        pod_containers,
        pod_init_containers,
        section_kube_pod_lifecycle,
    )

    now = time.time()
    value_store = get_value_store()
    if status_message not in value_store:
        value_store.clear()
        value_store[status_message] = now

    levels = _get_levels_from_params(status_message, params)
    if levels is None:
        yield Result(state=State.OK, summary=status_message)
    else:
        for result in check_levels(
            now - value_store[status_message],
            render_func=render.timespan,
            levels_upper=levels,
        ):
            yield Result(state=result.state, summary=f"{status_message}: since {result.summary}")

    yield from _container_status_details(pod_init_containers)
    yield from _container_status_details(pod_containers)
Beispiel #24
0
def check_proxmox_ve_disk_usage(params: Mapping[str, Any], section: Section) -> CheckResult:
    """
    >>> for result in check_proxmox_ve_disk_usage(
    ...     {"levels": (80., 90.)},
    ...     parse_proxmox_ve_disk_usage([['{"disk": 1073741824, "max_disk": 2147483648}']])):
    ...   print(result)
    Result(state=<State.OK: 0>, summary='Usage: 1.07 GB')
    Metric('fs_used', 1073741824.0, levels=(1717986918.4, 1932735283.2), boundaries=(0.0, 2147483648.0))
    """
    used_bytes, total_bytes = section.get("disk", 0), section.get("max_disk", 0)
    warn, crit = params.get("levels", (0., 0.))

    if total_bytes == 0:
        yield Result(state=State.WARN, summary="Size of filesystem is 0 MB")
        return

    yield from check_levels(
        value=used_bytes,
        levels_upper=(warn / 100 * total_bytes, crit / 100 * total_bytes),
        boundaries=(0, total_bytes),
        metric_name="fs_used",
        render_func=render.disksize,
        label="Usage",
    )
def check_proxmox_ve_vm_backup_status(
    now: datetime,
    params: Mapping[str, Any],
    section: Section,
) -> CheckResult:
    """If conditions provided calculate and compare age of last backup agains provided
    levels and define result status accordingly
    >>> for result in check_proxmox_ve_vm_backup_status(
    ...     datetime.strptime("2020-12-07 21:28:02", '%Y-%m-%d %H:%M:%S'),
    ...     {'age_levels_upper': (93600, 180000)},
    ...     parse_proxmox_ve_vm_backup_status([[
    ...     '  {"last_backup": {'
    ...     '     "started_time": "2020-12-06 21:28:02",'
    ...     '     "total_duration": 140,'
    ...     '     "archive_name": "/tmp/vzdump-qemu-109-2020_12_06-21_28_02.vma.zst",'
    ...     '     "upload_amount": 10995116277,'
    ...     '     "upload_total": 1099511627776,'
    ...     '     "upload_time": 120'
    ...     '  }}'
    ...     ]])):
    ...   print(result)
    Result(state=<State.OK: 0>, summary='Age: 1 day 0 hours')
    Metric('age', 86400.0, levels=(93600.0, 180000.0), boundaries=(0.0, None))
    Result(state=<State.OK: 0>, summary='Time: 2020-12-06 21:28:02')
    Result(state=<State.OK: 0>, summary='Duration: 2 minutes 20 seconds')
    Result(state=<State.OK: 0>, summary='Name: /tmp/vzdump-qemu-109-2020_12_06-21_28_02.vma.zst')
    Result(state=<State.OK: 0>, summary='Dedup rate: 100.00')
    Result(state=<State.OK: 0>, summary='Bandwidth: 91.6 MB/s')
    """
    age_levels_upper = params.get("age_levels_upper")
    last_backup = section.get("last_backup")
    if not last_backup:
        yield (Result(state=State.CRIT, summary="No backup found")
               if age_levels_upper else  #
               Result(state=State.OK,
                      summary="No backup found and none needed"))
        return
    if "error" in last_backup:
        yield Result(
            state=State.CRIT,
            summary=f"Last backup failed with message {last_backup['error']!r}",
        )
        return

    # Proxmox VE backup logs only provide time stamps without time zone so we have to hope
    # the Proxmox VE node is located close to us
    started_time = last_backup.get("started_time")
    if started_time:
        yield from check_levels(
            value=(now - started_time).total_seconds(),
            levels_upper=age_levels_upper,
            metric_name="age",
            render_func=render.timespan,
            label="Age",
            boundaries=(0, None),
        )
    yield Result(
        state=State.OK,
        summary=f"Time: {started_time}",
    )
    yield Result(
        state=State.OK,
        summary=f"Duration: {render.timespan(last_backup['total_duration'])}",
    )

    if 'archive_name' in last_backup:
        yield Result(state=State.OK,
                     summary=f"Name: {last_backup['archive_name']}")
    if 'archive_size' in last_backup:
        yield Result(
            state=State.OK,
            summary=f"Size: {render.bytes(last_backup['archive_size'])}")

    if all(k in last_backup
           for k in {'bytes_written_size', 'bytes_written_bandwidth'}):
        bandwidth = last_backup['bytes_written_bandwidth']
    elif all(k in last_backup for k in {'transfer_size', 'transfer_time'}):
        if last_backup['transfer_time'] == 0:
            return
        bandwidth = last_backup['transfer_size'] / last_backup['transfer_time']
    elif all(k in last_backup
             for k in {'upload_amount', 'upload_total', 'upload_time'}):
        if last_backup['upload_amount'] > 0:
            dedup_rate = last_backup['upload_total'] / last_backup[
                'upload_amount']
            yield Result(state=State.OK,
                         summary=f"Dedup rate: {dedup_rate:.2f}")
        if last_backup['upload_time'] == 0:
            return
        bandwidth = last_backup['upload_amount'] / last_backup['upload_time']
    elif all(k in last_backup
             for k in {'backup_amount', 'backup_total', 'backup_time'}):
        if last_backup['backup_amount'] > 0:
            dedup_rate = last_backup['backup_total'] / last_backup[
                'backup_amount']
            yield Result(state=State.OK,
                         summary=f"Dedup rate: {dedup_rate:.2f}")
        if last_backup['backup_time'] == 0:
            return
        bandwidth = last_backup['backup_amount'] / last_backup['backup_time']
    else:
        return

    yield Result(state=State.OK,
                 summary=f"Bandwidth: {render.iobandwidth(bandwidth)}")
        condition_name = name.upper()
        if (status := condition["status"]) is CONDITIONS_OK_MAPPINGS[name]:
            yield Result(
                state=State.OK,
                summary=condition_short_description(condition_name, status),
                details=condition_detailed_description(condition_name, status,
                                                       condition["reason"],
                                                       condition["message"]),
            )
            continue

        time_difference = current_timestamp - condition["last_transition_time"]
        check_result = list(
            check_levels(
                time_difference,
                levels_upper=condition_levels(params=params, condition=name),
                render_func=render.timespan,
            ))
        result = check_result[0]
        yield Result(
            state=result.state,
            summary=
            f"{condition_detailed_description(condition_name, condition['status'], condition['reason'], condition['message'])} for {result.summary}",
        )


register.check_plugin(
    name="kube_deployment_conditions",
    service_name="Condition",
    discovery_function=discovery,
    check_function=check,
Beispiel #27
0
def check_entity_sensors_fan(
    item: str,
    params: Mapping[str, Any],
    section: EntitySensorSection,
) -> CheckResult:
    if not (sensor_reading := section.get('fan', {}).get(item)):
        return

    yield Result(state=sensor_reading.state,
                 summary=f"Operational status: {sensor_reading.status_descr}")

    yield from check_levels(
        value=sensor_reading.reading,
        metric_name="fan" if params.get('output_metrics') else None,
        levels_upper=params.get("upper"),
        levels_lower=params["lower"],
        render_func=lambda r: f'{int(r)} {sensor_reading.unit}',
        label="Speed",
        boundaries=(0, None),
    )


register.check_plugin(
    name='entity_sensors_fan',
    sections=['entity_sensors'],
    service_name='Fan %s',
    discovery_function=discover_entity_sensors_fan,
    check_function=check_entity_sensors_fan,
    check_ruleset_name='hw_fans',
    check_default_parameters={'lower': (2000, 1000)},  # customer request
)
Beispiel #28
0
def check_proxmox_ve_vm_backup_status(
    now: datetime,
    params: Mapping[str, Any],
    section: Section,
) -> CheckResult:
    """If conditions provided calculate and compare age of last backup agains provided
    levels and define result status accordingly
    >>> for result in check_proxmox_ve_vm_backup_status(
    ...     datetime.strptime("2020-12-07 21:28:02+01:00", '%Y-%m-%d %H:%M:%S%z'),
    ...     {'age_levels_upper': (93600, 180000)},
    ...     parse_proxmox_ve_vm_backup_status([[
    ...     '  {"last_backup": {'
    ...     '     "started_time": "2020-12-06 21:28:02+0000",'
    ...     '     "total_duration": 140,'
    ...     '     "archive_name": "/tmp/vzdump-qemu-109-2020_12_06-21_28_02.vma.zst",'
    ...     '     "upload_amount": 10995116277,'
    ...     '     "upload_total": 1099511627776,'
    ...     '     "upload_time": 120}}'
    ...     ]])):
    ...   print(result)
    Result(state=<State.OK: 0>, summary='Age: 23 hours 0 minutes')
    Metric('age', 82800.0, levels=(93600.0, 180000.0), boundaries=(0.0, None))
    Result(state=<State.OK: 0>, summary='Server local start time: 2020-12-06 21:28:02+00:00')
    Result(state=<State.OK: 0>, summary='Duration: 2 minutes 20 seconds')
    Metric('backup_duration', 140.0, boundaries=(0.0, None))
    Result(state=<State.OK: 0>, summary='Name: /tmp/vzdump-qemu-109-2020_12_06-21_28_02.vma.zst')
    Result(state=<State.OK: 0>, summary='Dedup rate: 100.00')
    Result(state=<State.OK: 0>, summary='Bandwidth: 91.6 MB/s')
    Metric('backup_avgspeed', 91625968.975, boundaries=(0.0, None))
    """
    age_levels_upper = params.get("age_levels_upper")
    duration_levels_upper = params.get("duration_levels_upper")
    bandwidth_levels_lower_bytes = params.get("bandwidth_levels_lower")
    bandwidth_levels_lower = (
        (
            bandwidth_levels_lower_bytes[0] * 1000 * 1000,
            bandwidth_levels_lower_bytes[1] * 1000 * 1000,
        )
        if bandwidth_levels_lower_bytes
        else None
    )
    last_backup = section.get("last_backup")
    if not last_backup:
        yield (
            Result(state=State.CRIT, summary="No backup found")
            if age_levels_upper
            else Result(state=State.OK, summary="No backup found and none needed")  #
        )
        return
    if "error" in last_backup:
        yield Result(
            state=State.CRIT,
            summary=f"Last backup failed with message {last_backup['error']!r}",
        )
        return

    # Proxmox VE backup logs only provide time stamps without time zone so the special agent
    # explicitly converted them to utc
    started_time = last_backup.get("started_time")
    if started_time:
        yield from check_levels(
            value=(now - started_time.astimezone(timezone.utc)).total_seconds(),
            levels_upper=age_levels_upper,
            metric_name="age",
            render_func=render.timespan,
            label="Age",
            boundaries=(0, None),
        )
    yield Result(
        state=State.OK,
        summary=f"Server local start time: {started_time}",
    )

    yield from check_levels(
        value=last_backup["total_duration"],
        levels_upper=duration_levels_upper,
        metric_name="backup_duration",
        render_func=render.timespan,
        label="Duration",
        boundaries=(0, None),
    )

    if "archive_name" in last_backup:
        yield Result(state=State.OK, summary=f"Name: {last_backup['archive_name']}")
    if "archive_size" in last_backup:
        yield Result(state=State.OK, summary=f"Size: {render.bytes(last_backup['archive_size'])}")

    if all(k in last_backup for k in ("bytes_written_size", "bytes_written_bandwidth")):
        bandwidth = last_backup["bytes_written_bandwidth"]
    elif all(k in last_backup for k in ("transfer_size", "transfer_time")):
        if last_backup["transfer_time"] == 0:
            return
        bandwidth = last_backup["transfer_size"] / last_backup["transfer_time"]
    elif all(k in last_backup for k in ("upload_amount", "upload_total", "upload_time")):
        if last_backup["upload_amount"] > 0:
            dedup_rate = last_backup["upload_total"] / last_backup["upload_amount"]
            yield Result(state=State.OK, summary=f"Dedup rate: {dedup_rate:.2f}")
        if last_backup["upload_time"] == 0:
            return
        bandwidth = last_backup["upload_amount"] / last_backup["upload_time"]
    elif all(k in last_backup for k in ("backup_amount", "backup_total", "backup_time")):
        if last_backup["backup_amount"] > 0:
            dedup_rate = last_backup["backup_total"] / last_backup["backup_amount"]
            yield Result(state=State.OK, summary=f"Dedup rate: {dedup_rate:.2f}")
        if last_backup["backup_time"] == 0:
            return
        bandwidth = last_backup["backup_amount"] / last_backup["backup_time"]
    else:
        return

    yield from check_levels(
        value=bandwidth,
        levels_lower=bandwidth_levels_lower,
        metric_name="backup_avgspeed",
        render_func=render.iobandwidth,
        label="Bandwidth",
        boundaries=(0, None),
    )
Beispiel #29
0
    if pool.status == "Running" and pool.cache_mode == "ReadWrite":
        state = State.OK
    elif pool.status == "Running" and pool.cache_mode != "ReadWrite":
        state = State.WARN
    else:
        state = State.CRIT
    yield Result(
        state=state,
        summary=f"{pool.pool_type} pool {pool.name} is {pool.status}, its cache is in {pool.cache_mode} mode",
    )

    yield from check_levels(
        value=pool.percent_allocated,
        metric_name="pool_allocation",
        levels_upper=params["allocated_pools_percentage_upper"],
        render_func=render.percent,
        label="Pool allocation",
        boundaries=(0, 100),
    )


register.check_plugin(
    name="sansymphony_pool",
    discovery_function=discover_sansymphony_pool,
    check_function=check_sansymphony_pool,
    service_name="Sansymphony Pool %s",
    check_ruleset_name="sansymphony_pool",
    check_default_parameters={"allocated_pools_percentage_upper": (80.0, 90.0)},
)