def test_compute_rates_single_disk(): # without frequency # first call should result in IgnoreResults, second call should yield rates with pytest.raises(IgnoreResultsError): winperf_phydisk._compute_rates_single_disk( DISK_WO_FREQUENCY, get_value_store(), ) disk_with_rates = winperf_phydisk._compute_rates_single_disk( _increment_time_and_frequency(DISK_WO_FREQUENCY), get_value_store(), ) _check_disk_with_rates( DISK_WO_FREQUENCY, disk_with_rates, ) # with frequency # first call should result in IgnoreResults, second call should yield rates with pytest.raises(IgnoreResultsError): winperf_phydisk._compute_rates_single_disk( DISK, get_value_store(), ) disk_with_rates = winperf_phydisk._compute_rates_single_disk( _increment_time_and_frequency(DISK), get_value_store(), ) _check_disk_with_rates( DISK, disk_with_rates, )
def test_compute_rates_single_disk_same_time_same_values(): # same timestamp twice --> IgnoreResultsError twice with pytest.raises(IgnoreResultsError): diskstat._compute_rates_single_disk( DISK, get_value_store(), ) with pytest.raises(IgnoreResultsError): diskstat._compute_rates_single_disk( DISK, get_value_store(), )
def test_compute_rates_single_disk_diff_time_diff_values(): # different timestamps --> IgnoreResults once with pytest.raises(IgnoreResultsError): diskstat._compute_rates_single_disk( DISK_HALF, get_value_store(), ) disk_w_rates = diskstat._compute_rates_single_disk( DISK, get_value_store(), ) assert set(disk_w_rates) == EXP_METRICS for k, v in disk_w_rates.items(): if k == "queue_length": assert v == DISK["queue_length"] else: assert v > 0
def test_compute_rates_single_disk_diff_time_same_values(): # different timestamps --> IgnoreResults once with pytest.raises(IgnoreResultsError): diskstat._compute_rates_single_disk( DISK, get_value_store(), ) disk_w_rates = diskstat._compute_rates_single_disk( { **DISK, "timestamp": DISK["timestamp"] + 100, }, get_value_store(), ) assert disk_w_rates == { **{metric: 0 for metric in EXP_METRICS}, "queue_length": DISK["queue_length"], }
def _calc_restart_rate_in_last_hour(restart_count: int) -> int: curr_timestamp_seconds = int(time.time()) host_value_store = get_value_store() restart_count_list = host_value_store.setdefault("restart_count_list", []) while restart_count_list and restart_count_list[0][0] <= curr_timestamp_seconds - ONE_HOUR: restart_count_list.pop(0) restart_count_list.append((curr_timestamp_seconds, restart_count)) if len(restart_count_list) > 1: return restart_count - restart_count_list[0][1] return restart_count
def cluster_check_juniper_trpz_aps_sessions( item: str, section: Mapping[str, Section], ) -> CheckGenerator: yield from _check_common_juniper_trpz_aps_sessions( get_value_store(), time.time(), item, section, )
def check_juniper_trpz_aps_sessions( item: str, section: Section, ) -> CheckGenerator: yield from _check_common_juniper_trpz_aps_sessions( get_value_store(), time.time(), item, {"": section}, )
def check_kube_pod_status( params: Params, section_kube_pod_containers: Optional[PodContainers], section_kube_pod_init_containers: Optional[PodContainers], section_kube_pod_lifecycle: Optional[PodLifeCycle], ) -> CheckResult: assert section_kube_pod_lifecycle is not None, "Missing Api data" pod_containers = _pod_containers(section_kube_pod_containers) pod_init_containers = _pod_containers(section_kube_pod_init_containers) status_message = _pod_status_message( pod_containers, pod_init_containers, section_kube_pod_lifecycle, ) now = time.time() value_store = get_value_store() group_levels, group_statuses = _get_group_from_params( status_message, params) if value_store.get("group") != group_statuses: value_store["group"] = group_statuses value_store["duration_per_status"] = {status_message: 0.0} else: previous_status = value_store["previous_status"] value_store["duration_per_status"][ previous_status] += now - value_store["previous_time"] value_store["duration_per_status"].setdefault(status_message, 0.0) value_store["previous_time"] = now value_store["previous_status"] = status_message levels = None if group_levels == "no_levels" else group_levels[1] if levels is None: yield Result(state=State.OK, summary=status_message) else: for result in check_levels( sum(time for time in value_store["duration_per_status"].values()), render_func=render.timespan, levels_upper=levels, ): yield Result(state=result.state, summary=f"{status_message}: since {result.summary}") if len(value_store["duration_per_status"]) > 1: seen_statuses = ", ".join( f"{s} ({render.timespan(t)})" for s, t in value_store["duration_per_status"].items()) yield Result(state=State.OK, notice=f"Seen: {seen_statuses}") yield from _container_status_details(pod_init_containers) yield from _container_status_details(pod_containers)
def check_kube_cpu( params: Params, section_kube_performance_cpu: Optional[PerformanceUsage], section_kube_cpu_resources: Optional[Resources], section_kube_allocatable_cpu_resource: Optional[AllocatableResource], ) -> CheckResult: yield from _check_kube_cpu( params, section_kube_performance_cpu, section_kube_cpu_resources, section_kube_allocatable_cpu_resource, current_timestamp=time.time(), host_value_store=get_value_store(), )
def check_apache_status(item: str, params: Mapping[str, Any], section: Section) -> CheckResult: if item.endswith(":None"): # fix item name discovered before werk 2763 item = item[:-5] data = section.get(item) if data is None: return this_time = int(time.time()) value_store = get_value_store() if "Total Accesses" in data: data["ReqPerSec"] = get_rate(value_store, "apache_status_%s_accesses" % item, this_time, data.pop("Total Accesses")) if "Total kBytes" in data: data["BytesPerSec"] = get_rate(value_store, "apache_status_%s_bytes" % item, this_time, data.pop("Total kBytes") * 1024) for key, label in ((k, l) for k, l in _CHECK_LEVEL_ENTRIES if k in data): value = data[key] levels_are_lower = key == "OpenSlots" notice_only = key not in { "Uptime", "IdleWorkers", "BusyWorkers", "TotalSlots" } renderer = None if key == "Uptime": renderer = render.timespan elif not isinstance(value, float): renderer = lambda i: "%d" % int(i) yield from check_levels( value, metric_name=key.replace(" ", "_"), levels_lower=params.get(key) if levels_are_lower else None, levels_upper=None if levels_are_lower else params.get(key), render_func=renderer, label=label, notice_only=notice_only, ) yield from _scoreboard_results(data)
def check_kube_pods(params: Params, section: PodResources) -> CheckResult: current_time = time.time() value_store = get_value_store() old_resource_store = value_store.get("pending", {}) # store currently pending pod_names and the timestamp we first saw them in pending state # this means if pods are no longer pending, they are removed from the value_store value_store["pending"] = { pod_name: old_resource_store.get(pod_name, current_time) for pod_name in section.pending } for resource in _POD_RESOURCES_FIELDS: pod_names = getattr(section, resource) pod_count = len(pod_names) summary = _summary(resource, pod_count) if resource == "unknown": yield Result( state=State.OK, summary=summary, details=f"{summary}{_view_pod_list(pod_names)}", ) elif resource == "pending" and params["pending"] != "no_levels": yield _check_phase_duration_pods( summary, current_time, value_store["pending"], Levels(*params["pending"][1]), ) else: yield Result( state=State.OK, summary=summary, ) if resource != "unknown": yield Metric(name=f"kube_pod_{resource}", value=pod_count)
def check_synology_disks(item: str, params: Mapping[str, Any], section: Section) -> CheckResult: disk = section[item] yield from temperature.check_temperature( reading=disk.temperature, params=None, unique_name=item, value_store=get_value_store(), ) states = { 1: (State.OK, "OK"), 2: (State.OK, "OK"), 3: (State.WARN, "not initialized"), 4: (State.CRIT, "system partition failed"), 5: (State.CRIT, "crashed"), } state, text = states[disk.state] if disk.state == 3 and params.get("used_as_cache"): text = "used as cache" state = State.OK yield Result( state=state, summary=f"Status: {text}, Temperature: {disk.temperature} °C, Model: {disk.model}", )
def check_kube_pod_status( params: Params, section_kube_pod_containers: Optional[PodContainers], section_kube_pod_init_containers: Optional[PodContainers], section_kube_pod_lifecycle: Optional[PodLifeCycle], ) -> CheckResult: assert section_kube_pod_lifecycle is not None, "Missing Api data" pod_containers = _pod_containers(section_kube_pod_containers) pod_init_containers = _pod_containers(section_kube_pod_init_containers) status_message = _pod_status_message( pod_containers, pod_init_containers, section_kube_pod_lifecycle, ) now = time.time() value_store = get_value_store() if status_message not in value_store: value_store.clear() value_store[status_message] = now levels = _get_levels_from_params(status_message, params) if levels is None: yield Result(state=State.OK, summary=status_message) else: for result in check_levels( now - value_store[status_message], render_func=render.timespan, levels_upper=levels, ): yield Result(state=result.state, summary=f"{status_message}: since {result.summary}") yield from _container_status_details(pod_init_containers) yield from _container_status_details(pod_containers)
def check_kube_pod_restarts(params: Params, section: PodContainers) -> CheckResult: yield from _check(params, section, int(time.time()), get_value_store())
yield from (Service(item=item) for item in section.get('temp', {})) def check_entity_sensors_temp( item: str, params: TempParamType, section: EntitySensorSection, ) -> CheckResult: if not (sensor_reading := section.get('temp', {}).get(item)): return yield from check_temperature( sensor_reading.reading, params, unique_name="temp", value_store=get_value_store(), dev_unit=sensor_reading.unit, dev_status=int(sensor_reading.state), dev_status_name=sensor_reading.status_descr, ) register.check_plugin( name='entity_sensors_temp', sections=['entity_sensors'], service_name='Temperature %s', discovery_function=discover_entity_sensors_temp, check_function=check_entity_sensors_temp, check_ruleset_name='temperature', check_default_parameters= {}, # The check processes ambient and CPU temp sensors,