if pool.status == "Running" and pool.cache_mode == "ReadWrite": state = State.OK elif pool.status == "Running" and pool.cache_mode != "ReadWrite": state = State.WARN else: state = State.CRIT yield Result( state=state, summary=f"{pool.pool_type} pool {pool.name} is {pool.status}, its cache is in {pool.cache_mode} mode", ) yield from check_levels( value=pool.percent_allocated, metric_name="pool_allocation", levels_upper=params["allocated_pools_percentage_upper"], render_func=render.percent, label="Pool allocation", boundaries=(0, 100), ) register.check_plugin( name="sansymphony_pool", discovery_function=discover_sansymphony_pool, check_function=check_sansymphony_pool, service_name="Sansymphony Pool %s", check_ruleset_name="sansymphony_pool", check_default_parameters={"allocated_pools_percentage_upper": (80.0, 90.0)}, )
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State from .agent_based_api.v1.type_defs import CheckResult, DiscoveryResult from .utils.mobileiron import SourceHostSection def check_mobileiron_sourcehost(section: SourceHostSection) -> CheckResult: yield Result( state=State.OK, summary=f"Query Time: {section.queryTime}", ) yield Result( state=State.OK, summary=f"Total number of returned devices: {section.total_count}", ) def discover_single(section: SourceHostSection) -> DiscoveryResult: yield Service() register.check_plugin( name="mobileiron_sourcehost", sections=["mobileiron_source_host"], service_name="Mobileiron source host", discovery_function=discover_single, check_function=check_mobileiron_sourcehost, )
def check_waiting(params: Mapping[str, int], state: ContainerWaitingState) -> CheckResult: summary = f"Status: Waiting ({state.reason}: {state.detail})" yield Result(state=State.OK, summary=summary) def check_terminated(params: Mapping[str, int], state: ContainerTerminatedState) -> CheckResult: result_state = State.OK status = "Succeeded" if state.exit_code != 0: result_state = State(params["failed_state"]) status = "Failed" summary = f"Status: {status} ({state.reason}: {state.detail})" yield Result(state=result_state, summary=summary) end_time = render.datetime(state.end_time) duration = render.timespan(state.end_time - state.start_time) summary = f"End time: {end_time} Run duration: {duration}" yield Result(state=State.OK, summary=summary) register.check_plugin( name="kube_pod_containers", service_name="Container %s", discovery_function=discovery, check_function=check, check_default_parameters={"failed_state": int(State.CRIT)}, check_ruleset_name="kube_pod_containers", )
for image in section.images: yield HostLabel("cmk/container_image", image) register.agent_section( name="kube_deployment_info_v1", parsed_section_name="kube_deployment_info", parse_function=parse, host_label_function=host_labels, ) def discovery(section: DeploymentInfo) -> DiscoveryResult: yield Service() def check_kube_deployment_info(section: DeploymentInfo) -> CheckResult: yield from check_info({ "name": section.name, "namespace": section.namespace, "creation_timestamp": section.creation_timestamp, }) register.check_plugin( name="kube_deployment_info", service_name="Info", discovery_function=discovery, check_function=check_kube_deployment_info, )
continue summary_prefix = f"{cond_service_text.not_passed} ({cond['reason']}: {cond['detail']})" else: summary_prefix = cond_service_text.not_passed for result in check_levels(time_diff, levels_upper=get_levels_for(params, name), render_func=render.timespan): yield Result(state=result.state, summary=f"{summary_prefix} for {result.summary}") register.agent_section( name="k8s_pod_conditions_v1", parsed_section_name="k8s_pod_conditions", parse_function=parse, ) register.check_plugin( name="k8s_pod_conditions", service_name="Condition", discovery_function=discovery, check_function=check, check_default_parameters=dict( scheduled="no_levels", initialized="no_levels", containersready="no_levels", ready="no_levels", ), check_ruleset_name="k8s_pod_conditions", )
register.snmp_section( name='cisco_asa_failover', parse_function=parse_cisco_asa_failover, fetch=SNMPTree( base= '.1.3.6.1.4.1.9.9.147.1.2.1.1.1', # CISCO-FIREWALL-MIB::cfwHardwareStatusEntry oids=[ '2', # CISCO-FIREWALL-MIB::cfwHardwareInformation '3', # CISCO-FIREWALL-MIB::cfwHardwareStatusValue '4', # CISCO-FIREWALL-MIB::cfwHardwareStatusDetail ], ), detect=any_of( startswith('.1.3.6.1.2.1.1.1.0', 'cisco adaptive security'), contains('.1.3.6.1.2.1.1.1.0', 'cisco pix security'), ), ) register.check_plugin( name='cisco_asa_failover', service_name='Cluster Status', discovery_function=discovery_cisco_asa_failover, check_function=check_cisco_asa_failover, check_default_parameters={ 'primary': 'active', 'secondary': 'standby', 'failover_state': 1, }, check_ruleset_name='cisco_asa_failover', )
metric_name="kube_pod_restart_rate", render_func=str, label="In last hour", ) def _calc_restart_rate_in_last_hour( restart_count: int, curr_timestamp_seconds: int, host_value_store: MutableMapping[str, Any], ) -> Optional[int]: restart_count_list = host_value_store.setdefault("restart_count_list", []) while restart_count_list and restart_count_list[0][ 0] <= curr_timestamp_seconds - ONE_HOUR: restart_count_list.pop(0) restart_count_list.append((curr_timestamp_seconds, restart_count)) if len(restart_count_list) > 1: return restart_count - restart_count_list[0][1] return None register.check_plugin( name="kube_pod_restarts", service_name="Restarts", sections=["kube_pod_containers"], discovery_function=discovery, check_function=check_kube_pod_restarts, check_default_parameters=_DEFAULT_PARAMS, check_ruleset_name="kube_pod_restarts", )
"faas_total_instance_count": gcp.MetricSpec("cloudfunctions.googleapis.com/function/instance_count", str), "faas_active_instance_count": gcp.MetricSpec( "cloudfunctions.googleapis.com/function/active_instances", str), } timeseries = section.get(item, gcp.SectionItem(rows=[])).rows yield from gcp.generic_check(metrics, timeseries, params) register.check_plugin( name="gcp_function_instances", sections=["gcp_service_cloud_functions", "gcp_assets"], service_name="GCP Cloud Function instances %s", check_ruleset_name="gcp_function_instances", discovery_function=discover, check_function=check_gcp_function_instances, check_default_parameters={}, ) def check_gcp_function_execution( item: str, params: Mapping[str, Any], section_gcp_service_cloud_functions: Optional[gcp.Section], section_gcp_assets: Optional[gcp.AssetSection], ) -> CheckResult: if section_gcp_service_cloud_functions is None: return section = section_gcp_service_cloud_functions
section_kube_allocatable_pods.allocatable), ) yield Result( state=State.OK, notice=_summary("capacity", section_kube_allocatable_pods.capacity), ) if section_kube_allocatable_pods.kubernetes_object == "cluster": param = params["free_cluster"] elif section_kube_allocatable_pods.kubernetes_object == "node": param = params["free_node"] else: raise AssertionError("Unknown Kubernetes object with capacity.") yield from check_free_pods( param, section_kube_pod_resources, section_kube_allocatable_pods.allocatable, ) yield Metric(name="kube_pod_allocatable", value=section_kube_allocatable_pods.allocatable) register.check_plugin( name="kube_pod_resources", service_name="Pod resources", sections=["kube_pod_resources", "kube_allocatable_pods"], discovery_function=discovery_kube_pod_resources, check_function=check_kube_pod_resources, check_default_parameters=_DEFAULT_PARAMS, check_ruleset_name="kube_pod_resources", )
yield Result( state=State.OK, summary=f"Bandwidth: {render.iobandwidth(transfer_size / last_backup['transfer_time'])}", ) register.agent_section( name="proxmox_ve_vm_backup_status", parse_function=parse_proxmox_ve_vm_backup_status, ) def check_proxmox_ve_vm_backup_status_unpure(params: Mapping[str, Any], section: Section) -> CheckResult: """Because of datetime.now() this function is not testable. Test check_proxmox_ve_vm_backup_status() instead.""" yield from check_proxmox_ve_vm_backup_status(datetime.now(), params, section) register.check_plugin( name="proxmox_ve_vm_backup_status", service_name="Proxmox VE VM Backup Status", discovery_function=discover_single, check_function=check_proxmox_ve_vm_backup_status_unpure, check_ruleset_name="proxmox_ve_vm_backup_status", check_default_parameters={"age_levels_upper": ( 60 * 60 * 26, 60 * 60 * 50, )}, )
oids=[ "1", # System Status "3", # Power Status ], ), ) def discovery(section: Section) -> DiscoveryResult: yield Service() def check(section: Section) -> CheckResult: if section.system != 1: yield Result(state=State.CRIT, summary="System Failure") else: yield Result(state=State.OK, summary="System state OK") if section.power != 1: yield Result(state=State.CRIT, summary="Power Failure") else: yield Result(state=State.OK, summary="Power state OK") register.check_plugin( name="synology_status", sections=["synology_status"], service_name="Status", discovery_function=discovery, check_function=check, )
node_name: list(check_mssql_mirroring(item, params, node_section)) for node_name, node_section in section.items() if node_section } results = {k: v for k, v in node_results.items() if v} if len(results) > 1: yield Result( state=State.CRIT, summary= f"Found principal database on more than one node: {(', ').join(results.keys())}", ) return yield from (result for result_set in results.values() for result in result_set) register.check_plugin( name='mssql_mirroring', sections=['mssql_mirroring'], service_name='MSSQL Mirroring Status: %s', discovery_function=discover_mssql_mirroring, check_function=check_mssql_mirroring, cluster_check_function=cluster_check_mssql_mirroring, check_ruleset_name='mssql_mirroring', check_default_parameters={ 'mirroring_state_criticality': 0, 'mirroring_witness_state_criticality': 0, }, )
... parse_proxmox_ve_mem_usage([['{"max_mem": 67424276480, "mem": 32768163840}']])): ... print(result) Result(state=<State.OK: 0>, summary='Usage: 48.60% - 30.5 GiB of 62.8 GiB') Metric('mem_used', 32768163840.0, levels=(53939421184.0, 60681848832.0), boundaries=(0.0, 67424276480.0)) Metric('mem_used_percent', 48.59994878806002, levels=(80.0, 90.0), boundaries=(0.0, None)) """ warn, crit = params.get("levels", (0, 0)) yield from check_element( "Usage", float(section.get("mem", 0)), float(section.get("max_mem", 0)), ("perc_used", (warn, crit)), metric_name="mem_used", create_percent_metric=True, ) register.agent_section( name="proxmox_ve_mem_usage", parse_function=parse_proxmox_ve_mem_usage, ) register.check_plugin( name="proxmox_ve_mem_usage", service_name="Proxmox VE Memory Usage", discovery_function=discover_single, check_function=check_proxmox_ve_mem_usage, check_ruleset_name="proxmox_ve_mem_usage", check_default_parameters={"levels": (80.0, 90.0)}, )
if not section: yield Result(state=State.OK, summary="No open messages") return data = section for msg in data: state = _handle_severity(msg.severity) summary = f"{msg.timeCreated_iso} - {msg.server} - {msg.message}" yield Result(state=state, summary=summary) def _handle_severity(severity: str) -> State: severity_mapping = { "info": State.OK, "warn": State.WARN, "error": State.CRIT, } try: return severity_mapping[severity] except KeyError: return State.UNKNOWN register.check_plugin( name="splunk_system_msg", service_name="Splunk System Messages", discovery_function=discovery, check_function=check, )
"run.googleapis.com/container/network/received_bytes_count", render.networkbandwidth), "net_data_sent": gcp.MetricSpec("run.googleapis.com/container/network/sent_bytes_count", render.networkbandwidth), } timeseries = section_gcp_service_cloud_run.get( item, gcp.SectionItem(rows=[])).rows yield from gcp.generic_check(metrics, timeseries, params) register.check_plugin( name="gcp_run_network", sections=["gcp_service_cloud_run", "gcp_assets"], service_name="GCP Cloud Run network %s", check_ruleset_name="gcp_run_network", discovery_function=discover, check_function=check_gcp_run_network, check_default_parameters={}, ) def check_gcp_run_memory( item: str, params: Mapping[str, Any], section_gcp_service_cloud_run: Optional[gcp.Section], section_gcp_assets: Optional[gcp.AssetSection], ) -> CheckResult: if section_gcp_service_cloud_run is None: return metrics = {
parsed_section_name="kube_collector_connection", parse_function=parse, ) def discover(section: CollectorLogs) -> DiscoveryResult: yield Service() def check(section: CollectorLogs) -> CheckResult: for entry in section.logs: if entry.status == CollectorState.OK: yield Result(state=State.OK, summary=f"{entry.component}: OK") continue component_message = f"{entry.component}: {entry.message}" detail_message = f" ({entry.detail})" if entry.detail else "" yield Result( state=State.OK if entry.status == CollectorState.OK else State.CRIT, summary=component_message, details=f"{component_message}{detail_message}", ) register.check_plugin( name="kube_collector_connection", service_name="Cluster Collector", discovery_function=discover, check_function=check, )
yield Result( state=State.OK if not req_subs_status or subs_status == req_subs_status else State.WARN, summary=(f"Subscription: {subs_status}" f"{req_subs_status and f' (required: {req_subs_status})'}"), ) yield Result(state=State.OK, summary=f"Version: {proxmox_ve_version}") yield Result( state=State.OK, summary=(f"Hosted VMs: {len(section.get('lxc', []))}x LXC," f" {len(section.get('qemu', []))}x Qemu"), ) register.agent_section( name="proxmox_ve_node_info", parse_function=parse_proxmox_ve_node_info, ) register.check_plugin( name="proxmox_ve_node_info", service_name="Proxmox VE Node Info", discovery_function=discover_single, check_function=check_proxmox_ve_node_info, check_ruleset_name="proxmox_ve_node_info", check_default_parameters={ "required_node_status": None, "required_subscription_status": None, }, )
if state.exit_code != 0 and state.reason is not None: return state.reason return section_kube_pod_lifecycle.phase.title() def discovery_kube_pod_status( section_kube_pod_containers: Optional[PodContainers], section_kube_pod_lifecycle: Optional[PodLifeCycle], ) -> DiscoveryResult: yield Service() def check_kube_pod_status( section_kube_pod_containers: Optional[PodContainers], section_kube_pod_lifecycle: Optional[PodLifeCycle], ) -> CheckResult: if section_kube_pod_lifecycle is not None: yield Result( state=State.OK, summary=_pod_status_message(section_kube_pod_containers, section_kube_pod_lifecycle), ) register.check_plugin( name="kube_pod_status", service_name="Status", sections=["kube_pod_containers", "kube_pod_lifecycle"], discovery_function=discovery_kube_pod_status, check_function=check_kube_pod_status, )
""" return StatefulSetInfo(**json.loads(string_table[0][0])) register.agent_section( name="kube_statefulset_info_v1", parsed_section_name="kube_statefulset_info", parse_function=parse, host_label_function=host_labels("statefulset"), ) def discovery(section: StatefulSetInfo) -> DiscoveryResult: yield Service() def check_kube_statefulset_info(section: StatefulSetInfo) -> CheckResult: yield from check_info({ "name": section.name, "namespace": section.namespace, "creation_timestamp": section.creation_timestamp, }) register.check_plugin( name="kube_statefulset_info", service_name="Info", discovery_function=discovery, check_function=check_kube_statefulset_info, )
state = State.UNKNOWN if section.status in params["ok_states"]: state = State.OK elif section.status in params["warn_states"]: state = State.WARN elif section.status in params["crit_states"]: state = State.CRIT elif section.status == 3: # to prevent flapping between update avail and Connection raise IgnoreResultsError("Devices try to connect to the update server") yield Result( state=state, summary= f"Update Status: {_STATES[section.status]}, Current Version: {section.version}", ) register.check_plugin( name="synology_update", sections=["synology_update"], service_name="Update", discovery_function=discovery, check_function=check, check_ruleset_name="synology_update", check_default_parameters={ "ok_states": [2], "warn_states": [5], "crit_states": [1, 4], }, )
"Instances", str), "faas_active_instance_count": gcp.MetricSpec( "cloudfunctions.googleapis.com/function/active_instances", "Active instances", str), } timeseries = section.get(item, gcp.SectionItem(rows=[])).rows yield from gcp.generic_check(metrics, timeseries, params) register.check_plugin( name="gcp_function_instances", sections=["gcp_service_cloud_functions", "gcp_assets"], service_name=service_namer("instances"), check_ruleset_name="gcp_function_instances", discovery_function=discover, check_function=check_gcp_function_instances, check_default_parameters={ "faas_total_instance_count": None, "faas_active_instance_count": None, }, ) def check_gcp_function_execution( item: str, params: Mapping[str, Any], section_gcp_service_cloud_functions: Optional[gcp.Section], section_gcp_assets: Optional[gcp.AssetSection], ) -> CheckResult: if section_gcp_service_cloud_functions is None: return
register.agent_section( name="kube_performance_cpu_v1", parsed_section_name="kube_performance_cpu", parse_function=parse_performance_usage, ) register.agent_section( name="kube_cpu_resources_v1", parsed_section_name="kube_cpu_resources", parse_function=parse_resources, ) register.agent_section( name="kube_allocatable_cpu_resource_v1", parsed_section_name="kube_allocatable_cpu_resource", parse_function=parse_allocatable_resource, ) register.check_plugin( name="kube_cpu", service_name="CPU resources", sections=[ "kube_performance_cpu", "kube_cpu_resources", "kube_allocatable_cpu_resource" ], check_ruleset_name="kube_cpu", discovery_function=discovery_kube_cpu, check_function=check_kube_cpu, check_default_parameters=DEFAULT_PARAMS, )
def check(params: KubeContainersLevelsUpperLower, section: ContainerCount) -> CheckResult: """Computes `total` and uses `check_levels` for each section element, setting levels from `params` individually""" section_dict = section.dict() section_dict["total"] = sum(section_dict.values()) for name, value in section_dict.items(): yield from check_levels( value, levels_upper=params.get(f"{name}_upper"), levels_lower=params.get(f"{name}_lower"), metric_name=f"kube_node_container_count_{name}", label=f"{name.title()}", ) register.agent_section( name="kube_node_container_count_v1", parsed_section_name="kube_node_container_count", parse_function=parse, ) register.check_plugin( name="kube_node_container_count", service_name="Containers", discovery_function=discovery, check_function=check, check_ruleset_name="kube_node_container_count", check_default_parameters={}, )
name="synology_info", detect=synology.detect(), parse_function=parse, fetch=SNMPTree( base=".1.3.6.1.4.1.6574.1.5", oids=[ "1", # Model "2", # SerialNumber "3", # OS Version ], ), ) def discovery(section: Section) -> DiscoveryResult: yield Service() def check(section: Section) -> CheckResult: summary = f"Model: {section.model}, S/N: {section.serialnumber}, OS Version: {section.os}" yield Result(state=State.OK, summary=summary) register.check_plugin( name="synology_info", sections=["synology_info"], service_name="Info", discovery_function=discovery, check_function=check, )
return section = section_gcp_service_filestore metrics = { "fs_used_percent": gcp.MetricSpec( "file.googleapis.com/nfs/server/used_bytes_percent", "Usage", render.percent, scale=1e2 ), "disk_read_ios": gcp.MetricSpec( "file.googleapis.com/nfs/server/read_ops_count", "Read operations", str ), "disk_write_ios": gcp.MetricSpec( "file.googleapis.com/nfs/server/write_ops_count", "Write operations", str ), } timeseries = section[item].rows yield from gcp.generic_check(metrics, timeseries, params) register.check_plugin( name="gcp_filestore_disk", sections=["gcp_service_filestore", "gcp_assets"], service_name="GCP Filestore %s", check_ruleset_name="gcp_filestore_disk", discovery_function=discover, check_function=check, check_default_parameters={ "fs_used_percent": None, "disk_read_ios": None, "disk_write_ios": None, }, )
params: Mapping[str, Any], section_gcp_service_filestore: Optional[gcp.Section], section_gcp_assets: Optional[gcp.AssetSection], ) -> CheckResult: if section_gcp_service_filestore is None: return section = section_gcp_service_filestore metrics = { "fs_used_percent": gcp.MetricSpec("file.googleapis.com/nfs/server/used_bytes_percent", render.percent, scale=1e2), "disk_read_ios": gcp.MetricSpec("file.googleapis.com/nfs/server/read_ops_count", str), "disk_write_ios": gcp.MetricSpec("file.googleapis.com/nfs/server/write_ops_count", str), } timeseries = section[item].rows yield from gcp.generic_check(metrics, timeseries, params) register.check_plugin( name="gcp_filestore_disk", sections=["gcp_service_filestore", "gcp_assets"], service_name="GCP Filestore %s", check_ruleset_name="gcp_filestore_disk", discovery_function=discover, check_function=check, check_default_parameters={}, )
def check_kube_pod_info(section: PodInfo) -> CheckResult: # To get an understanding of API objects this check deals with, one can take a look at # PodInfo and the definition of its fields if section.namespace is None: raise KubernetesError("Pod has no namespace") if section.creation_timestamp is None: raise KubernetesError("Pod has no creation timestamp") yield from check_info({ "node": section.node, "name": section.name, "namespace": section.namespace, "creation_timestamp": section.creation_timestamp, "qos_class": section.qos_class, "uid": section.uid, "restart_policy": section.restart_policy, "control_chain": section.controllers, }) register.check_plugin( name="kube_pod_info", service_name="Info", discovery_function=discovery_kube_pod_info, check_function=check_kube_pod_info, )
def parse_kube_pod_lifecycle(string_table: StringTable) -> PodLifeCycle: """ >>> parse_kube_pod_lifecycle([['{"phase": "running"}']]) PodLifeCycle(phase=<Phase.RUNNING: 'running'>) """ return PodLifeCycle(**json.loads(string_table[0][0])) register.agent_section( name="kube_pod_lifecycle_v1", parse_function=parse_kube_pod_lifecycle, parsed_section_name="kube_pod_lifecycle", ) def discovery_kube_pod_phase(section: PodLifeCycle) -> DiscoveryResult: yield Service() def check_kube_pod_phase(section: PodLifeCycle) -> CheckResult: yield Result(state=State.OK, summary=section.phase.title()) register.check_plugin( name="kube_pod_phase", service_name="Phase", sections=["kube_pod_lifecycle"], discovery_function=discovery_kube_pod_phase, check_function=check_kube_pod_phase, )
total_bytes, boundaries=(0, None), ) yield Metric( "fs_used_percent", 100.0 * used_bytes / total_bytes, levels=(warn, crit), boundaries=(0.0, 100.0), ) yield Result(state=(State.CRIT if used_bytes >= crit_bytes else State.WARN if used_bytes >= warn_bytes else State.OK), summary="%s used (%s of %s)" % (render.percent(100.0 * used_bytes / total_bytes), render.disksize(used_bytes), render.disksize(total_bytes))) register.agent_section( name="proxmox_ve_disk_usage", parse_function=parse_proxmox_ve_disk_usage, ) register.check_plugin( name="proxmox_ve_disk_usage", service_name="Proxmox VE Disk Usage", discovery_function=discover_single, check_function=check_proxmox_ve_disk_usage, check_ruleset_name="proxmox_ve_disk_percentage_used", check_default_parameters={"levels": (80., 90.)}, )
], ), ) def discover_checkpoint_connections(section: Section) -> DiscoveryResult: yield Service() def check_checkpoint_connections( params, section: Section, ) -> CheckResult: yield from check_levels( value=section.current, levels_upper=params["levels"], metric_name="connections", label="Current connections", render_func=str, ) register.check_plugin( name="checkpoint_connections", service_name="Connections", discovery_function=discover_checkpoint_connections, check_function=check_checkpoint_connections, check_default_parameters={"levels": (40000, 50000)}, check_ruleset_name="checkpoint_connections", )