def _check_inventory_tree( trees: InventoryTrees, old_tree: Optional[StructuredDataNode], sw_missing: ServiceState, sw_changes: ServiceState, hw_changes: ServiceState, ) -> ActiveCheckResult: if trees.inventory.is_empty() and trees.status_data.is_empty(): return ActiveCheckResult(0, ("Found no data", ), (), ()) status = 0 infotexts = [f"Found {trees.inventory.count_entries()} inventory entries"] swp_table = trees.inventory.get_table(["software", "packages"]) if swp_table is not None and swp_table.is_empty() and sw_missing: infotexts.append("software packages information is missing" + state_markers[sw_missing]) status = max(status, sw_missing) if old_tree is not None: if not _tree_nodes_are_equal(old_tree, trees.inventory, "software"): infotexts.append("software changes" + state_markers[sw_changes]) status = max(status, sw_changes) if not _tree_nodes_are_equal(old_tree, trees.inventory, "hardware"): infotexts.append("hardware changes" + state_markers[hw_changes]) status = max(status, hw_changes) if not trees.status_data.is_empty(): infotexts.append( f"Found {trees.status_data.count_entries()} status entries") return ActiveCheckResult(status, infotexts, (), ())
def _check_plugins_missing_data( plugins_missing_data: List[CheckPluginName], exit_spec: ExitSpec, some_success: bool, ) -> Iterable[ActiveCheckResult]: if not plugins_missing_data: return if not some_success: yield ActiveCheckResult(exit_spec.get("empty_output", 2), "Got no information from host") return # key is a legacy name, kept for compatibility. specific_plugins_missing_data_spec = exit_spec.get( "specific_missing_sections", []) specific_plugins, generic_plugins = set(), set() for check_plugin_name in plugins_missing_data: for pattern, status in specific_plugins_missing_data_spec: reg = regex(pattern) if reg.match(str(check_plugin_name)): specific_plugins.add((check_plugin_name, status)) break else: # no break generic_plugins.add(str(check_plugin_name)) # key is a legacy name, kept for compatibility. missing_status = exit_spec.get("missing_sections", 1) plugin_list = ", ".join(sorted(generic_plugins)) yield ActiveCheckResult( missing_status, f"Missing monitoring data for plugins: {plugin_list}", ) yield from (ActiveCheckResult(status, str(plugin)) for plugin, status in sorted(specific_plugins))
def _check_inventory_tree( trees: InventoryTrees, old_tree: Optional[StructuredDataNode], sw_missing: ServiceState, sw_changes: ServiceState, hw_changes: ServiceState, ) -> ActiveCheckResult: if trees.inventory.is_empty() and trees.status_data.is_empty(): return ActiveCheckResult(0, ("Found no data", ), (), ()) status = 0 infotexts = [f"Found {trees.inventory.count_entries()} inventory entries"] # Node 'software' is always there because _do_inv_for creates this node for cluster info sw_container = trees.inventory.get_node(['software']) if sw_container is not None and not sw_container.has_edge( 'packages') and sw_missing: infotexts.append("software packages information is missing" + state_markers[sw_missing]) status = max(status, sw_missing) if old_tree is not None: if not old_tree.is_equal(trees.inventory, edges=["software"]): infotexts.append("software changes" + state_markers[sw_changes]) status = max(status, sw_changes) if not old_tree.is_equal(trees.inventory, edges=["hardware"]): infotexts.append("hardware changes" + state_markers[hw_changes]) status = max(status, hw_changes) if not trees.status_data.is_empty(): infotexts.append( f"Found {trees.status_data.count_entries()} status entries") return ActiveCheckResult(status, infotexts, (), ())
def _check_transport( self, ssh_transport: bool, controller_present: bool, legacy_pull_mode: Optional[str], ) -> Optional[ActiveCheckResult]: if ssh_transport: return ActiveCheckResult(0, "Transport via SSH") if not controller_present: return None if not legacy_pull_mode or legacy_pull_mode == "no": return None return ActiveCheckResult( self.exit_spec.get("legacy_pull_mode", 1), "TLS is not activated on monitored host (see details)", ( "The hosts agent supports TLS, but it is not being used.", "We strongly recommend to enable TLS by registering the host to the site " "(using the `cmk-agent-ctl register` command on the monitored host).", "However you can configure missing TLS to be OK in the setting " '"State in case of available but not enabled TLS" of the ruleset ' '"Status of the Checkmk services".', ), )
def summarize_success( self, host_sections: HostSections[AgentRawDataSection], *, mode: Mode, ) -> Sequence[ActiveCheckResult]: """Returns useful information about the data source execution Return only summary information in case there is piggyback data""" if mode is not Mode.CHECKING: return [] sources: Final[Sequence[PiggybackRawDataInfo]] = list( itertools.chain.from_iterable( # TODO(ml): The code uses `get_piggyback_raw_data()` instead of # `HostSections.piggyback_raw_data` because this allows it to # sneakily use cached data. At minimum, we should group all cache # handling performed after the parser. get_piggyback_raw_data(origin, self.time_settings) for origin in (self.hostname, self.ipaddress))) if not sources: if self.always: return [ActiveCheckResult(1, "Missing data")] return [] return [ ActiveCheckResult(src.reason_status, src.reason) for src in sources if src.reason ]
def _timing_results( total_times: Snapshot, fetcher_messages: Sequence[FetcherMessage]) -> ActiveCheckResult: for msg in fetcher_messages: total_times += msg.stats.duration infotext = "execution time %.1f sec" % total_times.process.elapsed if not config.check_mk_perfdata_with_times: return ActiveCheckResult( 0, infotext, (), ("execution_time=%.3f" % total_times.process.elapsed, )) perfdata = [ "execution_time=%.3f" % total_times.process.elapsed, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] summary: DefaultDict[str, Snapshot] = defaultdict(Snapshot.null) for msg in fetcher_messages: with suppress(KeyError): summary[{ FetcherType.PIGGYBACK: "agent", FetcherType.PROGRAM: "ds", FetcherType.SNMP: "snmp", FetcherType.TCP: "agent", }[msg.fetcher_type]] += msg.stats.duration for phase, duration in summary.items(): perfdata.append("cmk_time_%s=%.3f" % (phase, duration.idle)) return ActiveCheckResult(0, infotext, (), perfdata)
def test_active_check_result(): assert ActiveCheckResult.from_subresults( ActiveCheckResult(0, ("Ok",), ("We're good",), ("metric1",)), ActiveCheckResult(2, ("Critical",), ("We're doomed",), ("metric2",)), ) == ActiveCheckResult(2, ["Ok", "Critical"], ["We're good", "We're doomed"], ["metric1", "metric2"])
def check_sources( *, source_results: SourceResults, mode: Mode, include_ok_results: bool = False, override_non_ok_state: Optional[ServiceState] = None, ) -> Iterable[ActiveCheckResult]: for source, host_sections in source_results: subresults = source.summarize(host_sections, mode=mode) if include_ok_results or any(s.state != 0 for s in subresults): yield from ( ActiveCheckResult( s.state if override_non_ok_state is None else override_non_ok_state, f"[{source.id}] {s.summary}", s.details, s.metrics, ) for s in subresults[:1] ) yield from ( ActiveCheckResult( s.state if override_non_ok_state is None else override_non_ok_state, s.summary, s.details, s.metrics, ) for s in subresults[1:] )
def summarize_check_mk_section( self, cmk_section: Optional[Sequence[AgentRawDataSection]], *, mode: Mode, ) -> Sequence[ActiveCheckResult]: agent_info = self._get_agent_info(cmk_section) subresults = [] if not self.is_cluster and agent_info["version"] is not None: subresults.append( ActiveCheckResult(0, "Version: %s" % agent_info["version"])) if not self.is_cluster and agent_info["agentos"] is not None: subresults.append( ActiveCheckResult(0, "OS: %s" % agent_info["agentos"])) if mode is Mode.CHECKING and cmk_section: subresults.extend(r for r in [ self._check_version(agent_info.get("version")), self._check_only_from(agent_info.get("onlyfrom")), self._check_agent_update(agent_info.get( "updatefailed"), agent_info.get("updaterecoveraction")), self._check_python_plugins( agent_info.get("failedpythonplugins"), agent_info.get("failedpythonreason")), self._check_transport(bool(agent_info.get("agentcontroller")), agent_info.get("legacypullmode")), ] if r) return subresults
def _check_only_from( self, agent_only_from: Optional[str], ) -> Optional[ActiveCheckResult]: if agent_only_from is None: return None config_only_from = self.only_from if config_only_from is None: return None allowed_nets = set( cmk.utils.misc.normalize_ip_addresses(agent_only_from)) expected_nets = set( cmk.utils.misc.normalize_ip_addresses(config_only_from)) if allowed_nets == expected_nets: return ActiveCheckResult( 0, f"Allowed IP ranges: {' '.join(allowed_nets)}") infotexts = [] exceeding = allowed_nets - expected_nets if exceeding: infotexts.append("exceeding: %s" % " ".join(sorted(exceeding))) missing = expected_nets - allowed_nets if missing: infotexts.append("missing: %s" % " ".join(sorted(missing))) mismatch_state = self.exit_spec.get("restricted_address_mismatch", 1) return ActiveCheckResult( mismatch_state, f"Unexpected allowed IP ranges ({', '.join(infotexts)})")
def test_set_version_and_os(self, summarizer, mode): assert summarizer.summarize_check_mk_section( [["version:", "42"], ["agentos:", "BeOS", "or", "Haiku", "OS"]], mode=mode, ) == [ ActiveCheckResult(0, "Version: 42"), ActiveCheckResult(0, "OS: BeOS or Haiku OS"), ]
def test_check_parsing_errors_with_errors_() -> None: assert check_parsing_errors(("error - message", )) == [ ActiveCheckResult(1, "error", ("error - message", )) ] assert (check_parsing_errors( ("error - message", ), error_state=2, ) == [ActiveCheckResult(2, "error", ("error - message", ))])
def active_check_inventory(hostname: HostName, options: Dict[str, int]) -> ActiveCheckResult: # TODO: drop '_inv_' _inv_hw_changes = options.get("hw-changes", 0) _inv_sw_changes = options.get("sw-changes", 0) _inv_sw_missing = options.get("sw-missing", 0) _inv_fail_status = options.get("inv-fail-status", 1) host_config = config.HostConfig.make_host_config(hostname) retentions_tracker = RetentionsTracker(host_config.inv_retention_intervals) inv_result = _inventorize_host( host_config=host_config, selected_sections=NO_SELECTION, run_plugin_names=EVERYTHING, retentions_tracker=retentions_tracker, ) trees = inv_result.trees retentions = Retentions( retentions_tracker, trees.inventory, # If no intervals are configured then remove all known retentions do_update=bool(host_config.inv_retention_intervals), ) if inv_result.safe_to_write: old_tree = _save_inventory_tree(hostname, trees.inventory, retentions) update_result = ActiveCheckResult(0, (), (), ()) else: old_tree, sources_state = None, 1 update_result = ActiveCheckResult( sources_state, (f"Cannot update tree{state_markers[sources_state]}", ), (), ()) _run_inventory_export_hooks(host_config, trees.inventory) return ActiveCheckResult.from_subresults( update_result, _check_inventory_tree(trees, old_tree, _inv_sw_missing, _inv_sw_changes, _inv_hw_changes), *check_sources( source_results=inv_result.source_results, mode=Mode.INVENTORY, # Do not use source states which would overwrite "State when inventory fails" in the # ruleset "Do hardware/software Inventory". These are handled by the "Check_MK" service override_non_ok_state=_inv_fail_status, ), check_parsing_errors( errors=inv_result.parsing_errors, error_state=_inv_fail_status, ), )
def test_mismatch(self, summarizer, mode): assert summarizer.summarize_check_mk_section( [ ["version:", "69"], ["agentos:"], ], mode=mode, ) == [ ActiveCheckResult(0, "Version: 69"), ActiveCheckResult(1, "unexpected agent version 69 (should be 42)"), ]
def test_check_parsing_errors_with_errors_() -> None: assert check_parsing_errors(("error - message",)) == ActiveCheckResult( 1, ["error(!)"], ("error - message",), (), ) assert check_parsing_errors(("error - message",), error_state=2,) == ActiveCheckResult( 2, ["error(!!)"], ("error - message",), (), )
def _check_host_labels( host_labels: QualifiedDiscovery[HostLabel], severity_new_host_label: int, discovery_mode: DiscoveryMode, ) -> Tuple[ActiveCheckResult, bool]: return ( ActiveCheckResult(severity_new_host_label, [f"{len(host_labels.new)} new host labels"], [], []), discovery_mode in (DiscoveryMode.NEW, DiscoveryMode.FIXALL, DiscoveryMode.REFRESH), ) if host_labels.new else ( ActiveCheckResult(0, ["no new host labels"], [], []), False, )
def _schedule_rediscovery( *, host_config: config.HostConfig, need_rediscovery: bool, ) -> ActiveCheckResult: if not need_rediscovery: return ActiveCheckResult(0, (), (), ()) autodiscovery_queue = _AutodiscoveryQueue() if host_config.is_cluster and host_config.nodes: for nodename in host_config.nodes: autodiscovery_queue.add(nodename) else: autodiscovery_queue.add(host_config.hostname) return ActiveCheckResult(0, ("rediscovery scheduled", ), (), ())
def summarize_success( self, host_sections: HostSections[SNMPRawDataSection], *, mode: Mode, ) -> Sequence[ActiveCheckResult]: return [ActiveCheckResult(0, "Success")]
def check_parsing_errors( errors: Sequence[str], *, error_state: ServiceState = 1, ) -> Sequence[ActiveCheckResult]: state = error_state if errors else 0 return [ActiveCheckResult(state, msg.split(" - ")[0], (msg,)) for msg in errors]
def test_check_parsing_errors_are_ok() -> None: assert check_parsing_errors(("error - message",), error_state=0,) == ActiveCheckResult( 0, ["error"], ("error - message",), (), )
def summarize_failure( self, exc: Exception, *, mode: Mode, ) -> Sequence[ActiveCheckResult]: return [ActiveCheckResult(self._extract_status(exc), str(exc))]
def test_allowed(self, summarizer, mode): assert summarizer.summarize_check_mk_section( [ ["onlyfrom:", "deep_space"], ], mode=mode, ) == [ActiveCheckResult(0, "Allowed IP ranges: deep_space")]
def test_summarize_existing_data_with_always_option( self, summarizer, monkeypatch, ): def get_piggyback_raw_data(source_hostname, time_settings): if not source_hostname: return () return [ PiggybackRawDataInfo( PiggybackFileInfo( source_hostname=source_hostname, file_path=Path("/dev/null"), successfully_processed=True, message="success", status=0, ), raw_data=AgentRawData(b""), ) ] monkeypatch.setattr(summarizer, "always", True) monkeypatch.setattr( cmk.core_helpers.piggyback, "get_piggyback_raw_data", get_piggyback_raw_data, ) if summarizer.hostname is None and summarizer.ipaddress is None: return pytest.skip() assert all(r == ActiveCheckResult(0, "success") for r in summarizer.summarize_success())
def test_at_least_dict_empty(self, summarizer, mode): assert summarizer.summarize_check_mk_section( [ ["version:", "69"], ["agentos:"], ], mode=mode, ) == [ActiveCheckResult(0, "Version: 69")]
def test_match(self, summarizer, mode): assert summarizer.summarize_check_mk_section( [ ["version:", "42"], ["agentos:"], ], mode=mode, ) == [ActiveCheckResult(0, "Version: 42")]
def test_no_tls_but_ssh(self, summarizer, mode): assert summarizer.summarize_check_mk_section( [ ["AgentController:", "cmk-agent-ctl 0.1.0"], ["LegacyPullMode:", "yes"], ["SSHClient:", "1.2.3.4"], ], mode=mode, ) == [ActiveCheckResult(0, "Transport via SSH")]
def check_parsing_errors( errors: Sequence[str], *, error_state: ServiceState = 1, ) -> ActiveCheckResult: state = error_state if errors else 0 return ActiveCheckResult( state, [f"{msg.split(' - ')[0]}{state_markers[state]}" for msg in errors], errors, ())
def test_exceeding(self, summarizer, mode): assert ( summarizer.summarize_check_mk_section( [ ["onlyfrom:", "deep_space somewhere_else"], ], mode=mode, ) == [ActiveCheckResult(1, "Unexpected allowed IP ranges (exceeding: somewhere_else)")] )
def summarize_success(self) -> Sequence[ActiveCheckResult]: """Returns useful information about the data source execution""" sources: Final[Sequence[PiggybackRawDataInfo]] = list( itertools.chain.from_iterable( # TODO(ml): The code uses `get_piggyback_raw_data()` instead of # `HostSections.piggyback_raw_data` because this allows it to # sneakily use cached data. At minimum, we should group all cache # handling performed after the parser. get_piggyback_raw_data(origin, self.time_settings) for origin in (self.hostname, self.ipaddress))) if not sources: if self.always: return [ActiveCheckResult(1, "Missing data")] return [] return [ ActiveCheckResult(src.info.status, src.info.message) for src in sources if src.info.message ]
def test_summarize_missing_data_with_always_option( self, summarizer, monkeypatch, ): monkeypatch.setattr(summarizer, "always", True) assert summarizer.summarize_success() == [ ActiveCheckResult(1, "Missing data") ]