def _execute_walks_for_dump(snmp_config, oids): # type: (SNMPHostConfig, List[OID]) -> Iterable[SNMPRowInfoForStoredWalk] for oid in oids: try: console.verbose("Walk on \"%s\"..." % oid) yield walk_for_export(snmp_config, oid) except Exception as e: console.error("Error: %s\n" % e) if cmk.utils.debug.enabled(): raise
def _execute_walks_for_dump(oids: List[OID], *, backend: ABCSNMPBackend) -> Iterable[SNMPRowInfoForStoredWalk]: for oid in oids: try: console.verbose("Walk on \"%s\"...\n" % oid) yield walk_for_export(oid, backend=backend) except Exception as e: console.error("Error: %s\n" % e) if cmk.utils.debug.enabled(): raise
def wrapped_check_func(hostname: HostName, *args: Any, **kwargs: Any) -> int: host_config = config.get_config_cache().get_host_config(hostname) exit_spec = host_config.exit_code_spec() status, infotexts, long_infotexts, perfdata = 0, [], [], [] try: status, infotexts, long_infotexts, perfdata = check_func( hostname, *args, **kwargs) except MKTimeout: if _in_keepalive_mode(): raise infotexts.append("Timed out") status = max(status, exit_spec.get("timeout", 2)) except (MKAgentError, MKFetcherError, MKSNMPError, MKIPAddressLookupError) as e: infotexts.append("%s" % e) status = exit_spec.get("connection", 2) except MKGeneralException as e: infotexts.append("%s" % e) status = max(status, exit_spec.get("exception", 3)) except Exception: if cmk.utils.debug.enabled(): raise crash_output = cmk.base.crash_reporting.create_check_crash_dump( hostname, check_plugin_name, {}, False, description) infotexts.append( crash_output.replace("Crash dump:\n", "Crash dump:\\n")) status = max(status, exit_spec.get("exception", 3)) # Produce the service check result output output_txt = ", ".join(infotexts) if perfdata: output_txt += " | %s" % " ".join(perfdata) if long_infotexts: output_txt = "%s\n%s" % (output_txt, "\n".join(long_infotexts)) output_txt += "\n" if _in_keepalive_mode(): if not cmk_version.is_raw_edition(): import cmk.base.cee.keepalive as keepalive # pylint: disable=no-name-in-module else: keepalive = None # type: ignore[assignment] keepalive.add_active_check_result(hostname, output_txt) console.verbose(ensure_str(output_txt)) else: out.output(ensure_str(output_txt)) return status
def service_outside_check_period( config_cache: config.ConfigCache, hostname: HostName, description: ServiceName ) -> bool: period = config_cache.check_period_of_service(hostname, description) if period is None: return False if cmk.base.core.check_timeperiod(period): console.vverbose("Service %s: timeperiod %s is currently active.\n", description, period) return False console.verbose("Skipping service %s: currently not in timeperiod %s.\n", description, period) return True
def walk(self, oid: str, check_plugin_name: Optional[str] = None, table_base_oid: Optional[str] = None, context_name: Optional[str] = None) -> SNMPRowInfo: protospec = self._snmp_proto_spec() ipaddress = self.config.ipaddress if self.config.is_ipv6_primary: ipaddress = "[" + ipaddress + "]" portspec = self._snmp_port_spec() command = self._snmp_walk_command(context_name) command += ["-OQ", "-OU", "-On", "-Ot", "%s%s%s" % (protospec, ipaddress, portspec), oid] console.vverbose("Running '%s'\n" % subprocess.list2cmdline(command)) snmp_process = None exitstatus = None rowinfo: SNMPRowInfo = [] try: snmp_process = subprocess.Popen(command, close_fds=True, stdin=open(os.devnull), stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") rowinfo = self._get_rowinfo_from_snmp_process(snmp_process) except MKTimeout: # On timeout exception try to stop the process to prevent child process "leakage" if snmp_process: os.kill(snmp_process.pid, signal.SIGTERM) snmp_process.wait() raise finally: # The stdout and stderr pipe are not closed correctly on a MKTimeout # Normally these pipes getting closed after p.communicate finishes # Closing them a second time in a OK scenario won't hurt neither.. if snmp_process: exitstatus = snmp_process.wait() if snmp_process.stderr: error = snmp_process.stderr.read() snmp_process.stderr.close() if snmp_process.stdout: snmp_process.stdout.close() if exitstatus: console.verbose(tty.red + tty.bold + "ERROR: " + tty.normal + "SNMP error: %s\n" % ensure_str(error).strip()) raise MKSNMPError("SNMP Error on %s: %s (Exit-Code: %d)" % (ipaddress, ensure_str(error).strip(), exitstatus)) return rowinfo
def _get_cached_snmpwalk(hostname, fetchoid): # type: (HostName, OID) -> Optional[SNMPRowInfo] path = _snmpwalk_cache_path(hostname, fetchoid) try: console.vverbose(" Loading %s from walk cache %s\n" % (fetchoid, path)) return store.load_object_from_file(path) except Exception: if cmk.utils.debug.enabled(): raise console.verbose(" Failed loading walk cache from %s. Continue without it.\n" % path) return None
def section_step(text: str, add_info: str = "", verbose: bool = True) -> None: if add_info: # Additional information, not titlecased add_info = " (%s)" % add_info if verbose: console.verbose("%s+%s %s%s\n", tty.yellow, tty.normal, text.upper(), add_info) else: console.info("%s+%s %s%s\n", tty.yellow, tty.normal, text.upper(), add_info)
def _execute_check( parsed_sections_broker: ParsedSectionsBroker, host_config: config.HostConfig, ipaddress: Optional[HostAddress], service: Service, *, dry_run: bool, show_perfdata: bool, value_store_manager: value_store.ValueStoreManager, ) -> bool: plugin = agent_based_register.get_check_plugin(service.check_plugin_name) # check if we must use legacy mode. remove this block entirely one day if (plugin is not None and host_config.is_cluster and plugin.cluster_check_function.__name__ == "cluster_legacy_mode_from_hell"): submittable = _legacy_mode.get_aggregated_result( parsed_sections_broker, host_config.hostname, ipaddress, service, used_params=( # time_resolved_check_parameters(service.parameters) # if isinstance(service.parameters, cmk.base.config.TimespecificParamList) else service.parameters), value_store_manager=value_store_manager, ) else: # This is the new, shiny, 'normal' case. submittable = get_aggregated_result( parsed_sections_broker, host_config, ipaddress, service, plugin, lambda: _final_read_only_check_parameters(service.parameters), value_store_manager=value_store_manager, ) if submittable.submit: _submit_to_core.check_result( host_name=host_config.hostname, service_name=service.description, result=submittable.result, cache_info=submittable.cache_info, dry_run=dry_run, show_perfdata=show_perfdata, ) else: console.verbose( f"{service.description:20} PEND - {submittable.result[1]}\n") return submittable.data_received
def cached_dns_lookup(hostname, family): # type: (HostName, int) -> Optional[str] cache = _config_cache.get_dict("cached_dns_lookup") cache_id = hostname, family # Address has already been resolved in prior call to this function? try: return cache[cache_id] except KeyError: pass ip_lookup_cache = _get_ip_lookup_cache() cached_ip = ip_lookup_cache.get(cache_id) if cached_ip and config.use_dns_cache: cache[cache_id] = cached_ip return cached_ip host_config = config.get_config_cache().get_host_config(hostname) if host_config.is_no_ip_host: cache[cache_id] = None return None # Now do the actual DNS lookup try: ipa = socket.getaddrinfo( hostname, None, family == 4 and socket.AF_INET or socket.AF_INET6)[0][4][0] # Update our cached address if that has changed or was missing if ipa != cached_ip: console.verbose("Updating IPv%d DNS cache for %s: %s\n" % (family, hostname, ipa)) ip_lookup_cache.update_cache(cache_id, ipa) cache[cache_id] = ipa # Update in-memory-cache return ipa except (MKTerminate, MKTimeout): # We should be more specific with the exception handler below, then we # could drop this special handling here raise except Exception as e: # DNS failed. Use cached IP address if present, even if caching # is disabled. if cached_ip: cache[cache_id] = cached_ip return cached_ip cache[cache_id] = None raise MKIPAddressLookupError( "Failed to lookup IPv%d address of %s via DNS: %s" % (family, hostname, e))
def _output_check_result(servicedesc: ServiceName, state: ServiceState, infotext: ServiceDetails, perftexts: List[str]) -> None: if _show_perfdata: infotext_fmt = "%-56s" p = ' (%s)' % (" ".join(perftexts)) else: p = '' infotext_fmt = "%s" console.verbose("%-20s %s%s" + infotext_fmt + "%s%s\n", ensure_str(servicedesc), tty.bold, tty.states[state], ensure_str(infotext.split('\n')[0]), tty.normal, ensure_str(p))
def _discover_marked_host_exists(config_cache: config.ConfigCache, host_name: HostName) -> bool: if host_name in config_cache.all_configured_hosts(): return True host_flag_path = os.path.join(_get_autodiscovery_dir(), host_name) try: os.remove(host_flag_path) except OSError: pass console.verbose( f" Skipped. Host {host_name} does not exist in configuration. Removing mark.\n") return False
def _cleanup_dump_folder(self): # type: () -> None dumps = sorted([(dump.stat().st_mtime, dump) for dump in self.dump_folder.glob("*%s" % SUFFIX)], key=lambda t: t[0])[:-self._keep_num_dumps] console.verbose( "Cleanup dump folder (remove old dumps, keep the last %s dumps):\n" % self._keep_num_dumps) for _mtime, filepath in dumps: console.verbose(" '%s'\n" % _get_short_filepath(filepath)) self._remove_file(filepath)
def __exit__( self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType], ) -> bool: signal.alarm(0) if isinstance(exc_val, _Timeout): console.verbose(f" Timeout of {self.limit} seconds reached. " f"Let's do the remaining {self.label} next time.") return True return False
def _get_filepaths(self): # type: () -> List[Path] out.output("Collect diagnostics information:\n") filepaths = [] for element in self.elements: filepath = element.add_or_get_file(self.tmp_dump_folder) if filepath is None: console.verbose(" %s: No informations\n" % element.ident) continue out.output(" %s\n" % element.description) filepaths.append(filepath) return filepaths
def wrapped_check_func(hostname, *args, **kwargs): # type: (HostName, Any, Any) -> int host_config = config.get_config_cache().get_host_config(hostname) exit_spec = host_config.exit_code_spec() status, infotexts, long_infotexts, perfdata = 0, [], [], [] try: status, infotexts, long_infotexts, perfdata = check_func( hostname, *args, **kwargs) except MKTimeout: if _in_keepalive_mode(): raise infotexts.append("Timed out") status = max(status, cast(int, exit_spec.get("timeout", 2))) except (MKAgentError, MKSNMPError, MKIPAddressLookupError) as e: infotexts.append("%s" % e) status = cast(int, exit_spec.get("connection", 2)) except MKGeneralException as e: infotexts.append("%s" % e) status = max(status, cast(int, exit_spec.get("exception", 3))) except Exception: if cmk.utils.debug.enabled(): raise crash_output = cmk.base.crash_reporting.create_check_crash_dump( hostname, check_plugin_name, None, False, None, description, []) infotexts.append( crash_output.replace("Crash dump:\n", "Crash dump:\\n")) status = max(status, cast(int, exit_spec.get("exception", 3))) # Produce the service check result output output_txt = "%s - %s" % (defines.short_service_state_name(status), ", ".join(infotexts)) if perfdata: output_txt += " | %s" % " ".join(perfdata) if long_infotexts: output_txt = "%s\n%s" % (output_txt, "\n".join(long_infotexts)) output_txt += "\n" if _in_keepalive_mode(): keepalive.add_keepalive_active_check_result( hostname, output_txt) console.verbose(six.ensure_str(output_txt)) else: out.output(six.ensure_str(output_txt)) return status
def update_dns_cache( *, host_configs: Iterable[_HostConfigLike], configured_ipv4_addresses: Mapping[HostName, HostAddress], configured_ipv6_addresses: Mapping[HostName, HostAddress], # Do these two even make sense? If either is set, this function # will just clear the cache. simulation_mode: bool, override_dns: Optional[HostAddress], ) -> UpdateDNSCacheResult: failed = [] ip_lookup_cache = _get_ip_lookup_cache() ip_lookup_cache.persist_on_update = False console.verbose("Cleaning up existing DNS cache...\n") ip_lookup_cache.clear() console.verbose("Updating DNS cache...\n") for host_config, family in _annotate_family(host_configs): console.verbose(f"{host_config.hostname} ({family})...") try: ip = lookup_ip_address( host_config=host_config, family=family, configured_ip_address=(configured_ipv4_addresses if family is socket.AF_INET else configured_ipv4_addresses).get( host_config.hostname), simulation_mode=simulation_mode, override_dns=override_dns, use_dns_cache=False, # it's cleared anyway ) console.verbose(f"{ip}\n") except (MKTerminate, MKTimeout): # We should be more specific with the exception handler below, then we # could drop this special handling here raise except Exception as e: failed.append(host_config.hostname) console.verbose("lookup failed: %s\n" % e) if cmk.utils.debug.enabled(): raise continue ip_lookup_cache.persist_on_update = True ip_lookup_cache.save_persisted() return len(ip_lookup_cache), failed
def _cleanup_dump_folder(self) -> None: if not self.tarfile_created: # Remove empty tarfile path self._remove_file(self.tarfile_path) dumps = sorted( [(dump.stat().st_mtime, dump) for dump in self.dump_folder.glob("*%s" % SUFFIX)], key=lambda t: t[0])[:-self._keep_num_dumps] section.section_step("Cleanup dump folder", add_info="keep last %d dumps" % self._keep_num_dumps) for _mtime, filepath in dumps: console.verbose("%s\n", _format_filepath(filepath)) self._remove_file(filepath)
def _do_discovery_for( host_name: HostName, ipaddress: Optional[HostAddress], parsed_sections_broker: ParsedSectionsBroker, run_plugin_names: Container[CheckPluginName], only_new: bool, *, load_labels: bool, only_host_labels: bool, on_error: OnError, ) -> None: section.section_step("Analyse discovered host labels") host_labels = analyse_node_labels( host_name=host_name, ipaddress=ipaddress, parsed_sections_broker=parsed_sections_broker, load_labels=load_labels, save_labels=True, on_error=on_error, ) count = len(host_labels.new) if host_labels.new else ("no new" if only_new else "no") section.section_success(f"Found {count} host labels") if only_host_labels: return section.section_step("Analyse discovered services") service_result = analyse_discovered_services( host_name=host_name, ipaddress=ipaddress, parsed_sections_broker=parsed_sections_broker, run_plugin_names=run_plugin_names, only_new=only_new, on_error=on_error, ) # TODO (mo): for the labels the corresponding code is in _host_labels. # We should put the persisting in one place. autochecks.save_autochecks_file(host_name, service_result.present) new_per_plugin = Counter(s.check_plugin_name for s in service_result.new) for name, count in sorted(new_per_plugin.items()): console.verbose("%s%3d%s %s\n" % (tty.green + tty.bold, count, tty.normal, name)) count = len(service_result.new) if service_result.new else ("no new" if only_new else "no") section.section_success(f"Found {count} services")
def execute_check( multi_host_sections: MultiHostSections, host_config: config.HostConfig, ipaddress: Optional[HostAddress], service: Service, *, submit_to_core: bool, show_perfdata: bool, ) -> bool: plugin = agent_based_register.get_check_plugin(service.check_plugin_name) # check if we must use legacy mode. remove this block entirely one day if (plugin is not None and host_config.is_cluster and plugin.cluster_check_function.__name__ == "cluster_legacy_mode_from_hell"): with _service_context(service): return _execute_check_legacy_mode( multi_host_sections, host_config.hostname, ipaddress, service, submit_to_core=submit_to_core, show_perfdata=show_perfdata, ) submit, data_received, result = get_aggregated_result( multi_host_sections, host_config, ipaddress, service, plugin, lambda: determine_check_params(service.parameters), ) if submit: _submit_check_result( host_config.hostname, service.description, result, multi_host_sections.get_cache_info(plugin.sections) if plugin else None, submit_to_core=submit_to_core, show_perfdata=show_perfdata, ) elif data_received: console.verbose("%-20s PEND - %s\n", ensure_str(service.description), result[1]) return data_received
def walk( self, oid: str, section_name: Optional[SectionName] = None, table_base_oid: Optional[str] = None, context_name: Optional[str] = None, ) -> SNMPRowInfo: protospec = self._snmp_proto_spec() ipaddress = self.config.ipaddress if self.config.is_ipv6_primary: ipaddress = "[" + ipaddress + "]" portspec = self._snmp_port_spec() command = self._snmp_walk_command(context_name) command += [ "-OQ", "-OU", "-On", "-Ot", "%s%s%s" % (protospec, ipaddress, portspec), oid ] console.vverbose("Running '%s'\n" % subprocess.list2cmdline(command)) rowinfo: SNMPRowInfo = [] with subprocess.Popen( command, close_fds=True, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", ) as snmp_process: assert snmp_process.stdout assert snmp_process.stderr try: rowinfo = self._get_rowinfo_from_walk_output( snmp_process.stdout) error = snmp_process.stderr.read() except MKTimeout: snmp_process.kill() raise if snmp_process.returncode: console.verbose(tty.red + tty.bold + "ERROR: " + tty.normal + "SNMP error: %s\n" % error.strip()) raise MKSNMPError("SNMP Error on %s: %s (Exit-Code: %d)" % ( ipaddress, error.strip(), snmp_process.returncode, )) return rowinfo
def _run_inventory_export_hooks(host_config: config.HostConfig, inventory_tree: StructuredDataNode) -> None: import cmk.base.inventory_plugins as inventory_plugins # pylint: disable=import-outside-toplevel for hookname, params in host_config.inventory_export_hooks: console.verbose("Execute export hook: %s%s%s%s" % (tty.blue, tty.bold, hookname, tty.normal)) try: func = inventory_plugins.inv_export[hookname]["export_function"] func(host_config.hostname, params, inventory_tree.serialize()) except Exception as e: if cmk.utils.debug.enabled(): raise raise MKGeneralException("Failed to execute export hook %s: %s" % (hookname, e))
def try_get_activation_lock(): # type: () -> bool global _restart_lock_fd # In some bizarr cases (as cmk -RR) we need to avoid duplicate locking! if config.restart_locking and _restart_lock_fd is None: lock_file = cmk.utils.paths.default_config_dir + "/main.mk" _restart_lock_fd = os.open(lock_file, os.O_RDONLY) # Make sure that open file is not inherited to monitoring core! fcntl.fcntl(_restart_lock_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) try: console.verbose("Waiting for exclusive lock on %s.\n" % lock_file, stream=sys.stderr) fcntl.flock(_restart_lock_fd, fcntl.LOCK_EX | (config.restart_locking == "abort" and fcntl.LOCK_NB or 0)) except Exception: return True return False
def _do_inv_for_realhost( host_config: config.HostConfig, ipaddress: Optional[HostAddress], *, multi_host_sections: MultiHostSections, run_only_plugin_names: Optional[Set[InventoryPluginName]], ) -> InventoryTrees: tree_aggregator = _TreeAggregator() _set_cluster_property(tree_aggregator.trees.inventory, host_config) section.section_step("Executing inventory plugins") for inventory_plugin in agent_based_register.iter_all_inventory_plugins(): if run_only_plugin_names and inventory_plugin.name not in run_only_plugin_names: continue kwargs = multi_host_sections.get_section_kwargs( HostKey(host_config.hostname, ipaddress, SourceType.HOST), inventory_plugin.sections, ) if not kwargs: console.vverbose(" %s%s%s%s: skipped (no data)\n", tty.yellow, tty.bold, inventory_plugin.name, tty.normal) continue # Inventory functions can optionally have a second argument: parameters. # These are configured via rule sets (much like check parameters). if inventory_plugin.inventory_ruleset_name is not None: kwargs["params"] = host_config.inventory_parameters( str(inventory_plugin.inventory_ruleset_name) ) # TODO (mo): keep type! exception = tree_aggregator.aggregate_results( inventory_plugin.inventory_function(**kwargs), inventory_plugin.name, ) if exception: console.warning(" %s%s%s%s: failed: %s", tty.red, tty.bold, inventory_plugin.name, tty.normal, exception) else: console.verbose(" %s%s%s%s", tty.green, tty.bold, inventory_plugin.name, tty.normal) console.vverbose(": ok\n") console.verbose("\n") tree_aggregator.trees.inventory.normalize_nodes() tree_aggregator.trees.status_data.normalize_nodes() return tree_aggregator.trees
def _do_inv_for_realhost( host_config: config.HostConfig, ipaddress: Optional[HostAddress], *, parsed_sections_broker: ParsedSectionsBroker, run_plugin_names: Container[InventoryPluginName], ) -> InventoryTrees: tree_aggregator = TreeAggregator() _set_cluster_property(tree_aggregator.trees.inventory, host_config) section.section_step("Executing inventory plugins") for inventory_plugin in agent_based_register.iter_all_inventory_plugins(): if inventory_plugin.name not in run_plugin_names: continue for source_type in (SourceType.HOST, SourceType.MANAGEMENT): kwargs = get_section_kwargs( parsed_sections_broker, HostKey(host_config.hostname, ipaddress, source_type), inventory_plugin.sections, ) if not kwargs: console.vverbose(" %s%s%s%s: skipped (no data)\n", tty.yellow, tty.bold, inventory_plugin.name, tty.normal) continue # Inventory functions can optionally have a second argument: parameters. # These are configured via rule sets (much like check parameters). if inventory_plugin.inventory_ruleset_name is not None: kwargs["params"] = host_config.inventory_parameters( inventory_plugin.inventory_ruleset_name) exception = tree_aggregator.aggregate_results( inventory_plugin.inventory_function(**kwargs), ) if exception: console.warning(" %s%s%s%s: failed: %s", tty.red, tty.bold, inventory_plugin.name, tty.normal, exception) else: console.verbose(" %s%s%s%s", tty.green, tty.bold, inventory_plugin.name, tty.normal) console.vverbose(": ok\n") console.verbose("\n") tree_aggregator.trees.inventory.normalize_nodes() tree_aggregator.trees.status_data.normalize_nodes() return tree_aggregator.trees
def execute_check(multi_host_sections: MultiHostSections, host_config: config.HostConfig, ipaddress: Optional[HostAddress], service: Service) -> bool: plugin = agent_based_register.get_check_plugin(service.check_plugin_name) # Make a bit of context information globally available, so that functions # called by checks know this context. set_service is needed for predictive levels! # TODO: This should be a context manager, similar to value_store (f.k.a. item_state) # This is used for both legacy and agent_based API. check_api_utils.set_service(str(service.check_plugin_name), service.description) # check if we must use legacy mode. remove this block entirely one day if (plugin is not None and host_config.is_cluster and plugin.cluster_check_function.__name__ == "cluster_legacy_mode_from_hell"): return _execute_check_legacy_mode( multi_host_sections, host_config.hostname, ipaddress, service, ) submit, data_received, result = get_aggregated_result( multi_host_sections, host_config, ipaddress, service, plugin, lambda: determine_check_params(service.parameters), ) if submit: _submit_check_result( host_config.hostname, service.description, result, multi_host_sections.get_cache_info(plugin.sections) if plugin else None, ) elif data_received: console.verbose("%-20s PEND - %s\n", ensure_str(service.description), result[1]) return data_received
def _make_host_sections( nodes: Iterable[Tuple[HostName, Optional[HostAddress], DataSources]], *, max_cachefile_age: int, selected_raw_sections: Optional[SelectedRawSections], ) -> MultiHostSections: """Gather ALL host info data for any host (hosts, nodes, clusters) in Check_MK. Returns a dictionary object of already parsed HostSections() constructs for each related host. For single hosts it's just a single entry in the dictionary. For cluster hosts it contains one HostSections() entry for each related node. Communication errors are not raised through by this functions. All agent related errors are caught by the source.run() method and saved in it's _exception attribute. The caller should use source.get_summary_result() to get the state, output and perfdata of the agent excecution or source.exception() to get the exception object. """ console.verbose("%s+%s %s\n", tty.yellow, tty.normal, "Fetching data".upper()) # Special agents can produce data for the same check_plugin_name on the same host, in this case # the section lines need to be extended multi_host_sections = MultiHostSections() for hostname, ipaddress, sources in nodes: for source in sources: source.configurator.file_cache.max_age = max_cachefile_age host_sections = multi_host_sections.setdefault( HostKey(hostname, ipaddress, source.configurator.source_type), source.default_host_sections, ) host_sections.update( # TODO: Select agent / snmp sources before passing source.run(selected_raw_sections=selected_raw_sections)) # Store piggyback information received from all sources of this host. This # also implies a removal of piggyback files received during previous calls. host_sections = multi_host_sections.setdefault( HostKey(hostname, ipaddress, SourceType.HOST), AgentHostSections(), ) cmk.utils.piggyback.store_piggyback_raw_data( hostname, host_sections.piggybacked_raw_data, ) return multi_host_sections
def discover_marked_hosts(core: MonitoringCore) -> None: console.verbose("Doing discovery for all marked hosts:\n") autodiscovery_dir = _get_autodiscovery_dir() if not os.path.exists(autodiscovery_dir): # there is obviously nothing to do console.verbose(" Nothing to do. %s is missing.\n" % autodiscovery_dir) return config_cache = config.get_config_cache() oldest_queued = _queue_age() hosts = os.listdir(autodiscovery_dir) if not hosts: console.verbose( " Nothing to do. No hosts marked by discovery check.\n") # Fetch host state information from livestatus host_states = _fetch_host_states() activation_required = False rediscovery_reference_time = time.time() with TimeLimitFilter(limit=120, grace=10, label="hosts") as time_limited: for host_name in time_limited(hosts): host_config = config_cache.get_host_config(host_name) if not _discover_marked_host_exists(config_cache, host_name): continue # Only try to discover hosts with UP state if host_states and host_states.get(host_name) != 0: continue if _discover_marked_host(config_cache, host_config, rediscovery_reference_time, oldest_queued): activation_required = True if activation_required: console.verbose( "\nRestarting monitoring core with updated configuration...\n") with config.set_use_core_config(use_core_config=False): try: _config_cache.clear_all() config.get_config_cache().initialize() if config.monitoring_core == "cmc": cmk.base.core.do_reload(core) else: cmk.base.core.do_restart(core) finally: _config_cache.clear_all() config.get_config_cache().initialize()
def wrapped_check_func(hostname: HostName, *args: Any, **kwargs: Any) -> int: host_config = config.get_config_cache().get_host_config(hostname) exit_spec = host_config.exit_code_spec() try: status, output_text = _combine_texts( check_func(hostname, *args, **kwargs)) except MKTimeout: if _in_keepalive_mode(): raise status = exit_spec.get("timeout", 2) output_text = "Timed out\n" except (MKAgentError, MKFetcherError, MKSNMPError, MKIPAddressLookupError) as e: status = exit_spec.get("connection", 2) output_text = f"{e}\n" except MKGeneralException as e: status = exit_spec.get("exception", 3) output_text = f"{e}\n" except Exception: if cmk.utils.debug.enabled(): raise status = exit_spec.get("exception", 3) output_text = cmk.base.crash_reporting.create_check_crash_dump( host_name=hostname, service_name=description, plugin_name=check_plugin_name, plugin_kwargs={}, is_manual=False, ).replace("Crash dump:\n", "Crash dump:\\n") if _in_keepalive_mode(): import cmk.base.cee.keepalive as keepalive # pylint: disable=no-name-in-module keepalive.add_active_check_result(hostname, output_text) console.verbose(output_text) else: out.output(output_text) return status
def _run_inventory_export_hooks(host_config, inventory_tree): # type: (config.HostConfig, StructuredDataTree) -> None import cmk.base.inventory_plugins as inventory_plugins # pylint: disable=import-outside-toplevel hooks = host_config.inventory_export_hooks if not hooks: return section.section_step("Execute inventory export hooks") for hookname, params in hooks: console.verbose("Execute export hook: %s%s%s%s" % (tty.blue, tty.bold, hookname, tty.normal)) try: func = inventory_plugins.inv_export[hookname]["export_function"] func(host_config.hostname, params, inventory_tree.get_raw_tree()) except Exception as e: if cmk.utils.debug.enabled(): raise raise MKGeneralException("Failed to execute export hook %s: %s" % (hookname, e))
def fetch_all( *, sources: Iterable[Source], file_cache_max_age: file_cache.MaxAge, mode: Mode, ) -> Iterator[FetcherMessage]: console.verbose("%s+%s %s\n", tty.yellow, tty.normal, "Fetching data".upper()) for source in sources: console.vverbose(" Source: %s/%s\n" % (source.source_type, source.fetcher_type)) source.file_cache_max_age = file_cache_max_age with CPUTracker() as tracker: raw_data = source.fetch(mode) yield FetcherMessage.from_raw_data( raw_data, tracker.duration, source.fetcher_type, )