def check_icmp_arguments_of(config_cache: ConfigCache, hostname: HostName, add_defaults: bool = True, family: Optional[int] = None) -> str: host_config = config_cache.get_host_config(hostname) levels = host_config.ping_levels if not add_defaults and not levels: return "" if family is None: family = 6 if host_config.is_ipv6_primary else 4 args = [] if family == 6: args.append("-6") rta = 200.0, 500.0 loss = 80.0, 100.0 for key, value in levels.items(): if key == "timeout": if not isinstance(value, int): raise TypeError() args.append("-t %d" % value) elif key == "packets": if not isinstance(value, int): raise TypeError() args.append("-n %d" % value) elif key == "rta": if not isinstance(value, tuple): raise TypeError() rta = value elif key == "loss": if not isinstance(value, tuple): raise TypeError() loss = value args.append("-w %.2f,%.2f%%" % (rta[0], loss[0])) args.append("-c %.2f,%.2f%%" % (rta[1], loss[1])) return " ".join(args)
def _verify_cluster_address_family(nodes: List[str], config_cache: config.ConfigCache, host_config: config.HostConfig) -> None: cluster_host_family = "IPv6" if host_config.is_ipv6_primary else "IPv4" address_families = [ "%s: %s" % (host_config.hostname, cluster_host_family), ] address_family = cluster_host_family mixed = False for nodename in nodes: node_config = config_cache.get_host_config(nodename) family = "IPv6" if node_config.is_ipv6_primary else "IPv4" address_families.append("%s: %s" % (nodename, family)) if address_family is None: address_family = family elif address_family != family: mixed = True if mixed: warning("Cluster '%s' has different primary address families: %s" % (host_config.hostname, ", ".join(address_families)))
def _get_monitoring_data_kwargs( parsed_sections_broker: ParsedSectionsBroker, host_config: config.HostConfig, config_cache: config.ConfigCache, ipaddress: Optional[HostAddress], service: ConfiguredService, sections: Sequence[ParsedSectionName], source_type: Optional[SourceType] = None, ) -> Tuple[Mapping[str, object], ServiceCheckResult]: if source_type is None: source_type = ( SourceType.MANAGEMENT if service.check_plugin_name.is_management_name() else SourceType.HOST ) if host_config.is_cluster: nodes = config_cache.get_clustered_service_node_keys( host_config, source_type, service.description, ) return ( get_section_cluster_kwargs( parsed_sections_broker, nodes, sections, ), ServiceCheckResult.cluster_received_no_data(nodes), ) return ( get_section_kwargs( parsed_sections_broker, HostKey(host_config.hostname, ipaddress, source_type), sections, ), ServiceCheckResult.received_no_data(), )
def _make_piggyback_nodes( mode: Mode, config_cache: config.ConfigCache, host_config: HostConfig, ) -> Sequence[Tuple[HostName, Optional[HostAddress], Sequence[Source]]]: """Abstract clusters/nodes/hosts""" assert host_config.nodes is not None nodes = [] for hostname in host_config.nodes: node_config = config_cache.get_host_config(hostname) ipaddress = ip_lookup.lookup_ip_address( node_config, family=node_config.default_address_family, ) sources = make_sources( HostConfig.make_host_config(hostname), ipaddress, mode=mode, ) nodes.append((hostname, ipaddress, sources)) return nodes
def test_manager_get_autochecks_of( test_config: config.ConfigCache, autochecks_content: str, expected_result: Sequence[ConfiguredService], ) -> None: autochecks_file = Path(cmk.utils.paths.autochecks_dir, "host.mk") with autochecks_file.open("w", encoding="utf-8") as f: f.write(autochecks_content) manager = test_config._autochecks_manager result = manager.get_autochecks_of( HostName("host"), lambda *a: _COMPUTED_PARAMETERS_SENTINEL, lambda _host, check, item: f"{check}-{item}", lambda hostname, _desc: hostname, ) assert result == expected_result # see that compute_check_parameters has been called: assert result[0].parameters is _COMPUTED_PARAMETERS_SENTINEL # Check that the ConfigCache method also returns the correct data assert test_config.get_autochecks_of(HostName("host")) == result
def _keep_service( config_cache: config.ConfigCache, host_config: config.HostConfig, service: Service, filter_mode: Optional[Literal["only_clustered", "include_clustered"]], skip_ignored: bool, ) -> bool: hostname = host_config.hostname # drop unknown plugins: if agent_based_register.get_check_plugin( service.check_plugin_name) is None: return False if skip_ignored and config.service_ignored( hostname, service.check_plugin_name, service.description): return False if filter_mode == "include_clustered": return True if not host_config.part_of_clusters: return filter_mode != "only_clustered" host_of_service = config_cache.host_of_clustered_service( hostname, service.description, part_of_clusters=host_config.part_of_clusters, ) svc_is_mine = (hostname == host_of_service) if filter_mode is None: return svc_is_mine # filter_mode == "only_clustered" return not svc_is_mine
def _discover_marked_host( *, config_cache: config.ConfigCache, host_config: config.HostConfig, autodiscovery_queue: _AutodiscoveryQueue, reference_time: float, oldest_queued: float, ) -> bool: host_name = host_config.hostname console.verbose(f"{tty.bold}{host_name}{tty.normal}:\n") if host_config.discovery_check_parameters is None: console.verbose(" failed: discovery check disabled\n") return False rediscovery_parameters = host_config.discovery_check_parameters.get( "inventory_rediscovery", {}) reason = _may_rediscover( rediscovery_parameters=rediscovery_parameters, reference_time=reference_time, oldest_queued=oldest_queued, ) if reason: console.verbose(f" skipped: {reason}\n") return False result = automation_discovery( config_cache=config_cache, host_config=host_config, mode=DiscoveryMode(rediscovery_parameters.get("mode")), service_filters=_ServiceFilters.from_settings(rediscovery_parameters), on_error=OnError.IGNORE, use_cached_snmp_data=True, # autodiscovery is run every 5 minutes (see # omd/packages/check_mk/skel/etc/cron.d/cmk_discovery) # make sure we may use the file the active discovery check left behind: max_cachefile_age=config.max_cachefile_age(discovery=600), ) if result.error_text is not None: # for offline hosts the error message is empty. This is to remain # compatible with the automation code console.verbose( f" failed: {result.error_text or 'host is offline'}\n") # delete the file even in error case, otherwise we might be causing the same error # every time the cron job runs autodiscovery_queue.remove(host_name) return False something_changed = (result.self_new != 0 or result.self_removed != 0 or result.self_kept != result.self_total or result.clustered_new != 0 or result.clustered_vanished != 0 or result.self_new_host_labels != 0) if not something_changed: console.verbose(" nothing changed.\n") activation_required = False else: console.verbose( f" {result.self_new} new, {result.self_removed} removed, " f"{result.self_kept} kept, {result.self_total} total services " f"and {result.self_new_host_labels} new host labels. " f"clustered new {result.clustered_new}, clustered vanished " f"{result.clustered_vanished}") # Note: Even if the actual mark-for-discovery flag may have been created by a cluster host, # the activation decision is based on the discovery configuration of the node activation_required = bool(rediscovery_parameters["activation"]) # Enforce base code creating a new host config object after this change config_cache.invalidate_host_config(host_name) # Now ensure that the discovery service is updated right after the changes schedule_discovery_check(host_name) autodiscovery_queue.remove(host_name) return activation_required
def automation_discovery( *, config_cache: config.ConfigCache, host_config: config.HostConfig, mode: DiscoveryMode, service_filters: Optional[_ServiceFilters], on_error: OnError, use_cached_snmp_data: bool, max_cachefile_age: cmk.core_helpers.cache.MaxAge, ) -> DiscoveryResult: console.verbose(" Doing discovery with mode '%s'...\n" % mode) host_name = host_config.hostname result = DiscoveryResult() if host_name not in config_cache.all_active_hosts(): result.error_text = "" return result cmk.core_helpers.cache.FileCacheFactory.use_outdated = True cmk.core_helpers.cache.FileCacheFactory.maybe = use_cached_snmp_data try: # in "refresh" mode we first need to remove all previously discovered # checks of the host, so that _get_host_services() does show us the # new discovered check parameters. if mode is DiscoveryMode.REFRESH: result.self_removed += host_config.remove_autochecks( ) # this is cluster-aware! if host_config.is_cluster: ipaddress = None else: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, _source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=Mode.DISCOVERY, selected_sections=NO_SELECTION, file_cache_max_age=max_cachefile_age, fetcher_messages=(), force_snmp_cache_refresh=not use_cached_snmp_data, on_scan_error=on_error, ) if mode is not DiscoveryMode.REMOVE: host_labels = analyse_host_labels( host_config=host_config, ipaddress=ipaddress, parsed_sections_broker=parsed_sections_broker, load_labels=True, save_labels=True, on_error=on_error, ) result.self_new_host_labels = len(host_labels.new) result.self_total_host_labels = len(host_labels.present) if mode is DiscoveryMode.ONLY_HOST_LABELS: # This is the result of a refactoring, and the following code was added # to ensure a compatible behaviour. I don't think it is particularly # sensible. We used to only compare service descriptions of old and new # services, so `make_object_diff` was always comparing two identical objects # if the mode was DiscoveryMode.ONLY_HOST_LABEL. # We brainlessly mimic that behaviour, for now. result.diff_text = make_object_diff(set(), set()) return result # Compute current state of new and existing checks services = _get_host_services( host_config, ipaddress, parsed_sections_broker, on_error=on_error, ) old_services = services.get("old", []) # Create new list of checks new_services = _get_post_discovery_services( host_name, services, service_filters or _ServiceFilters.accept_all(), result, mode) host_config.set_autochecks(new_services) # If old_services == new_services, make_object_diff will return # something along the lines of "nothing changed". # I guess this was written before discovered host labels were invented. result.diff_text = make_object_diff( {x.service.description for x in old_services}, {x.service.description for x in new_services}, ) except MKTimeout: raise # let general timeout through except Exception as e: if cmk.utils.debug.enabled(): raise result.error_text = str(e) result.self_total = result.self_new + result.self_kept return result
def get_host_attributes(hostname: HostName, config_cache: ConfigCache) -> ObjectAttributes: host_config = config_cache.get_host_config(hostname) attrs = host_config.extra_host_attributes # Pre 1.6 legacy attribute. We have changed our whole code to use the # livestatus column "tags" which is populated by all attributes starting with # "__TAG_" instead. We may deprecate this is one day. attrs["_TAGS"] = " ".join( sorted(config_cache.get_host_config(hostname).tags)) attrs.update(_get_tag_attributes(host_config.tag_groups, "TAG")) attrs.update(_get_tag_attributes(host_config.labels, "LABEL")) attrs.update(_get_tag_attributes(host_config.label_sources, "LABELSOURCE")) if "alias" not in attrs: attrs["alias"] = host_config.alias # Now lookup configured IP addresses v4address: Optional[str] = None if host_config.is_ipv4_host: v4address = ip_address_of(host_config, 4) if v4address is None: v4address = "" attrs["_ADDRESS_4"] = v4address v6address: Optional[str] = None if host_config.is_ipv6_host: v6address = ip_address_of(host_config, 6) if v6address is None: v6address = "" attrs["_ADDRESS_6"] = v6address ipv6_primary = host_config.is_ipv6_primary if ipv6_primary: attrs["address"] = attrs["_ADDRESS_6"] attrs["_ADDRESS_FAMILY"] = "6" else: attrs["address"] = attrs["_ADDRESS_4"] attrs["_ADDRESS_FAMILY"] = "4" add_ipv4addrs, add_ipv6addrs = host_config.additional_ipaddresses if add_ipv4addrs: attrs["_ADDRESSES_4"] = " ".join(add_ipv4addrs) for nr, ipv4_address in enumerate(add_ipv4addrs): key = "_ADDRESS_4_%s" % (nr + 1) attrs[key] = ipv4_address if add_ipv6addrs: attrs["_ADDRESSES_6"] = " ".join(add_ipv6addrs) for nr, ipv6_address in enumerate(add_ipv6addrs): key = "_ADDRESS_6_%s" % (nr + 1) attrs[key] = ipv6_address # Add the optional WATO folder path path = config.host_paths.get(hostname) if path: attrs["_FILENAME"] = path # Add custom user icons and actions actions = host_config.icons_and_actions if actions: attrs["_ACTIONS"] = ",".join(actions) if cmk_version.is_managed_edition(): attrs[ "_CUSTOMER"] = config.current_customer # type: ignore[attr-defined] return attrs
def _discover_marked_host(config_cache: config.ConfigCache, host_config: config.HostConfig, now_ts: float, oldest_queued: float) -> bool: host_name = host_config.hostname something_changed = False console.verbose(f"{tty.bold}{host_name}{tty.normal}:\n") host_flag_path = os.path.join(_get_autodiscovery_dir(), host_name) params = host_config.discovery_check_parameters if params is None: console.verbose(" failed: discovery check disabled\n") return False reason = _may_rediscover(params, now_ts, oldest_queued) if not reason: result = discover_on_host( config_cache=config_cache, host_config=host_config, mode=DiscoveryMode(_get_rediscovery_parameters(params).get("mode")), service_filters=_ServiceFilters.from_settings(_get_rediscovery_parameters(params)), on_error="ignore", use_cached_snmp_data=True, # autodiscovery is run every 5 minutes (see # omd/packages/check_mk/skel/etc/cron.d/cmk_discovery) # make sure we may use the file the active discovery check left behind: max_cachefile_age=600, ) if result.error_text is not None: if result.error_text: console.verbose(f"failed: {result.error_text}\n") else: # for offline hosts the error message is empty. This is to remain # compatible with the automation code console.verbose(" failed: host is offline\n") else: if result.self_new == 0 and\ result.self_removed == 0 and\ result.self_kept == result.self_total and\ result.clustered_new == 0 and\ result.clustered_vanished == 0 and\ result.self_new_host_labels == 0: console.verbose(" nothing changed.\n") else: console.verbose(f" {result.self_new} new, {result.self_removed} removed, " f"{result.self_kept} kept, {result.self_total} total services " f"and {result.self_new_host_labels} new host labels. " f"clustered new {result.clustered_new}, clustered vanished " f"{result.clustered_vanished}") # Note: Even if the actual mark-for-discovery flag may have been created by a cluster host, # the activation decision is based on the discovery configuration of the node if _get_rediscovery_parameters(params)["activation"]: something_changed = True # Enforce base code creating a new host config object after this change config_cache.invalidate_host_config(host_name) # Now ensure that the discovery service is updated right after the changes schedule_discovery_check(host_name) # delete the file even in error case, otherwise we might be causing the same error # every time the cron job runs try: os.remove(host_flag_path) except OSError: pass else: console.verbose(f" skipped: {reason}\n") return something_changed
def discover_on_host( *, config_cache: config.ConfigCache, host_config: config.HostConfig, mode: DiscoveryMode, service_filters: Optional[_ServiceFilters], on_error: str, use_cached_snmp_data: bool, max_cachefile_age: int, ) -> DiscoveryResult: console.verbose(" Doing discovery with mode '%s'...\n" % mode) host_name = host_config.hostname result = DiscoveryResult() discovery_parameters = DiscoveryParameters( on_error=on_error, load_labels=(mode is not DiscoveryMode.REMOVE), save_labels=(mode is not DiscoveryMode.REMOVE), only_host_labels=(mode is DiscoveryMode.ONLY_HOST_LABELS), ) if host_name not in config_cache.all_active_hosts(): result.error_text = "" return result _set_cache_opts_of_checkers(use_cached_snmp_data=use_cached_snmp_data) try: # in "refresh" mode we first need to remove all previously discovered # checks of the host, so that _get_host_services() does show us the # new discovered check parameters. if mode is DiscoveryMode.REFRESH: result.self_removed += host_config.remove_autochecks() # this is cluster-aware! if host_config.is_cluster: ipaddress = None else: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, _source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=Mode.DISCOVERY, selected_sections=NO_SELECTION, file_cache_max_age=max_cachefile_age, fetcher_messages=(), force_snmp_cache_refresh=not use_cached_snmp_data, on_scan_error=on_error, ) # Compute current state of new and existing checks services, host_labels = _get_host_services( host_config, ipaddress, parsed_sections_broker, discovery_parameters, ) old_services = services.get("old", []) # Create new list of checks new_services = _get_post_discovery_services(host_name, services, service_filters or _ServiceFilters.accept_all(), result, mode) host_config.set_autochecks(new_services) result.diff_text = make_object_diff( _make_services_audit_log_object([x.service for x in old_services]), _make_services_audit_log_object([x.service for x in new_services])) except MKTimeout: raise # let general timeout through except Exception as e: if cmk.utils.debug.enabled(): raise result.error_text = str(e) else: if mode is not DiscoveryMode.REMOVE: result.self_new_host_labels = len(host_labels.new) result.self_total_host_labels = len(host_labels.present) result.self_total = result.self_new + result.self_kept return result
def scan_parents_of(config_cache: config.ConfigCache, hosts: List[HostName], silent: bool = False, settings: Optional[Dict[str, int]] = None) -> Gateways: if settings is None: settings = {} if config.monitoring_host: host_config = config_cache.get_host_config(config.monitoring_host) nagios_ip = ip_lookup.lookup_ipv4_address(host_config) else: nagios_ip = None os.putenv("LANG", "") os.putenv("LC_ALL", "") # Start processes in parallel procs: List[Tuple[HostName, Optional[HostAddress], Union[str, subprocess.Popen]]] = [] for host in hosts: console.verbose("%s " % host) host_config = config_cache.get_host_config(host) try: ip = ip_lookup.lookup_ipv4_address(host_config) if ip is None: raise RuntimeError() command = [ "traceroute", "-w", "%d" % settings.get("timeout", 8), "-q", "%d" % settings.get("probes", 2), "-m", "%d" % settings.get("max_ttl", 10), "-n", ip ] console.vverbose("Running '%s'\n" % subprocess.list2cmdline(command)) procs.append((host, ip, subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True, encoding="utf-8"))) except Exception as e: if cmk.utils.debug.enabled(): raise procs.append((host, None, "ERROR: %s" % e)) # Output marks with status of each single scan def dot(color: str, dot: str = 'o') -> None: if not silent: out.output(tty.bold + color + dot + tty.normal) # Now all run and we begin to read the answers. For each host # we add a triple to gateways: the gateway, a scan state and a diagnostic output gateways: Gateways = [] for host, ip, proc_or_error in procs: if isinstance(proc_or_error, str): lines = [proc_or_error] exitstatus = 1 else: exitstatus = proc_or_error.wait() if proc_or_error.stdout is None: raise RuntimeError() lines = [l.strip() for l in proc_or_error.stdout.readlines()] if exitstatus: dot(tty.red, '*') gateways.append( (None, "failed", 0, "Traceroute failed with exit code %d" % (exitstatus & 255))) continue if len(lines) == 1 and lines[0].startswith("ERROR:"): message = lines[0][6:].strip() console.verbose("%s: %s\n", host, message, stream=sys.stderr) dot(tty.red, "D") gateways.append((None, "dnserror", 0, message)) continue if len(lines) == 0: if cmk.utils.debug.enabled(): raise MKGeneralException( "Cannot execute %s. Is traceroute installed? Are you root?" % command) dot(tty.red, '!') continue if len(lines) < 2: if not silent: console.error("%s: %s\n" % (host, ' '.join(lines))) gateways.append((None, "garbled", 0, "The output of traceroute seem truncated:\n%s" % ("".join(lines)))) dot(tty.blue) continue # Parse output of traceroute: # traceroute to 8.8.8.8 (8.8.8.8), 30 hops max, 40 byte packets # 1 * * * # 2 10.0.0.254 0.417 ms 0.459 ms 0.670 ms # 3 172.16.0.254 0.967 ms 1.031 ms 1.544 ms # 4 217.0.116.201 23.118 ms 25.153 ms 26.959 ms # 5 217.0.76.134 32.103 ms 32.491 ms 32.337 ms # 6 217.239.41.106 32.856 ms 35.279 ms 36.170 ms # 7 74.125.50.149 45.068 ms 44.991 ms * # 8 * 66.249.94.86 41.052 ms 66.249.94.88 40.795 ms # 9 209.85.248.59 43.739 ms 41.106 ms 216.239.46.240 43.208 ms # 10 216.239.48.53 45.608 ms 47.121 ms 64.233.174.29 43.126 ms # 11 209.85.255.245 49.265 ms 40.470 ms 39.870 ms # 12 8.8.8.8 28.339 ms 28.566 ms 28.791 ms routes: List[Optional[str]] = [] for line in lines[1:]: parts = line.split() route = parts[1] if route.count('.') == 3: routes.append(route) elif route == '*': routes.append(None) # No answer from this router else: if not silent: console.error( "%s: invalid output line from traceroute: '%s'\n" % (host, line)) if len(routes) == 0: error = "incomplete output from traceroute. No routes found." console.error("%s: %s\n" % (host, error)) gateways.append((None, "garbled", 0, error)) dot(tty.red) continue # Only one entry -> host is directly reachable and gets nagios as parent - # if nagios is not the parent itself. Problem here: How can we determine # if the host in question is the monitoring host? The user must configure # this in monitoring_host. if len(routes) == 1: if ip == nagios_ip: gateways.append( (None, "root", 0, "")) # We are the root-monitoring host dot(tty.white, 'N') elif config.monitoring_host: gateways.append(((config.monitoring_host, nagios_ip, None), "direct", 0, "")) dot(tty.cyan, 'L') else: gateways.append((None, "direct", 0, "")) continue # Try far most route which is not identical with host itself ping_probes = settings.get("ping_probes", 5) skipped_gateways = 0 this_route: Optional[HostAddress] = None for r in routes[::-1]: if not r or (r == ip): continue # Do (optional) PING check in order to determine if that # gateway can be monitored via the standard host check if ping_probes: if not gateway_reachable_via_ping(r, ping_probes): console.verbose("(not using %s, not reachable)\n", r, stream=sys.stderr) skipped_gateways += 1 continue this_route = r break if not this_route: error = "No usable routing information" if not silent: console.error("%s: %s\n" % (host, error)) gateways.append((None, "notfound", 0, error)) dot(tty.blue) continue # TTLs already have been filtered out) gateway_ip = this_route gateway = _ip_to_hostname(config_cache, this_route) if gateway: console.verbose("%s(%s) ", gateway, gateway_ip) else: console.verbose("%s ", gateway_ip) # Try to find DNS name of host via reverse DNS lookup dns_name = _ip_to_dnsname(gateway_ip) gateways.append( ((gateway, gateway_ip, dns_name), "gateway", skipped_gateways, "")) dot(tty.green, 'G') return gateways