def _do_active_inventory_for( *, host_config: config.HostConfig, run_only_plugin_names: Optional[Set[InventoryPluginName]], selected_sections: SectionNameCollection, ) -> ActiveInventoryResult: if host_config.is_cluster: return ActiveInventoryResult( trees=_do_inv_for_cluster(host_config), source_results=[], safe_to_write=True, ) ipaddress = config.lookup_ip_address(host_config) config_cache = config.get_config_cache() parsed_sections_broker, source_results = _fetch_parsed_sections_broker_for_inv( config_cache, host_config, ipaddress, selected_sections, ) return ActiveInventoryResult( trees=_do_inv_for_realhost( host_config, ipaddress, parsed_sections_broker=parsed_sections_broker, run_only_plugin_names=run_only_plugin_names, ), source_results=source_results, safe_to_write=_safe_to_write_tree(source_results) and selected_sections is NO_SELECTION, )
def get_check_preview( *, host_name: HostName, max_cachefile_age: int, use_cached_snmp_data: bool, on_error: OnError, ) -> Tuple[CheckPreviewTable, QualifiedDiscovery[HostLabel]]: """Get the list of service of a host or cluster and guess the current state of all services if possible""" config_cache = config.get_config_cache() host_config = config_cache.get_host_config(host_name) ip_address = None if host_config.is_cluster else config.lookup_ip_address(host_config) _set_cache_opts_of_checkers(use_cached_snmp_data=use_cached_snmp_data) parsed_sections_broker, _source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ip_address, mode=Mode.DISCOVERY, file_cache_max_age=max_cachefile_age, selected_sections=NO_SELECTION, fetcher_messages=(), force_snmp_cache_refresh=not use_cached_snmp_data, on_scan_error=on_error, ) host_labels = analyse_host_labels( host_config=host_config, ipaddress=ip_address, parsed_sections_broker=parsed_sections_broker, load_labels=True, save_labels=False, on_error=on_error, ) grouped_services = _get_host_services( host_config, ip_address, parsed_sections_broker, on_error, ) with load_host_value_store(host_name, store_changes=False) as value_store_manager: table = [ _check_preview_table_row( host_config=host_config, ip_address=ip_address, service=service, check_source=check_source, parsed_sections_broker=parsed_sections_broker, found_on_nodes=found_on_nodes, value_store_manager=value_store_manager, ) for check_source, services_with_nodes in grouped_services.items() for service, found_on_nodes in services_with_nodes ] return table, host_labels
def analyse_cluster_labels( *, host_config: config.HostConfig, ipaddress: Optional[str], parsed_sections_broker: ParsedSectionsBroker, load_labels: bool, save_labels: bool, on_error: OnError, ) -> QualifiedDiscovery[HostLabel]: """Discovers and processes host labels per cluster host Side effects: * may write to disk * may reset ruleset optimizer If specified in the discovery_parameters, the host labels after the discovery are persisted on disk. Some plugins discover services based on host labels, so the ruleset optimizer caches have to be cleared if new host labels are found. """ if not host_config.nodes: return QualifiedDiscovery.empty() nodes_host_labels: Dict[str, HostLabel] = {} config_cache = config.get_config_cache() for node in host_config.nodes: node_config = config_cache.get_host_config(node) node_ipaddress = config.lookup_ip_address(node_config) node_result = analyse_node_labels( host_name=node, ipaddress=node_ipaddress, parsed_sections_broker=parsed_sections_broker, load_labels=load_labels, save_labels=save_labels, on_error=on_error, ) # keep the latest for every label.name nodes_host_labels.update({ # TODO (mo): According to unit tests, this is what was done prior to refactoring. # I'm not sure this is desired. If it is, it should be explained. # Whenever we do not load the host labels, vanished will be empty. **{l.name: l for l in node_result.vanished}, **{l.name: l for l in node_result.present}, }) return _analyse_host_labels( host_name=host_config.hostname, discovered_host_labels=list(nodes_host_labels.values()), existing_host_labels=_load_existing_host_labels(host_config.hostname) if load_labels else (), save_labels=save_labels, )
def do_discovery( arg_hostnames: Set[HostName], *, selected_sections: SectionNameCollection, run_plugin_names: Container[CheckPluginName], arg_only_new: bool, only_host_labels: bool = False, ) -> None: config_cache = config.get_config_cache() use_caches = not arg_hostnames or cmk.core_helpers.cache.FileCacheFactory.maybe on_error = "raise" if cmk.utils.debug.enabled() else "warn" discovery_parameters = DiscoveryParameters( on_error=on_error, load_labels=arg_only_new, save_labels=True, only_host_labels=only_host_labels, ) host_names = _preprocess_hostnames(arg_hostnames, config_cache, only_host_labels) mode = Mode.DISCOVERY if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS # Now loop through all hosts for host_name in sorted(host_names): host_config = config_cache.get_host_config(host_name) section.section_begin(host_name) try: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, _results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=mode, selected_sections=selected_sections, file_cache_max_age=config.discovery_max_cachefile_age() if use_caches else 0, fetcher_messages=(), force_snmp_cache_refresh=False, on_scan_error=on_error, ) _do_discovery_for( host_name, ipaddress, parsed_sections_broker, run_plugin_names, arg_only_new, discovery_parameters, ) except Exception as e: if cmk.utils.debug.enabled(): raise section.section_error("%s" % e) finally: cmk.utils.cleanup.cleanup_globals()
def _ip_address_for_dump_host( host_config: config.HostConfig, *, family: socket.AddressFamily, ) -> Optional[str]: try: return config.lookup_ip_address(host_config, family=family) except Exception: return "" if host_config.is_cluster else ip_lookup.fallback_ip_for(family)
def commandline_discovery( arg_hostnames: Set[HostName], *, selected_sections: SectionNameCollection, run_plugin_names: Container[CheckPluginName], arg_only_new: bool, only_host_labels: bool = False, ) -> None: """Implementing cmk -I and cmk -II This is directly called from the main option parsing code. The list of hostnames is already prepared by the main code. If it is empty then we use all hosts and switch to using cache files. """ config_cache = config.get_config_cache() on_error = OnError.RAISE if cmk.utils.debug.enabled() else OnError.WARN host_names = _preprocess_hostnames(arg_hostnames, config_cache, only_host_labels) mode = Mode.DISCOVERY if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS # Now loop through all hosts for host_name in sorted(host_names): host_config = config_cache.get_host_config(host_name) section.section_begin(host_name) try: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, _results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=mode, selected_sections=selected_sections, file_cache_max_age=config.max_cachefile_age(), fetcher_messages=(), force_snmp_cache_refresh=False, on_scan_error=on_error, ) _commandline_discovery_on_host( host_name, ipaddress, parsed_sections_broker, run_plugin_names, arg_only_new, load_labels=arg_only_new, only_host_labels=only_host_labels, on_error=on_error, ) except Exception as e: if cmk.utils.debug.enabled(): raise section.section_error("%s" % e) finally: cmk.utils.cleanup.cleanup_globals()
def ip_address_of(host_config: config.HostConfig, family: socket.AddressFamily) -> Optional[str]: try: return config.lookup_ip_address(host_config, family=family) except Exception as e: if host_config.is_cluster: return "" _failed_ip_lookups.append(host_config.hostname) if not _ignore_ip_lookup_failures: warning("Cannot lookup IP address of '%s' (%s). " "The host will not be monitored correctly." % (host_config.hostname, e)) return ip_lookup.fallback_ip_for(family)
def make_cluster_sources( config_cache: config.ConfigCache, host_config: HostConfig, ) -> Sequence[Source]: """Abstract clusters/nodes/hosts""" assert host_config.nodes is not None return [ source for host_name in host_config.nodes for source in make_sources( HostConfig.make_host_config(host_name), config.lookup_ip_address(config_cache.get_host_config(host_name)), force_snmp_cache_refresh=False, ) ]
def _inventorize_host( *, host_config: config.HostConfig, run_plugin_names: Container[InventoryPluginName], selected_sections: SectionNameCollection, retentions_tracker: RetentionsTracker, ) -> ActiveInventoryResult: if host_config.is_cluster: return ActiveInventoryResult( trees=_do_inv_for_cluster(host_config), source_results=(), parsing_errors=(), processing_failed=False, ) ipaddress = config.lookup_ip_address(host_config) config_cache = config.get_config_cache() fetched = fetch_all( sources=make_sources( config_cache, host_config, ipaddress, selected_sections=selected_sections, force_snmp_cache_refresh=False, on_scan_error=OnError.RAISE, ), file_cache_max_age=host_config.max_cachefile_age, mode=(Mode.INVENTORY if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS), ) broker, results = make_broker( fetched=fetched, selected_sections=selected_sections, file_cache_max_age=host_config.max_cachefile_age, ) parsing_errors = broker.parsing_errors() return ActiveInventoryResult( trees=_do_inv_for_realhost( host_config, parsed_sections_broker=broker, run_plugin_names=run_plugin_names, retentions_tracker=retentions_tracker, ), source_results=results, parsing_errors=parsing_errors, processing_failed=(_sources_failed(results) or bool(parsing_errors)), )
def _inventorize_host( *, host_config: config.HostConfig, run_plugin_names: Container[InventoryPluginName], selected_sections: SectionNameCollection, retentions_tracker: RetentionsTracker, ) -> ActiveInventoryResult: if host_config.is_cluster: return ActiveInventoryResult( trees=_do_inv_for_cluster(host_config), source_results=(), parsing_errors=(), safe_to_write=True, ) ipaddress = config.lookup_ip_address(host_config) config_cache = config.get_config_cache() broker, results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, selected_sections=selected_sections, mode=(Mode.INVENTORY if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS), file_cache_max_age=host_config.max_cachefile_age, fetcher_messages=(), force_snmp_cache_refresh=False, on_scan_error=OnError.RAISE, ) parsing_errors = broker.parsing_errors() return ActiveInventoryResult( trees=_do_inv_for_realhost( host_config, ipaddress, parsed_sections_broker=broker, run_plugin_names=run_plugin_names, retentions_tracker=retentions_tracker, ), source_results=results, parsing_errors=parsing_errors, safe_to_write=( _safe_to_write_tree(results) and selected_sections is NO_SELECTION # and run_plugin_names is EVERYTHING # and not parsing_errors # ), )
def _ip_to_hostname(config_cache: config.ConfigCache, ip: Optional[HostAddress]) -> Optional[HostName]: if "ip_to_hostname" not in _config_cache: cache = _config_cache.get("ip_to_hostname") for host in config_cache.all_active_realhosts(): host_config = config_cache.get_host_config(host) try: cache[config.lookup_ip_address(host_config, family=socket.AF_INET)] = host except Exception: pass else: cache = _config_cache.get("ip_to_hostname") return cache.get(ip)
def _get_cluster_services( host_config: config.HostConfig, ipaddress: Optional[str], parsed_sections_broker: ParsedSectionsBroker, on_error: OnError, ) -> ServicesTable: if not host_config.nodes: return {} cluster_items: ServicesTable = {} config_cache = config.get_config_cache() # Get services of the nodes. We are only interested in "old", "new" and "vanished" # From the states and parameters of these we construct the final state per service. for node in host_config.nodes: node_config = config_cache.get_host_config(node) node_ipaddress = config.lookup_ip_address(node_config) services = analyse_discovered_services( host_name=node, ipaddress=node_ipaddress, parsed_sections_broker=parsed_sections_broker, run_plugin_names=EVERYTHING, only_new=True, on_error=on_error, ) for check_source, service in itertools.chain( (("vanished", s) for s in services.vanished), (("old", s) for s in services.old), (("new", s) for s in services.new), ): cluster_items.update( _cluster_service_entry( check_source=check_source, host_name=host_config.hostname, node_name=node, services_cluster=config_cache.host_of_clustered_service( node, service.description), service=service, existing_entry=cluster_items.get(service.id()), )) return cluster_items
def _make_cluster_nodes( config_cache: config.ConfigCache, host_config: HostConfig, ) -> Sequence[Tuple[HostName, Optional[HostAddress], Sequence[Source]]]: """Abstract clusters/nodes/hosts""" assert host_config.nodes is not None nodes = [] for hostname in host_config.nodes: node_config = config_cache.get_host_config(hostname) ipaddress = config.lookup_ip_address(node_config) sources = make_sources( HostConfig.make_host_config(hostname), ipaddress, force_snmp_cache_refresh=False, ) nodes.append((hostname, ipaddress, sources)) return nodes
def _do_active_inventory_for( *, host_config: config.HostConfig, run_plugin_names: Container[InventoryPluginName], selected_sections: SectionNameCollection, ) -> ActiveInventoryResult: if host_config.is_cluster: return ActiveInventoryResult( trees=_do_inv_for_cluster(host_config), source_results=[], safe_to_write=True, ) ipaddress = config.lookup_ip_address(host_config) config_cache = config.get_config_cache() broker, results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, selected_sections=selected_sections, mode=(Mode.INVENTORY if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS), file_cache_max_age=host_config.max_cachefile_age, fetcher_messages=(), force_snmp_cache_refresh=False, on_scan_error="raise", ) return ActiveInventoryResult( trees=_do_inv_for_realhost( host_config, ipaddress, parsed_sections_broker=broker, run_plugin_names=run_plugin_names, ), source_results=results, safe_to_write=( _safe_to_write_tree(results) and # selected_sections is NO_SELECTION and # run_plugin_names is EVERYTHING), )
def commandline_checking( host_name: HostName, ipaddress: Optional[HostAddress], *, run_plugin_names: Container[CheckPluginName] = EVERYTHING, selected_sections: SectionNameCollection = NO_SELECTION, dry_run: bool = False, show_perfdata: bool = False, ) -> ActiveCheckResult: console.vverbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(host_name) # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = config.lookup_ip_address(host_config) fetched = fetch_all( sources=make_sources( config_cache, host_config, ipaddress, selected_sections=selected_sections, force_snmp_cache_refresh=False, on_scan_error=OnError.RAISE, ), file_cache_max_age=host_config.max_cachefile_age, mode=Mode.CHECKING if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS, ) return _execute_checkmk_checks( hostname=host_name, ipaddress=ipaddress, fetched=fetched, run_plugin_names=run_plugin_names, selected_sections=selected_sections, dry_run=dry_run, show_perfdata=show_perfdata, )
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], *, # The following arguments *must* remain optional for Nagios and the `DiscoCheckExecutor`. # See Also: `cmk.base.discovery.check_discovery()` fetcher_messages: Sequence[FetcherMessage] = (), run_plugin_names: Container[CheckPluginName] = EVERYTHING, selected_sections: SectionNameCollection = NO_SELECTION, dry_run: bool = False, show_perfdata: bool = False, ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: console.vverbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() mode = Mode.CHECKING if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = config.lookup_ip_address(host_config) services_to_check = _get_services_to_check( config_cache=config_cache, host_name=hostname, run_plugin_names=run_plugin_names, ) with CPUTracker() as tracker: broker, source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=mode, selected_sections=selected_sections, file_cache_max_age=host_config.max_cachefile_age, fetcher_messages=fetcher_messages, force_snmp_cache_refresh=False, on_scan_error="raise", ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, parsed_sections_broker=broker, services=services_to_check, dry_run=dry_run, show_perfdata=show_perfdata, ) if run_plugin_names is EVERYTHING: inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, parsed_sections_broker=broker, ) for source, host_sections in source_results: source_state, source_output = source.summarize(host_sections) if source_output != "": status = worst_service_state(status, source_state, default=3) infotexts.append("[%s] %s" % (source.id, source_output)) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) total_times = tracker.duration for msg in fetcher_messages: total_times += msg.stats.duration infotexts.append("execution time %.1f sec" % total_times.process.elapsed) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % total_times.process.elapsed, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] summary: DefaultDict[str, Snapshot] = defaultdict(Snapshot.null) for msg in fetcher_messages if fetcher_messages else (): if msg.fetcher_type in ( FetcherType.PIGGYBACK, FetcherType.PROGRAM, FetcherType.SNMP, FetcherType.TCP, ): summary[{ FetcherType.PIGGYBACK: "agent", FetcherType.PROGRAM: "ds", FetcherType.SNMP: "snmp", FetcherType.TCP: "agent", }[msg.fetcher_type]] += msg.stats.duration for phase, duration in summary.items(): perfdata.append("cmk_time_%s=%.3f" % (phase, duration.idle)) else: perfdata.append("execution_time=%.3f" % total_times.process.elapsed) return status, infotexts, long_infotexts, perfdata finally: _submit_to_core.finalize()
def check_discovery( host_name: HostName, ipaddress: Optional[HostAddress], *, # The next argument *must* remain optional for the DiscoCheckExecutor. # See Also: `cmk.base.agent_based.checking.do_check()`. fetcher_messages: Sequence[FetcherMessage] = (), ) -> Tuple[int, List[str], List[str], List[Tuple]]: # Note: '--cache' is set in core_cmc, nagios template or even on CL and means: # 1. use caches as default: # - Set FileCacheFactory.maybe = True (set max_cachefile_age, else 0) # - Set FileCacheFactory.use_outdated = True # 2. Then these settings are used to read cache file or not config_cache = config.get_config_cache() host_config = config_cache.get_host_config(host_name) discovery_parameters = DiscoveryParameters( on_error="raise", load_labels=True, save_labels=False, only_host_labels=False, ) params = host_config.discovery_check_parameters if params is None: params = host_config.default_discovery_check_parameters() discovery_mode = DiscoveryMode(_get_rediscovery_parameters(params).get("mode")) # In case of keepalive discovery we always have an ipaddress. When called as non keepalive # ipaddress is always None if ipaddress is None and not host_config.is_cluster: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=Mode.DISCOVERY, fetcher_messages=fetcher_messages, selected_sections=NO_SELECTION, file_cache_max_age=(config.discovery_max_cachefile_age() if cmk.core_helpers.cache.FileCacheFactory.maybe else 0), force_snmp_cache_refresh=False, on_scan_error=discovery_parameters.on_error, ) services, host_label_discovery_result = _get_host_services( host_config, ipaddress, parsed_sections_broker, discovery_parameters, ) status, infotexts, long_infotexts, perfdata, need_rediscovery = _aggregate_subresults( _check_service_lists(host_name, services, params, discovery_mode), _check_host_labels( host_label_discovery_result, int(params.get("severity_new_host_label", 1)), discovery_mode, ), _check_data_sources(source_results), ) if need_rediscovery: if host_config.is_cluster and host_config.nodes: for nodename in host_config.nodes: _set_rediscovery_flag(nodename) else: _set_rediscovery_flag(host_name) infotexts.append(u"rediscovery scheduled") return status, infotexts, long_infotexts, perfdata
def discover_on_host( *, config_cache: config.ConfigCache, host_config: config.HostConfig, mode: DiscoveryMode, service_filters: Optional[_ServiceFilters], on_error: str, use_cached_snmp_data: bool, max_cachefile_age: int, ) -> DiscoveryResult: console.verbose(" Doing discovery with mode '%s'...\n" % mode) host_name = host_config.hostname result = DiscoveryResult() discovery_parameters = DiscoveryParameters( on_error=on_error, load_labels=(mode is not DiscoveryMode.REMOVE), save_labels=(mode is not DiscoveryMode.REMOVE), only_host_labels=(mode is DiscoveryMode.ONLY_HOST_LABELS), ) if host_name not in config_cache.all_active_hosts(): result.error_text = "" return result _set_cache_opts_of_checkers(use_cached_snmp_data=use_cached_snmp_data) try: # in "refresh" mode we first need to remove all previously discovered # checks of the host, so that _get_host_services() does show us the # new discovered check parameters. if mode is DiscoveryMode.REFRESH: result.self_removed += host_config.remove_autochecks() # this is cluster-aware! if host_config.is_cluster: ipaddress = None else: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, _source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=Mode.DISCOVERY, selected_sections=NO_SELECTION, file_cache_max_age=max_cachefile_age, fetcher_messages=(), force_snmp_cache_refresh=not use_cached_snmp_data, on_scan_error=on_error, ) # Compute current state of new and existing checks services, host_labels = _get_host_services( host_config, ipaddress, parsed_sections_broker, discovery_parameters, ) old_services = services.get("old", []) # Create new list of checks new_services = _get_post_discovery_services(host_name, services, service_filters or _ServiceFilters.accept_all(), result, mode) host_config.set_autochecks(new_services) result.diff_text = make_object_diff( _make_services_audit_log_object([x.service for x in old_services]), _make_services_audit_log_object([x.service for x in new_services])) except MKTimeout: raise # let general timeout through except Exception as e: if cmk.utils.debug.enabled(): raise result.error_text = str(e) else: if mode is not DiscoveryMode.REMOVE: result.self_new_host_labels = len(host_labels.new) result.self_total_host_labels = len(host_labels.present) result.self_total = result.self_new + result.self_kept return result
def _execute_checkmk_checks( *, hostname: HostName, ipaddress: Optional[HostAddress], fetcher_messages: Sequence[FetcherMessage] = (), run_plugin_names: Container[CheckPluginName], selected_sections: SectionNameCollection, dry_run: bool, show_perfdata: bool, ) -> ActiveCheckResult: config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() mode = Mode.CHECKING if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS try: license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = config.lookup_ip_address(host_config) services = config.resolve_service_dependencies( host_name=hostname, services=sorted( check_table.get_check_table(hostname).values(), key=lambda service: service.description, ), ) with CPUTracker() as tracker: broker, source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=mode, selected_sections=selected_sections, file_cache_max_age=host_config.max_cachefile_age, fetcher_messages=fetcher_messages, force_snmp_cache_refresh=False, on_scan_error=OnError.RAISE, ) num_success, plugins_missing_data = check_host_services( config_cache=config_cache, host_config=host_config, ipaddress=ipaddress, parsed_sections_broker=broker, services=services, run_plugin_names=run_plugin_names, dry_run=dry_run, show_perfdata=show_perfdata, ) if run_plugin_names is EVERYTHING: inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, parsed_sections_broker=broker, ) timed_results = [ *check_sources( source_results=source_results, mode=mode, include_ok_results=True, ), *check_parsing_errors(errors=broker.parsing_errors(), ), *_check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ), ] return ActiveCheckResult.from_subresults( *timed_results, _timing_results(tracker, fetcher_messages), ) finally: _submit_to_core.finalize()
def scan_parents_of( config_cache: config.ConfigCache, hosts: List[HostName], silent: bool = False, settings: Optional[Dict[str, int]] = None, ) -> Gateways: if settings is None: settings = {} if config.monitoring_host: host_config = config_cache.get_host_config(config.monitoring_host) nagios_ip = config.lookup_ip_address(host_config, family=socket.AF_INET) else: nagios_ip = None os.putenv("LANG", "") os.putenv("LC_ALL", "") # Start processes in parallel procs: List[Tuple[HostName, Optional[HostAddress], Union[str, subprocess.Popen]]] = [] for host in hosts: console.verbose("%s " % host) host_config = config_cache.get_host_config(host) try: ip = config.lookup_ip_address(host_config, family=socket.AF_INET) if ip is None: raise RuntimeError() command = [ "traceroute", "-w", "%d" % settings.get("timeout", 8), "-q", "%d" % settings.get("probes", 2), "-m", "%d" % settings.get("max_ttl", 10), "-n", ip, ] console.vverbose("Running '%s'\n" % subprocess.list2cmdline(command)) procs.append( ( host, ip, subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True, encoding="utf-8", ), ) ) except Exception as e: if cmk.utils.debug.enabled(): raise procs.append((host, None, "ERROR: %s" % e)) # Output marks with status of each single scan def dot(color: str, dot: str = "o") -> None: if not silent: out.output(tty.bold + color + dot + tty.normal) # Now all run and we begin to read the answers. For each host # we add a triple to gateways: the gateway, a scan state and a diagnostic output gateways: Gateways = [] for host, ip, proc_or_error in procs: if isinstance(proc_or_error, str): lines = [proc_or_error] exitstatus = 1 else: exitstatus = proc_or_error.wait() if proc_or_error.stdout is None: raise RuntimeError() lines = [l.strip() for l in proc_or_error.stdout.readlines()] if exitstatus: dot(tty.red, "*") gateways.append( (None, "failed", 0, "Traceroute failed with exit code %d" % (exitstatus & 255)) ) continue if len(lines) == 1 and lines[0].startswith("ERROR:"): message = lines[0][6:].strip() console.verbose("%s: %s\n", host, message, stream=sys.stderr) dot(tty.red, "D") gateways.append((None, "dnserror", 0, message)) continue if len(lines) == 0: if cmk.utils.debug.enabled(): raise MKGeneralException( "Cannot execute %s. Is traceroute installed? Are you root?" % command ) dot(tty.red, "!") continue if len(lines) < 2: if not silent: console.error("%s: %s\n" % (host, " ".join(lines))) gateways.append( ( None, "garbled", 0, "The output of traceroute seem truncated:\n%s" % ("".join(lines)), ) ) dot(tty.blue) continue # Parse output of traceroute: # traceroute to 8.8.8.8 (8.8.8.8), 30 hops max, 40 byte packets # 1 * * * # 2 10.0.0.254 0.417 ms 0.459 ms 0.670 ms # 3 172.16.0.254 0.967 ms 1.031 ms 1.544 ms # 4 217.0.116.201 23.118 ms 25.153 ms 26.959 ms # 5 217.0.76.134 32.103 ms 32.491 ms 32.337 ms # 6 217.239.41.106 32.856 ms 35.279 ms 36.170 ms # 7 74.125.50.149 45.068 ms 44.991 ms * # 8 * 66.249.94.86 41.052 ms 66.249.94.88 40.795 ms # 9 209.85.248.59 43.739 ms 41.106 ms 216.239.46.240 43.208 ms # 10 216.239.48.53 45.608 ms 47.121 ms 64.233.174.29 43.126 ms # 11 209.85.255.245 49.265 ms 40.470 ms 39.870 ms # 12 8.8.8.8 28.339 ms 28.566 ms 28.791 ms routes: List[Optional[str]] = [] for line in lines[1:]: parts = line.split() route = parts[1] if route.count(".") == 3: routes.append(route) elif route == "*": routes.append(None) # No answer from this router else: if not silent: console.error("%s: invalid output line from traceroute: '%s'\n" % (host, line)) if len(routes) == 0: error = "incomplete output from traceroute. No routes found." console.error("%s: %s\n" % (host, error)) gateways.append((None, "garbled", 0, error)) dot(tty.red) continue # Only one entry -> host is directly reachable and gets nagios as parent - # if nagios is not the parent itself. Problem here: How can we determine # if the host in question is the monitoring host? The user must configure # this in monitoring_host. if len(routes) == 1: if ip == nagios_ip: gateways.append((None, "root", 0, "")) # We are the root-monitoring host dot(tty.white, "N") elif config.monitoring_host: gateways.append(((config.monitoring_host, nagios_ip, None), "direct", 0, "")) dot(tty.cyan, "L") else: gateways.append((None, "direct", 0, "")) continue # Try far most route which is not identical with host itself ping_probes = settings.get("ping_probes", 5) skipped_gateways = 0 this_route: Optional[HostAddress] = None for r in routes[::-1]: if not r or (r == ip): continue # Do (optional) PING check in order to determine if that # gateway can be monitored via the standard host check if ping_probes: if not gateway_reachable_via_ping(r, ping_probes): console.verbose("(not using %s, not reachable)\n", r, stream=sys.stderr) skipped_gateways += 1 continue this_route = r break if not this_route: error = "No usable routing information" if not silent: console.error("%s: %s\n" % (host, error)) gateways.append((None, "notfound", 0, error)) dot(tty.blue) continue # TTLs already have been filtered out) gateway_ip = this_route gateway = _ip_to_hostname(config_cache, this_route) if gateway: console.verbose("%s(%s) ", gateway, gateway_ip) else: console.verbose("%s ", gateway_ip) # Try to find DNS name of host via reverse DNS lookup dns_name = _ip_to_dnsname(gateway_ip) gateways.append(((gateway, gateway_ip, dns_name), "gateway", skipped_gateways, "")) dot(tty.green, "G") return gateways
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], *, # The following arguments *must* remain optional for Nagios and the `DiscoCheckExecutor`. # See Also: `cmk.base.discovery.check_discovery()` fetcher_messages: Sequence[FetcherMessage] = (), run_only_plugin_names: Optional[Set[CheckPluginName]] = None, selected_sections: SectionNameCollection = NO_SELECTION, dry_run: bool = False, show_perfdata: bool = False, ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: console.verbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() mode = Mode.CHECKING if selected_sections is NO_SELECTION else Mode.FORCE_SECTIONS status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = config.lookup_ip_address(host_config) # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all # information from the monitored host and cache the result for the cluster checks to be # performed on the cached information. # # This means that in case of SNMP nodes, they need to take the clustered services of the # node into account, fetch the needed sections and cache them for the cluster host. # # But later, when checking the node services, the node has to only deal with the unclustered # services. # # TODO: clean this up. The fetched sections are computed in the checkers # _make_configured_snmp_sections now. # belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services_to_fetch = _get_services_to_fetch( host_name=hostname, belongs_to_cluster=belongs_to_cluster, config_cache=config_cache, ) services_to_check = _filter_clustered_services( config_cache=config_cache, host_name=hostname, belongs_to_cluster=belongs_to_cluster, services=services_to_fetch, run_only_plugin_names=run_only_plugin_names, ) nodes = sources.make_nodes( config_cache, host_config, ipaddress, mode, sources.make_sources( host_config, ipaddress, mode=mode, selected_sections=selected_sections, ), ) if not fetcher_messages: # Note: `fetch_all(sources)` is almost always called in similar # code in discovery and inventory. The only other exception # is `cmk.base.discovery.check_discovery(...)`. This does # not seem right. fetcher_messages = list( sources.fetch_all( nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, )) with CPUTracker() as tracker: broker = ParsedSectionsBroker() result = sources.update_host_sections( broker, nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=fetcher_messages, selected_sections=selected_sections, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, parsed_sections_broker=broker, services=services_to_check, dry_run=dry_run, show_perfdata=show_perfdata, ) if run_only_plugin_names is None: inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, parsed_sections_broker=broker, ) for source, host_sections in result: source_state, source_output = source.summarize(host_sections) if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id, source_output)) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) total_times = tracker.duration for msg in fetcher_messages: total_times += msg.stats.duration infotexts.append("execution time %.1f sec" % total_times.process.elapsed) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % total_times.process.elapsed, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] summary: DefaultDict[str, Snapshot] = defaultdict(Snapshot.null) for msg in fetcher_messages if fetcher_messages else (): if msg.fetcher_type in ( FetcherType.PIGGYBACK, FetcherType.PROGRAM, FetcherType.SNMP, FetcherType.TCP, ): summary[{ FetcherType.PIGGYBACK: "agent", FetcherType.PROGRAM: "ds", FetcherType.SNMP: "snmp", FetcherType.TCP: "agent", }[msg.fetcher_type]] += msg.stats.duration for phase, duration in summary.items(): perfdata.append("cmk_time_%s=%.3f" % (phase, duration.idle)) else: perfdata.append("execution_time=%.3f" % total_times.process.elapsed) return status, infotexts, long_infotexts, perfdata finally: _submit_to_core.finalize()
def automation_discovery( *, config_cache: config.ConfigCache, host_config: config.HostConfig, mode: DiscoveryMode, service_filters: Optional[_ServiceFilters], on_error: OnError, use_cached_snmp_data: bool, max_cachefile_age: cmk.core_helpers.cache.MaxAge, ) -> DiscoveryResult: console.verbose(" Doing discovery with mode '%s'...\n" % mode) host_name = host_config.hostname result = DiscoveryResult() if host_name not in config_cache.all_active_hosts(): result.error_text = "" return result cmk.core_helpers.cache.FileCacheFactory.use_outdated = True cmk.core_helpers.cache.FileCacheFactory.maybe = use_cached_snmp_data try: # in "refresh" mode we first need to remove all previously discovered # checks of the host, so that _get_host_services() does show us the # new discovered check parameters. if mode is DiscoveryMode.REFRESH: result.self_removed += host_config.remove_autochecks( ) # this is cluster-aware! if host_config.is_cluster: ipaddress = None else: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, _source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=Mode.DISCOVERY, selected_sections=NO_SELECTION, file_cache_max_age=max_cachefile_age, fetcher_messages=(), force_snmp_cache_refresh=not use_cached_snmp_data, on_scan_error=on_error, ) if mode is not DiscoveryMode.REMOVE: host_labels = analyse_host_labels( host_config=host_config, ipaddress=ipaddress, parsed_sections_broker=parsed_sections_broker, load_labels=True, save_labels=True, on_error=on_error, ) result.self_new_host_labels = len(host_labels.new) result.self_total_host_labels = len(host_labels.present) if mode is DiscoveryMode.ONLY_HOST_LABELS: # This is the result of a refactoring, and the following code was added # to ensure a compatible behaviour. I don't think it is particularly # sensible. We used to only compare service descriptions of old and new # services, so `make_object_diff` was always comparing two identical objects # if the mode was DiscoveryMode.ONLY_HOST_LABEL. # We brainlessly mimic that behaviour, for now. result.diff_text = make_object_diff(set(), set()) return result # Compute current state of new and existing checks services = _get_host_services( host_config, ipaddress, parsed_sections_broker, on_error=on_error, ) old_services = services.get("old", []) # Create new list of checks new_services = _get_post_discovery_services( host_name, services, service_filters or _ServiceFilters.accept_all(), result, mode) host_config.set_autochecks(new_services) # If old_services == new_services, make_object_diff will return # something along the lines of "nothing changed". # I guess this was written before discovered host labels were invented. result.diff_text = make_object_diff( {x.service.description for x in old_services}, {x.service.description for x in new_services}, ) except MKTimeout: raise # let general timeout through except Exception as e: if cmk.utils.debug.enabled(): raise result.error_text = str(e) result.self_total = result.self_new + result.self_kept return result
def active_check_discovery( host_name: HostName, ipaddress: Optional[HostAddress], *, # The next argument *must* remain optional for the DiscoCheckExecutor. # See Also: `cmk.base.agent_based.checking.active_check_checking()`. fetcher_messages: Sequence[FetcherMessage] = (), ) -> ActiveCheckResult: # Note: '--cache' is set in core_cmc, nagios template or even on CL and means: # 1. use caches as default: # - Set FileCacheFactory.maybe = True (set max_cachefile_age, else 0) # - Set FileCacheFactory.use_outdated = True # 2. Then these settings are used to read cache file or not config_cache = config.get_config_cache() host_config = config_cache.get_host_config(host_name) params = host_config.discovery_check_parameters if params is None: params = host_config.default_discovery_check_parameters() rediscovery_parameters = params.get("inventory_rediscovery", {}) discovery_mode = DiscoveryMode(rediscovery_parameters.get("mode")) # In case of keepalive discovery we always have an ipaddress. When called as non keepalive # ipaddress is always None if ipaddress is None and not host_config.is_cluster: ipaddress = config.lookup_ip_address(host_config) parsed_sections_broker, source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ipaddress, mode=Mode.DISCOVERY, fetcher_messages=fetcher_messages, selected_sections=NO_SELECTION, file_cache_max_age=config.max_cachefile_age( discovery=None if cmk.core_helpers.cache.FileCacheFactory. maybe else 0), force_snmp_cache_refresh=False, on_scan_error=OnError.RAISE, ) host_labels = analyse_host_labels( host_config=host_config, ipaddress=ipaddress, parsed_sections_broker=parsed_sections_broker, load_labels=True, save_labels=False, on_error=OnError.RAISE, ) services = _get_host_services( host_config, ipaddress, parsed_sections_broker, on_error=OnError.RAISE, ) services_result, services_need_rediscovery = _check_service_lists( host_name=host_name, services_by_transition=services, params=params, service_filters=_ServiceFilters.from_settings(rediscovery_parameters), discovery_mode=discovery_mode, ) host_labels_result, host_labels_need_rediscovery = _check_host_labels( host_labels, int(params.get("severity_new_host_label", 1)), discovery_mode, ) parsing_errors_result = check_parsing_errors( parsed_sections_broker.parsing_errors()) return ActiveCheckResult.from_subresults( services_result, host_labels_result, *check_sources(source_results=source_results, mode=Mode.DISCOVERY), parsing_errors_result, _schedule_rediscovery( host_config=host_config, need_rediscovery=(services_need_rediscovery or host_labels_need_rediscovery) and parsing_errors_result.state == 0, ), )
def get_check_preview( *, host_name: HostName, max_cachefile_age: int, use_cached_snmp_data: bool, on_error: str, ) -> Tuple[CheckPreviewTable, QualifiedDiscovery[HostLabel]]: """Get the list of service of a host or cluster and guess the current state of all services if possible""" config_cache = config.get_config_cache() host_config = config_cache.get_host_config(host_name) ip_address = None if host_config.is_cluster else config.lookup_ip_address( host_config) discovery_parameters = DiscoveryParameters( on_error=on_error, load_labels=True, save_labels=False, only_host_labels=False, ) _set_cache_opts_of_checkers(use_cached_snmp_data=use_cached_snmp_data) parsed_sections_broker, _source_results = make_broker( config_cache=config_cache, host_config=host_config, ip_address=ip_address, mode=Mode.DISCOVERY, file_cache_max_age=max_cachefile_age, selected_sections=NO_SELECTION, fetcher_messages=(), force_snmp_cache_refresh=not use_cached_snmp_data, on_scan_error=on_error, ) grouped_services, host_label_result = _get_host_services( host_config, ip_address, parsed_sections_broker, discovery_parameters, ) table: CheckPreviewTable = [] for check_source, services_with_nodes in grouped_services.items(): for service, found_on_nodes in services_with_nodes: plugin = agent_based_register.get_check_plugin( service.check_plugin_name) params = _preview_params(host_name, service, plugin, check_source) if check_source in ['legacy', 'active', 'custom']: exitcode = None output = u"WAITING - %s check, cannot be done offline" % check_source.title( ) ruleset_name: Optional[RulesetName] = None else: ruleset_name = (str(plugin.check_ruleset_name) if plugin and plugin.check_ruleset_name else None) wrapped_params = ( Parameters(wrap_parameters(params)) if plugin and plugin.check_default_parameters is not None else None) exitcode, output, _perfdata = checking.get_aggregated_result( parsed_sections_broker, host_config, ip_address, service, plugin, lambda p=wrapped_params: p, # type: ignore[misc] # "type of lambda" ).result # Service discovery never uses the perfdata in the check table. That entry # is constantly discarded, yet passed around(back and forth) as part of the # discovery result in the request elements. Some perfdata VALUES are not parsable # by ast.literal_eval such as "inf" it lead to ValueErrors. Thus keep perfdata empty perfdata: List[MetricTuple] = [] table.append(( _preview_check_source(host_name, service, check_source), str(service.check_plugin_name), ruleset_name, service.item, service.parameters, params, service.description, exitcode, output, perfdata, service.service_labels.to_dict(), found_on_nodes, )) return table, host_label_result