def _do_all_checks_on_host( services: List[Service], sources: data_sources.DataSources, host_config: config.HostConfig, ipaddress: Optional[HostAddress], only_check_plugins: Optional[Set[CheckPluginName]] = None, ) -> Tuple[int, List[CheckPluginName]]: hostname: HostName = host_config.hostname num_success = 0 plugins_missing_data: Set[CheckPluginName] = set() check_api_utils.set_hostname(hostname) # Gather the data from the sources nodes = sources.make_nodes(host_config) multi_host_sections = sources.get_host_sections(nodes, max_cachefile_age=host_config.max_cachefile_age) for service in services: success = execute_check(multi_host_sections, host_config, ipaddress, service) if success: num_success += 1 else: # TODO (mo): centralize maincheckify: CMK-4295 plugins_missing_data.add(CheckPluginName(maincheckify(service.check_plugin_name))) import cmk.base.inventory as inventory # pylint: disable=import-outside-toplevel inventory.do_inventory_actions_during_checking_for( sources, multi_host_sections, host_config, ipaddress, ) return num_success, sorted(plugins_missing_data)
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], only_check_plugin_names: Optional[Set[CheckPluginName]] = None, fetcher_messages: Optional[Sequence[FetcherMessage]] = None ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: console.verbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: with cpu_tracking.execute(), cpu_tracking.phase("busy"): license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(host_config) item_state.load(hostname) # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all # information from the monitored host and cache the result for the cluster checks to be # performed on the cached information. # # This means that in case of SNMP nodes, they need to take the clustered services of the # node into account, fetch the needed sections and cache them for the cluster host. # # But later, when checking the node services, the node has to only deal with the unclustered # services. belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services_to_fetch = _get_services_to_fetch( host_name=hostname, belongs_to_cluster=belongs_to_cluster, config_cache=config_cache, only_check_plugins=only_check_plugin_names, ) services_to_check = _filter_clustered_services( config_cache=config_cache, host_name=hostname, belongs_to_cluster=belongs_to_cluster, services=services_to_fetch, ) # see which raw sections we may need selected_raw_sections = agent_based_register.get_relevant_raw_sections( check_plugin_names=(s.check_plugin_name for s in services_to_fetch), consider_inventory_plugins=host_config.do_status_data_inventory, ) sources = checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.CHECKING, ) mhs = MultiHostSections() result = checkers.update_host_sections( mhs, checkers.make_nodes( config_cache, host_config, ipaddress, checkers.Mode.CHECKING, sources, ), selected_raw_sections=selected_raw_sections, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=fetcher_messages, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, multi_host_sections=mhs, services=services_to_check, only_check_plugins=only_check_plugin_names, ) inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, sources=sources, multi_host_sections=mhs, ) if _submit_to_core: item_state.save(hostname) for source, host_sections in result: source_state, source_output, source_perfdata = source.summarize(host_sections) if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id, source_output)) perfdata.extend([_convert_perf_data(p) for p in source_perfdata]) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) for msg in fetcher_messages if fetcher_messages else (): cpu_tracking.update(msg.stats.cpu_times) phase_times = cpu_tracking.get_times() total_times = phase_times["busy"] infotexts.append("execution time %.1f sec" % total_times.run_time) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % total_times.run_time, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] for phase, times in phase_times.items(): if phase in ["agent", "snmp", "ds"]: t = times.run_time - sum(times.process[:4]) # real time - CPU time perfdata.append("cmk_time_%s=%.3f" % (phase, t)) else: perfdata.append("execution_time=%.3f" % total_times.run_time) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file() # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress # may be None. This needs to be understood in detail and cleaned up. As the InlineSNMP # stats feature is a very rarely used debugging feature, the analyzation and fix is # postponed now. if config.record_inline_snmp_stats and ipaddress is not None and host_config.snmp_config( ipaddress).snmp_backend == "inline": inline.snmp_stats_save()
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], *, # The following arguments *must* remain optional for Nagios and the `DiscoCheckExecutor`. # See Also: `cmk.base.discovery.check_discovery()` fetcher_messages: Sequence[FetcherMessage] = (), run_only_plugin_names: Optional[Set[CheckPluginName]] = None, selected_sections: checkers.SectionNameCollection = checkers.NO_SELECTION, submit_to_core: bool = True, show_perfdata: bool = False, ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: console.verbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() mode = checkers.Mode.CHECKING if selected_sections is checkers.NO_SELECTION else checkers.Mode.FORCE_SECTIONS status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(host_config) # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all # information from the monitored host and cache the result for the cluster checks to be # performed on the cached information. # # This means that in case of SNMP nodes, they need to take the clustered services of the # node into account, fetch the needed sections and cache them for the cluster host. # # But later, when checking the node services, the node has to only deal with the unclustered # services. # # TODO: clean this up. The fetched sections are computed in the checkers # _make_configured_snmp_sections now. # belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services_to_fetch = _get_services_to_fetch( host_name=hostname, belongs_to_cluster=belongs_to_cluster, config_cache=config_cache, ) services_to_check = _filter_clustered_services( config_cache=config_cache, host_name=hostname, belongs_to_cluster=belongs_to_cluster, services=services_to_fetch, run_only_plugin_names=run_only_plugin_names, ) nodes = checkers.make_nodes( config_cache, host_config, ipaddress, mode, checkers.make_sources( host_config, ipaddress, mode=mode, selected_sections=selected_sections, ), ) if not fetcher_messages: # Note: `fetch_all(sources)` is almost always called in similar # code in discovery and inventory. The only other exception # is `cmk.base.discovery.check_discovery(...)`. This does # not seem right. fetcher_messages = list( checkers.fetch_all( nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, )) with CPUTracker() as tracker: mhs = MultiHostSections() result = checkers.update_host_sections( mhs, nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=fetcher_messages, selected_sections=selected_sections, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, multi_host_sections=mhs, services=services_to_check, submit_to_core=submit_to_core, show_perfdata=show_perfdata, ) if run_only_plugin_names is None: inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, multi_host_sections=mhs, ) for source, host_sections in result: source_state, source_output, source_perfdata = source.summarize( host_sections) if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id, source_output)) perfdata.extend( [_convert_perf_data(p) for p in source_perfdata]) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) total_times = tracker.duration for msg in fetcher_messages: total_times += msg.stats.duration infotexts.append("execution time %.1f sec" % total_times.process.elapsed) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % total_times.process.elapsed, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] summary: DefaultDict[str, Snapshot] = defaultdict(Snapshot.null) for msg in fetcher_messages if fetcher_messages else (): if msg.fetcher_type in ( FetcherType.PIGGYBACK, FetcherType.PROGRAM, FetcherType.SNMP, FetcherType.TCP, ): summary[{ FetcherType.PIGGYBACK: "agent", FetcherType.PROGRAM: "ds", FetcherType.SNMP: "snmp", FetcherType.TCP: "agent", }[msg.fetcher_type]] += msg.stats.duration for phase, duration in summary.items(): perfdata.append("cmk_time_%s=%.3f" % (phase, duration.idle)) else: perfdata.append("execution_time=%.3f" % total_times.process.elapsed) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file()
def _do_all_checks_on_host(sources, host_config, ipaddress, only_check_plugin_names=None): # type: (data_sources.DataSources, config.HostConfig, Optional[HostAddress], Optional[List[str]]) -> Tuple[int, List[SectionName]] hostname = host_config.hostname # type: HostName config_cache = config.get_config_cache() num_success, missing_sections = 0, set() check_api_utils.set_hostname(hostname) filter_mode = None belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 if belongs_to_cluster: filter_mode = "include_clustered" services = check_table.get_precompiled_check_table(hostname, remove_duplicates=True, filter_mode=filter_mode) # When check types are specified via command line, enforce them. Otherwise use the # list of checks defined by the check table. if only_check_plugin_names is None: only_check_plugins = { service.check_plugin_name for service in services } else: only_check_plugins = set(only_check_plugin_names) sources.enforce_check_plugin_names(only_check_plugins) # Gather the data from the sources multi_host_sections = sources.get_host_sections() # Filter out check types which are not used on the node if belongs_to_cluster: pos_match = set() neg_match = set() for service in services: if hostname != config_cache.host_of_clustered_service( hostname, service.description): pos_match.add(service.check_plugin_name) else: neg_match.add(service.check_plugin_name) only_check_plugins -= (pos_match - neg_match) for service in services: if only_check_plugins is not None and service.check_plugin_name not in only_check_plugins: continue if belongs_to_cluster and hostname != config_cache.host_of_clustered_service( hostname, service.description): continue success = execute_check(config_cache, multi_host_sections, hostname, ipaddress, service.check_plugin_name, service.item, service.parameters, service.description) if success: num_success += 1 elif success is None: # If the service is in any timeperiod we do not want to # - increase num_success or # - add to missing sections continue else: missing_sections.add( cmk.base.check_utils.section_name_of( service.check_plugin_name)) import cmk.base.inventory as inventory # pylint: disable=import-outside-toplevel inventory.do_inventory_actions_during_checking_for(sources, multi_host_sections, host_config, ipaddress) missing_section_list = sorted(missing_sections) return num_success, missing_section_list
def _do_all_checks_on_host(sources, host_config, ipaddress, only_check_plugin_names=None): # type: (data_sources.DataSources, config.HostConfig, Optional[HostAddress], Optional[List[str]]) -> Tuple[int, List[SectionName]] hostname = host_config.hostname # type: HostName config_cache = config.get_config_cache() num_success, missing_sections = 0, set() check_api_utils.set_hostname(hostname) belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services = check_table.get_precompiled_check_table( hostname, remove_duplicates=True, filter_mode="include_clustered" if belongs_to_cluster else None, ) # When check types are specified via command line, enforce them. Otherwise use the # list of checks defined by the check table. if only_check_plugin_names is None: only_check_plugins = { service.check_plugin_name for service in services } else: only_check_plugins = set(only_check_plugin_names) sources.enforce_check_plugin_names(only_check_plugins) # Gather the data from the sources multi_host_sections = sources.get_host_sections() def _is_not_of_host(host_name, service): return hostname != config_cache.host_of_clustered_service( hostname, service.description) # Filter out check types which are not used on the node if belongs_to_cluster: removed_plugins = { plugin for plugin in only_check_plugins if all( _is_not_of_host(hostname, service) for service in services if service.check_plugin_name == plugin) } only_check_plugins -= removed_plugins for service in services: if service.check_plugin_name not in only_check_plugins: continue if belongs_to_cluster and _is_not_of_host(hostname, service): continue if service_outside_check_period(config_cache, hostname, service.description): continue success = execute_check(multi_host_sections, hostname, ipaddress, service) if success: num_success += 1 else: missing_sections.add( cmk.base.check_utils.section_name_of( service.check_plugin_name)) import cmk.base.inventory as inventory # pylint: disable=import-outside-toplevel inventory.do_inventory_actions_during_checking_for(sources, multi_host_sections, host_config, ipaddress) missing_section_list = sorted(missing_sections) return num_success, missing_section_list
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], only_check_plugin_names: Optional[Set[CheckPluginName]] = None ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: cpu_tracking.start("busy") console.verbose("Check_MK version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(host_config) item_state.load(hostname) services = _get_filtered_services( host_name=hostname, belongs_to_cluster=len(config_cache.clusters_of(hostname)) > 0, config_cache=config_cache, only_check_plugins=only_check_plugin_names, ) # see which raw sections we may need selected_raw_sections = _get_relevant_raw_sections(services, host_config) sources = data_sources.make_sources( host_config, ipaddress, ) mhs = data_sources.make_host_sections( config_cache, host_config, ipaddress, sources=sources, selected_raw_sections=selected_raw_sections, max_cachefile_age=host_config.max_cachefile_age, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, multi_host_sections=mhs, services=services, only_check_plugins=only_check_plugin_names, ) inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, sources=sources, multi_host_sections=mhs, ) if _submit_to_core: item_state.save(hostname) for source in sources: source_state, source_output, source_perfdata = source.get_summary_result_for_checking() if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id, source_output)) perfdata.extend([_convert_perf_data(p) for p in source_perfdata]) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) cpu_tracking.end() phase_times = cpu_tracking.get_times() total_times = phase_times["TOTAL"] run_time = total_times[4] infotexts.append("execution time %.1f sec" % run_time) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % run_time, "user_time=%.3f" % total_times[0], "system_time=%.3f" % total_times[1], "children_user_time=%.3f" % total_times[2], "children_system_time=%.3f" % total_times[3], ] for phase, times in phase_times.items(): if phase in ["agent", "snmp", "ds"]: t = times[4] - sum(times[:4]) # real time - CPU time perfdata.append("cmk_time_%s=%.3f" % (phase, t)) else: perfdata.append("execution_time=%.3f" % run_time) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file() # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress # may be None. This needs to be understood in detail and cleaned up. As the InlineSNMP # stats feature is a very rarely used debugging feature, the analyzation and fix is # postponed now. if config.record_inline_snmp_stats \ and ipaddress is not None \ and host_config.snmp_config(ipaddress).is_inline_snmp_host: inline.snmp_stats_save()