def test_cpu_tracking_simple(monkeypatch): monkeypatch.setattr("time.time", lambda: 0.0) cpu_tracking.start("busy") assert cpu_tracking.get_times() == {} monkeypatch.setattr("time.time", lambda: 1.0) cpu_tracking.end() times = cpu_tracking.get_times() assert len(times) == 2 assert len(times["TOTAL"]) == 5 assert times["TOTAL"][4] == 1.0 assert times["busy"][4] == 1.0
def _run_fetchers_from_file(file_name: Path, mode: Mode, timeout: int) -> None: """ Writes to the stdio next data: Count Type Content Action ----- ----- ------- ------ 1 Success Answer Fetcher Blob Send to the checker 0..n Failure Answer Exception of failed fetcher Log 1 Waiting Answer empty End IO *) Fetcher blob contains all answers from all fetcher objects including failed **) file_name is serial/host_name.json ***) timeout is not used at the moment""" with file_name.open() as f: data = json.load(f) fetchers = data["fetchers"] # CONTEXT: AT the moment we call fetcher-executors sequentially (due to different reasons). # Possibilities: # Sequential: slow fetcher may block other fetchers. # Asyncio: every fetcher must be asyncio-aware. This is ok, but even estimation requires time # Threading: some fetcher may be not thread safe(snmp, for example). May be dangerous. # Multiprocessing: CPU and memory(at least in terms of kernel) hungry. Also duplicates # functionality of the Microcore. messages: List[FetcherMessage] = [] with cpu_tracking.execute(), cpu_tracking.phase( "fetchers"), timeout_control(timeout): try: # fill as many messages as possible before timeout exception raised for entry in fetchers: messages.append(run_fetcher(entry, mode)) except MKTimeout as exc: # fill missing entries with timeout errors stats = L3Stats(cpu_tracking.get_times()) messages.extend([ _make_fetcher_timeout_message( FetcherType[entry["fetcher_type"]], stats, exc) for entry in fetchers[len(messages):] ]) log.logger.debug("Produced %d messages:", len(messages)) for message in messages: log.logger.debug(" message: %s", message.header) write_bytes(make_payload_answer(*messages)) for msg in filter( lambda msg: msg.header.payload_type is PayloadType.ERROR, messages, ): log.logger.log(msg.header.status, "Error in %s fetcher: %s", msg.header.fetcher_type.name, msg.raw_data.error)
def test_cpu_tracking_add_times(monkeypatch): monkeypatch.setattr("time.time", lambda: 0.0) cpu_tracking.start("busy") monkeypatch.setattr("time.time", lambda: 2.0) cpu_tracking.push_phase("agent") monkeypatch.setattr("time.time", lambda: 5.0) cpu_tracking.pop_phase() cpu_tracking.push_phase("agent") monkeypatch.setattr("time.time", lambda: 7.0) cpu_tracking.pop_phase() cpu_tracking.end() times = cpu_tracking.get_times() assert len(times) == 3 assert times["TOTAL"][4] == 7.0 assert times["busy"][4] == 2.0 assert times["agent"][4] == 5.0
def run_fetcher(entry: Dict[str, Any], mode: Mode) -> FetcherMessage: """ Entrypoint to obtain data from fetcher objects. """ try: fetcher_type = FetcherType[entry["fetcher_type"]] except KeyError as exc: raise RuntimeError from exc log.logger.debug("Executing fetcher: %s", entry["fetcher_type"]) try: fetcher_params = entry["fetcher_params"] except KeyError as exc: stats = L3Stats({}) payload = ErrorPayload(exc) return FetcherMessage( FetcherHeader( fetcher_type, PayloadType.ERROR, status=logging.CRITICAL, payload_length=len(payload), stats_length=len(stats), ), payload, stats, ) try: with cpu_tracking.phase(fetcher_type.name), fetcher_type.from_json( fetcher_params) as fetcher: raw_data = fetcher.fetch(mode) except Exception as exc: raw_data = result.Error(exc) return FetcherMessage.from_raw_data( raw_data, L3Stats(cpu_tracking.get_times()), fetcher_type, )
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], only_check_plugin_names: Optional[Set[CheckPluginName]] = None, fetcher_messages: Optional[Sequence[FetcherMessage]] = None ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: console.verbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: with cpu_tracking.execute(), cpu_tracking.phase("busy"): license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(host_config) item_state.load(hostname) # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all # information from the monitored host and cache the result for the cluster checks to be # performed on the cached information. # # This means that in case of SNMP nodes, they need to take the clustered services of the # node into account, fetch the needed sections and cache them for the cluster host. # # But later, when checking the node services, the node has to only deal with the unclustered # services. belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services_to_fetch = _get_services_to_fetch( host_name=hostname, belongs_to_cluster=belongs_to_cluster, config_cache=config_cache, only_check_plugins=only_check_plugin_names, ) services_to_check = _filter_clustered_services( config_cache=config_cache, host_name=hostname, belongs_to_cluster=belongs_to_cluster, services=services_to_fetch, ) # see which raw sections we may need selected_raw_sections = agent_based_register.get_relevant_raw_sections( check_plugin_names=(s.check_plugin_name for s in services_to_fetch), consider_inventory_plugins=host_config.do_status_data_inventory, ) sources = checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.CHECKING, ) mhs = MultiHostSections() result = checkers.update_host_sections( mhs, checkers.make_nodes( config_cache, host_config, ipaddress, checkers.Mode.CHECKING, sources, ), selected_raw_sections=selected_raw_sections, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=fetcher_messages, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, multi_host_sections=mhs, services=services_to_check, only_check_plugins=only_check_plugin_names, ) inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, sources=sources, multi_host_sections=mhs, ) if _submit_to_core: item_state.save(hostname) for source, host_sections in result: source_state, source_output, source_perfdata = source.summarize(host_sections) if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id, source_output)) perfdata.extend([_convert_perf_data(p) for p in source_perfdata]) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) for msg in fetcher_messages if fetcher_messages else (): cpu_tracking.update(msg.stats.cpu_times) phase_times = cpu_tracking.get_times() total_times = phase_times["busy"] infotexts.append("execution time %.1f sec" % total_times.run_time) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % total_times.run_time, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] for phase, times in phase_times.items(): if phase in ["agent", "snmp", "ds"]: t = times.run_time - sum(times.process[:4]) # real time - CPU time perfdata.append("cmk_time_%s=%.3f" % (phase, t)) else: perfdata.append("execution_time=%.3f" % total_times.run_time) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file() # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress # may be None. This needs to be understood in detail and cleaned up. As the InlineSNMP # stats feature is a very rarely used debugging feature, the analyzation and fix is # postponed now. if config.record_inline_snmp_stats and ipaddress is not None and host_config.snmp_config( ipaddress).snmp_backend == "inline": inline.snmp_stats_save()
def do_check(hostname, ipaddress, only_check_plugin_names=None): cpu_tracking.start("busy") console.verbose("Check_MK version %s\n" % cmk.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() status, infotexts, long_infotexts, perfdata = 0, [], [], [] try: # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(hostname) item_state.load(hostname) sources = data_sources.DataSources(hostname, ipaddress) num_success, missing_sections = \ _do_all_checks_on_host(sources, host_config, ipaddress, only_check_plugin_names) if _submit_to_core: item_state.save(hostname) for source in sources.get_data_sources(): source_state, source_output, source_perfdata = source.get_summary_result_for_checking( ) if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id(), source_output)) perfdata.extend(source_perfdata) if missing_sections and num_success > 0: missing_sections_status, missing_sections_infotext = \ _check_missing_sections(missing_sections, exit_spec) status = max(status, missing_sections_status) infotexts.append(missing_sections_infotext) elif missing_sections: infotexts.append("Got no information from host") status = max(status, exit_spec.get("empty_output", 2)) cpu_tracking.end() phase_times = cpu_tracking.get_times() total_times = phase_times["TOTAL"] run_time = total_times[4] infotexts.append("execution time %.1f sec" % run_time) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % run_time, "user_time=%.3f" % total_times[0], "system_time=%.3f" % total_times[1], "children_user_time=%.3f" % total_times[2], "children_system_time=%.3f" % total_times[3], ] for phase, times in phase_times.items(): if phase in ["agent", "snmp", "ds"]: t = times[4] - sum(times[:4]) # real time - CPU time perfdata.append("cmk_time_%s=%.3f" % (phase, t)) else: perfdata.append("execution_time=%.3f" % run_time) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file() if config.record_inline_snmp_stats \ and host_config.snmp_config(ipaddress).is_inline_snmp_host: inline_snmp.save_snmp_stats()
def test_cpu_tracking_initial_times(): assert cpu_tracking.get_times() == {}