Example #1
0
def test_cpu_tracking_simple(monkeypatch):
    monkeypatch.setattr("time.time", lambda: 0.0)
    cpu_tracking.start("busy")
    assert cpu_tracking.get_times() == {}
    monkeypatch.setattr("time.time", lambda: 1.0)
    cpu_tracking.end()

    times = cpu_tracking.get_times()

    assert len(times) == 2
    assert len(times["TOTAL"]) == 5
    assert times["TOTAL"][4] == 1.0
    assert times["busy"][4] == 1.0
Example #2
0
def _run_fetchers_from_file(file_name: Path, mode: Mode, timeout: int) -> None:
    """ Writes to the stdio next data:
    Count Type            Content                     Action
    ----- -----           -------                     ------
    1     Success Answer  Fetcher Blob                Send to the checker
    0..n  Failure Answer  Exception of failed fetcher Log
    1     Waiting Answer  empty                       End IO
    *) Fetcher blob contains all answers from all fetcher objects including failed
    **) file_name is serial/host_name.json
    ***) timeout is not used at the moment"""
    with file_name.open() as f:
        data = json.load(f)

    fetchers = data["fetchers"]

    # CONTEXT: AT the moment we call fetcher-executors sequentially (due to different reasons).
    # Possibilities:
    # Sequential: slow fetcher may block other fetchers.
    # Asyncio: every fetcher must be asyncio-aware. This is ok, but even estimation requires time
    # Threading: some fetcher may be not thread safe(snmp, for example). May be dangerous.
    # Multiprocessing: CPU and memory(at least in terms of kernel) hungry. Also duplicates
    # functionality of the Microcore.

    messages: List[FetcherMessage] = []
    with cpu_tracking.execute(), cpu_tracking.phase(
            "fetchers"), timeout_control(timeout):
        try:
            # fill as many messages as possible before timeout exception raised
            for entry in fetchers:
                messages.append(run_fetcher(entry, mode))
        except MKTimeout as exc:
            # fill missing entries with timeout errors
            stats = L3Stats(cpu_tracking.get_times())
            messages.extend([
                _make_fetcher_timeout_message(
                    FetcherType[entry["fetcher_type"]], stats, exc)
                for entry in fetchers[len(messages):]
            ])

    log.logger.debug("Produced %d messages:", len(messages))
    for message in messages:
        log.logger.debug("  message: %s", message.header)

    write_bytes(make_payload_answer(*messages))
    for msg in filter(
            lambda msg: msg.header.payload_type is PayloadType.ERROR,
            messages,
    ):
        log.logger.log(msg.header.status, "Error in %s fetcher: %s",
                       msg.header.fetcher_type.name, msg.raw_data.error)
Example #3
0
def test_cpu_tracking_add_times(monkeypatch):
    monkeypatch.setattr("time.time", lambda: 0.0)
    cpu_tracking.start("busy")
    monkeypatch.setattr("time.time", lambda: 2.0)

    cpu_tracking.push_phase("agent")
    monkeypatch.setattr("time.time", lambda: 5.0)
    cpu_tracking.pop_phase()

    cpu_tracking.push_phase("agent")
    monkeypatch.setattr("time.time", lambda: 7.0)
    cpu_tracking.pop_phase()

    cpu_tracking.end()

    times = cpu_tracking.get_times()
    assert len(times) == 3

    assert times["TOTAL"][4] == 7.0
    assert times["busy"][4] == 2.0
    assert times["agent"][4] == 5.0
Example #4
0
def run_fetcher(entry: Dict[str, Any], mode: Mode) -> FetcherMessage:
    """ Entrypoint to obtain data from fetcher objects.    """

    try:
        fetcher_type = FetcherType[entry["fetcher_type"]]
    except KeyError as exc:
        raise RuntimeError from exc

    log.logger.debug("Executing fetcher: %s", entry["fetcher_type"])

    try:
        fetcher_params = entry["fetcher_params"]
    except KeyError as exc:
        stats = L3Stats({})
        payload = ErrorPayload(exc)
        return FetcherMessage(
            FetcherHeader(
                fetcher_type,
                PayloadType.ERROR,
                status=logging.CRITICAL,
                payload_length=len(payload),
                stats_length=len(stats),
            ),
            payload,
            stats,
        )

    try:
        with cpu_tracking.phase(fetcher_type.name), fetcher_type.from_json(
                fetcher_params) as fetcher:
            raw_data = fetcher.fetch(mode)
    except Exception as exc:
        raw_data = result.Error(exc)

    return FetcherMessage.from_raw_data(
        raw_data,
        L3Stats(cpu_tracking.get_times()),
        fetcher_type,
    )
Example #5
0
def do_check(
    hostname: HostName,
    ipaddress: Optional[HostAddress],
    only_check_plugin_names: Optional[Set[CheckPluginName]] = None,
    fetcher_messages: Optional[Sequence[FetcherMessage]] = None
) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]:
    console.verbose("Checkmk version %s\n", cmk_version.__version__)

    config_cache = config.get_config_cache()
    host_config = config_cache.get_host_config(hostname)

    exit_spec = host_config.exit_code_spec()

    status: ServiceState = 0
    infotexts: List[ServiceDetails] = []
    long_infotexts: List[ServiceAdditionalDetails] = []
    perfdata: List[str] = []
    try:
        with cpu_tracking.execute(), cpu_tracking.phase("busy"):
            license_usage.try_history_update()

            # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when
            # address is unknown). When called as non keepalive ipaddress may be None or
            # is already an address (2nd argument)
            if ipaddress is None and not host_config.is_cluster:
                ipaddress = ip_lookup.lookup_ip_address(host_config)

            item_state.load(hostname)

            # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all
            # information from the monitored host and cache the result for the cluster checks to be
            # performed on the cached information.
            #
            # This means that in case of SNMP nodes, they need to take the clustered services of the
            # node into account, fetch the needed sections and cache them for the cluster host.
            #
            # But later, when checking the node services, the node has to only deal with the unclustered
            # services.
            belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0

            services_to_fetch = _get_services_to_fetch(
                host_name=hostname,
                belongs_to_cluster=belongs_to_cluster,
                config_cache=config_cache,
                only_check_plugins=only_check_plugin_names,
            )

            services_to_check = _filter_clustered_services(
                config_cache=config_cache,
                host_name=hostname,
                belongs_to_cluster=belongs_to_cluster,
                services=services_to_fetch,
            )

            # see which raw sections we may need
            selected_raw_sections = agent_based_register.get_relevant_raw_sections(
                check_plugin_names=(s.check_plugin_name for s in services_to_fetch),
                consider_inventory_plugins=host_config.do_status_data_inventory,
            )

            sources = checkers.make_sources(
                host_config,
                ipaddress,
                mode=checkers.Mode.CHECKING,
            )
            mhs = MultiHostSections()

            result = checkers.update_host_sections(
                mhs,
                checkers.make_nodes(
                    config_cache,
                    host_config,
                    ipaddress,
                    checkers.Mode.CHECKING,
                    sources,
                ),
                selected_raw_sections=selected_raw_sections,
                max_cachefile_age=host_config.max_cachefile_age,
                host_config=host_config,
                fetcher_messages=fetcher_messages,
            )

            num_success, plugins_missing_data = _do_all_checks_on_host(
                config_cache,
                host_config,
                ipaddress,
                multi_host_sections=mhs,
                services=services_to_check,
                only_check_plugins=only_check_plugin_names,
            )
            inventory.do_inventory_actions_during_checking_for(
                config_cache,
                host_config,
                ipaddress,
                sources=sources,
                multi_host_sections=mhs,
            )

            if _submit_to_core:
                item_state.save(hostname)

            for source, host_sections in result:
                source_state, source_output, source_perfdata = source.summarize(host_sections)
                if source_output != "":
                    status = max(status, source_state)
                    infotexts.append("[%s] %s" % (source.id, source_output))
                    perfdata.extend([_convert_perf_data(p) for p in source_perfdata])

            if plugins_missing_data:
                missing_data_status, missing_data_infotext = _check_plugins_missing_data(
                    plugins_missing_data,
                    exit_spec,
                    bool(num_success),
                )
                status = max(status, missing_data_status)
                infotexts.append(missing_data_infotext)

        for msg in fetcher_messages if fetcher_messages else ():
            cpu_tracking.update(msg.stats.cpu_times)

        phase_times = cpu_tracking.get_times()
        total_times = phase_times["busy"]

        infotexts.append("execution time %.1f sec" % total_times.run_time)
        if config.check_mk_perfdata_with_times:
            perfdata += [
                "execution_time=%.3f" % total_times.run_time,
                "user_time=%.3f" % total_times.process.user,
                "system_time=%.3f" % total_times.process.system,
                "children_user_time=%.3f" % total_times.process.children_user,
                "children_system_time=%.3f" % total_times.process.children_system,
            ]

            for phase, times in phase_times.items():
                if phase in ["agent", "snmp", "ds"]:
                    t = times.run_time - sum(times.process[:4])  # real time - CPU time
                    perfdata.append("cmk_time_%s=%.3f" % (phase, t))
        else:
            perfdata.append("execution_time=%.3f" % total_times.run_time)

        return status, infotexts, long_infotexts, perfdata
    finally:
        if _checkresult_file_fd is not None:
            _close_checkresult_file()

        # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress
        # may be None.  This needs to be understood in detail and cleaned up. As the InlineSNMP
        # stats feature is a very rarely used debugging feature, the analyzation and fix is
        # postponed now.
        if config.record_inline_snmp_stats and ipaddress is not None and host_config.snmp_config(
                ipaddress).snmp_backend == "inline":
            inline.snmp_stats_save()
Example #6
0
def do_check(hostname, ipaddress, only_check_plugin_names=None):
    cpu_tracking.start("busy")
    console.verbose("Check_MK version %s\n" % cmk.__version__)

    config_cache = config.get_config_cache()
    host_config = config_cache.get_host_config(hostname)

    exit_spec = host_config.exit_code_spec()

    status, infotexts, long_infotexts, perfdata = 0, [], [], []
    try:
        # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when
        # address is unknown). When called as non keepalive ipaddress may be None or
        # is already an address (2nd argument)
        if ipaddress is None and not host_config.is_cluster:
            ipaddress = ip_lookup.lookup_ip_address(hostname)

        item_state.load(hostname)

        sources = data_sources.DataSources(hostname, ipaddress)

        num_success, missing_sections = \
            _do_all_checks_on_host(sources, host_config, ipaddress, only_check_plugin_names)

        if _submit_to_core:
            item_state.save(hostname)

        for source in sources.get_data_sources():
            source_state, source_output, source_perfdata = source.get_summary_result_for_checking(
            )
            if source_output != "":
                status = max(status, source_state)
                infotexts.append("[%s] %s" % (source.id(), source_output))
                perfdata.extend(source_perfdata)

        if missing_sections and num_success > 0:
            missing_sections_status, missing_sections_infotext = \
                _check_missing_sections(missing_sections, exit_spec)
            status = max(status, missing_sections_status)
            infotexts.append(missing_sections_infotext)

        elif missing_sections:
            infotexts.append("Got no information from host")
            status = max(status, exit_spec.get("empty_output", 2))

        cpu_tracking.end()
        phase_times = cpu_tracking.get_times()
        total_times = phase_times["TOTAL"]
        run_time = total_times[4]

        infotexts.append("execution time %.1f sec" % run_time)
        if config.check_mk_perfdata_with_times:
            perfdata += [
                "execution_time=%.3f" % run_time,
                "user_time=%.3f" % total_times[0],
                "system_time=%.3f" % total_times[1],
                "children_user_time=%.3f" % total_times[2],
                "children_system_time=%.3f" % total_times[3],
            ]

            for phase, times in phase_times.items():
                if phase in ["agent", "snmp", "ds"]:
                    t = times[4] - sum(times[:4])  # real time - CPU time
                    perfdata.append("cmk_time_%s=%.3f" % (phase, t))
        else:
            perfdata.append("execution_time=%.3f" % run_time)

        return status, infotexts, long_infotexts, perfdata
    finally:
        if _checkresult_file_fd is not None:
            _close_checkresult_file()

        if config.record_inline_snmp_stats \
           and host_config.snmp_config(ipaddress).is_inline_snmp_host:
            inline_snmp.save_snmp_stats()
Example #7
0
def test_cpu_tracking_initial_times():
    assert cpu_tracking.get_times() == {}