Example #1
0
def _do_all_checks_on_host(
    services: List[Service],
    sources: data_sources.DataSources,
    host_config: config.HostConfig,
    ipaddress: Optional[HostAddress],
    only_check_plugins: Optional[Set[CheckPluginName]] = None,
) -> Tuple[int, List[CheckPluginName]]:
    hostname: HostName = host_config.hostname

    num_success = 0
    plugins_missing_data: Set[CheckPluginName] = set()

    check_api_utils.set_hostname(hostname)

    # Gather the data from the sources
    nodes = sources.make_nodes(host_config)
    multi_host_sections = sources.get_host_sections(nodes,
                                                    max_cachefile_age=host_config.max_cachefile_age)

    for service in services:

        success = execute_check(multi_host_sections, host_config, ipaddress, service)
        if success:
            num_success += 1
        else:
            # TODO (mo): centralize maincheckify: CMK-4295
            plugins_missing_data.add(CheckPluginName(maincheckify(service.check_plugin_name)))

    import cmk.base.inventory as inventory  # pylint: disable=import-outside-toplevel
    inventory.do_inventory_actions_during_checking_for(
        sources,
        multi_host_sections,
        host_config,
        ipaddress,
    )

    return num_success, sorted(plugins_missing_data)
Example #2
0
def do_check(
    hostname: HostName,
    ipaddress: Optional[HostAddress],
    only_check_plugin_names: Optional[Set[CheckPluginName]] = None,
    fetcher_messages: Optional[Sequence[FetcherMessage]] = None
) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]:
    console.verbose("Checkmk version %s\n", cmk_version.__version__)

    config_cache = config.get_config_cache()
    host_config = config_cache.get_host_config(hostname)

    exit_spec = host_config.exit_code_spec()

    status: ServiceState = 0
    infotexts: List[ServiceDetails] = []
    long_infotexts: List[ServiceAdditionalDetails] = []
    perfdata: List[str] = []
    try:
        with cpu_tracking.execute(), cpu_tracking.phase("busy"):
            license_usage.try_history_update()

            # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when
            # address is unknown). When called as non keepalive ipaddress may be None or
            # is already an address (2nd argument)
            if ipaddress is None and not host_config.is_cluster:
                ipaddress = ip_lookup.lookup_ip_address(host_config)

            item_state.load(hostname)

            # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all
            # information from the monitored host and cache the result for the cluster checks to be
            # performed on the cached information.
            #
            # This means that in case of SNMP nodes, they need to take the clustered services of the
            # node into account, fetch the needed sections and cache them for the cluster host.
            #
            # But later, when checking the node services, the node has to only deal with the unclustered
            # services.
            belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0

            services_to_fetch = _get_services_to_fetch(
                host_name=hostname,
                belongs_to_cluster=belongs_to_cluster,
                config_cache=config_cache,
                only_check_plugins=only_check_plugin_names,
            )

            services_to_check = _filter_clustered_services(
                config_cache=config_cache,
                host_name=hostname,
                belongs_to_cluster=belongs_to_cluster,
                services=services_to_fetch,
            )

            # see which raw sections we may need
            selected_raw_sections = agent_based_register.get_relevant_raw_sections(
                check_plugin_names=(s.check_plugin_name for s in services_to_fetch),
                consider_inventory_plugins=host_config.do_status_data_inventory,
            )

            sources = checkers.make_sources(
                host_config,
                ipaddress,
                mode=checkers.Mode.CHECKING,
            )
            mhs = MultiHostSections()

            result = checkers.update_host_sections(
                mhs,
                checkers.make_nodes(
                    config_cache,
                    host_config,
                    ipaddress,
                    checkers.Mode.CHECKING,
                    sources,
                ),
                selected_raw_sections=selected_raw_sections,
                max_cachefile_age=host_config.max_cachefile_age,
                host_config=host_config,
                fetcher_messages=fetcher_messages,
            )

            num_success, plugins_missing_data = _do_all_checks_on_host(
                config_cache,
                host_config,
                ipaddress,
                multi_host_sections=mhs,
                services=services_to_check,
                only_check_plugins=only_check_plugin_names,
            )
            inventory.do_inventory_actions_during_checking_for(
                config_cache,
                host_config,
                ipaddress,
                sources=sources,
                multi_host_sections=mhs,
            )

            if _submit_to_core:
                item_state.save(hostname)

            for source, host_sections in result:
                source_state, source_output, source_perfdata = source.summarize(host_sections)
                if source_output != "":
                    status = max(status, source_state)
                    infotexts.append("[%s] %s" % (source.id, source_output))
                    perfdata.extend([_convert_perf_data(p) for p in source_perfdata])

            if plugins_missing_data:
                missing_data_status, missing_data_infotext = _check_plugins_missing_data(
                    plugins_missing_data,
                    exit_spec,
                    bool(num_success),
                )
                status = max(status, missing_data_status)
                infotexts.append(missing_data_infotext)

        for msg in fetcher_messages if fetcher_messages else ():
            cpu_tracking.update(msg.stats.cpu_times)

        phase_times = cpu_tracking.get_times()
        total_times = phase_times["busy"]

        infotexts.append("execution time %.1f sec" % total_times.run_time)
        if config.check_mk_perfdata_with_times:
            perfdata += [
                "execution_time=%.3f" % total_times.run_time,
                "user_time=%.3f" % total_times.process.user,
                "system_time=%.3f" % total_times.process.system,
                "children_user_time=%.3f" % total_times.process.children_user,
                "children_system_time=%.3f" % total_times.process.children_system,
            ]

            for phase, times in phase_times.items():
                if phase in ["agent", "snmp", "ds"]:
                    t = times.run_time - sum(times.process[:4])  # real time - CPU time
                    perfdata.append("cmk_time_%s=%.3f" % (phase, t))
        else:
            perfdata.append("execution_time=%.3f" % total_times.run_time)

        return status, infotexts, long_infotexts, perfdata
    finally:
        if _checkresult_file_fd is not None:
            _close_checkresult_file()

        # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress
        # may be None.  This needs to be understood in detail and cleaned up. As the InlineSNMP
        # stats feature is a very rarely used debugging feature, the analyzation and fix is
        # postponed now.
        if config.record_inline_snmp_stats and ipaddress is not None and host_config.snmp_config(
                ipaddress).snmp_backend == "inline":
            inline.snmp_stats_save()
Example #3
0
def do_check(
    hostname: HostName,
    ipaddress: Optional[HostAddress],
    *,
    # The following arguments *must* remain optional for Nagios and the `DiscoCheckExecutor`.
    #   See Also: `cmk.base.discovery.check_discovery()`
    fetcher_messages: Sequence[FetcherMessage] = (),
    run_only_plugin_names: Optional[Set[CheckPluginName]] = None,
    selected_sections: checkers.SectionNameCollection = checkers.NO_SELECTION,
    submit_to_core: bool = True,
    show_perfdata: bool = False,
) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails],
           List[str]]:
    console.verbose("Checkmk version %s\n", cmk_version.__version__)

    config_cache = config.get_config_cache()
    host_config = config_cache.get_host_config(hostname)

    exit_spec = host_config.exit_code_spec()

    mode = checkers.Mode.CHECKING if selected_sections is checkers.NO_SELECTION else checkers.Mode.FORCE_SECTIONS

    status: ServiceState = 0
    infotexts: List[ServiceDetails] = []
    long_infotexts: List[ServiceAdditionalDetails] = []
    perfdata: List[str] = []
    try:
        license_usage.try_history_update()

        # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when
        # address is unknown). When called as non keepalive ipaddress may be None or
        # is already an address (2nd argument)
        if ipaddress is None and not host_config.is_cluster:
            ipaddress = ip_lookup.lookup_ip_address(host_config)

        # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all
        # information from the monitored host and cache the result for the cluster checks to be
        # performed on the cached information.
        #
        # This means that in case of SNMP nodes, they need to take the clustered services of the
        # node into account, fetch the needed sections and cache them for the cluster host.
        #
        # But later, when checking the node services, the node has to only deal with the unclustered
        # services.
        #
        # TODO: clean this up. The fetched sections are computed in the checkers
        #       _make_configured_snmp_sections now.
        #
        belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0

        services_to_fetch = _get_services_to_fetch(
            host_name=hostname,
            belongs_to_cluster=belongs_to_cluster,
            config_cache=config_cache,
        )

        services_to_check = _filter_clustered_services(
            config_cache=config_cache,
            host_name=hostname,
            belongs_to_cluster=belongs_to_cluster,
            services=services_to_fetch,
            run_only_plugin_names=run_only_plugin_names,
        )

        nodes = checkers.make_nodes(
            config_cache,
            host_config,
            ipaddress,
            mode,
            checkers.make_sources(
                host_config,
                ipaddress,
                mode=mode,
                selected_sections=selected_sections,
            ),
        )

        if not fetcher_messages:
            # Note: `fetch_all(sources)` is almost always called in similar
            #       code in discovery and inventory.  The only other exception
            #       is `cmk.base.discovery.check_discovery(...)`.  This does
            #       not seem right.
            fetcher_messages = list(
                checkers.fetch_all(
                    nodes,
                    max_cachefile_age=host_config.max_cachefile_age,
                    host_config=host_config,
                ))

        with CPUTracker() as tracker:
            mhs = MultiHostSections()
            result = checkers.update_host_sections(
                mhs,
                nodes,
                max_cachefile_age=host_config.max_cachefile_age,
                host_config=host_config,
                fetcher_messages=fetcher_messages,
                selected_sections=selected_sections,
            )

            num_success, plugins_missing_data = _do_all_checks_on_host(
                config_cache,
                host_config,
                ipaddress,
                multi_host_sections=mhs,
                services=services_to_check,
                submit_to_core=submit_to_core,
                show_perfdata=show_perfdata,
            )

            if run_only_plugin_names is None:
                inventory.do_inventory_actions_during_checking_for(
                    config_cache,
                    host_config,
                    ipaddress,
                    multi_host_sections=mhs,
                )

            for source, host_sections in result:
                source_state, source_output, source_perfdata = source.summarize(
                    host_sections)
                if source_output != "":
                    status = max(status, source_state)
                    infotexts.append("[%s] %s" % (source.id, source_output))
                    perfdata.extend(
                        [_convert_perf_data(p) for p in source_perfdata])

            if plugins_missing_data:
                missing_data_status, missing_data_infotext = _check_plugins_missing_data(
                    plugins_missing_data,
                    exit_spec,
                    bool(num_success),
                )
                status = max(status, missing_data_status)
                infotexts.append(missing_data_infotext)

        total_times = tracker.duration
        for msg in fetcher_messages:
            total_times += msg.stats.duration

        infotexts.append("execution time %.1f sec" %
                         total_times.process.elapsed)
        if config.check_mk_perfdata_with_times:
            perfdata += [
                "execution_time=%.3f" % total_times.process.elapsed,
                "user_time=%.3f" % total_times.process.user,
                "system_time=%.3f" % total_times.process.system,
                "children_user_time=%.3f" % total_times.process.children_user,
                "children_system_time=%.3f" %
                total_times.process.children_system,
            ]
            summary: DefaultDict[str, Snapshot] = defaultdict(Snapshot.null)
            for msg in fetcher_messages if fetcher_messages else ():
                if msg.fetcher_type in (
                        FetcherType.PIGGYBACK,
                        FetcherType.PROGRAM,
                        FetcherType.SNMP,
                        FetcherType.TCP,
                ):
                    summary[{
                        FetcherType.PIGGYBACK: "agent",
                        FetcherType.PROGRAM: "ds",
                        FetcherType.SNMP: "snmp",
                        FetcherType.TCP: "agent",
                    }[msg.fetcher_type]] += msg.stats.duration
            for phase, duration in summary.items():
                perfdata.append("cmk_time_%s=%.3f" % (phase, duration.idle))
        else:
            perfdata.append("execution_time=%.3f" %
                            total_times.process.elapsed)

        return status, infotexts, long_infotexts, perfdata
    finally:
        if _checkresult_file_fd is not None:
            _close_checkresult_file()
Example #4
0
def _do_all_checks_on_host(sources,
                           host_config,
                           ipaddress,
                           only_check_plugin_names=None):
    # type: (data_sources.DataSources, config.HostConfig, Optional[HostAddress], Optional[List[str]]) -> Tuple[int, List[SectionName]]
    hostname = host_config.hostname  # type: HostName
    config_cache = config.get_config_cache()

    num_success, missing_sections = 0, set()

    check_api_utils.set_hostname(hostname)

    filter_mode = None

    belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0
    if belongs_to_cluster:
        filter_mode = "include_clustered"

    services = check_table.get_precompiled_check_table(hostname,
                                                       remove_duplicates=True,
                                                       filter_mode=filter_mode)

    # When check types are specified via command line, enforce them. Otherwise use the
    # list of checks defined by the check table.
    if only_check_plugin_names is None:
        only_check_plugins = {
            service.check_plugin_name
            for service in services
        }
    else:
        only_check_plugins = set(only_check_plugin_names)

    sources.enforce_check_plugin_names(only_check_plugins)

    # Gather the data from the sources
    multi_host_sections = sources.get_host_sections()

    # Filter out check types which are not used on the node
    if belongs_to_cluster:
        pos_match = set()
        neg_match = set()
        for service in services:
            if hostname != config_cache.host_of_clustered_service(
                    hostname, service.description):
                pos_match.add(service.check_plugin_name)
            else:
                neg_match.add(service.check_plugin_name)
        only_check_plugins -= (pos_match - neg_match)

    for service in services:
        if only_check_plugins is not None and service.check_plugin_name not in only_check_plugins:
            continue

        if belongs_to_cluster and hostname != config_cache.host_of_clustered_service(
                hostname, service.description):
            continue

        success = execute_check(config_cache, multi_host_sections, hostname,
                                ipaddress, service.check_plugin_name,
                                service.item, service.parameters,
                                service.description)
        if success:
            num_success += 1
        elif success is None:
            # If the service is in any timeperiod we do not want to
            # - increase num_success or
            # - add to missing sections
            continue
        else:
            missing_sections.add(
                cmk.base.check_utils.section_name_of(
                    service.check_plugin_name))

    import cmk.base.inventory as inventory  # pylint: disable=import-outside-toplevel
    inventory.do_inventory_actions_during_checking_for(sources,
                                                       multi_host_sections,
                                                       host_config, ipaddress)

    missing_section_list = sorted(missing_sections)
    return num_success, missing_section_list
Example #5
0
def _do_all_checks_on_host(sources,
                           host_config,
                           ipaddress,
                           only_check_plugin_names=None):
    # type: (data_sources.DataSources, config.HostConfig, Optional[HostAddress], Optional[List[str]]) -> Tuple[int, List[SectionName]]
    hostname = host_config.hostname  # type: HostName
    config_cache = config.get_config_cache()

    num_success, missing_sections = 0, set()

    check_api_utils.set_hostname(hostname)

    belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0

    services = check_table.get_precompiled_check_table(
        hostname,
        remove_duplicates=True,
        filter_mode="include_clustered" if belongs_to_cluster else None,
    )

    # When check types are specified via command line, enforce them. Otherwise use the
    # list of checks defined by the check table.
    if only_check_plugin_names is None:
        only_check_plugins = {
            service.check_plugin_name
            for service in services
        }
    else:
        only_check_plugins = set(only_check_plugin_names)

    sources.enforce_check_plugin_names(only_check_plugins)

    # Gather the data from the sources
    multi_host_sections = sources.get_host_sections()

    def _is_not_of_host(host_name, service):
        return hostname != config_cache.host_of_clustered_service(
            hostname, service.description)

    # Filter out check types which are not used on the node
    if belongs_to_cluster:
        removed_plugins = {
            plugin
            for plugin in only_check_plugins if all(
                _is_not_of_host(hostname, service) for service in services
                if service.check_plugin_name == plugin)
        }
        only_check_plugins -= removed_plugins

    for service in services:
        if service.check_plugin_name not in only_check_plugins:
            continue
        if belongs_to_cluster and _is_not_of_host(hostname, service):
            continue
        if service_outside_check_period(config_cache, hostname,
                                        service.description):
            continue

        success = execute_check(multi_host_sections, hostname, ipaddress,
                                service)
        if success:
            num_success += 1
        else:
            missing_sections.add(
                cmk.base.check_utils.section_name_of(
                    service.check_plugin_name))

    import cmk.base.inventory as inventory  # pylint: disable=import-outside-toplevel
    inventory.do_inventory_actions_during_checking_for(sources,
                                                       multi_host_sections,
                                                       host_config, ipaddress)

    missing_section_list = sorted(missing_sections)
    return num_success, missing_section_list
Example #6
0
def do_check(
    hostname: HostName,
    ipaddress: Optional[HostAddress],
    only_check_plugin_names: Optional[Set[CheckPluginName]] = None
) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]:
    cpu_tracking.start("busy")
    console.verbose("Check_MK version %s\n", cmk_version.__version__)

    config_cache = config.get_config_cache()
    host_config = config_cache.get_host_config(hostname)

    exit_spec = host_config.exit_code_spec()

    status: ServiceState = 0
    infotexts: List[ServiceDetails] = []
    long_infotexts: List[ServiceAdditionalDetails] = []
    perfdata: List[str] = []
    try:
        # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when
        # address is unknown). When called as non keepalive ipaddress may be None or
        # is already an address (2nd argument)
        if ipaddress is None and not host_config.is_cluster:
            ipaddress = ip_lookup.lookup_ip_address(host_config)

        item_state.load(hostname)

        services = _get_filtered_services(
            host_name=hostname,
            belongs_to_cluster=len(config_cache.clusters_of(hostname)) > 0,
            config_cache=config_cache,
            only_check_plugins=only_check_plugin_names,
        )

        # see which raw sections we may need
        selected_raw_sections = _get_relevant_raw_sections(services, host_config)

        sources = data_sources.make_sources(
            host_config,
            ipaddress,
        )
        mhs = data_sources.make_host_sections(
            config_cache,
            host_config,
            ipaddress,
            sources=sources,
            selected_raw_sections=selected_raw_sections,
            max_cachefile_age=host_config.max_cachefile_age,
        )
        num_success, plugins_missing_data = _do_all_checks_on_host(
            config_cache,
            host_config,
            ipaddress,
            multi_host_sections=mhs,
            services=services,
            only_check_plugins=only_check_plugin_names,
        )
        inventory.do_inventory_actions_during_checking_for(
            config_cache,
            host_config,
            ipaddress,
            sources=sources,
            multi_host_sections=mhs,
        )

        if _submit_to_core:
            item_state.save(hostname)

        for source in sources:
            source_state, source_output, source_perfdata = source.get_summary_result_for_checking()
            if source_output != "":
                status = max(status, source_state)
                infotexts.append("[%s] %s" % (source.id, source_output))
                perfdata.extend([_convert_perf_data(p) for p in source_perfdata])

        if plugins_missing_data:
            missing_data_status, missing_data_infotext = _check_plugins_missing_data(
                plugins_missing_data,
                exit_spec,
                bool(num_success),
            )
            status = max(status, missing_data_status)
            infotexts.append(missing_data_infotext)

        cpu_tracking.end()
        phase_times = cpu_tracking.get_times()
        total_times = phase_times["TOTAL"]
        run_time = total_times[4]

        infotexts.append("execution time %.1f sec" % run_time)
        if config.check_mk_perfdata_with_times:
            perfdata += [
                "execution_time=%.3f" % run_time,
                "user_time=%.3f" % total_times[0],
                "system_time=%.3f" % total_times[1],
                "children_user_time=%.3f" % total_times[2],
                "children_system_time=%.3f" % total_times[3],
            ]

            for phase, times in phase_times.items():
                if phase in ["agent", "snmp", "ds"]:
                    t = times[4] - sum(times[:4])  # real time - CPU time
                    perfdata.append("cmk_time_%s=%.3f" % (phase, t))
        else:
            perfdata.append("execution_time=%.3f" % run_time)

        return status, infotexts, long_infotexts, perfdata
    finally:
        if _checkresult_file_fd is not None:
            _close_checkresult_file()

        # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress
        # may be None.  This needs to be understood in detail and cleaned up. As the InlineSNMP
        # stats feature is a very rarely used debugging feature, the analyzation and fix is
        # postponed now.
        if config.record_inline_snmp_stats \
           and ipaddress is not None \
           and host_config.snmp_config(ipaddress).is_inline_snmp_host:
            inline.snmp_stats_save()