Exemple #1
0
def check_mk_remote_automation(site_id,
                               command,
                               args,
                               indata,
                               stdin_data=None,
                               timeout=None,
                               sync=True):
    site = config.site(site_id)
    if "secret" not in site:
        raise MKGeneralException(
            _("Cannot connect to site \"%s\": The site is not logged in") %
            site.get("alias", site_id))

    if not site.get("replication"):
        raise MKGeneralException(
            _("Cannot connect to site \"%s\": The replication is disabled") %
            site.get("alias", site_id))

    if sync:
        sync_changes_before_remote_automation(site_id)

    # Now do the actual remote command
    response = do_remote_automation(
        config.site(site_id),
        "checkmk-automation",
        [
            ("automation", command),  # The Check_MK automation command
            ("arguments", mk_repr(args)),  # The arguments for the command
            ("indata", mk_repr(indata)),  # The input data
            ("stdin_data", mk_repr(stdin_data)),  # The input data for stdin
            ("timeout", mk_repr(timeout)),  # The timeout
        ])
    return response
Exemple #2
0
    def page(self):
        ajax_request = self.webapi_request()

        site_id_val = ajax_request.get("site")
        if not site_id_val:
            raise MKUserError(None, "The site_id is missing")
        site_id = site_id_val
        if site_id not in config.sitenames():
            raise MKUserError(None, _("The requested site does not exist"))

        status = cmk.gui.sites.states().get(site_id,
                                            cmk.gui.sites.SiteStatus({})).get(
                                                "state", "unknown")
        if status == "dead":
            raise MKGeneralException(
                _('The site is marked as dead. Not trying to replicate.'))

        site = config.site(site_id)
        assert config.user.id is not None
        result = self._synchronize_profile(site_id, site, config.user.id)

        if result is not True:
            assert result is not False
            _add_profile_replication_change(site_id, result)
            raise MKGeneralException(result)

        return _("Replication completed successfully.")
Exemple #3
0
    def _get_diagnostics_dump_file(self, site: str, tarfile_name: str) -> bytes:
        if config.site_is_local(site):
            return _get_diagnostics_dump_file(tarfile_name)

        return do_remote_automation(config.site(site), "diagnostics-dump-get-file", [
            ("tarfile_name", tarfile_name),
        ])
Exemple #4
0
def _do_check_mk_remote_automation_in_background_job(
        site_id: SiteId, automation_request: CheckmkAutomationRequest) -> Any:
    """Execute the automation in a background job on the remote site

    It starts the background job using one call. It then polls the remote site, waiting for
    completion of the job."""
    site_config = config.site(site_id)

    job_id = _start_remote_automation_job(site_config, automation_request)

    auto_logger.info("Waiting for job completion")
    result = None
    while True:
        raw_response = do_remote_automation(
            site_config, "checkmk-remote-automation-get-status", [
                ("request", repr(job_id)),
            ])
        response = CheckmkAutomationGetStatusResponse(*raw_response)
        auto_logger.debug("Job status: %r", response)

        if not response.job_status["is_active"]:
            result = response.result
            auto_logger.debug(
                "Job is not active anymore. Return the result: %s", result)
            break

    return result
Exemple #5
0
def _synchronize_profiles_to_sites(logger, profiles_to_synchronize):
    if not profiles_to_synchronize:
        return

    remote_sites = [(site_id, config.site(site_id))
                    for site_id in config.get_login_slave_sites()]

    logger.info('Credentials changed for %s. Trying to sync to %d sites' %
                (", ".join(profiles_to_synchronize.keys()), len(remote_sites)))

    states = sites.states()

    pool = ThreadPool()
    jobs = []
    for site_id, site in remote_sites:
        jobs.append(
            pool.apply_async(_sychronize_profile_worker,
                             (states, site_id, site, profiles_to_synchronize)))

    results = []
    start_time = time.time()
    while time.time() - start_time < 30:
        for job in jobs[:]:
            try:
                results.append(job.get(timeout=0.5))
                jobs.remove(job)
            except mp_TimeoutError:
                pass
        if not jobs:
            break

    contacted_sites = {x[0] for x in remote_sites}
    working_sites = {result.site_id for result in results}
    for site_id in contacted_sites - working_sites:
        results.append(
            SynchronizationResult(
                site_id,
                error_text=_("No response from update thread"),
                failed=True))

    for result in results:
        if result.error_text:
            logger.info('  FAILED [%s]: %s' %
                        (result.site_id, result.error_text))
            if config.wato_enabled:
                add_change("edit-users",
                           _('Password changed (sync failed: %s)') %
                           result.error_text,
                           add_user=False,
                           sites=[result.site_id],
                           need_restart=False)

    pool.terminate()
    pool.join()

    num_failed = sum([1 for result in results if result.failed])
    num_disabled = sum([1 for result in results if result.disabled])
    num_succeeded = sum([1 for result in results if result.succeeded])
    logger.info('  Disabled: %d, Succeeded: %d, Failed: %d' %
                (num_disabled, num_succeeded, num_failed))
Exemple #6
0
 def execute(self):
     # type: () -> Iterator[ACResult]
     for site_id in config.sitenames():
         site_config = config.site(site_id)
         for result in self._check_site(site_id, site_config):
             result.site_id = site_id
             yield result
Exemple #7
0
    def _get_job_status(self) -> Dict:
        if config.site_is_local(self._request.host.site_id()):
            return get_fetch_agent_job_status(self._request)

        return watolib.do_remote_automation(config.site(self._request.host.site_id()),
                                            "fetch-agent-output-get-status", [
                                                ("request", repr(self._request.serialize())),
                                            ])
Exemple #8
0
    def _get_agent_output_file(self) -> bytes:
        if config.site_is_local(self._request.host.site_id()):
            return get_fetch_agent_output_file(self._request)

        return watolib.do_remote_automation(config.site(self._request.host.site_id()),
                                            "fetch-agent-output-get-file", [
                                                ("request", repr(self._request.serialize())),
                                            ])
Exemple #9
0
    def _collect_sites_data(cls) -> List[ABCElement]:
        sites.update_site_states_from_dead_sites()

        site_states = sites.states()
        site_state_titles = sites.site_state_titles()
        site_stats = cls._get_site_stats()

        elements: List[ABCElement] = []
        for site_id, _sitealias in config.sorted_sites():
            site_spec = config.site(site_id)
            site_status = site_states.get(site_id, sites.SiteStatus({}))
            state: Optional[str] = site_status.get("state")

            if state is None:
                state = "missing"

            if state != "online":
                elements.append(
                    IconElement(
                        title=site_spec["alias"],
                        css_class="site_%s" % state,
                        tooltip=site_state_titles[state],
                    ))
                continue

            stats = site_stats[site_id]
            parts = []
            total = 0
            for title, css_class, count in [
                (_("hosts are down or have critical services"), "critical",
                 stats.hosts_down_or_have_critical),
                (_("hosts are unreachable or have unknown services"), "unknown",
                 stats.hosts_unreachable_or_have_unknown),
                (_("hosts are up but have services in warning state"), "warning",
                 stats.hosts_up_and_have_warning),
                (_("hosts are in scheduled downtime"), "downtime", stats.hosts_in_downtime),
                (_("hosts are up and have no service problems"), "ok",
                 stats.hosts_up_without_problem),
            ]:
                parts.append(Part(title=title, css_class=css_class, count=count))
                total += count

            total_part = Part(title=_("Total number of hosts"), css_class="", count=total)

            elements.append(
                SiteElement(
                    title=site_spec["alias"],
                    url_add_vars={
                        "name": "site",
                        "site": site_id,
                    },
                    parts=parts,
                    total=total_part,
                    tooltip=cls._render_tooltip(site_spec["alias"], parts, total_part),
                ))

        #return elements + cls._test_elements()
        return elements
Exemple #10
0
def execute_host_label_sync(host_name: HostName, site_id: SiteId) -> None:
    """Contacts the given remote site to synchronize the labels of the given host"""
    site_spec = config.site(site_id)
    result = _execute_site_sync(
        site_id, site_spec,
        SiteRequest(
            newest_host_labels=0.0,
            enforce_host=EnforcedHostRequest(site_id, host_name),
        ))
    save_updated_host_label_files(result.updated_host_labels)
Exemple #11
0
    def _start_fetch(self) -> None:
        """Start the job on the site the host is monitored by"""
        if config.site_is_local(self._request.host.site_id()):
            start_fetch_agent_job(self._request)
            return

        watolib.do_remote_automation(config.site(
            self._request.host.site_id()), "fetch-agent-output-start", [
                ("request", repr(self._request.serialize())),
            ])
Exemple #12
0
def check_mk_remote_automation(site_id,
                               command,
                               args,
                               indata,
                               stdin_data=None,
                               timeout=None,
                               sync=True,
                               non_blocking_http=False):
    # type: (SiteId, str, Optional[Sequence[Union[str, Text]]], Any, Optional[str], Optional[int], bool, bool) -> Any
    site = config.site(site_id)
    if "secret" not in site:
        raise MKGeneralException(
            _("Cannot connect to site \"%s\": The site is not logged in") %
            site.get("alias", site_id))

    if not site.get("replication"):
        raise MKGeneralException(
            _("Cannot connect to site \"%s\": The replication is disabled") %
            site.get("alias", site_id))

    if sync:
        sync_changes_before_remote_automation(site_id)

    if non_blocking_http:
        # This will start a background job process on the remote site to execute the automation
        # asynchronously. It then polls the remote site, waiting for completion of the job.
        return _do_check_mk_remote_automation_in_background_job(
            site_id,
            CheckmkAutomationRequest(command, args, indata, stdin_data,
                                     timeout))

    # Synchronous execution of the actual remote command in a single blocking HTTP request
    return do_remote_automation(
        config.site(site_id),
        "checkmk-automation",
        [
            ("automation", command),  # The Check_MK automation command
            ("arguments", mk_repr(args)),  # The arguments for the command
            ("indata", mk_repr(indata)),  # The input data
            ("stdin_data", mk_repr(stdin_data)),  # The input data for stdin
            ("timeout", mk_repr(timeout)),  # The timeout
        ])
Exemple #13
0
    def _choices(self):
        if self.enforce:
            choices = []
        else:
            choices = [("", "")]

        for sitename, state in sites.states().items():
            if state["state"] == "online":
                choices.append((sitename, config.site(sitename)["alias"]))

        return sorted(choices, key=lambda a: a[1].lower())
Exemple #14
0
    def _execute_remote_automation(self, request):
        if request["site_id"] not in config.sitenames():
            raise MKUserError("site_id", _("This site does not exist."))

        if request["site_id"] not in dict(config.wato_slave_sites()):
            raise MKUserError("site_id",
                              _("This site is not a distributed WATO site."))

        return cmk.gui.watolib.automations.do_remote_automation(
            config.site(request["site_id"]), request["command"],
            request["command_args"])
Exemple #15
0
def host_service_graph_popup_pnp(site, host_name, service_description):
    pnp_host = cmk.utils.pnp_cleanup(host_name)
    pnp_svc = cmk.utils.pnp_cleanup(service_description)
    url_prefix = config.site(site)["url_prefix"]

    if html.mobile:
        url = url_prefix + ("pnp4nagios/index.php?kohana_uri=/mobile/popup/%s/%s" % \
            (html.urlencode(pnp_host), html.urlencode(pnp_svc)))
    else:
        url = url_prefix + ("pnp4nagios/index.php/popup?host=%s&srv=%s" % \
            (html.urlencode(pnp_host), html.urlencode(pnp_svc)))

    html.write(url)
Exemple #16
0
def _get_check_table_from_remote(request):
    """Gathers the check table from a remote site

    Cares about pre 1.6 sites that does not support the new service-discovery-job API call.
    Falling back to the previously existing try-inventry and inventory automation calls.
    """
    try:
        sync_changes_before_remote_automation(request.host.site_id())

        return _deserialize_remote_result(
            watolib.do_remote_automation(config.site(
                request.host.site_id()), "service-discovery-job", [
                    ("host_name", request.host.name()),
                    ("options", json.dumps(request.options._asdict())),
                ]))
    except watolib.MKAutomationException as e:
        if "Invalid automation command: service-discovery-job" not in "%s" % e:
            raise

        # Compatibility for pre 1.6 remote sites.
        # TODO: Replace with helpful exception in 1.7.
        if request.options.action == DiscoveryAction.REFRESH:
            _counts, _failed_hosts = check_mk_automation(
                request.host.site_id(), "inventory",
                ["@scan", "refresh", request.host.name()])

        if request.options.action == DiscoveryAction.SCAN:
            options = ["@scan"]
        else:
            options = ["@noscan"]

        if not request.options.ignore_errors:
            options.append("@raiseerrors")

        options.append(request.host.name())

        check_table = check_mk_automation(request.host.site_id(),
                                          "try-inventory", options)

        return DiscoveryResult(
            job_status={
                "is_active": False,
                "state": JobStatusStates.INITIALIZED,
            },
            check_table=check_table,
            check_table_created=int(time.time()),
            host_labels={},
            new_labels={},
            vanished_labels={},
            changed_labels={},
        )
Exemple #17
0
    def _perform_tests_for_site(
            self, site_id: SiteId,
            result_queue: 'multiprocessing.Queue[Tuple[SiteId, str]]') -> None:
        self._logger.debug("[%s] Starting" % site_id)
        try:
            # Would be better to clean all open fds that are not needed, but we don't
            # know the FDs of the result_queue pipe. Can we find it out somehow?
            # Cleanup resources of the apache
            #for x in range(3, 256):
            #    try:
            #        os.close(x)
            #    except OSError, e:
            #        if e.errno == errno.EBADF:
            #            pass
            #        else:
            #            raise

            # Reinitialize logging targets
            log.init_logging()  # NOTE: We run in a subprocess!

            if config.site_is_local(site_id):
                automation = AutomationCheckAnalyzeConfig()
                results_data = automation.execute(automation.get_request())

            else:
                results_data = watolib.do_remote_automation(
                    config.site(site_id),
                    "check-analyze-config", [],
                    timeout=html.request.request_timeout - 10)

            self._logger.debug("[%s] Finished" % site_id)

            result = {
                "state": 0,
                "response": results_data,
            }

        except Exception:
            self._logger.exception("[%s] Failed" % site_id)
            result = {
                "state":
                1,
                "response":
                "Traceback:<br>%s" %
                (traceback.format_exc().replace("\n", "<br>\n")),
            }
        finally:
            result_queue.put((site_id, repr(result)))
            result_queue.close()
            result_queue.join_thread()
            result_queue.join()
Exemple #18
0
    def show(self) -> None:
        html.open_table(cellspacing="0", class_="sitestate")

        sites.update_site_states_from_dead_sites()

        for sitename, _sitealias in config.sorted_sites():
            site = config.site(sitename)

            state = sites.states().get(sitename,
                                       sites.SiteStatus({})).get("state")

            if state is None:
                state = "missing"
                switch = "missing"
                text = sitename

            else:
                if state == "disabled":
                    switch = "on"
                    text = site["alias"]
                else:
                    switch = "off"
                    text = render_link(
                        site["alias"],
                        "view.py?view_name=sitehosts&site=%s" % sitename)

            html.open_tr()
            html.open_td(class_="left")
            html.write(text)
            html.close_td()
            html.open_td(class_="state")
            if switch == "missing":
                html.status_label(content=state,
                                  status=state,
                                  title=_("Site is missing"))
            else:
                url = makeactionuri_contextless(request,
                                                transactions, [
                                                    ("_site_switch", "%s:%s" %
                                                     (sitename, switch)),
                                                ],
                                                filename="switch_site.py")
                html.status_label_button(
                    content=state,
                    status=state,
                    title=_("enable this site")
                    if state == "disabled" else _("disable this site"),
                    onclick="cmk.sidebar.switch_site(%s)" % (json.dumps(url)))
            html.close_tr()
        html.close_table()
Exemple #19
0
def host_service_graph_dashlet_pnp(graph_identification):
    site = graph_identification[1]["site"]
    source = int(graph_identification[1]["graph_index"])

    pnp_host = cmk.utils.pnp_cleanup(graph_identification[1]["host_name"])
    pnp_svc = cmk.utils.pnp_cleanup(
        graph_identification[1]["service_description"])
    url_prefix = config.site(site)["url_prefix"]

    pnp_theme = html.get_theme()
    if pnp_theme == "classic":
        pnp_theme = "multisite"

    html.write(url_prefix + "pnp4nagios/index.php/image?host=%s&srv=%s&source=%d&view=%s&theme=%s" % \
        (html.urlencode(pnp_host), html.urlencode(pnp_svc), source, html.request.var("timerange"), pnp_theme))
Exemple #20
0
 def render(self, what, row, tags, custom_vars):
     # TODO: At least for interfaces we have 2 predictive values. But this icon
     # only creates a link to the first one. Add multiple icons or add a navigation
     # element to the prediction page.
     if what == "service":
         parts = row[what + "_perf_data"].split()
         for p in parts:
             if p.startswith("predict_"):
                 varname, _value = p.split("=")
                 dsname = varname[8:]
                 sitename = row["site"]
                 url_prefix = config.site(sitename)["url_prefix"]
                 url = url_prefix + "check_mk/prediction_graph.py?" + html.urlencode_vars([
                     ("host", row["host_name"]),
                     ("service", row["service_description"]),
                     ("dsname", dsname),
                 ])
                 title = _("Analyse predictive monitoring for this service")
                 return 'prediction', title, url
Exemple #21
0
    def _call_activate_changes_automation(self):
        domains = self._get_domains_needing_activation()

        if config.site_is_local(self._site_id):
            return execute_activate_changes(domains)

        try:
            response = cmk.gui.watolib.automations.do_remote_automation(
                config.site(self._site_id), "activate-changes", [
                    ("domains", repr(domains)),
                    ("site_id", self._site_id),
                ])
        except cmk.gui.watolib.automations.MKAutomationException as e:
            if "Invalid automation command: activate-changes" in "%s" % e:
                raise MKGeneralException(
                    "Activate changes failed (%s). The version of this site may be too old.")
            else:
                raise

        return response
Exemple #22
0
    def _push_snapshot_to_site(self):
        """Calls a remote automation call push-snapshot which is handled by AutomationPushSnapshot()"""
        site = config.site(self._site_id)

        url = html.makeuri_contextless(
            [
                ("command", "push-snapshot"),
                ("secret", site["secret"]),
                ("siteid", site["id"]),
                ("debug", config.debug and "1" or ""),
            ],
            filename=site["multisiteurl"] + "automation.py",
        )

        response_text = self._upload_file(url, site.get('insecure', False))

        try:
            return ast.literal_eval(response_text)
        except SyntaxError:
            raise cmk.gui.watolib.automations.MKAutomationException(
                _("Garbled automation response: <pre>%s</pre>") % (html.attrencode(response_text)))
Exemple #23
0
 def cmp(self, r1, r2):
     return (config.site(r1["site"])["alias"] > config.site(r2["site"])["alias"]) - (config.site(
         r1["site"])["alias"] < config.site(r2["site"])["alias"])
Exemple #24
0
    def render(self, what, row, tags, custom_vars):
        if not config.mkeventd_enabled:
            return

        # show for services based on the mkevents active check
        command = row[what + '_check_command']

        if what != 'service' or not command.startswith('check_mk_active-mkevents'):
            return

        # Split command by the parts (COMMAND!ARG0!...) Beware: Do not split by escaped exclamation mark.
        splitted_command = re.split(r'(?<!\\)!', command)

        # All arguments are space separated in in ARG0
        if len(splitted_command) != 2:
            return

        host = None
        app = None

        # Extract parameters from check_command:
        # TODO: Use better argument string splitting (shlex.split())
        args = splitted_command[1].split()
        if not args:
            return

        # Handle -a and -H options. Sorry for the hack. We currently
        # have no better idea
        if len(args) >= 2 and args[0] == '-H':
            args = args[2:]  # skip two arguments
        if len(args) >= 1 and args[0] == '-a':
            args = args[1:]

        if len(args) >= 1:
            if args[0] == '$HOSTNAME$':
                host = row['host_name']
            elif args[0] == '$HOSTADDRESS$':
                host = row['host_address']
            else:
                host = args[0]

        # If we have no host then the command line from the check_command seems
        # to be garbled. Better show nothing in this case.
        if not host:
            return

        # It is possible to have a central event console, this is the default case.
        # Another possible architecture is to have an event console in each site in
        # a distributed environment. For the later case the base url need to be
        # constructed here
        url_prefix = ''
        if getattr(config, 'mkeventd_distributed', False):
            site = config.site(row["site"])
            url_prefix = site['url_prefix'] + 'check_mk/'

        url_vars = [
            ("view_name", "ec_events_of_monhost"),
            ("site", row["site"]),
            ("host", row["host_name"]),
        ]

        title = _('Events of Host %s') % (row["host_name"])

        if len(args) >= 2:
            app = args[1].strip('\'').replace("\\\\", "\\").replace("\\!", "!")
            title = _('Events of Application "%s" on Host %s') % (app, host)
            url_vars.append(("event_application", app))

        url = 'view.py?' + html.urlencode_vars(url_vars)

        return 'mkeventd', title, url_prefix + url
Exemple #25
0
 def cmp(self, r1, r2):
     return cmp(
         config.site(r1["site"])["alias"],
         config.site(r2["site"])["alias"])
Exemple #26
0
def filter_cre_heading_info():
    current_value = html.request.var("site")
    return config.site(current_value)["alias"] if current_value else None
Exemple #27
0
def filter_cre_choices():
    return sorted([(sitename, config.site(sitename)["alias"])
                   for sitename, state in sites.states().items()
                   if state["state"] == "online"],
                  key=lambda a: a[1].lower())
Exemple #28
0
def execute_network_scan_job() -> None:
    init_wato_datastructures(with_wato_lock=True)

    if watolib.is_wato_slave_site():
        return  # Don't execute this job on slaves.

    folder = find_folder_to_scan()
    if not folder:
        return  # Nothing to do.

    # We need to have the context of the user. The jobs are executed when
    # config.set_user_by_id() has not been executed yet. So there is no user context
    # available. Use the run_as attribute from the job config and revert
    # the previous state after completion.
    old_user = config.user.id
    run_as = folder.attribute("network_scan")["run_as"]
    if not userdb.user_exists(run_as):
        raise MKGeneralException(
            _("The user %s used by the network "
              "scan of the folder %s does not exist.") % (run_as, folder.title()))
    config.set_user_by_id(folder.attribute("network_scan")["run_as"])

    result: NetworkScanResult = {
        "start": time.time(),
        "end": True,  # means currently running
        "state": None,
        "output": "The scan is currently running.",
    }

    # Mark the scan in progress: Is important in case the request takes longer than
    # the interval of the cron job (1 minute). Otherwise the scan might be started
    # a second time before the first one finished.
    save_network_scan_result(folder, result)

    try:
        if config.site_is_local(folder.site_id()):
            found = cmk.gui.watolib.network_scan.do_network_scan(folder)
        else:
            found = watolib.do_remote_automation(config.site(folder.site_id()), "network-scan",
                                                 [("folder", folder.path())])

        if not isinstance(found, list):
            raise MKGeneralException(_("Received an invalid network scan result: %r") % found)

        add_scanned_hosts_to_folder(folder, found)

        result.update({
            "state": True,
            "output": _("The network scan found %d new hosts.") % len(found),
        })
    except Exception as e:
        result.update({
            "state": False,
            "output": _("An exception occured: %s") % e,
        })
        logger.error("Exception in network scan:\n%s", traceback.format_exc())

    result["end"] = time.time()

    save_network_scan_result(folder, result)

    if old_user:
        config.set_user_by_id(old_user)
Exemple #29
0
 def execute(self):
     for site_id in config.sitenames():
         site_config = config.site(site_id)
         for result in self._check_site(site_id, site_config):
             result.site_id = site_id
             yield result
Exemple #30
0
def user_sync_config() -> UserSyncConfig:
    # use global option as default for reading legacy options and on remote site
    # for reading the value set by the WATO master site
    default_cfg = user_sync_default_config(config.omd_site())
    return config.site(config.omd_site()).get("user_sync", default_cfg)