Example #1
0
def _synchronize_profiles_to_sites(logger, profiles_to_synchronize):
    if not profiles_to_synchronize:
        return

    remote_sites = [(site_id, get_site_config(site_id))
                    for site_id in get_login_slave_sites()]

    logger.info('Credentials changed for %s. Trying to sync to %d sites' %
                (", ".join(profiles_to_synchronize.keys()), len(remote_sites)))

    states = sites.states()

    pool = ThreadPool()
    jobs = []
    for site_id, site in remote_sites:
        jobs.append(
            pool.apply_async(_sychronize_profile_worker,
                             (states, site_id, site, profiles_to_synchronize)))

    results = []
    start_time = time.time()
    while time.time() - start_time < 30:
        for job in jobs[:]:
            try:
                results.append(job.get(timeout=0.5))
                jobs.remove(job)
            except mp_TimeoutError:
                pass
        if not jobs:
            break

    contacted_sites = {x[0] for x in remote_sites}
    working_sites = {result.site_id for result in results}
    for site_id in contacted_sites - working_sites:
        results.append(
            SynchronizationResult(
                site_id,
                error_text=_("No response from update thread"),
                failed=True))

    for result in results:
        if result.error_text:
            logger.info('  FAILED [%s]: %s' %
                        (result.site_id, result.error_text))
            if config.wato_enabled:
                add_change("edit-users",
                           _('Password changed (sync failed: %s)') %
                           result.error_text,
                           add_user=False,
                           sites=[result.site_id],
                           need_restart=False)

    pool.terminate()
    pool.join()

    num_failed = sum([1 for result in results if result.failed])
    num_disabled = sum([1 for result in results if result.disabled])
    num_succeeded = sum([1 for result in results if result.succeeded])
    logger.info('  Disabled: %d, Succeeded: %d, Failed: %d' %
                (num_disabled, num_succeeded, num_failed))
Example #2
0
    def page(self):
        ajax_request = self.webapi_request()

        site_id_val = ajax_request.get("site")
        if not site_id_val:
            raise MKUserError(None, "The site_id is missing")
        site_id = site_id_val
        if site_id not in sitenames():
            raise MKUserError(None, _("The requested site does not exist"))

        status = (cmk.gui.sites.states().get(site_id,
                                             cmk.gui.sites.SiteStatus({})).get(
                                                 "state", "unknown"))
        if status == "dead":
            raise MKGeneralException(
                _("The site is marked as dead. Not trying to replicate."))

        site = get_site_config(site_id)
        assert user.id is not None
        result = self._synchronize_profile(site_id, site, user.id)

        if result is not True:
            assert result is not False
            _add_profile_replication_change(site_id, result)
            raise MKGeneralException(result)

        return _("Replication completed successfully.")
Example #3
0
def _do_check_mk_remote_automation_in_background_job(
        site_id: SiteId, automation_request: CheckmkAutomationRequest) -> Any:
    """Execute the automation in a background job on the remote site

    It starts the background job using one call. It then polls the remote site, waiting for
    completion of the job."""
    site_config = get_site_config(site_id)

    job_id = _start_remote_automation_job(site_config, automation_request)

    auto_logger.info("Waiting for job completion")
    result = None
    while True:
        raw_response = do_remote_automation(
            site_config, "checkmk-remote-automation-get-status", [
                ("request", repr(job_id)),
            ])
        response = CheckmkAutomationGetStatusResponse(*raw_response)
        auto_logger.debug("Job status: %r", response)

        if not response.job_status["is_active"]:
            result = response.result
            auto_logger.debug(
                "Job is not active anymore. Return the result: %s", result)
            break

    return result
Example #4
0
def filter_cre_choices():
    return sorted(
        [(sitename, get_site_config(sitename)["alias"])
         for sitename, state in sites.states().items()
         if state["state"] == "online"],
        key=lambda a: a[1].lower(),
    )
Example #5
0
def check_mk_remote_automation_serialized(
    *,
    site_id: SiteId,
    command: str,
    args: Optional[Sequence[str]],
    indata: Any,
    stdin_data: Optional[str] = None,
    timeout: Optional[int] = None,
    sync: bool = True,
    non_blocking_http: bool = False,
) -> SerializedResult:
    site = get_site_config(site_id)
    if "secret" not in site:
        raise MKGeneralException(
            _('Cannot connect to site "%s": The site is not logged in') %
            site.get("alias", site_id))

    if not site.get("replication"):
        raise MKGeneralException(
            _('Cannot connect to site "%s": The replication is disabled') %
            site.get("alias", site_id))

    if sync:
        sync_changes_before_remote_automation(site_id)

    if non_blocking_http:
        # This will start a background job process on the remote site to execute the automation
        # asynchronously. It then polls the remote site, waiting for completion of the job.
        return _do_check_mk_remote_automation_in_background_job_serialized(
            site_id,
            CheckmkAutomationRequest(command, args, indata, stdin_data,
                                     timeout))

    # Synchronous execution of the actual remote command in a single blocking HTTP request
    return SerializedResult(
        _do_remote_automation_serialized(
            site=get_site_config(site_id),
            command="checkmk-automation",
            vars_=[
                ("automation", command),  # The Checkmk automation command
                ("arguments", mk_repr(args)),  # The arguments for the command
                ("indata", mk_repr(indata)),  # The input data
                ("stdin_data",
                 mk_repr(stdin_data)),  # The input data for stdin
                ("timeout", mk_repr(timeout)),  # The timeout
            ],
        ))
Example #6
0
def cre_sites_options() -> Options:

    return sorted(
        [(sitename, sites.get_site_config(sitename)["alias"])
         for sitename, state in sites.states().items()
         if state["state"] == "online"],
        key=lambda a: a[1].lower(),
    )
Example #7
0
    def _get_diagnostics_dump_file(self, site: str,
                                   tarfile_name: str) -> bytes:
        if site_is_local(site):
            return _get_diagnostics_dump_file(tarfile_name)

        return do_remote_automation(get_site_config(site),
                                    "diagnostics-dump-get-file", [
                                        ("tarfile_name", tarfile_name),
                                    ])
Example #8
0
    def _get_agent_output_file(self) -> bytes:
        if site_is_local(self._request.host.site_id()):
            return get_fetch_agent_output_file(self._request)

        return watolib.do_remote_automation(
            get_site_config(self._request.host.site_id()),
            "fetch-agent-output-get-file", [
                ("request", repr(self._request.serialize())),
            ])
Example #9
0
    def _get_job_status(self) -> Dict:
        if site_is_local(self._request.host.site_id()):
            return get_fetch_agent_job_status(self._request)

        return watolib.do_remote_automation(
            get_site_config(self._request.host.site_id()),
            "fetch-agent-output-get-status", [
                ("request", repr(self._request.serialize())),
            ])
    def _execute_remote_automation(self, request):
        if request["site_id"] not in sitenames():
            raise MKUserError("site_id", _("This site does not exist."))

        if request["site_id"] not in wato_slave_sites():
            raise MKUserError("site_id", _("This site is not a distributed WATO site."))

        return cmk.gui.watolib.automations.do_remote_automation(
            get_site_config(request["site_id"]), request["command"], request["command_args"]
        )
Example #11
0
def execute_host_label_sync(host_name: HostName, site_id: SiteId) -> None:
    """Contacts the given remote site to synchronize the labels of the given host"""
    site_spec = get_site_config(site_id)
    result = _execute_site_sync(
        site_id, site_spec,
        SiteRequest(
            newest_host_labels=0.0,
            enforce_host=EnforcedHostRequest(site_id, host_name),
        ))
    save_updated_host_label_files(result.updated_host_labels)
Example #12
0
    def _start_fetch(self) -> None:
        """Start the job on the site the host is monitored by"""
        if site_is_local(self._request.host.site_id()):
            start_fetch_agent_job(self._request)
            return

        watolib.do_remote_automation(
            get_site_config(self._request.host.site_id()),
            "fetch-agent-output-start", [
                ("request", repr(self._request.serialize())),
            ])
Example #13
0
def _get_check_table_from_remote(api_request):
    """Gathers the check table from a remote site

    Cares about pre 1.6 sites that does not support the new service-discovery-job API call.
    Falling back to the previously existing try-inventry and inventory automation calls.
    """
    try:
        sync_changes_before_remote_automation(api_request.host.site_id())

        return _deserialize_remote_result(
            watolib.do_remote_automation(
                get_site_config(api_request.host.site_id()),
                "service-discovery-job", [
                    ("host_name", api_request.host.name()),
                    ("options", json.dumps(api_request.options._asdict())),
                ]))
    except watolib.MKAutomationException as e:
        if "Invalid automation command: service-discovery-job" not in "%s" % e:
            raise

        # Compatibility for pre 1.6 remote sites.
        # TODO: Replace with helpful exception in 1.7.
        if api_request.options.action == DiscoveryAction.TABULA_RASA:
            _counts, _failed_hosts = check_mk_automation(
                api_request.host.site_id(), "inventory",
                ["@scan", "refresh",
                 api_request.host.name()])

        if api_request.options.action == DiscoveryAction.REFRESH:
            options = ["@scan"]
        else:
            options = ["@noscan"]

        if not api_request.options.ignore_errors:
            options.append("@raiseerrors")

        options.append(api_request.host.name())

        check_table = check_mk_automation(api_request.host.site_id(),
                                          "try-inventory", options)

        return DiscoveryResult(
            job_status={
                "is_active": False,
                "state": JobStatusStates.INITIALIZED,
            },
            check_table=check_table,
            check_table_created=int(time.time()),
            host_labels={},
            new_labels={},
            vanished_labels={},
            changed_labels={},
        )
Example #14
0
    def _perform_tests_for_site(
            self, site_id: SiteId,
            result_queue: "multiprocessing.Queue[Tuple[SiteId, str]]") -> None:
        self._logger.debug("[%s] Starting" % site_id)
        try:
            # Would be better to clean all open fds that are not needed, but we don't
            # know the FDs of the result_queue pipe. Can we find it out somehow?
            # Cleanup resources of the apache
            # for x in range(3, 256):
            #    try:
            #        os.close(x)
            #    except OSError, e:
            #        if e.errno == errno.EBADF:
            #            pass
            #        else:
            #            raise

            # Reinitialize logging targets
            log.init_logging()  # NOTE: We run in a subprocess!

            if site_is_local(site_id):
                automation = AutomationCheckAnalyzeConfig()
                results_data = automation.execute(automation.get_request())

            else:
                results_data = watolib.do_remote_automation(
                    get_site_config(site_id),
                    "check-analyze-config",
                    [],
                    timeout=request.request_timeout - 10,
                )

            self._logger.debug("[%s] Finished" % site_id)

            result = {
                "state": 0,
                "response": results_data,
            }

        except Exception:
            self._logger.exception("[%s] Failed" % site_id)
            result = {
                "state":
                1,
                "response":
                "Traceback:<br>%s" %
                (traceback.format_exc().replace("\n", "<br>\n")),
            }
        finally:
            result_queue.put((site_id, repr(result)))
            result_queue.close()
            result_queue.join_thread()
            result_queue.join()
Example #15
0
    def show(self) -> None:
        html.open_table(cellspacing="0", class_="sitestate")

        sites.update_site_states_from_dead_sites()

        for sitename, _sitealias in sites.sorted_sites():
            site = sites.get_site_config(sitename)

            state = sites.states().get(sitename,
                                       sites.SiteStatus({})).get("state")

            if state is None:
                state = "missing"
                switch = "missing"
                text = escape_html_permissive(sitename)

            else:
                if state == "disabled":
                    switch = "on"
                    text = escape_html_permissive(site["alias"])
                else:
                    switch = "off"
                    text = render_link(
                        site["alias"],
                        "view.py?view_name=sitehosts&site=%s" % sitename)

            html.open_tr()
            html.td(text, class_="left")
            html.open_td(class_="state")
            if switch == "missing":
                html.status_label(content=state,
                                  status=state,
                                  title=_("Site is missing"))
            else:
                url = makeactionuri_contextless(
                    request,
                    transactions,
                    [
                        ("_site_switch", "%s:%s" % (sitename, switch)),
                    ],
                    filename="switch_site.py",
                )
                html.status_label_button(
                    content=state,
                    status=state,
                    title=_("enable this site")
                    if state == "disabled" else _("disable this site"),
                    onclick="cmk.sidebar.switch_site(%s)" % (json.dumps(url)),
                )
            html.close_tr()
        html.close_table()
Example #16
0
def get_check_table(
        discovery_request: StartDiscoveryRequest) -> DiscoveryResult:
    """Gathers the check table using a background job

    Cares about handling local / remote sites using an automation call. In both cases
    the ServiceDiscoveryBackgroundJob is executed to care about collecting the check
    table asynchronously. In case of a remote site the chain is:

    Starting from central site:

    _get_check_table()
          |
          v
    automation service-discovery-job-discover
          |
          v
    to remote site
          |
          v
    AutomationServiceDiscoveryJob().execute()
          |
          v
    _get_check_table()
    """
    if discovery_request.options.action == DiscoveryAction.TABULA_RASA:
        watolib.add_service_change(
            discovery_request.host,
            "refresh-autochecks",
            _("Refreshed check configuration of host '%s'") %
            discovery_request.host.name(),
        )

    if site_is_local(discovery_request.host.site_id()):
        return execute_discovery_job(discovery_request)

    sync_changes_before_remote_automation(discovery_request.host.site_id())

    return DiscoveryResult.deserialize(
        watolib.do_remote_automation(
            get_site_config(discovery_request.host.site_id()),
            "service-discovery-job",
            [
                ("host_name", discovery_request.host.name()),
                ("options", json.dumps(discovery_request.options._asdict())),
            ],
        ))
Example #17
0
def get_page_heading() -> str:
    if "%s" in config.page_heading:
        return config.page_heading % (get_site_config(omd_site()).get(
            'alias', _("GUI")))
    return config.page_heading
Example #18
0
    def render(self, what, row, tags, custom_vars):
        if not config.mkeventd_enabled:
            return

        # show for services based on the mkevents active check
        command = row[what + "_check_command"]

        if what != "service" or not command.startswith("check_mk_active-mkevents"):
            return

        # Split command by the parts (COMMAND!ARG0!...) Beware: Do not split by escaped exclamation mark.
        splitted_command = re.split(r"(?<!\\)!", command)

        # All arguments are space separated in in ARG0
        if len(splitted_command) != 2:
            return

        host = None
        app = None

        # Extract parameters from check_command
        args = shlex.split(splitted_command[1])
        if not args:
            return

        # Handle -a and -H options. Sorry for the hack. We currently
        # have no better idea
        if len(args) >= 2 and args[0] == "-H":
            args = args[2:]  # skip two arguments
        if len(args) >= 1 and args[0] == "-a":
            args = args[1:]

        if len(args) >= 1:
            host = _get_hostname(args, row)

        # If we have no host then the command line from the check_command seems
        # to be garbled. Better show nothing in this case.
        if not host:
            return

        # It is possible to have a central event console, this is the default case.
        # Another possible architecture is to have an event console in each site in
        # a distributed environment. For the later case the base url need to be
        # constructed here
        url_prefix = ""
        if getattr(config, "mkeventd_distributed", False):
            site = get_site_config(row["site"])
            url_prefix = site["url_prefix"] + "check_mk/"

        url_vars = [
            ("view_name", "ec_events_of_monhost"),
            ("site", row["site"]),
            ("host", row["host_name"]),
        ]

        title = _("Events of Host %s") % (row["host_name"])

        if len(args) >= 2:
            app = args[1].strip("'").replace("\\\\", "\\").replace("\\!", "!")
            title = _('Events of Application "%s" on Host %s') % (app, host)
            url_vars.append(("event_application", app))

        url = "view.py?" + urlencode_vars(url_vars)

        return "mkeventd", title, url_prefix + url
Example #19
0
 def cmp(self, r1, r2):
     return (get_site_config(r1["site"])["alias"] > get_site_config(
         r2["site"])["alias"]) - (get_site_config(r1["site"])["alias"] <
                                  get_site_config(r2["site"])["alias"])
Example #20
0
def execute_network_scan_job() -> None:
    """Executed by the multisite cron job once a minute. Is only executed in the
    central site. Finds the next folder to scan and starts it via WATO
    automation. The result is written to the folder in the master site."""
    init_wato_datastructures(with_wato_lock=True)

    if is_wato_slave_site():
        return  # Don't execute this job on slaves.

    folder = _find_folder_to_scan()
    if not folder:
        return  # Nothing to do.

    run_as = folder.attribute("network_scan")["run_as"]
    if not userdb.user_exists(run_as):
        raise MKGeneralException(
            _("The user %s used by the network "
              "scan of the folder %s does not exist.") %
            (run_as, folder.title()))

    with UserContext(run_as):
        result: NetworkScanResult = {
            "start": time.time(),
            "end": True,  # means currently running
            "state": None,
            "output": "The scan is currently running.",
        }

        # Mark the scan in progress: Is important in case the request takes longer than
        # the interval of the cron job (1 minute). Otherwise the scan might be started
        # a second time before the first one finished.
        _save_network_scan_result(folder, result)

        try:
            if site_is_local(folder.site_id()):
                found = _do_network_scan(folder)
            else:
                found = do_remote_automation(get_site_config(folder.site_id()),
                                             "network-scan",
                                             [("folder", folder.path())])

            if not isinstance(found, list):
                raise MKGeneralException(
                    _("Received an invalid network scan result: %r") % found)

            _add_scanned_hosts_to_folder(folder, found)

            result.update({
                "state":
                True,
                "output":
                _("The network scan found %d new hosts.") % len(found),
            })
        except Exception as e:
            result.update({
                "state": False,
                "output": _("An exception occured: %s") % e,
            })
            logger.error("Exception in network scan:\n%s",
                         traceback.format_exc())

        result["end"] = time.time()

        _save_network_scan_result(folder, result)
Example #21
0
def filter_cre_heading_info(value: FilterHTTPVariables) -> Optional[str]:
    current_value = value.get("site")
    return get_site_config(current_value)["alias"] if current_value else None
Example #22
0
 def execute(self) -> Iterator[ACResult]:
     for site_id in sitenames():
         site_config = get_site_config(site_id)
         for result in self._check_site(site_id, site_config):
             result.site_id = site_id
             yield result
Example #23
0
def user_sync_config() -> UserSyncConfig:
    # use global option as default for reading legacy options and on remote site
    # for reading the value set by the WATO master site
    default_cfg = user_sync_default_config(omd_site())
    return get_site_config(omd_site()).get("user_sync", default_cfg)