예제 #1
0
def remove_device(lib_env: LibraryEnvironment, skip_offline_nodes=False):
    """
    Stop using quorum device, distribute and reload configs if live
    skip_offline_nodes continue even if not all nodes are accessible
    """
    cfg = lib_env.get_corosync_conf()
    if not cfg.has_quorum_device():
        raise LibraryError(
            ReportItem.error(reports.messages.QdeviceNotDefined())
        )
    model = cfg.get_quorum_device_model()
    cfg.remove_quorum_device()

    if lib_env.is_corosync_conf_live:
        report_processor = lib_env.report_processor
        # get nodes for communication
        cluster_nodes_names, report_list = get_existing_nodes_names(
            cfg,
            # Pcs is unable to communicate with nodes missing names. It cannot
            # send new corosync.conf to them. That might break the cluster.
            # Hence we error out.
            error_on_missing_name=True,
        )
        if report_processor.report_list(report_list).has_errors:
            raise LibraryError()
        target_list = lib_env.get_node_target_factory().get_target_list(
            cluster_nodes_names,
            skip_non_existing=skip_offline_nodes,
        )
        # fix quorum options for SBD to work properly
        if sbd.atb_has_to_be_enabled(lib_env.cmd_runner(), cfg):
            lib_env.report_processor.report(
                ReportItem.warning(
                    reports.messages.CorosyncQuorumAtbWillBeEnabledDueToSbd()
                )
            )
            cfg.set_quorum_options({"auto_tie_breaker": "1"})

        # disable qdevice
        lib_env.report_processor.report(
            ReportItem.info(
                reports.messages.ServiceActionStarted(
                    reports.const.SERVICE_ACTION_DISABLE, "corosync-qdevice"
                )
            )
        )
        com_cmd_disable = qdevice_com.Disable(
            lib_env.report_processor, skip_offline_nodes
        )
        com_cmd_disable.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd_disable)
        # stop qdevice
        lib_env.report_processor.report(
            ReportItem.info(
                reports.messages.ServiceActionStarted(
                    reports.const.SERVICE_ACTION_STOP, "corosync-qdevice"
                )
            )
        )
        com_cmd_stop = qdevice_com.Stop(
            lib_env.report_processor, skip_offline_nodes
        )
        com_cmd_stop.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd_stop)
        # handle model specific configuration
        if model == "net":
            lib_env.report_processor.report(
                ReportItem.info(
                    reports.messages.QdeviceCertificateRemovalStarted()
                )
            )
            com_cmd_client_destroy = qdevice_net_com.ClientDestroy(
                lib_env.report_processor, skip_offline_nodes
            )
            com_cmd_client_destroy.set_targets(target_list)
            run_and_raise(
                lib_env.get_node_communicator(), com_cmd_client_destroy
            )

    lib_env.push_corosync_conf(cfg, skip_offline_nodes)
예제 #2
0
def full_cluster_status_plaintext(
    env: LibraryEnvironment,
    hide_inactive_resources: bool = False,
    verbose: bool = False,
) -> str:
    """
    Return full cluster status as plaintext

    env -- LibraryEnvironment
    hide_inactive_resources -- if True, do not display non-running resources
    verbose -- if True, display more info
    """
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements

    # validation
    if not env.is_cib_live and env.is_corosync_conf_live:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentNotConsistent(
                    [file_type_codes.CIB],
                    [file_type_codes.COROSYNC_CONF],
                )))
    if env.is_cib_live and not env.is_corosync_conf_live:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentNotConsistent(
                    [file_type_codes.COROSYNC_CONF],
                    [file_type_codes.CIB],
                )))

    # initialization
    runner = env.cmd_runner()
    report_processor = env.report_processor
    live = env.is_cib_live and env.is_corosync_conf_live
    is_sbd_running = False

    # load status, cib, corosync.conf
    status_text, warning_list = get_cluster_status_text(
        runner, hide_inactive_resources, verbose)
    corosync_conf = None
    # If we are live on a remote node, we have no corosync.conf.
    # TODO Use the new file framework so the path is not exposed.
    if not live or os.path.exists(settings.corosync_conf_file):
        corosync_conf = env.get_corosync_conf()
    cib = env.get_cib()
    if verbose:
        (
            ticket_status_text,
            ticket_status_stderr,
            ticket_status_retval,
        ) = get_ticket_status_text(runner)
    # get extra info if live
    if live:
        try:
            is_sbd_running = is_service_running(runner, get_sbd_service_name())
        except LibraryError:
            pass
        local_services_status = _get_local_services_status(runner)
        if verbose and corosync_conf:
            node_name_list, node_names_report_list = get_existing_nodes_names(
                corosync_conf)
            report_processor.report_list(node_names_report_list)
            node_reachability = _get_node_reachability(
                env.get_node_target_factory(),
                env.get_node_communicator(),
                report_processor,
                node_name_list,
            )

    # check stonith configuration
    warning_list = list(warning_list)
    warning_list.extend(_stonith_warnings(cib, is_sbd_running))

    # put it all together
    if report_processor.has_errors:
        raise LibraryError()

    cluster_name = (corosync_conf.get_cluster_name() if corosync_conf else
                    nvpair.get_value("cluster_property_set",
                                     get_crm_config(cib), "cluster-name", ""))
    parts = []
    parts.append(f"Cluster name: {cluster_name}")
    if warning_list:
        parts.extend(["", "WARNINGS:"] + warning_list + [""])
    parts.append(status_text)
    if verbose:
        parts.extend(["", "Tickets:"])
        if ticket_status_retval != 0:
            ticket_warning_parts = [
                "WARNING: Unable to get information about tickets"
            ]
            if ticket_status_stderr:
                ticket_warning_parts.extend(
                    indent(ticket_status_stderr.splitlines()))
            parts.extend(indent(ticket_warning_parts))
        else:
            parts.extend(indent(ticket_status_text.splitlines()))
    if live:
        if verbose and corosync_conf:
            parts.extend(["", "PCSD Status:"])
            parts.extend(
                indent(
                    _format_node_reachability(node_name_list,
                                              node_reachability)))
        parts.extend(["", "Daemon Status:"])
        parts.extend(
            indent(_format_local_services_status(local_services_status)))
    return "\n".join(parts)
예제 #3
0
def add_device(
    lib_env: LibraryEnvironment,
    model,
    model_options,
    generic_options,
    heuristics_options,
    force_model=False,
    force_options=False,
    skip_offline_nodes=False,
):
    # pylint: disable=too-many-locals
    """
    Add a quorum device to a cluster, distribute and reload configs if live

    string model -- quorum device model
    dict model_options -- model specific options
    dict generic_options -- generic quorum device options
    dict heuristics_options -- heuristics options
    bool force_model -- continue even if the model is not valid
    bool force_options -- continue even if options are not valid
    bool skip_offline_nodes -- continue even if not all nodes are accessible
    """
    cfg = lib_env.get_corosync_conf()
    if cfg.has_quorum_device():
        raise LibraryError(
            ReportItem.error(reports.messages.QdeviceAlreadyDefined())
        )

    report_processor = lib_env.report_processor
    report_processor.report_list(
        corosync_conf_validators.add_quorum_device(
            model,
            model_options,
            generic_options,
            heuristics_options,
            [node.nodeid for node in cfg.get_nodes()],
            force_model=force_model,
            force_options=force_options,
        )
    )

    if lib_env.is_corosync_conf_live:
        cluster_nodes_names, report_list = get_existing_nodes_names(
            cfg,
            # Pcs is unable to communicate with nodes missing names. It cannot
            # send new corosync.conf to them. That might break the cluster.
            # Hence we error out.
            error_on_missing_name=True,
        )
        report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    cfg.add_quorum_device(
        model,
        model_options,
        generic_options,
        heuristics_options,
    )
    if cfg.is_quorum_device_heuristics_enabled_with_no_exec():
        lib_env.report_processor.report(
            ReportItem.warning(
                reports.messages.CorosyncQuorumHeuristicsEnabledWithNoExec()
            )
        )

    # First setup certificates for qdevice, then send corosync.conf to nodes.
    # If anything fails, nodes will not have corosync.conf with qdevice in it,
    # so there is no effect on the cluster.
    if lib_env.is_corosync_conf_live:
        target_factory = lib_env.get_node_target_factory()
        target_list = target_factory.get_target_list(
            cluster_nodes_names,
            skip_non_existing=skip_offline_nodes,
        )
        # Do model specific configuration.
        # If the model is not known to pcs and was forced, do not configure
        # anything else than corosync.conf, as we do not know what to do
        # anyway.
        if model == "net":
            qdevice_net.set_up_client_certificates(
                lib_env.cmd_runner(),
                lib_env.report_processor,
                lib_env.communicator_factory,
                # We are sure the "host" key is there, it has been validated
                # above.
                target_factory.get_target_from_hostname(model_options["host"]),
                cfg.get_cluster_name(),
                target_list,
                skip_offline_nodes,
            )

        lib_env.report_processor.report(
            ReportItem.info(
                reports.messages.ServiceActionStarted(
                    reports.const.SERVICE_ACTION_ENABLE, "corosync-qdevice"
                )
            )
        )
        com_cmd = qdevice_com.Enable(
            lib_env.report_processor, skip_offline_nodes
        )
        com_cmd.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd)

    # everything set up, it's safe to tell the nodes to use qdevice
    lib_env.push_corosync_conf(cfg, skip_offline_nodes)

    # Now, when corosync.conf has been reloaded, we can start qdevice service.
    if lib_env.is_corosync_conf_live:
        lib_env.report_processor.report(
            ReportItem.info(
                reports.messages.ServiceActionStarted(
                    reports.const.SERVICE_ACTION_START, "corosync-qdevice"
                )
            )
        )
        com_cmd_start = qdevice_com.Start(
            lib_env.report_processor, skip_offline_nodes
        )
        com_cmd_start.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd_start)
예제 #4
0
def config_sync(
    env: LibraryEnvironment, instance_name=None, skip_offline_nodes=False,
):
    """
    Send specified local booth configuration to all nodes in the local cluster.

    env
    string instance_name -- booth instance name
    skip_offline_nodes -- if True offline nodes will be skipped
    """
    report_processor = env.report_processor
    booth_env = env.get_booth_env(instance_name)
    if not env.is_cib_live:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentRequired([file_type_codes.CIB])
            )
        )

    cluster_nodes_names, report_list = get_existing_nodes_names(
        env.get_corosync_conf()
    )
    if not cluster_nodes_names:
        report_list.append(
            ReportItem.error(reports.messages.CorosyncConfigNoNodesDefined())
        )
    report_processor.report_list(report_list)

    try:
        booth_conf_data = booth_env.config.read_raw()
        booth_conf = booth_env.config.raw_to_facade(booth_conf_data)
        if isinstance(booth_env.config.raw_file, GhostFile):
            authfile_data = booth_env.key.read_raw()
            authfile_path = booth_conf.get_authfile()
            authfile_name = (
                os.path.basename(authfile_path) if authfile_path else None
            )
        else:
            (
                authfile_name,
                authfile_data,
                authfile_report_list,
            ) = config_files.get_authfile_name_and_data(booth_conf)
            report_processor.report_list(authfile_report_list)
    except RawFileError as e:
        report_processor.report(raw_file_error_report(e))
    except ParserErrorException as e:
        report_processor.report_list(
            booth_env.config.parser_exception_to_report_list(e)
        )
    if report_processor.has_errors:
        raise LibraryError()

    com_cmd = BoothSendConfig(
        env.report_processor,
        booth_env.instance_name,
        booth_conf_data,
        authfile=authfile_name,
        authfile_data=authfile_data,
        skip_offline_targets=skip_offline_nodes,
    )
    com_cmd.set_targets(
        env.get_node_target_factory().get_target_list(
            cluster_nodes_names, skip_non_existing=skip_offline_nodes,
        )
    )
    run_and_raise(env.get_node_communicator(), com_cmd)
예제 #5
0
def pull_config(env: LibraryEnvironment, node_name, instance_name=None):
    """
    Get config from specified node and save it on local system. It will
    rewrite existing files.

    env
    string node_name -- name of the node from which the config should be fetched
    string instance_name -- booth instance name
    """
    report_processor = env.report_processor
    booth_env = env.get_booth_env(instance_name)
    instance_name = booth_env.instance_name
    _ensure_live_env(env, booth_env)

    env.report_processor.report(
        ReportItem.info(
            reports.messages.BoothFetchingConfigFromNode(
                node_name, config=instance_name,
            )
        )
    )
    com_cmd = BoothGetConfig(env.report_processor, instance_name)
    com_cmd.set_targets(
        [env.get_node_target_factory().get_target_from_hostname(node_name)]
    )
    # pylint: disable=unsubscriptable-object
    # In general, pylint is right. And it cannot know in this case code is OK.
    # It is covered by tests.
    output = run_and_raise(env.get_node_communicator(), com_cmd)[0][1]
    try:
        # TODO adapt to new file transfer framework once it is written
        if (
            output["authfile"]["name"] is not None
            and output["authfile"]["data"]
        ):
            authfile_name = output["authfile"]["name"]
            report_list = config_validators.check_instance_name(authfile_name)
            if report_list:
                raise LibraryError(*report_list)
            booth_key = FileInstance.for_booth_key(authfile_name)
            booth_key.write_raw(
                base64.b64decode(output["authfile"]["data"].encode("utf-8")),
                can_overwrite=True,
            )
        booth_env.config.write_raw(
            output["config"]["data"].encode("utf-8"), can_overwrite=True
        )
        env.report_processor.report(
            ReportItem.info(
                reports.messages.BoothConfigAcceptedByNode(
                    name_list=[instance_name]
                )
            )
        )
    except RawFileError as e:
        report_processor.report(raw_file_error_report(e))
    except KeyError:
        raise LibraryError(
            ReportItem.error(reports.messages.InvalidResponseFormat(node_name))
        )
    if report_processor.has_errors:
        raise LibraryError()
예제 #6
0
파일: pcsd.py 프로젝트: kmalyjur/pcs
def synchronize_ssl_certificate(env: LibraryEnvironment, skip_offline=False):
    """
    Send the local pcsd SSL cert and key to all full nodes in the local cluster.

    Consider the pcs Web UI is accessed via an IP running as a resource in the
    cluster. When the IP is moved, the user's browser connects to the new node
    and we want it to get the same certificate to make the transition a
    seamless experience (otherwise the browser display a warning that the
    certificate has changed).
    Using pcsd Web UI on remote and guest nodes is not supported (pcs/pcsd
    depends on the corosanc.conf file being present on the local node) so we
    send the cert only to corossync (== full stack) nodes.
    """
    report_processor = env.report_processor
    target_factory = env.get_node_target_factory()
    cluster_nodes_names, report_list = get_existing_nodes_names(
        env.get_corosync_conf())
    if not cluster_nodes_names:
        report_list.append(
            ReportItem.error(reports.messages.CorosyncConfigNoNodesDefined()))
    report_processor.report_list(report_list)

    try:
        with open(settings.pcsd_cert_location, "r") as file:
            ssl_cert = file.read()
    except EnvironmentError as e:
        report_processor.report(
            ReportItem.error(
                reports.messages.FileIoError(
                    file_type_codes.PCSD_SSL_CERT,
                    RawFileError.ACTION_READ,
                    format_environment_error(e),
                    file_path=settings.pcsd_cert_location,
                )))
    try:
        with open(settings.pcsd_key_location, "r") as file:
            ssl_key = file.read()
    except EnvironmentError as e:
        report_processor.report(
            ReportItem.error(
                reports.messages.FileIoError(
                    file_type_codes.PCSD_SSL_KEY,
                    RawFileError.ACTION_READ,
                    format_environment_error(e),
                    file_path=settings.pcsd_key_location,
                )))

    (
        target_report_list,
        target_list,
    ) = target_factory.get_target_list_with_reports(
        cluster_nodes_names, skip_non_existing=skip_offline)
    report_processor.report_list(target_report_list)

    if report_processor.has_errors:
        raise LibraryError()

    env.report_processor.report(
        ReportItem.info(
            reports.messages.PcsdSslCertAndKeyDistributionStarted(
                sorted([target.label for target in target_list]))))

    com_cmd = SendPcsdSslCertAndKey(env.report_processor, ssl_cert, ssl_key)
    com_cmd.set_targets(target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)
예제 #7
0
def update_scsi_devices(
    env: LibraryEnvironment,
    stonith_id: str,
    set_device_list: Iterable[str],
    force_flags: Container[reports.types.ForceCode] = (),
) -> None:
    """
    Update scsi fencing devices without restart and affecting other resources.

    env -- provides all for communication with externals
    stonith_id -- id of stonith resource
    set_device_list -- paths to the scsi devices that would be set for stonith
        resource
    force_flags -- list of flags codes
    """
    if not is_getting_resource_digest_supported(env.cmd_runner()):
        raise LibraryError(
            ReportItem.error(
                reports.messages.StonithRestartlessUpdateOfScsiDevicesNotSupported()
            )
        )
    cib = env.get_cib()
    if not set_device_list:
        env.report_processor.report(
            ReportItem.error(
                reports.messages.InvalidOptionValue(
                    "devices", "", None, cannot_be_empty=True
                )
            )
        )
    (
        stonith_el,
        report_list,
    ) = stonith.validate_stonith_restartless_update(cib, stonith_id)
    if env.report_processor.report_list(report_list).has_errors:
        raise LibraryError()
    # for mypy, this should not happen because exeption would be raised
    if stonith_el is None:
        raise AssertionError("stonith element is None")

    stonith.update_scsi_devices_without_restart(
        env.cmd_runner(),
        env.get_cluster_state(),
        stonith_el,
        IdProvider(cib),
        set_device_list,
    )

    # Unfencing
    cluster_nodes_names, nodes_report_list = get_existing_nodes_names(
        env.get_corosync_conf(),
        error_on_missing_name=True,
    )
    env.report_processor.report_list(nodes_report_list)
    (
        target_report_list,
        cluster_nodes_target_list,
    ) = env.get_node_target_factory().get_target_list_with_reports(
        cluster_nodes_names,
        allow_skip=False,
    )
    env.report_processor.report_list(target_report_list)
    if env.report_processor.has_errors:
        raise LibraryError()
    com_cmd: AllSameDataMixin = GetCorosyncOnlineTargets(
        env.report_processor,
        skip_offline_targets=reports.codes.SKIP_OFFLINE_NODES in force_flags,
    )
    com_cmd.set_targets(cluster_nodes_target_list)
    online_corosync_target_list = run_and_raise(
        env.get_node_communicator(), com_cmd
    )
    com_cmd = Unfence(env.report_processor, sorted(set_device_list))
    com_cmd.set_targets(online_corosync_target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)

    env.push_cib()
예제 #8
0
def set_recovery_site(env: LibraryEnvironment, node_name: str) -> None:
    """
    Set up disaster recovery with the local cluster being the primary site

    env
    node_name -- a known host from the recovery site
    """
    # pylint: disable=too-many-locals
    if env.ghost_file_codes:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentRequired(
                    env.ghost_file_codes)))
    report_processor = env.report_processor
    dr_env = env.get_dr_env()
    if dr_env.config.raw_file.exists():
        report_processor.report(
            ReportItem.error(reports.messages.DrConfigAlreadyExist()))
    target_factory = env.get_node_target_factory()

    local_nodes, report_list = get_existing_nodes_names(
        env.get_corosync_conf(), error_on_missing_name=True)
    report_processor.report_list(report_list)

    if node_name in local_nodes:
        report_processor.report(
            ReportItem.error(reports.messages.NodeInLocalCluster(node_name)))

    report_list, local_targets = target_factory.get_target_list_with_reports(
        local_nodes, allow_skip=False, report_none_host_found=False)
    report_processor.report_list(report_list)

    report_list, remote_targets = target_factory.get_target_list_with_reports(
        [node_name], allow_skip=False, report_none_host_found=False)
    report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    # TODO The new file framework doesn't support network communication yet.
    com_cmd = GetCorosyncConf(env.report_processor)
    com_cmd.set_targets(remote_targets)
    corosync_conf_instance = FileInstance.for_corosync_conf()
    try:
        remote_cluster_nodes, report_list = get_existing_nodes_names(
            cast(
                CorosyncConfigFacade,
                corosync_conf_instance.raw_to_facade(
                    run_and_raise(env.get_node_communicator(),
                                  com_cmd).encode("utf-8")),
            ),
            error_on_missing_name=True,
        )
    except ParserErrorException as e:
        report_processor.report_list(
            corosync_conf_instance.toolbox.parser.exception_to_report_list(
                e,
                file_type_codes.COROSYNC_CONF,
                None,
                force_code=None,
                is_forced_or_warning=False,
            ))

    if report_processor.report_list(report_list).has_errors:
        raise LibraryError()

    # ensure we have tokens for all nodes of remote cluster
    report_list, remote_targets = target_factory.get_target_list_with_reports(
        remote_cluster_nodes, allow_skip=False, report_none_host_found=False)
    if report_processor.report_list(report_list).has_errors:
        raise LibraryError()
    dr_config_exporter = get_file_toolbox(
        file_type_codes.PCS_DR_CONFIG).exporter
    # create dr config for remote cluster
    remote_dr_cfg = dr_env.create_facade(DrRole.RECOVERY)
    remote_dr_cfg.add_site(DrRole.PRIMARY, local_nodes)
    # send config to all node of remote cluster
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(remote_dr_cfg.config)),
    )
    distribute_file_cmd.set_targets(remote_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)
    # create new dr config, with local cluster as primary site
    local_dr_cfg = dr_env.create_facade(DrRole.PRIMARY)
    local_dr_cfg.add_site(DrRole.RECOVERY, remote_cluster_nodes)
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(local_dr_cfg.config)),
    )
    distribute_file_cmd.set_targets(local_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)
예제 #9
0
def status_all_sites_plaintext(
    env: LibraryEnvironment,
    hide_inactive_resources: bool = False,
    verbose: bool = False,
) -> List[Mapping[str, Any]]:
    """
    Return local site's and all remote sites' status as plaintext

    env -- LibraryEnvironment
    hide_inactive_resources -- if True, do not display non-running resources
    verbose -- if True, display more info
    """

    # The command does not provide an option to skip offline / unreacheable /
    # misbehaving nodes.
    # The point of such skipping is to stop a command if it is unable to make
    # changes on all nodes. The user can then decide to proceed anyway and
    # make changes on the skipped nodes later manually.
    # This command only reads from nodes so it automatically asks other nodes
    # if one is offline / misbehaving.
    class SiteData:
        def __init__(
            self,
            local: bool,
            role: DrRole,
            target_list: Iterable[RequestTarget],
        ) -> None:
            self.local = local
            self.role = role
            self.target_list = target_list
            self.status_loaded = False
            self.status_plaintext = ""

    if env.ghost_file_codes:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentRequired(
                    env.ghost_file_codes)))

    report_processor = env.report_processor
    report_list, dr_config = _load_dr_config(env.get_dr_env().config)
    report_processor.report_list(report_list)
    if report_processor.has_errors:
        raise LibraryError()

    site_data_list = []
    target_factory = env.get_node_target_factory()

    # get local nodes
    local_nodes, report_list = get_existing_nodes_names(
        env.get_corosync_conf())
    report_processor.report_list(report_list)
    report_list, local_targets = target_factory.get_target_list_with_reports(
        local_nodes,
        skip_non_existing=True,
    )
    report_processor.report_list(report_list)
    site_data_list.append(SiteData(True, dr_config.local_role, local_targets))

    # get remote sites' nodes
    for conf_remote_site in dr_config.get_remote_site_list():
        (
            report_list,
            remote_targets,
        ) = target_factory.get_target_list_with_reports(
            conf_remote_site.node_name_list,
            skip_non_existing=True,
        )
        report_processor.report_list(report_list)
        site_data_list.append(
            SiteData(False, conf_remote_site.role, remote_targets))
    if report_processor.has_errors:
        raise LibraryError()

    # get all statuses
    for site_data in site_data_list:
        com_cmd = GetFullClusterStatusPlaintext(
            report_processor,
            hide_inactive_resources=hide_inactive_resources,
            verbose=verbose,
        )
        com_cmd.set_targets(site_data.target_list)
        site_data.status_loaded, site_data.status_plaintext = run_com_cmd(
            env.get_node_communicator(), com_cmd)

    return [
        dto.to_dict(
            DrSiteStatusDto(
                local_site=site_data.local,
                site_role=site_data.role,
                status_plaintext=site_data.status_plaintext,
                status_successfully_obtained=site_data.status_loaded,
            )) for site_data in site_data_list
    ]
예제 #10
0
def set_recovery_site(env: LibraryEnvironment, node_name: str) -> None:
    """
    Set up disaster recovery with the local cluster being the primary site

    env
    node_name -- a known host from the recovery site
    """
    if env.ghost_file_codes:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentRequired(
                    env.ghost_file_codes)))
    report_processor = env.report_processor
    dr_env = env.get_dr_env()
    if dr_env.config.raw_file.exists():
        report_processor.report(
            ReportItem.error(reports.messages.DrConfigAlreadyExist()))
    target_factory = env.get_node_target_factory()

    local_nodes, report_list = get_existing_nodes_names(
        env.get_corosync_conf(), error_on_missing_name=True)
    report_processor.report_list(report_list)

    if node_name in local_nodes:
        report_processor.report(
            ReportItem.error(reports.messages.NodeInLocalCluster(node_name)))

    report_list, local_targets = target_factory.get_target_list_with_reports(
        local_nodes, allow_skip=False, report_none_host_found=False)
    report_processor.report_list(report_list)

    report_list, remote_targets = target_factory.get_target_list_with_reports(
        [node_name], allow_skip=False, report_none_host_found=False)
    report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    com_cmd = GetCorosyncConf(env.report_processor)
    com_cmd.set_targets(remote_targets)
    remote_cluster_nodes, report_list = get_existing_nodes_names(
        CorosyncConfigFacade.from_string(
            run_and_raise(env.get_node_communicator(), com_cmd)),
        error_on_missing_name=True,
    )
    if report_processor.report_list(report_list).has_errors:
        raise LibraryError()

    # ensure we have tokens for all nodes of remote cluster
    report_list, remote_targets = target_factory.get_target_list_with_reports(
        remote_cluster_nodes, allow_skip=False, report_none_host_found=False)
    if report_processor.report_list(report_list).has_errors:
        raise LibraryError()
    dr_config_exporter = get_file_toolbox(
        file_type_codes.PCS_DR_CONFIG).exporter
    # create dr config for remote cluster
    remote_dr_cfg = dr_env.create_facade(DrRole.RECOVERY)
    remote_dr_cfg.add_site(DrRole.PRIMARY, local_nodes)
    # send config to all node of remote cluster
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(remote_dr_cfg.config)),
    )
    distribute_file_cmd.set_targets(remote_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)
    # create new dr config, with local cluster as primary site
    local_dr_cfg = dr_env.create_facade(DrRole.PRIMARY)
    local_dr_cfg.add_site(DrRole.RECOVERY, remote_cluster_nodes)
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(local_dr_cfg.config)),
    )
    distribute_file_cmd.set_targets(local_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)