Beispiel #1
0
    def test_corosync_conf_not_set_need_offline_success(
        self, mock_get_corosync, mock_distribute, mock_is_running, mock_reload,
        mock_check_offline, mock_qdevice_reload
    ):
        corosync_data = open(rc("corosync.conf")).read()
        new_corosync_data = corosync_data.replace("version: 2", "version: 3")
        mock_get_corosync.return_value = corosync_data
        mock_is_running.return_value = False
        env = LibraryEnvironment(self.mock_logger, self.mock_reporter)

        self.assertTrue(env.is_corosync_conf_live)

        self.assertEqual(corosync_data, env.get_corosync_conf_data())
        self.assertEqual(corosync_data, env.get_corosync_conf().config.export())
        self.assertEqual(2, mock_get_corosync.call_count)

        conf_facade = CorosyncConfigFacade.from_string(new_corosync_data)
        conf_facade._need_stopped_cluster = True
        env.push_corosync_conf(conf_facade)
        mock_check_offline.assert_called_once_with(
            "mock node communicator",
            self.mock_reporter,
            "mock node list",
            False
        )
        mock_distribute.assert_called_once_with(
            "mock node communicator",
            self.mock_reporter,
            "mock node list",
            new_corosync_data,
            False
        )
        mock_reload.assert_not_called()
        mock_qdevice_reload.assert_not_called()
Beispiel #2
0
    def test_corosync_conf_set(
        self, mock_get_corosync, mock_distribute, mock_reload,
        mock_check_offline, mock_qdevice_reload
    ):
        corosync_data = "totem {\n    version: 2\n}\n"
        new_corosync_data = "totem {\n    version: 3\n}\n"
        env = LibraryEnvironment(
            self.mock_logger,
            self.mock_reporter,
            corosync_conf_data=corosync_data
        )

        self.assertFalse(env.is_corosync_conf_live)

        self.assertEqual(corosync_data, env.get_corosync_conf_data())
        self.assertEqual(corosync_data, env.get_corosync_conf().config.export())
        self.assertEqual(0, mock_get_corosync.call_count)

        env.push_corosync_conf(
            CorosyncConfigFacade.from_string(new_corosync_data)
        )
        self.assertEqual(0, mock_distribute.call_count)

        self.assertEqual(new_corosync_data, env.get_corosync_conf_data())
        self.assertEqual(0, mock_get_corosync.call_count)
        mock_check_offline.assert_not_called()
        mock_reload.assert_not_called()
        mock_qdevice_reload.assert_not_called()
Beispiel #3
0
def update_device(
    lib_env: LibraryEnvironment,
    model_options,
    generic_options,
    heuristics_options,
    force_options=False,
    skip_offline_nodes=False,
):
    """
    Change quorum device settings, distribute and reload configs if live

    dict model_options -- model specific options
    dict generic_options -- generic quorum device options
    dict heuristics_options -- heuristics options
    bool force_options -- continue even if options are not valid
    bool skip_offline_nodes -- continue even if not all nodes are accessible
    """
    cfg = lib_env.get_corosync_conf()
    if not cfg.has_quorum_device():
        raise LibraryError(reports.qdevice_not_defined())
    if lib_env.report_processor.report_list(
            corosync_conf_validators.update_quorum_device(
                cfg.get_quorum_device_model(),
                model_options,
                generic_options,
                heuristics_options, [node.nodeid for node in cfg.get_nodes()],
                force_options=force_options)).has_errors:
        raise LibraryError()
    cfg.update_quorum_device(model_options, generic_options,
                             heuristics_options)
    if cfg.is_quorum_device_heuristics_enabled_with_no_exec():
        lib_env.report_processor.report(
            reports.corosync_quorum_heuristics_enabled_with_no_exec())
    lib_env.push_corosync_conf(cfg, skip_offline_nodes)
Beispiel #4
0
    def test_corosync_conf_not_set_need_offline_success(
            self, mock_get_corosync, mock_distribute, mock_is_running,
            mock_reload, mock_check_offline, mock_qdevice_reload):
        corosync_data = open(rc("corosync.conf")).read()
        new_corosync_data = corosync_data.replace("version: 2", "version: 3")
        mock_get_corosync.return_value = corosync_data
        mock_is_running.return_value = False
        env = LibraryEnvironment(self.mock_logger, self.mock_reporter)

        self.assertTrue(env.is_corosync_conf_live)

        self.assertEqual(corosync_data, env.get_corosync_conf_data())
        self.assertEqual(corosync_data,
                         env.get_corosync_conf().config.export())
        self.assertEqual(2, mock_get_corosync.call_count)

        conf_facade = CorosyncConfigFacade.from_string(new_corosync_data)
        conf_facade._need_stopped_cluster = True
        env.push_corosync_conf(conf_facade)
        mock_check_offline.assert_called_once_with("mock node communicator",
                                                   self.mock_reporter,
                                                   "mock node list", False)
        mock_distribute.assert_called_once_with("mock node communicator",
                                                self.mock_reporter,
                                                "mock node list",
                                                new_corosync_data, False)
        mock_reload.assert_not_called()
        mock_qdevice_reload.assert_not_called()
Beispiel #5
0
def set_options(
    lib_env: LibraryEnvironment,
    options,
    skip_offline_nodes=False,
    force=False,
):
    """
    Set corosync quorum options, distribute and reload corosync.conf if live

    LibraryEnvironment lib_env
    dict options -- quorum options
    bool skip_offline_nodes -- continue even if not all nodes are accessible
    bool force -- force changes
    """
    cfg = lib_env.get_corosync_conf()
    if lib_env.report_processor.report_list(
            corosync_conf_validators.update_quorum_options(
                options, cfg.has_quorum_device(),
                cfg.get_quorum_options())).has_errors:
        raise LibraryError()
    cfg.set_quorum_options(options)
    if lib_env.is_corosync_conf_live:
        _check_if_atb_can_be_disabled(
            lib_env.service_manager,
            lib_env.report_processor,
            cfg,
            cfg.is_enabled_auto_tie_breaker(),
            force,
        )
    lib_env.push_corosync_conf(cfg, skip_offline_nodes)
Beispiel #6
0
    def test_corosync_conf_not_set(
        self, mock_get_corosync, mock_distribute, mock_reload
    ):
        corosync_data = open(rc("corosync.conf")).read()
        new_corosync_data = corosync_data.replace("version: 2", "version: 3")
        mock_get_corosync.return_value = corosync_data
        env = LibraryEnvironment(self.mock_logger, self.mock_reporter)

        self.assertTrue(env.is_corosync_conf_live)

        self.assertEqual(corosync_data, env.get_corosync_conf_data())
        self.assertEqual(corosync_data, env.get_corosync_conf().config.export())
        self.assertEqual(2, mock_get_corosync.call_count)

        env.push_corosync_conf(
            CorosyncConfigFacade.from_string(new_corosync_data)
        )
        mock_distribute.assert_called_once_with(
            "mock node communicator",
            self.mock_reporter,
            "mock node list",
            new_corosync_data,
            False
        )
        mock_reload.assert_called_once_with("mock cmd runner")
Beispiel #7
0
    def test_corosync_conf_set(self, mock_get_corosync, mock_distribute,
                               mock_reload, mock_check_offline,
                               mock_qdevice_reload):
        corosync_data = "totem {\n    version: 2\n}\n"
        new_corosync_data = "totem {\n    version: 3\n}\n"
        env = LibraryEnvironment(self.mock_logger,
                                 self.mock_reporter,
                                 corosync_conf_data=corosync_data)

        self.assertFalse(env.is_corosync_conf_live)

        self.assertEqual(corosync_data, env.get_corosync_conf_data())
        self.assertEqual(corosync_data,
                         env.get_corosync_conf().config.export())
        self.assertEqual(0, mock_get_corosync.call_count)

        env.push_corosync_conf(
            CorosyncConfigFacade.from_string(new_corosync_data))
        self.assertEqual(0, mock_distribute.call_count)

        self.assertEqual(new_corosync_data, env.get_corosync_conf_data())
        self.assertEqual(0, mock_get_corosync.call_count)
        mock_check_offline.assert_not_called()
        mock_reload.assert_not_called()
        mock_qdevice_reload.assert_not_called()
Beispiel #8
0
def _unfencing_scsi_devices(
        env: LibraryEnvironment,
        stonith_el: _Element,
        original_devices: Iterable[str],
        updated_devices: Iterable[str],
        force_flags: Container[reports.types.ForceCode] = (),
) -> None:
    """
    Unfence scsi devices provided in device_list if it is possible to connect
    to pcsd and corosync is running.

    env -- provides all for communication with externals
    original_devices -- devices before update
    updated_devices -- devices after update
    force_flags -- list of flags codes
    """
    devices_to_unfence = set(updated_devices) - set(original_devices)
    if not devices_to_unfence:
        return
    cluster_nodes_names, nodes_report_list = get_existing_nodes_names(
        env.get_corosync_conf(),
        error_on_missing_name=True,
    )
    env.report_processor.report_list(nodes_report_list)
    (
        target_report_list,
        cluster_nodes_target_list,
    ) = env.get_node_target_factory().get_target_list_with_reports(
        cluster_nodes_names,
        allow_skip=False,
    )
    env.report_processor.report_list(target_report_list)
    if env.report_processor.has_errors:
        raise LibraryError()
    com_cmd: AllSameDataMixin = GetCorosyncOnlineTargets(
        env.report_processor,
        skip_offline_targets=reports.codes.SKIP_OFFLINE_NODES in force_flags,
    )
    com_cmd.set_targets(cluster_nodes_target_list)
    online_corosync_target_list = run_and_raise(env.get_node_communicator(),
                                                com_cmd)
    if stonith_el.get("type") == "fence_mpath":
        com_cmd = UnfenceMpath(
            env.report_processor,
            original_devices=sorted(original_devices),
            updated_devices=sorted(updated_devices),
            node_key_map=resource.stonith.get_node_key_map_for_mpath(
                stonith_el,
                [target.label for target in online_corosync_target_list],
            ),
        )
    else:  # fence_scsi
        com_cmd = Unfence(
            env.report_processor,
            original_devices=sorted(original_devices),
            updated_devices=sorted(updated_devices),
        )
    com_cmd.set_targets(online_corosync_target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)
Beispiel #9
0
def synchronize_ssl_certificate(env: LibraryEnvironment, skip_offline=False):
    """
    Send the local pcsd SSL cert and key to all full nodes in the local cluster.

    Consider the pcs Web UI is accessed via an IP running as a resource in the
    cluster. When the IP is moved, the user's browser connects to the new node
    and we want it to get the same certificate to make the transition a
    seamless experience (otherwise the browser display a warning that the
    certificate has changed).
    Using pcsd Web UI on remote and guest nodes is not supported (pcs/pcsd
    depends on the corosanc.conf file being present on the local node) so we
    send the cert only to corossync (== full stack) nodes.
    """
    report_processor = env.report_processor
    target_factory = env.get_node_target_factory()
    cluster_nodes_names, report_list = get_existing_nodes_names(
        env.get_corosync_conf())
    if not cluster_nodes_names:
        report_list.append(reports.corosync_config_no_nodes_defined())
    report_processor.report_list(report_list)

    try:
        with open(settings.pcsd_cert_location, "r") as file:
            ssl_cert = file.read()
    except EnvironmentError as e:
        report_processor.report(
            reports.file_io_error(
                file_type_codes.PCSD_SSL_CERT,
                RawFileError.ACTION_READ,
                format_environment_error(e),
                file_path=settings.pcsd_cert_location,
            ))
    try:
        with open(settings.pcsd_key_location, "r") as file:
            ssl_key = file.read()
    except EnvironmentError as e:
        report_processor.report(
            reports.file_io_error(
                file_type_codes.PCSD_SSL_KEY,
                RawFileError.ACTION_READ,
                format_environment_error(e),
                file_path=settings.pcsd_key_location,
            ))

    target_report_list, target_list = (
        target_factory.get_target_list_with_reports(
            cluster_nodes_names, skip_non_existing=skip_offline))
    report_processor.report_list(target_report_list)

    if report_processor.has_errors:
        raise LibraryError()

    env.report_processor.report(
        reports.pcsd_ssl_cert_and_key_distribution_started(
            [target.label for target in target_list]))

    com_cmd = SendPcsdSslCertAndKey(env.report_processor, ssl_cert, ssl_key)
    com_cmd.set_targets(target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)
Beispiel #10
0
def remove_device(lib_env: LibraryEnvironment, skip_offline_nodes=False):
    """
    Stop using quorum device, distribute and reload configs if live
    skip_offline_nodes continue even if not all nodes are accessible
    """
    cfg = lib_env.get_corosync_conf()
    if not cfg.has_quorum_device():
        raise LibraryError(reports.qdevice_not_defined())
    model = cfg.get_quorum_device_model()
    cfg.remove_quorum_device()

    if lib_env.is_corosync_conf_live:
        report_processor = lib_env.report_processor
        # get nodes for communication
        cluster_nodes_names, report_list = get_existing_nodes_names(
            cfg,
            # Pcs is unable to communicate with nodes missing names. It cannot
            # send new corosync.conf to them. That might break the cluster.
            # Hence we error out.
            error_on_missing_name=True)
        if report_processor.report_list(report_list).has_errors:
            raise LibraryError()
        target_list = lib_env.get_node_target_factory().get_target_list(
            cluster_nodes_names,
            skip_non_existing=skip_offline_nodes,
        )
        # fix quorum options for SBD to work properly
        if sbd.atb_has_to_be_enabled(lib_env.cmd_runner(), cfg):
            lib_env.report_processor.report(
                reports.corosync_quorum_atb_will_be_enabled_due_to_sbd())
            cfg.set_quorum_options({"auto_tie_breaker": "1"})

        # disable qdevice
        lib_env.report_processor.report(
            reports.service_disable_started("corosync-qdevice"))
        com_cmd_disable = qdevice_com.Disable(lib_env.report_processor,
                                              skip_offline_nodes)
        com_cmd_disable.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd_disable)
        # stop qdevice
        lib_env.report_processor.report(
            reports.service_stop_started("corosync-qdevice"))
        com_cmd_stop = qdevice_com.Stop(lib_env.report_processor,
                                        skip_offline_nodes)
        com_cmd_stop.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd_stop)
        # handle model specific configuration
        if model == "net":
            lib_env.report_processor.report(
                reports.qdevice_certificate_removal_started())
            com_cmd_client_destroy = qdevice_net_com.ClientDestroy(
                lib_env.report_processor, skip_offline_nodes)
            com_cmd_client_destroy.set_targets(target_list)
            run_and_raise(lib_env.get_node_communicator(),
                          com_cmd_client_destroy)

    lib_env.push_corosync_conf(cfg, skip_offline_nodes)
Beispiel #11
0
def config_sync(
    env: LibraryEnvironment,
    instance_name=None,
    skip_offline_nodes=False,
):
    """
    Send specified local booth configuration to all nodes in the local cluster.

    env
    string instance_name -- booth instance name
    skip_offline_nodes -- if True offline nodes will be skipped
    """
    report_processor = env.report_processor
    booth_env = env.get_booth_env(instance_name)
    if not env.is_cib_live:
        raise LibraryError(
            reports.live_environment_required([file_type_codes.CIB], ))

    cluster_nodes_names, report_list = get_existing_nodes_names(
        env.get_corosync_conf())
    if not cluster_nodes_names:
        report_list.append(reports.corosync_config_no_nodes_defined())
    report_processor.report_list(report_list)

    try:
        booth_conf_data = booth_env.config.read_raw()
        booth_conf = booth_env.config.raw_to_facade(booth_conf_data)
        if isinstance(booth_env.config.raw_file, GhostFile):
            authfile_data = booth_env.key.read_raw()
            authfile_path = booth_conf.get_authfile()
            authfile_name = (os.path.basename(authfile_path)
                             if authfile_path else None)
        else:
            authfile_name, authfile_data, authfile_report_list = (
                config_files.get_authfile_name_and_data(booth_conf))
            report_processor.report_list(authfile_report_list)
    except RawFileError as e:
        report_processor.report(raw_file_error_report(e))
    except ParserErrorException as e:
        report_processor.report_list(
            booth_env.config.parser_exception_to_report_list(e))
    if report_processor.has_errors:
        raise LibraryError()

    com_cmd = BoothSendConfig(env.report_processor,
                              booth_env.instance_name,
                              booth_conf_data,
                              authfile=authfile_name,
                              authfile_data=authfile_data,
                              skip_offline_targets=skip_offline_nodes)
    com_cmd.set_targets(env.get_node_target_factory().get_target_list(
        cluster_nodes_names,
        skip_non_existing=skip_offline_nodes,
    ))
    run_and_raise(env.get_node_communicator(), com_cmd)
Beispiel #12
0
def destroy(
        env: LibraryEnvironment,
        force_flags: Container[reports.types.ForceCode] = (),
) -> None:
    """
    Destroy disaster-recovery configuration on all sites
    """
    if env.ghost_file_codes:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentRequired(
                    env.ghost_file_codes)))

    report_processor = env.report_processor
    skip_offline = report_codes.SKIP_OFFLINE_NODES in force_flags

    report_list, dr_config = _load_dr_config(env.get_dr_env().config)
    report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    local_nodes, report_list = get_existing_nodes_names(
        env.get_corosync_conf())
    report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    remote_nodes: List[str] = []
    for conf_remote_site in dr_config.get_remote_site_list():
        remote_nodes.extend(conf_remote_site.node_name_list)

    target_factory = env.get_node_target_factory()
    report_list, targets = target_factory.get_target_list_with_reports(
        remote_nodes + local_nodes,
        skip_non_existing=skip_offline,
    )
    report_processor.report_list(report_list)
    if report_processor.has_errors:
        raise LibraryError()

    com_cmd = RemoveFilesWithoutForces(
        env.report_processor,
        {
            "pcs disaster-recovery config": {
                "type": "pcs_disaster_recovery_conf",
            },
        },
    )
    com_cmd.set_targets(targets)
    run_and_raise(env.get_node_communicator(), com_cmd)
Beispiel #13
0
    def test_corosync_conf_not_set_need_offline_fail(
        self, mock_get_corosync, mock_distribute, mock_reload,
        mock_check_offline, mock_qdevice_reload
    ):
        corosync_data = open(rc("corosync.conf")).read()
        new_corosync_data = corosync_data.replace("version: 2", "version: 3")
        mock_get_corosync.return_value = corosync_data
        def raiser(dummy_communicator, dummy_reporter, dummy_nodes, dummy_force):
            raise LibraryError(
                reports.corosync_not_running_check_node_error("test node")
            )
        mock_check_offline.side_effect = raiser
        env = LibraryEnvironment(self.mock_logger, self.mock_reporter)

        self.assertTrue(env.is_corosync_conf_live)

        self.assertEqual(corosync_data, env.get_corosync_conf_data())
        self.assertEqual(corosync_data, env.get_corosync_conf().config.export())
        self.assertEqual(2, mock_get_corosync.call_count)

        conf_facade = CorosyncConfigFacade.from_string(new_corosync_data)
        conf_facade._need_stopped_cluster = True
        assert_raise_library_error(
            lambda: env.push_corosync_conf(conf_facade),
            (
                severity.ERROR,
                report_codes.COROSYNC_NOT_RUNNING_CHECK_NODE_ERROR,
                {"node": "test node"}
            )
        )
        mock_check_offline.assert_called_once_with(
            "mock node communicator",
            self.mock_reporter,
            "mock node list",
            False
        )
        mock_distribute.assert_not_called()
        mock_reload.assert_not_called()
        mock_qdevice_reload.assert_not_called()
Beispiel #14
0
    def test_corosync_conf_not_set_need_offline_fail(self, mock_get_corosync,
                                                     mock_distribute,
                                                     mock_reload,
                                                     mock_check_offline,
                                                     mock_qdevice_reload):
        corosync_data = open(rc("corosync.conf")).read()
        new_corosync_data = corosync_data.replace("version: 2", "version: 3")
        mock_get_corosync.return_value = corosync_data

        def raiser(dummy_communicator, dummy_reporter, dummy_nodes,
                   dummy_force):
            raise LibraryError(
                reports.corosync_not_running_check_node_error("test node"))

        mock_check_offline.side_effect = raiser
        env = LibraryEnvironment(self.mock_logger, self.mock_reporter)

        self.assertTrue(env.is_corosync_conf_live)

        self.assertEqual(corosync_data, env.get_corosync_conf_data())
        self.assertEqual(corosync_data,
                         env.get_corosync_conf().config.export())
        self.assertEqual(2, mock_get_corosync.call_count)

        conf_facade = CorosyncConfigFacade.from_string(new_corosync_data)
        conf_facade._need_stopped_cluster = True
        assert_raise_library_error(
            lambda: env.push_corosync_conf(conf_facade),
            (severity.ERROR,
             report_codes.COROSYNC_NOT_RUNNING_CHECK_NODE_ERROR, {
                 "node": "test node"
             }))
        mock_check_offline.assert_called_once_with("mock node communicator",
                                                   self.mock_reporter,
                                                   "mock node list", False)
        mock_distribute.assert_not_called()
        mock_reload.assert_not_called()
        mock_qdevice_reload.assert_not_called()
Beispiel #15
0
def full_cluster_status_plaintext(
    env: LibraryEnvironment,
    hide_inactive_resources: bool = False,
    verbose: bool = False,
) -> str:
    """
    Return full cluster status as plaintext

    env -- LibraryEnvironment
    hide_inactive_resources -- if True, do not display non-running resources
    verbose -- if True, display more info
    """
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements

    # validation
    if not env.is_cib_live and env.is_corosync_conf_live:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentNotConsistent(
                    [file_type_codes.CIB], [file_type_codes.COROSYNC_CONF],
                )
            )
        )
    if env.is_cib_live and not env.is_corosync_conf_live:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentNotConsistent(
                    [file_type_codes.COROSYNC_CONF], [file_type_codes.CIB],
                )
            )
        )

    # initialization
    runner = env.cmd_runner()
    report_processor = env.report_processor
    live = env.is_cib_live and env.is_corosync_conf_live
    is_sbd_running = False

    # load status, cib, corosync.conf
    status_text, warning_list = get_cluster_status_text(
        runner, hide_inactive_resources, verbose
    )
    corosync_conf = None
    # If we are live on a remote node, we have no corosync.conf.
    # TODO Use the new file framework so the path is not exposed.
    if not live or os.path.exists(settings.corosync_conf_file):
        corosync_conf = env.get_corosync_conf()
    cib = env.get_cib()
    if verbose:
        (
            ticket_status_text,
            ticket_status_stderr,
            ticket_status_retval,
        ) = get_ticket_status_text(runner)
    # get extra info if live
    if live:
        try:
            is_sbd_running = is_service_running(runner, get_sbd_service_name())
        except LibraryError:
            pass
        local_services_status = _get_local_services_status(runner)
        if verbose and corosync_conf:
            node_name_list, node_names_report_list = get_existing_nodes_names(
                corosync_conf
            )
            report_processor.report_list(node_names_report_list)
            node_reachability = _get_node_reachability(
                env.get_node_target_factory(),
                env.get_node_communicator(),
                report_processor,
                node_name_list,
            )

    # check stonith configuration
    warning_list = list(warning_list)
    warning_list.extend(_stonith_warnings(cib, is_sbd_running))

    # put it all together
    if report_processor.has_errors:
        raise LibraryError()

    cluster_name = (
        corosync_conf.get_cluster_name()
        if corosync_conf
        else nvpair.get_value(
            "cluster_property_set", get_crm_config(cib), "cluster-name", ""
        )
    )
    parts = []
    parts.append(f"Cluster name: {cluster_name}")
    if warning_list:
        parts.extend(["", "WARNINGS:"] + warning_list + [""])
    parts.append(status_text)
    if verbose:
        parts.extend(["", "Tickets:"])
        if ticket_status_retval != 0:
            ticket_warning_parts = [
                "WARNING: Unable to get information about tickets"
            ]
            if ticket_status_stderr:
                ticket_warning_parts.extend(
                    indent(ticket_status_stderr.splitlines())
                )
            parts.extend(indent(ticket_warning_parts))
        else:
            parts.extend(indent(ticket_status_text.splitlines()))
    if live:
        if verbose and corosync_conf:
            parts.extend(["", "PCSD Status:"])
            parts.extend(
                indent(
                    _format_node_reachability(node_name_list, node_reachability)
                )
            )
        parts.extend(["", "Daemon Status:"])
        parts.extend(
            indent(_format_local_services_status(local_services_status))
        )
    return "\n".join(parts)
Beispiel #16
0
def set_recovery_site(env: LibraryEnvironment, node_name: str) -> None:
    """
    Set up disaster recovery with the local cluster being the primary site

    env
    node_name -- a known host from the recovery site
    """
    if env.ghost_file_codes:
        raise LibraryError(
            reports.live_environment_required(env.ghost_file_codes))
    report_processor = SimpleReportProcessor(env.report_processor)
    dr_env = env.get_dr_env()
    if dr_env.config.raw_file.exists():
        report_processor.report(reports.dr_config_already_exist())
    target_factory = env.get_node_target_factory()

    local_nodes, report_list = get_existing_nodes_names(
        env.get_corosync_conf(), error_on_missing_name=True)
    report_processor.report_list(report_list)

    if node_name in local_nodes:
        report_processor.report(reports.node_in_local_cluster(node_name))

    report_list, local_targets = target_factory.get_target_list_with_reports(
        local_nodes, allow_skip=False, report_none_host_found=False)
    report_processor.report_list(report_list)

    report_list, remote_targets = (target_factory.get_target_list_with_reports(
        [node_name], allow_skip=False, report_none_host_found=False))
    report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    com_cmd = GetCorosyncConf(env.report_processor)
    com_cmd.set_targets(remote_targets)
    remote_cluster_nodes, report_list = get_existing_nodes_names(
        CorosyncConfigFacade.from_string(
            run_and_raise(env.get_node_communicator(), com_cmd)),
        error_on_missing_name=True)
    if report_processor.report_list(report_list):
        raise LibraryError()

    # ensure we have tokens for all nodes of remote cluster
    report_list, remote_targets = target_factory.get_target_list_with_reports(
        remote_cluster_nodes, allow_skip=False, report_none_host_found=False)
    if report_processor.report_list(report_list):
        raise LibraryError()
    dr_config_exporter = (get_file_toolbox(
        file_type_codes.PCS_DR_CONFIG).exporter)
    # create dr config for remote cluster
    remote_dr_cfg = dr_env.create_facade(DrRole.RECOVERY)
    remote_dr_cfg.add_site(DrRole.PRIMARY, local_nodes)
    # send config to all node of remote cluster
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(remote_dr_cfg.config)))
    distribute_file_cmd.set_targets(remote_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)
    # create new dr config, with local cluster as primary site
    local_dr_cfg = dr_env.create_facade(DrRole.PRIMARY)
    local_dr_cfg.add_site(DrRole.RECOVERY, remote_cluster_nodes)
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(local_dr_cfg.config)))
    distribute_file_cmd.set_targets(local_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)
Beispiel #17
0
def status_all_sites_plaintext(
    env: LibraryEnvironment,
    hide_inactive_resources: bool = False,
    verbose: bool = False,
) -> List[Mapping[str, Any]]:
    """
    Return local site's and all remote sites' status as plaintext

    env -- LibraryEnvironment
    hide_inactive_resources -- if True, do not display non-running resources
    verbose -- if True, display more info
    """

    # The command does not provide an option to skip offline / unreacheable /
    # misbehaving nodes.
    # The point of such skipping is to stop a command if it is unable to make
    # changes on all nodes. The user can then decide to proceed anyway and
    # make changes on the skipped nodes later manually.
    # This command only reads from nodes so it automatically asks other nodes
    # if one is offline / misbehaving.
    class SiteData():
        local: bool
        role: DrRole
        target_list: Iterable[RequestTarget]
        status_loaded: bool
        status_plaintext: str

        def __init__(self, local, role, target_list):
            self.local = local
            self.role = role
            self.target_list = target_list
            self.status_loaded = False
            self.status_plaintext = ""

    if env.ghost_file_codes:
        raise LibraryError(
            reports.live_environment_required(env.ghost_file_codes))

    report_processor = SimpleReportProcessor(env.report_processor)
    report_list, dr_config = _load_dr_config(env.get_dr_env().config)
    report_processor.report_list(report_list)
    if report_processor.has_errors:
        raise LibraryError()

    site_data_list = []
    target_factory = env.get_node_target_factory()

    # get local nodes
    local_nodes, report_list = get_existing_nodes_names(
        env.get_corosync_conf())
    report_processor.report_list(report_list)
    report_list, local_targets = target_factory.get_target_list_with_reports(
        local_nodes,
        skip_non_existing=True,
    )
    report_processor.report_list(report_list)
    site_data_list.append(SiteData(True, dr_config.local_role, local_targets))

    # get remote sites' nodes
    for conf_remote_site in dr_config.get_remote_site_list():
        report_list, remote_targets = (
            target_factory.get_target_list_with_reports(
                conf_remote_site.node_name_list,
                skip_non_existing=True,
            ))
        report_processor.report_list(report_list)
        site_data_list.append(
            SiteData(False, conf_remote_site.role, remote_targets))
    if report_processor.has_errors:
        raise LibraryError()

    # get all statuses
    for site_data in site_data_list:
        com_cmd = GetFullClusterStatusPlaintext(
            report_processor,
            hide_inactive_resources=hide_inactive_resources,
            verbose=verbose,
        )
        com_cmd.set_targets(site_data.target_list)
        site_data.status_loaded, site_data.status_plaintext = run_com_cmd(
            env.get_node_communicator(), com_cmd)

    return [
        DrSiteStatusDto(
            site_data.local,
            site_data.role,
            site_data.status_plaintext,
            site_data.status_loaded,
        ).to_dict() for site_data in site_data_list
    ]
Beispiel #18
0
def node_add_guest(
    env: LibraryEnvironment,
    node_name,
    resource_id,
    options,
    skip_offline_nodes=False,
    allow_incomplete_distribution=False,
    allow_pacemaker_remote_service_fail=False,
    wait: WaitType = False,
):
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements
    """
    Make a guest node from the specified resource

    LibraryEnvironment env -- provides all for communication with externals
    string node_name -- name of the guest node
    string resource_id -- specifies resource that should become a guest node
    dict options -- guest node options (remote-port, remote-addr,
        remote-connect-timeout)
    bool skip_offline_nodes -- if True, ignore when some nodes are offline
    bool allow_incomplete_distribution -- if True, allow this command to
        finish successfully even if file distribution did not succeed
    bool allow_pacemaker_remote_service_fail -- if True, allow this command to
        finish successfully even if starting/enabling pacemaker_remote did not
        succeed
    mixed wait -- a flag for controlling waiting for pacemaker idle mechanism
    """
    wait_timeout = env.ensure_wait_satisfiable(wait)

    report_processor = env.report_processor
    cib = env.get_cib()
    id_provider = IdProvider(cib)
    corosync_conf: Optional[CorosyncConfigFacade]
    if env.is_cib_live:
        corosync_conf = env.get_corosync_conf()
    else:
        corosync_conf = None
        report_processor.report(
            ReportItem.info(
                reports.messages.CorosyncNodeConflictCheckSkipped(
                    reports.const.REASON_NOT_LIVE_CIB, )))
    (
        existing_nodes_names,
        existing_nodes_addrs,
        report_list,
    ) = get_existing_nodes_names_addrs(corosync_conf, cib)
    if env.is_cib_live:
        # We just reported corosync checks are going to be skipped so we
        # shouldn't complain about errors related to corosync nodes
        report_processor.report_list(report_list)

    existing_target_list = []
    if env.is_cib_live:
        target_factory = env.get_node_target_factory()
        existing_target_list, new_target_list = _get_targets_for_add(
            target_factory,
            report_processor,
            existing_nodes_names,
            [node_name],
            skip_offline_nodes,
        )
        new_target = new_target_list[0] if new_target_list else None
        # default remote-addr to an address from known-hosts
        if "remote-addr" not in options or options["remote-addr"] is None:
            if new_target:
                new_addr = new_target.first_addr
                new_addr_source = (
                    reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS)
            else:
                new_addr = node_name
                new_addr_source = reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME
            options["remote-addr"] = new_addr
            report_processor.report(
                ReportItem.info(
                    reports.messages.UsingDefaultAddressForHost(
                        node_name, new_addr, new_addr_source)))
    else:
        # default remote-addr to an address from known-hosts
        if "remote-addr" not in options or options["remote-addr"] is None:
            known_hosts = env.get_known_hosts([node_name])
            if known_hosts:
                new_addr = known_hosts[0].dest.addr
                new_addr_source = (
                    reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS)
            else:
                new_addr = node_name
                new_addr_source = reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME
            options["remote-addr"] = new_addr
            report_processor.report(
                ReportItem.info(
                    reports.messages.UsingDefaultAddressForHost(
                        node_name, new_addr, new_addr_source)))

    # validate inputs
    report_list = guest_node.validate_set_as_guest(cib, existing_nodes_names,
                                                   existing_nodes_addrs,
                                                   node_name, options)
    searcher = ElementSearcher(primitive.TAG, resource_id, get_resources(cib))
    if searcher.element_found():
        resource_element = searcher.get_element()
        report_list.extend(guest_node.validate_is_not_guest(resource_element))
    else:
        report_list.extend(searcher.get_errors())

    report_processor.report_list(report_list)
    if report_processor.has_errors:
        raise LibraryError()

    # everything validated, let's set it up
    guest_node.set_as_guest(
        resource_element,
        id_provider,
        node_name,
        options.get("remote-addr", None),
        options.get("remote-port", None),
        options.get("remote-connect-timeout", None),
    )

    if env.is_cib_live:
        _prepare_pacemaker_remote_environment(
            env,
            report_processor,
            existing_target_list,
            new_target,
            node_name,
            skip_offline_nodes,
            allow_incomplete_distribution,
            allow_pacemaker_remote_service_fail,
        )
    else:
        report_processor.report_list(
            _reports_skip_new_node(node_name, "not_live_cib"))

    env.push_cib(wait_timeout=wait_timeout)
    if wait_timeout >= 0:
        _ensure_resource_running(env, resource_id)
Beispiel #19
0
def node_add_remote(
    env: LibraryEnvironment,
    node_name: str,
    node_addr: Optional[str],
    operations: Iterable[Mapping[str, str]],
    meta_attributes: Mapping[str, str],
    instance_attributes: Mapping[str, str],
    skip_offline_nodes: bool = False,
    allow_incomplete_distribution: bool = False,
    allow_pacemaker_remote_service_fail: bool = False,
    allow_invalid_operation: bool = False,
    allow_invalid_instance_attributes: bool = False,
    use_default_operations: bool = True,
    wait: WaitType = False,
):
    # pylint: disable=too-many-arguments
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements
    """
    create an ocf:pacemaker:remote resource and use it as a remote node

    env -- provides all for communication with externals
    node_name -- the name of the new node
    node_addr -- the address of the new node or None for default
    operations -- attributes for each entered operation
    meta_attributes -- attributes for primitive/meta_attributes
    instance_attributes -- attributes for primitive/instance_attributes
    skip_offline_nodes -- if True, ignore when some nodes are offline
    allow_incomplete_distribution -- if True, allow this command to
        finish successfully even if file distribution did not succeed
    allow_pacemaker_remote_service_fail -- if True, allow this command to
        finish successfully even if starting/enabling pacemaker_remote did not
        succeed
    allow_invalid_operation -- if True, allow to use operations that
        are not listed in a resource agent metadata
    allow_invalid_instance_attributes -- if True, allow to use instance
        attributes that are not listed in a resource agent metadata and allow to
        omit required instance_attributes
    use_default_operations -- if True, add operations specified in
        a resource agent metadata to the resource
    wait -- a flag for controlling waiting for pacemaker idle mechanism
    """
    wait_timeout = env.ensure_wait_satisfiable(wait)

    report_processor = env.report_processor
    cib = env.get_cib(
        minimal_version=get_required_cib_version_for_primitive(operations))
    id_provider = IdProvider(cib)
    if env.is_cib_live:
        corosync_conf: Optional[CorosyncConfigFacade] = env.get_corosync_conf()
    else:
        corosync_conf = None
        report_processor.report(
            ReportItem.info(
                reports.messages.CorosyncNodeConflictCheckSkipped(
                    reports.const.REASON_NOT_LIVE_CIB, )))
    (
        existing_nodes_names,
        existing_nodes_addrs,
        report_list,
    ) = get_existing_nodes_names_addrs(corosync_conf, cib)
    if env.is_cib_live:
        # We just reported corosync checks are going to be skipped so we
        # shouldn't complain about errors related to corosync nodes
        report_processor.report_list(report_list)

    try:
        resource_agent_facade = ResourceAgentFacadeFactory(
            env.cmd_runner(),
            report_processor).facade_from_parsed_name(remote_node.AGENT_NAME)
    except ResourceAgentError as e:
        report_processor.report(resource_agent_error_to_report_item(e))
        raise LibraryError() from e

    existing_target_list = []
    if env.is_cib_live:
        target_factory = env.get_node_target_factory()
        existing_target_list, new_target_list = _get_targets_for_add(
            target_factory,
            report_processor,
            existing_nodes_names,
            [node_name],
            skip_offline_nodes,
        )
        new_target = new_target_list[0] if new_target_list else None
        # default node_addr to an address from known-hosts
        if node_addr is None:
            if new_target:
                node_addr = new_target.first_addr
                node_addr_source = (
                    reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS)
            else:
                node_addr = node_name
                node_addr_source = (
                    reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME)
            report_processor.report(
                ReportItem.info(
                    reports.messages.UsingDefaultAddressForHost(
                        node_name, node_addr, node_addr_source)))
    else:
        # default node_addr to an address from known-hosts
        if node_addr is None:
            known_hosts = env.get_known_hosts([node_name])
            if known_hosts:
                node_addr = known_hosts[0].dest.addr
                node_addr_source = (
                    reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS)
            else:
                node_addr = node_name
                node_addr_source = (
                    reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME)
            report_processor.report(
                ReportItem.info(
                    reports.messages.UsingDefaultAddressForHost(
                        node_name, node_addr, node_addr_source)))

    # validate inputs
    report_list = remote_node.validate_create(
        existing_nodes_names,
        existing_nodes_addrs,
        resource_agent_facade.metadata,
        node_name,
        node_addr,
        instance_attributes,
    )
    if report_processor.report_list(report_list).has_errors:
        raise LibraryError()
    # validation + cib setup
    # TODO extract the validation to a separate function
    try:
        remote_resource_element = remote_node.create(
            env.report_processor,
            resource_agent_facade,
            get_resources(cib),
            id_provider,
            node_addr,
            node_name,
            operations,
            meta_attributes,
            instance_attributes,
            allow_invalid_operation,
            allow_invalid_instance_attributes,
            use_default_operations,
        )
    except LibraryError as e:
        # Check unique id conflict with check against nodes. Until validation
        # resource create is not separated, we need to make unique post
        # validation.
        already_exists = []
        unified_report_list = []
        for report_item in report_list + list(e.args):
            # pylint: disable=no-member
            dto_obj = report_item.message.to_dto()
            if dto_obj.code not in (
                    reports.codes.ID_ALREADY_EXISTS,
                    reports.codes.RESOURCE_INSTANCE_ATTR_VALUE_NOT_UNIQUE,
            ):
                unified_report_list.append(report_item)
            elif ("id" in dto_obj.payload
                  and dto_obj.payload["id"] not in already_exists):
                unified_report_list.append(report_item)
                already_exists.append(dto_obj.payload["id"])
        report_list = unified_report_list

    report_processor.report_list(report_list)
    if report_processor.has_errors:
        raise LibraryError()

    # everything validated, let's set it up
    if env.is_cib_live:
        _prepare_pacemaker_remote_environment(
            env,
            report_processor,
            existing_target_list,
            new_target,
            node_name,
            skip_offline_nodes,
            allow_incomplete_distribution,
            allow_pacemaker_remote_service_fail,
        )
    else:
        report_processor.report_list(
            _reports_skip_new_node(node_name, "not_live_cib"))

    env.push_cib(wait_timeout=wait_timeout)
    if wait_timeout >= 0:
        _ensure_resource_running(env, remote_resource_element.attrib["id"])
Beispiel #20
0
def full_cluster_status_plaintext(
    env: LibraryEnvironment,
    hide_inactive_resources: bool = False,
    verbose: bool = False,
) -> str:
    """
    Return full cluster status as plaintext

    env -- LibraryEnvironment
    hide_inactive_resources -- if True, do not display non-running resources
    verbose -- if True, display more info
    """
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals

    # validation
    if not env.is_cib_live and env.is_corosync_conf_live:
        raise LibraryError(
            reports.live_environment_not_consistent(
                [file_type_codes.CIB],
                [file_type_codes.COROSYNC_CONF],
            ))
    if env.is_cib_live and not env.is_corosync_conf_live:
        raise LibraryError(
            reports.live_environment_not_consistent(
                [file_type_codes.COROSYNC_CONF],
                [file_type_codes.CIB],
            ))

    # initialization
    runner = env.cmd_runner()
    report_processor = SimpleReportProcessor(env.report_processor)
    live = env.is_cib_live and env.is_corosync_conf_live
    is_sbd_running = False

    # load status, cib, corosync.conf
    status_text, warning_list = get_cluster_status_text(
        runner, hide_inactive_resources, verbose)
    corosync_conf = env.get_corosync_conf()
    cib = env.get_cib()
    if verbose:
        ticket_status_text, ticket_status_stderr, ticket_status_retval = (
            get_ticket_status_text(runner))
    # get extra info if live
    if live:
        try:
            is_sbd_running = is_service_running(runner, get_sbd_service_name())
        except LibraryError:
            pass
        local_services_status = _get_local_services_status(runner)
        if verbose:
            node_name_list, node_names_report_list = get_existing_nodes_names(
                corosync_conf)
            report_processor.report_list(node_names_report_list)
            node_reachability = _get_node_reachability(
                env.get_node_target_factory(),
                env.get_node_communicator(),
                report_processor,
                node_name_list,
            )

    # check stonith configuration
    warning_list = list(warning_list)
    warning_list.extend(_stonith_warnings(cib, is_sbd_running))

    # put it all together
    if report_processor.has_errors:
        raise LibraryError()

    parts = []
    parts.append(f"Cluster name: {corosync_conf.get_cluster_name()}")
    if warning_list:
        parts.extend(["", "WARNINGS:"] + warning_list + [""])
    parts.append(status_text)
    if verbose:
        parts.extend(["", "Tickets:"])
        if ticket_status_retval != 0:
            ticket_warning_parts = [
                "WARNING: Unable to get information about tickets"
            ]
            if ticket_status_stderr:
                ticket_warning_parts.extend(
                    indent(ticket_status_stderr.splitlines()))
            parts.extend(indent(ticket_warning_parts))
        else:
            parts.extend(indent(ticket_status_text.splitlines()))
    if live:
        if verbose:
            parts.extend(["", "PCSD Status:"])
            parts.extend(
                indent(
                    _format_node_reachability(node_name_list,
                                              node_reachability)))
        parts.extend(["", "Daemon Status:"])
        parts.extend(
            indent(_format_local_services_status(local_services_status)))
    return "\n".join(parts)
Beispiel #21
0
def add_device(
    lib_env: LibraryEnvironment,
    model,
    model_options,
    generic_options,
    heuristics_options,
    force_model=False,
    force_options=False,
    skip_offline_nodes=False,
):
    # pylint: disable=too-many-locals
    """
    Add a quorum device to a cluster, distribute and reload configs if live

    string model -- quorum device model
    dict model_options -- model specific options
    dict generic_options -- generic quorum device options
    dict heuristics_options -- heuristics options
    bool force_model -- continue even if the model is not valid
    bool force_options -- continue even if options are not valid
    bool skip_offline_nodes -- continue even if not all nodes are accessible
    """
    cfg = lib_env.get_corosync_conf()
    if cfg.has_quorum_device():
        raise LibraryError(
            ReportItem.error(reports.messages.QdeviceAlreadyDefined()))

    report_processor = lib_env.report_processor
    report_processor.report_list(
        corosync_conf_validators.add_quorum_device(
            model,
            model_options,
            generic_options,
            heuristics_options,
            [node.nodeid for node in cfg.get_nodes()],
            force_model=force_model,
            force_options=force_options,
        ))

    if lib_env.is_corosync_conf_live:
        cluster_nodes_names, report_list = get_existing_nodes_names(
            cfg,
            # Pcs is unable to communicate with nodes missing names. It cannot
            # send new corosync.conf to them. That might break the cluster.
            # Hence we error out.
            error_on_missing_name=True,
        )
        report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    cfg.add_quorum_device(
        model,
        model_options,
        generic_options,
        heuristics_options,
    )
    if cfg.is_quorum_device_heuristics_enabled_with_no_exec():
        lib_env.report_processor.report(
            ReportItem.warning(
                reports.messages.CorosyncQuorumHeuristicsEnabledWithNoExec()))

    # First setup certificates for qdevice, then send corosync.conf to nodes.
    # If anything fails, nodes will not have corosync.conf with qdevice in it,
    # so there is no effect on the cluster.
    if lib_env.is_corosync_conf_live:
        target_factory = lib_env.get_node_target_factory()
        target_list = target_factory.get_target_list(
            cluster_nodes_names,
            skip_non_existing=skip_offline_nodes,
        )
        # Do model specific configuration.
        # If the model is not known to pcs and was forced, do not configure
        # anything else than corosync.conf, as we do not know what to do
        # anyway.
        if model == "net":
            qdevice_net.set_up_client_certificates(
                lib_env.cmd_runner(),
                lib_env.report_processor,
                lib_env.communicator_factory,
                # We are sure the "host" key is there, it has been validated
                # above.
                target_factory.get_target_from_hostname(model_options["host"]),
                cfg.get_cluster_name(),
                target_list,
                skip_offline_nodes,
            )

        lib_env.report_processor.report(
            ReportItem.info(
                reports.messages.ServiceActionStarted(
                    reports.const.SERVICE_ACTION_ENABLE, "corosync-qdevice")))
        com_cmd = qdevice_com.Enable(lib_env.report_processor,
                                     skip_offline_nodes)
        com_cmd.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd)

    # everything set up, it's safe to tell the nodes to use qdevice
    lib_env.push_corosync_conf(cfg, skip_offline_nodes)

    # Now, when corosync.conf has been reloaded, we can start qdevice service.
    if lib_env.is_corosync_conf_live:
        lib_env.report_processor.report(
            ReportItem.info(
                reports.messages.ServiceActionStarted(
                    reports.const.SERVICE_ACTION_START, "corosync-qdevice")))
        com_cmd_start = qdevice_com.Start(lib_env.report_processor,
                                          skip_offline_nodes)
        com_cmd_start.set_targets(target_list)
        run_and_raise(lib_env.get_node_communicator(), com_cmd_start)
Beispiel #22
0
def set_recovery_site(env: LibraryEnvironment, node_name: str) -> None:
    """
    Set up disaster recovery with the local cluster being the primary site

    env
    node_name -- a known host from the recovery site
    """
    # pylint: disable=too-many-locals
    if env.ghost_file_codes:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentRequired(
                    env.ghost_file_codes)))
    report_processor = env.report_processor
    dr_env = env.get_dr_env()
    if dr_env.config.raw_file.exists():
        report_processor.report(
            ReportItem.error(reports.messages.DrConfigAlreadyExist()))
    target_factory = env.get_node_target_factory()

    local_nodes, report_list = get_existing_nodes_names(
        env.get_corosync_conf(), error_on_missing_name=True)
    report_processor.report_list(report_list)

    if node_name in local_nodes:
        report_processor.report(
            ReportItem.error(reports.messages.NodeInLocalCluster(node_name)))

    report_list, local_targets = target_factory.get_target_list_with_reports(
        local_nodes, allow_skip=False, report_none_host_found=False)
    report_processor.report_list(report_list)

    report_list, remote_targets = target_factory.get_target_list_with_reports(
        [node_name], allow_skip=False, report_none_host_found=False)
    report_processor.report_list(report_list)

    if report_processor.has_errors:
        raise LibraryError()

    # TODO The new file framework doesn't support network communication yet.
    com_cmd = GetCorosyncConf(env.report_processor)
    com_cmd.set_targets(remote_targets)
    corosync_conf_instance = FileInstance.for_corosync_conf()
    try:
        remote_cluster_nodes, report_list = get_existing_nodes_names(
            cast(
                CorosyncConfigFacade,
                corosync_conf_instance.raw_to_facade(
                    run_and_raise(env.get_node_communicator(),
                                  com_cmd).encode("utf-8")),
            ),
            error_on_missing_name=True,
        )
    except ParserErrorException as e:
        report_processor.report_list(
            corosync_conf_instance.toolbox.parser.exception_to_report_list(
                e,
                file_type_codes.COROSYNC_CONF,
                None,
                force_code=None,
                is_forced_or_warning=False,
            ))

    if report_processor.report_list(report_list).has_errors:
        raise LibraryError()

    # ensure we have tokens for all nodes of remote cluster
    report_list, remote_targets = target_factory.get_target_list_with_reports(
        remote_cluster_nodes, allow_skip=False, report_none_host_found=False)
    if report_processor.report_list(report_list).has_errors:
        raise LibraryError()
    dr_config_exporter = get_file_toolbox(
        file_type_codes.PCS_DR_CONFIG).exporter
    # create dr config for remote cluster
    remote_dr_cfg = dr_env.create_facade(DrRole.RECOVERY)
    remote_dr_cfg.add_site(DrRole.PRIMARY, local_nodes)
    # send config to all node of remote cluster
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(remote_dr_cfg.config)),
    )
    distribute_file_cmd.set_targets(remote_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)
    # create new dr config, with local cluster as primary site
    local_dr_cfg = dr_env.create_facade(DrRole.PRIMARY)
    local_dr_cfg.add_site(DrRole.RECOVERY, remote_cluster_nodes)
    distribute_file_cmd = DistributeFilesWithoutForces(
        env.report_processor,
        node_communication_format.pcs_dr_config_file(
            dr_config_exporter.export(local_dr_cfg.config)),
    )
    distribute_file_cmd.set_targets(local_targets)
    run_and_raise(env.get_node_communicator(), distribute_file_cmd)
Beispiel #23
0
def update_scsi_devices(
    env: LibraryEnvironment,
    stonith_id: str,
    set_device_list: Iterable[str],
    force_flags: Container[reports.types.ForceCode] = (),
) -> None:
    """
    Update scsi fencing devices without restart and affecting other resources.

    env -- provides all for communication with externals
    stonith_id -- id of stonith resource
    set_device_list -- paths to the scsi devices that would be set for stonith
        resource
    force_flags -- list of flags codes
    """
    if not is_getting_resource_digest_supported(env.cmd_runner()):
        raise LibraryError(
            ReportItem.error(
                reports.messages.StonithRestartlessUpdateOfScsiDevicesNotSupported()
            )
        )
    cib = env.get_cib()
    if not set_device_list:
        env.report_processor.report(
            ReportItem.error(
                reports.messages.InvalidOptionValue(
                    "devices", "", None, cannot_be_empty=True
                )
            )
        )
    (
        stonith_el,
        report_list,
    ) = stonith.validate_stonith_restartless_update(cib, stonith_id)
    if env.report_processor.report_list(report_list).has_errors:
        raise LibraryError()
    # for mypy, this should not happen because exeption would be raised
    if stonith_el is None:
        raise AssertionError("stonith element is None")

    stonith.update_scsi_devices_without_restart(
        env.cmd_runner(),
        env.get_cluster_state(),
        stonith_el,
        IdProvider(cib),
        set_device_list,
    )

    # Unfencing
    cluster_nodes_names, nodes_report_list = get_existing_nodes_names(
        env.get_corosync_conf(),
        error_on_missing_name=True,
    )
    env.report_processor.report_list(nodes_report_list)
    (
        target_report_list,
        cluster_nodes_target_list,
    ) = env.get_node_target_factory().get_target_list_with_reports(
        cluster_nodes_names,
        allow_skip=False,
    )
    env.report_processor.report_list(target_report_list)
    if env.report_processor.has_errors:
        raise LibraryError()
    com_cmd: AllSameDataMixin = GetCorosyncOnlineTargets(
        env.report_processor,
        skip_offline_targets=reports.codes.SKIP_OFFLINE_NODES in force_flags,
    )
    com_cmd.set_targets(cluster_nodes_target_list)
    online_corosync_target_list = run_and_raise(
        env.get_node_communicator(), com_cmd
    )
    com_cmd = Unfence(env.report_processor, sorted(set_device_list))
    com_cmd.set_targets(online_corosync_target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)

    env.push_cib()