def test_qdevice_tie_breaker_none(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node4"], self.fixture_nodes, ("net", {}, None, None)
         ),
         [],
     )
예제 #2
0
 def test_all_nodes(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node3", "node1", "node2", "node4"],
             self.fixture_nodes,
             (None, None, None, None),
         ),
         [fixture.error(report_codes.CANNOT_REMOVE_ALL_CLUSTER_NODES)],
     )
 def test_qdevice_tie_breaker_generic(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node4"],
             self.fixture_nodes,
             ("net", {"tie_breaker": "highest"}, None, None),
         ),
         [],
     )
예제 #4
0
 def test_nonexisting_nodes(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node3", "nodeX", "nodeY", "node4"], self.fixture_nodes,
             (None, None, None, None)),
         [
             fixture.error(
                 report_codes.NODE_NOT_FOUND, node=node, searched_types=[])
             for node in ["nodeX", "nodeY"]
         ])
 def test_qdevice_tie_breaker_kept(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node4"],
             self.fixture_nodes,
             ("net", {"tie_breaker": "3"}, None, None)
         ),
         [
         ]
     )
예제 #6
0
 def test_qdevice_tie_breaker_removed(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(["node4"], self.fixture_nodes,
                                        ("net", {
                                            "tie_breaker": "4"
                                        }, None, None)),
         [
             fixture.error(report_codes.NODE_USED_AS_TIE_BREAKER,
                           node="node4",
                           node_id=4),
         ])
 def test_all_nodes(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node3", "node1", "node2", "node4"],
             self.fixture_nodes,
             (None, None, None, None)
         ),
         [
             fixture.error(report_codes.CANNOT_REMOVE_ALL_CLUSTER_NODES)
         ]
     )
 def test_qdevice_tie_breaker_removed(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node4"],
             self.fixture_nodes,
             ("net", {"tie_breaker": "4"}, None, None)
         ),
         [
             fixture.error(
                 report_codes.NODE_USED_AS_TIE_BREAKER,
                 node="node4",
                 node_id=4
             ),
         ]
     )
예제 #9
0
 def test_more_errors(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node3", "node1", "node2", "node4", "nodeX"],
             self.fixture_nodes, ("net", {
                 "tie_breaker": "4"
             }, None, None)),
         [
             fixture.error(report_codes.NODE_NOT_FOUND,
                           node="nodeX",
                           searched_types=[]),
             fixture.error(report_codes.CANNOT_REMOVE_ALL_CLUSTER_NODES),
             fixture.error(report_codes.NODE_USED_AS_TIE_BREAKER,
                           node="node4",
                           node_id=4),
         ])
예제 #10
0
 def test_nonexisting_nodes(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node3", "nodeX", "nodeY", "node4"],
             self.fixture_nodes,
             (None, None, None, None)
         ),
         [
             fixture.error(
                 report_codes.NODE_NOT_FOUND,
                 node=node,
                 searched_types=[]
             )
             for node in ["nodeX", "nodeY"]
         ]
     )
예제 #11
0
 def test_more_errors(self):
     assert_report_item_list_equal(
         config_validators.remove_nodes(
             ["node3", "node1", "node2", "node4", "nodeX"],
             self.fixture_nodes,
             ("net", {"tie_breaker": "4"}, None, None)
         ),
         [
             fixture.error(
                 report_codes.NODE_NOT_FOUND,
                 node="nodeX",
                 searched_types=[]
             ),
             fixture.error(report_codes.CANNOT_REMOVE_ALL_CLUSTER_NODES),
             fixture.error(
                 report_codes.NODE_USED_AS_TIE_BREAKER,
                 node="node4",
                 node_id=4
             ),
         ]
     )
예제 #12
0
파일: cluster.py 프로젝트: bashims/pcs
def remove_nodes(env, node_list, force_quorum_loss=False, skip_offline=False):
    """
    Remove nodes from a cluster.

    env LibraryEnvironment
    node_list iterable -- names of nodes to remove
    force_quorum_loss bool -- treat quorum loss as a warning if True
    skip_offline bool -- treat unreachable nodes as warnings if True
    """
    _ensure_live_env(env)  # raises if env is not live

    report_processor = SimpleReportProcessor(env.report_processor)
    target_factory = env.get_node_target_factory()
    corosync_conf = env.get_corosync_conf()
    cluster_nodes_names = corosync_conf.get_nodes_names()

    # validations

    report_processor.report_list(
        config_validators.remove_nodes(
            node_list,
            corosync_conf.get_nodes(),
            corosync_conf.get_quorum_device_settings(),
        ))
    if report_processor.has_errors:
        # If there is an error, there is usually not much sense in doing other
        # validations:
        # - if there would be no node left in the cluster, it's pointless
        #   to check for quorum loss or if at least one remaining node is online
        # - if only one node is being removed and it doesn't exist, it's again
        #   pointless to check for other issues
        raise LibraryError()

    target_report_list, cluster_nodes_target_list = (
        target_factory.get_target_list_with_reports(
            cluster_nodes_names,
            skip_non_existing=skip_offline,
        ))
    known_nodes = set([target.label for target in cluster_nodes_target_list])
    unknown_nodes = set(
        [name for name in cluster_nodes_names if name not in known_nodes])
    report_processor.report_list(target_report_list)

    com_cmd = GetOnlineTargets(
        report_processor,
        ignore_offline_targets=skip_offline,
    )
    com_cmd.set_targets(cluster_nodes_target_list)
    online_target_list = run_com(env.get_node_communicator(), com_cmd)
    offline_target_list = [
        target for target in cluster_nodes_target_list
        if target not in online_target_list
    ]
    staying_online_target_list = [
        target for target in online_target_list
        if target.label not in node_list
    ]
    targets_to_remove = [
        target for target in cluster_nodes_target_list
        if target.label in node_list
    ]
    if not staying_online_target_list:
        report_processor.report(
            reports.unable_to_connect_to_any_remaining_node())
        # If no remaining node is online, there is no point in checking quorum
        # loss or anything as we would just get errors.
        raise LibraryError()

    if skip_offline:
        staying_offline_nodes = ([
            target.label
            for target in offline_target_list if target.label not in node_list
        ] + [name for name in unknown_nodes if name not in node_list])
        if staying_offline_nodes:
            report_processor.report(
                reports.unable_to_connect_to_all_remaining_node(
                    staying_offline_nodes))

    atb_has_to_be_enabled = sbd.atb_has_to_be_enabled(env.cmd_runner(),
                                                      corosync_conf,
                                                      -len(node_list))
    if atb_has_to_be_enabled:
        report_processor.report(
            reports.corosync_quorum_atb_will_be_enabled_due_to_sbd())
        com_cmd = CheckCorosyncOffline(
            report_processor,
            allow_skip_offline=False,
        )
        com_cmd.set_targets(staying_online_target_list)
        run_com(env.get_node_communicator(), com_cmd)
    else:
        # Check if removing the nodes would cause quorum loss. We ask the nodes
        # to be removed for their view of quorum. If they are all stopped or
        # not in a quorate partition, their removal cannot cause quorum loss.
        # That's why we ask them and not the remaining nodes.
        # example: 5-node cluster, 3 online nodes, removing one online node,
        # results in 4-node cluster with 2 online nodes => quorum lost
        # Check quorum loss only if ATB does not need to be enabled. If it is
        # required, cluster has to be turned off and therefore it loses quorum.
        forceable_report_creator = reports.get_problem_creator(
            report_codes.FORCE_QUORUM_LOSS, force_quorum_loss)
        com_cmd = cluster.GetQuorumStatus(report_processor)
        com_cmd.set_targets(targets_to_remove)
        failures, quorum_status = run_com(env.get_node_communicator(), com_cmd)
        if quorum_status:
            if quorum_status.stopping_nodes_cause_quorum_loss(node_list):
                report_processor.report(
                    forceable_report_creator(
                        reports.corosync_quorum_will_be_lost))
        elif failures or not targets_to_remove:
            report_processor.report(
                forceable_report_creator(
                    reports.corosync_quorum_loss_unable_to_check, ))

    if report_processor.has_errors:
        raise LibraryError()

    # validations done

    unknown_to_remove = [name for name in unknown_nodes if name in node_list]
    if unknown_to_remove:
        report_processor.report(
            reports.nodes_to_remove_unreachable(unknown_to_remove))
    if targets_to_remove:
        com_cmd = cluster.DestroyWarnOnFailure(report_processor)
        com_cmd.set_targets(targets_to_remove)
        run_and_raise(env.get_node_communicator(), com_cmd)

    corosync_conf.remove_nodes(node_list)
    if atb_has_to_be_enabled:
        corosync_conf.set_quorum_options(dict(auto_tie_breaker="1"))

    com_cmd = DistributeCorosyncConf(
        env.report_processor,
        corosync_conf.config.export(),
        allow_skip_offline=False,
    )
    com_cmd.set_targets(staying_online_target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)

    com_cmd = ReloadCorosyncConf(env.report_processor)
    com_cmd.set_targets(staying_online_target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)

    # try to remove nodes from pcmk using crm_node -R <node> --force and if not
    # successful remove it directly from CIB file on all nodes in parallel
    com_cmd = RemoveNodesFromCib(env.report_processor, node_list)
    com_cmd.set_targets(staying_online_target_list)
    run_and_raise(env.get_node_communicator(), com_cmd)