def test_error(self): self.mock_runner.run.return_value = ("some info", "status error", 2) with self.assertRaises(lib.QuorumStatusReadException) as cm: lib.get_quorum_status_text(self.mock_runner) self.mock_runner.run.assert_called_once_with([ self.quorum_tool, "-p" ]) self.assertEqual(cm.exception.reason, "status error")
def status_text(lib_env): """ Get quorum runtime status in plain text """ try: return corosync_live.get_quorum_status_text(lib_env.cmd_runner()) except corosync_live.QuorumStatusReadException as e: raise LibraryError(reports.corosync_quorum_get_status_error(e.reason))
def test_error(self): self.mock_runner.run.return_value = ("some info", "status error", 2) assert_raise_library_error( lambda: lib.get_quorum_status_text(self.mock_runner), (severity.ERROR, report_codes.COROSYNC_QUORUM_GET_STATUS_ERROR, { "reason": "status error", })) self.mock_runner.run.assert_called_once_with([self.quorum_tool, "-p"])
def test_success_with_retval_1(self): self.mock_runner.run.return_value = ("status info", "", 1) self.assertEqual( "status info", lib.get_quorum_status_text(self.mock_runner) ) self.mock_runner.run.assert_called_once_with([ self.quorum_tool, "-p" ])
def status_text(lib_env): """ Get quorum runtime status in plain text """ try: return corosync_live.get_quorum_status_text(lib_env.cmd_runner()) except corosync_live.QuorumStatusReadException as e: raise LibraryError( ReportItem.error( reports.messages.CorosyncQuorumGetStatusError( e.reason))) from e
def test_error(self): self.mock_runner.run.return_value = ("some info", "status error", 2) assert_raise_library_error( lambda: lib.get_quorum_status_text(self.mock_runner), ( severity.ERROR, report_codes.COROSYNC_QUORUM_GET_STATUS_ERROR, { "reason": "status error", } ) ) self.mock_runner.run.assert_called_once_with([ self.quorum_tool, "-p" ])
def status_text(lib_env): """ Get quorum runtime status in plain text """ __ensure_not_cman(lib_env) return corosync_live.get_quorum_status_text(lib_env.cmd_runner())
def remove_nodes(env, node_list, force_quorum_loss=False, skip_offline=False): """ Remove nodes from a cluster. env LibraryEnvironment node_list iterable -- names of nodes to remove force_quorum_loss bool -- treat quorum loss as a warning if True skip_offline bool -- treat unreachable nodes as warnings if True """ _ensure_live_env(env) # raises if env is not live report_processor = SimpleReportProcessor(env.report_processor) target_factory = env.get_node_target_factory() corosync_conf = env.get_corosync_conf() cluster_nodes_names = corosync_conf.get_nodes_names() # validations report_processor.report_list(config_validators.remove_nodes( node_list, corosync_conf.get_nodes(), corosync_conf.get_quorum_device_settings(), )) if report_processor.has_errors: # If there is an error, there is usually not much sense in doing other # validations: # - if there would be no node left in the cluster, it's pointless # to check for quorum loss or if at least one remaining node is online # - if only one node is being removed and it doesn't exist, it's again # pointless to check for other issues raise LibraryError() target_report_list, cluster_nodes_target_list = ( target_factory.get_target_list_with_reports( cluster_nodes_names, skip_non_existing=skip_offline, ) ) known_nodes = set([target.label for target in cluster_nodes_target_list]) unknown_nodes = set([ name for name in cluster_nodes_names if name not in known_nodes ]) report_processor.report_list(target_report_list) com_cmd = GetOnlineTargets( report_processor, ignore_offline_targets=skip_offline, ) com_cmd.set_targets(cluster_nodes_target_list) online_target_list = run_com(env.get_node_communicator(), com_cmd) offline_target_list = [ target for target in cluster_nodes_target_list if target not in online_target_list ] staying_online_target_list = [ target for target in online_target_list if target.label not in node_list ] if not staying_online_target_list: report_processor.report( reports.unable_to_connect_to_any_remaining_node() ) # If no remaining node is online, there is no point in checking quorum # loss or anything as we would just get errors. raise LibraryError() if skip_offline: staying_offline_nodes = ( [ target.label for target in offline_target_list if target.label not in node_list ] + [name for name in unknown_nodes if name not in node_list] ) if staying_offline_nodes: report_processor.report( reports.unable_to_connect_to_all_remaining_node( staying_offline_nodes ) ) atb_has_to_be_enabled = sbd.atb_has_to_be_enabled( env.cmd_runner(), corosync_conf, -len(node_list) ) if atb_has_to_be_enabled: report_processor.report( reports.corosync_quorum_atb_will_be_enabled_due_to_sbd() ) com_cmd = CheckCorosyncOffline( report_processor, allow_skip_offline=False, ) com_cmd.set_targets(staying_online_target_list) run_com(env.get_node_communicator(), com_cmd) else: # quorum check - local # example: 5-node cluster, 3 online nodes, removing one online node, # results in 4-node cluster with 2 online nodes => quorum lost # Check quorum loss only if ATB does not need to be enabled. If it is # required, cluster has to be turned off and therefore it loses quorum. forceable_report_creator = reports.get_problem_creator( report_codes.FORCE_QUORUM_LOSS, force_quorum_loss ) try: if corosync_live.QuorumStatus.from_string( corosync_live.get_quorum_status_text(env.cmd_runner()) ).stopping_nodes_cause_quorum_loss(node_list): report_processor.report( forceable_report_creator( reports.corosync_quorum_will_be_lost ) ) except corosync_live.QuorumStatusException as e: report_processor.report( forceable_report_creator( reports.corosync_quorum_loss_unable_to_check, reason=e.reason, ) ) if report_processor.has_errors: raise LibraryError() # validations done unknown_to_remove = [name for name in unknown_nodes if name in node_list] if unknown_to_remove: report_processor.report( reports.nodes_to_remove_unreachable(unknown_to_remove) ) targets_to_remove = [ target for target in cluster_nodes_target_list if target.label in node_list ] if targets_to_remove: com_cmd = cluster.DestroyWarnOnFailure(report_processor) com_cmd.set_targets(targets_to_remove) run_and_raise(env.get_node_communicator(), com_cmd) corosync_conf.remove_nodes(node_list) if atb_has_to_be_enabled: corosync_conf.set_quorum_options(dict(auto_tie_breaker="1")) com_cmd = DistributeCorosyncConf( env.report_processor, corosync_conf.config.export(), allow_skip_offline=False, ) com_cmd.set_targets(staying_online_target_list) run_and_raise(env.get_node_communicator(), com_cmd) com_cmd = ReloadCorosyncConf(env.report_processor) com_cmd.set_targets(staying_online_target_list) run_and_raise(env.get_node_communicator(), com_cmd) # try to remove nodes from pcmk using crm_node -R <node> --force and if not # successful remove it directly from CIB file on all nodes in parallel com_cmd = RemoveNodesFromCib(env.report_processor, node_list) com_cmd.set_targets(staying_online_target_list) run_and_raise(env.get_node_communicator(), com_cmd)