Example #1
0
def destroy_cluster(argv):
    if len(argv) > 0:
        # stop pacemaker and resources while cluster is still quorate
        nodes = argv
        node_errors = parallel_for_nodes(utils.repeat_if_timeout(
            utils.stopPacemaker),
                                         nodes,
                                         quiet=True)
        # proceed with destroy regardless of errors
        # destroy will stop any remaining cluster daemons
        node_errors = parallel_for_nodes(utils.destroyCluster,
                                         nodes,
                                         quiet=True)
        if node_errors:
            utils.err("unable to destroy cluster\n" +
                      "\n".join(node_errors.values()))
Example #2
0
def wait_for_nodes_started(node_list, timeout=None):
    timeout = 60 * 15 if timeout is None else timeout
    interval = 2
    stop_at = datetime.datetime.now() + datetime.timedelta(seconds=timeout)
    print("Waiting for node(s) to start...")
    if not node_list:
        code, output = wait_for_local_node_started(stop_at, interval)
        if code != 0:
            utils.err(output)
        else:
            print(output)
    else:
        node_errors = parallel_for_nodes(wait_for_remote_node_started,
                                         node_list, stop_at, interval)
        if node_errors:
            utils.err("unable to verify all nodes have started")
Example #3
0
def start_cluster_nodes(nodes):
    # Large clusters take longer time to start up. So we make the timeout longer
    # for each 8 nodes:
    #  1 -  8 nodes: 1 * timeout
    #  9 - 16 nodes: 2 * timeout
    # 17 - 24 nodes: 3 * timeout
    # and so on
    # Users can override this and set their own timeout by specifying
    # the --request-timeout option (see utils.sendHTTPRequest).
    timeout = int(settings.default_request_timeout *
                  math.ceil(len(nodes) / 8.0))
    node_errors = parallel_for_nodes(utils.startCluster,
                                     nodes,
                                     quiet=True,
                                     timeout=timeout)
    if node_errors:
        utils.err("unable to start all nodes\n" +
                  "\n".join(node_errors.values()))
Example #4
0
def stop_cluster_nodes(nodes):
    all_nodes = utils.get_corosync_conf_facade().get_nodes_names()
    unknown_nodes = set(nodes) - set(all_nodes)
    if unknown_nodes:
        utils.err("nodes '%s' do not appear to exist in configuration" %
                  "', '".join(unknown_nodes))

    stopping_all = set(nodes) >= set(all_nodes)
    if "--force" not in utils.pcs_options and not stopping_all:
        error_list = []
        for node in nodes:
            retval, data = utils.get_remote_quorumtool_output(node)
            if retval != 0:
                error_list.append(node + ": " + data)
                continue
            try:
                quorum_status = corosync_live.QuorumStatus.from_string(data)
                if not quorum_status.is_quorate:
                    # Get quorum status from a quorate node, non-quorate nodes
                    # may provide inaccurate info. If no node is quorate, there
                    # is no quorum to be lost and therefore no error to be
                    # reported.
                    continue
                if quorum_status.stopping_nodes_cause_quorum_loss(nodes):
                    utils.err(
                        "Stopping the node(s) will cause a loss of the quorum"
                        + ", use --force to override")
                else:
                    # We have the info, no need to print errors
                    error_list = []
                    break
            except corosync_live.QuorumStatusException:
                if not utils.is_node_offline_by_quorumtool_output(data):
                    error_list.append(node + ": Unable to get quorum status")
                # else the node seems to be stopped already
        if error_list:
            utils.err(
                "Unable to determine whether stopping the nodes will cause " +
                "a loss of the quorum, use --force to override\n" +
                "\n".join(error_list))

    was_error = False
    node_errors = parallel_for_nodes(utils.repeat_if_timeout(
        utils.stopPacemaker),
                                     nodes,
                                     quiet=True)
    accessible_nodes = [
        node for node in nodes if node not in node_errors.keys()
    ]
    if node_errors:
        utils.err("unable to stop all nodes\n" +
                  "\n".join(node_errors.values()),
                  exit_after_error=not accessible_nodes)
        was_error = True

    for node in node_errors:
        print("{0}: Not stopping cluster - node is unreachable".format(node))

    node_errors = parallel_for_nodes(utils.stopCorosync,
                                     accessible_nodes,
                                     quiet=True)
    if node_errors:
        utils.err("unable to stop all nodes\n" +
                  "\n".join(node_errors.values()))
    if was_error:
        utils.err("unable to stop all nodes")