예제 #1
0
def get_cluster_size(**kwargs):
    '''
    Determines the optimal/desired (but possible) etcd cluster size

    Determines the desired number of cluster members, defaulting to
    the value supplied in the etcd:masters pillar, falling back to
    match the number nodes with the kube-master role, and if this is
    less than 3, it will bump it to 3 (or the number of nodes
    available if the number of nodes is less than 3).

    Optional arguments:

      * `masters`: list of current kubernetes masters
      * `minions`: list of current kubernetes minions

    '''
    member_count = __salt__['pillar.get']('etcd:masters', None)

    masters = __salt__['caasp_nodes.get_from_args_or_with_expr'](
        'masters', kwargs, 'G@roles:kube-master')
    minions = __salt__['caasp_nodes.get_from_args_or_with_expr'](
        'minions', kwargs, 'G@roles:kube-minion')

    if not member_count:
        # A value has not been set in the pillar, calculate a "good" number
        # for the user.
        num_masters = len(masters)

        member_count = _optimal_etcd_number(num_masters)
        if member_count < MIN_RECOMMENDED_MEMBER_COUNT:
            # Attempt to increase the number of etcd master to 3,
            # however, if we don't have 3 nodes in total,
            # then match the number of nodes we have.
            increased_member_count = len(masters) + len(minions)
            increased_member_count = min(MIN_RECOMMENDED_MEMBER_COUNT,
                                         increased_member_count)

            # ... but make sure we are using an odd number
            # (otherwise we could have some leader election problems)
            member_count = _optimal_etcd_number(increased_member_count)

            warn("etcd member count too low (%d), increasing to %d",
                 num_masters, increased_member_count)

            # TODO: go deeper and look for candidates in nodes with
            #       no role (as get_replacement_for_member() does)
    else:
        # A value has been set in the pillar, respect the users choice
        # even it's not a "good" number.
        member_count = int(member_count)

        if member_count < MIN_RECOMMENDED_MEMBER_COUNT:
            warn(
                "etcd member count too low (%d), consider increasing "
                "to %d", member_count, MIN_RECOMMENDED_MEMBER_COUNT)

    member_count = max(1, member_count)
    debug("using member count = %d", member_count)
    return member_count
예제 #2
0
 def warn_or_abort_on_replacement_provided(msg, *args):
     if replacement_provided:
         abort("the user provided replacement cannot be used: " + msg, *args)
     else:
         warn(msg, *args)
예제 #3
0
def get_replacement_for(target, replacement="", **kwargs):
    """
    When removing a node `target`, try to get a `replacement` (and the new roles that
    must be assigned) for all the roles that were running there.

    If the user provides an explicit `replacement`, verify that that replacement is valid.
    In case the user-provided is not valid, raise an exception (aborting the execution).

    If no replacement can be found, we are fine as long as we have a minimum number
    of nodes with that role (ie, for masters, we are fine as long as we have at least one master).
    """
    assert target

    excluded = kwargs.get("excluded", [])

    replacement_provided = replacement != ""
    replacement_roles = []

    def warn_or_abort_on_replacement_provided(msg, *args):
        if replacement_provided:
            abort("the user provided replacement cannot be used: " + msg, *args)
        else:
            warn(msg, *args)

    # preparations

    # check: we cannot try to remove some 'virtual' nodes
    forbidden = get_from_args_or_with_expr("forbidden", kwargs, "P@roles:(admin|ca)")
    if target in forbidden:
        abort('%s cannot be removed: it has a "ca" or "admin" role', target)
    elif replacement_provided and replacement in forbidden:
        abort(
            '%s cannot be replaced by %s: the replacement has a "ca" or "admin" role',
            target,
            replacement,
        )
    elif replacement_provided and replacement in excluded:
        abort(
            "%s cannot be replaced by %s: the replacement is in the list of nodes excluded",
            target,
            replacement,
        )

    masters = get_from_args_or_with_expr("masters", kwargs, "G@roles:kube-master")
    minions = get_from_args_or_with_expr("minions", kwargs, "G@roles:kube-minion")
    etcd_members = get_from_args_or_with_expr("etcd_members", kwargs, "G@roles:etcd")

    #
    # replacement for etcd members
    #
    if target in etcd_members:
        etcd_replacement = replacement
        if not etcd_replacement:
            debug("looking for replacement for etcd at %s", target)
            # we must choose another node and promote it to be an etcd member
            etcd_replacement = _get_one_for_role("etcd", excluded=excluded)

        # check if the replacement provided is valid
        if etcd_replacement:
            bootstrapped_etcd_members = get_from_args_or_with_expr(
                "booted_etcd_members", kwargs, "G@roles:kube-master", booted=True
            )

            if etcd_replacement in bootstrapped_etcd_members:
                warn_or_abort_on_replacement_provided(
                    "the replacement for the etcd server %s cannot be %s: another etcd server is already running there",
                    target,
                    etcd_replacement,
                )
                etcd_replacement = ""
            # the etcd replacement can be run in bootstrapped masters/minions,
            # so we are done with the incompatibility checks...

        if etcd_replacement:
            debug(
                "setting %s as the replacement for the etcd member %s",
                etcd_replacement,
                target,
            )
            replacement = etcd_replacement
            replacement_roles.append("etcd")

        if "etcd" not in replacement_roles:
            if len(etcd_members) <= _MIN_ETCD_MEMBERS_AFTER_REMOVAL:
                # we need at least one etcd server
                abort(
                    "cannot remove etcd member %s: too few etcd members, and no replacement found or provided",
                    target,
                )
            else:
                warn(
                    "number of etcd members will be reduced to %d, as no replacement for etcd server in %s has been found (or provided)",
                    len(etcd_members),
                    target,
                )

    #
    # replacement for k8s masters
    #
    if target in masters:
        master_replacement = replacement
        if not master_replacement:
            # NOTE: even if no `replacement` was provided in the pillar,
            #       we probably have one at this point: if the master was
            #       running etcd as well, we have already tried to find
            #       a replacement in the previous step...
            #       however, we must verify that the etcd replacement
            #       is a valid k8s master replacement too.
            #       (ideally we should find the union of etcd and
            #       masters candidates)
            debug("looking for replacement for kubernetes master at %s", target)
            master_replacement = _get_one_for_role("kube-master", excluded=excluded)

        # check if the replacement provided/found is valid
        if master_replacement:
            bootstrapped_masters = get_from_args_or_with_expr(
                "booted_masters", kwargs, "G@roles:kube-master", booted=True
            )
            if master_replacement in bootstrapped_masters:
                warn_or_abort_on_replacement_provided(
                    "will not replace the k8s master %s: the replacement %s is already running a k8s master",
                    target,
                    master_replacement,
                )
                master_replacement = ""
            elif master_replacement in minions:
                warn_or_abort_on_replacement_provided(
                    "will not replace the k8s master at %s: the replacement found/provided is the k8s minion %s",
                    target,
                    master_replacement,
                )
                master_replacement = ""

        if master_replacement:
            # so far we do not support having two replacements for two roles,
            # so we check if the new replacement is compatible with any previous
            # replacement found so far. If it is not, keep the previous one and
            # warn the user
            if not replacement:
                replacement = master_replacement

            assert len(replacement) > 0
            if replacement == master_replacement:
                debug(
                    "setting %s as replacement for the kubernetes master %s",
                    replacement,
                    target,
                )
                replacement_roles.append("kube-master")
            else:
                warn(
                    "the k8s master replacement (%s) is not the same as the current replacement (%s) "
                    + "(it will run %s) so we cannot use it for running the k8s master too",
                    master_replacement,
                    replacement,
                    ",".join(replacement_roles),
                )

        if "kube-master" not in replacement_roles:
            # stability check: check if it is ok not to run the k8s master in the replacement
            if len(masters) <= _MIN_MASTERS_AFTER_REMOVAL:
                # we need at least one master (for runing the k8s API at all times)
                abort(
                    "cannot remove k8s master %s: too few k8s masters, and no replacement found or provided",
                    target,
                )
            else:
                warn(
                    "number of k8s masters will be reduced to %d, as no replacement for the k8s master in %s has been found (or provided)",
                    len(masters),
                    target,
                )

    #
    # replacement for k8s minions
    #
    if target in minions:
        minion_replacement = replacement
        if not minion_replacement:
            debug("looking for replacement for kubernetes minion at %s", target)
            minion_replacement = _get_one_for_role("kube-minion", excluded=excluded)

        # check if the replacement provided/found is valid
        # NOTE: maybe the new role has already been assigned in Velum...
        if minion_replacement:
            bootstrapped_minions = get_from_args_or_with_expr(
                "booted_minions", kwargs, "G@roles:kube-minion", booted=True
            )
            if minion_replacement in bootstrapped_minions:
                warn_or_abort_on_replacement_provided(
                    "will not replace minion %s: the replacement %s is already running a k8s minion",
                    target,
                    minion_replacement,
                )
                minion_replacement = ""

            elif minion_replacement in masters:
                warn_or_abort_on_replacement_provided(
                    "will not replace the k8s minion %s: the replacement %s is already a k8s master",
                    target,
                    minion_replacement,
                )
                minion_replacement = ""

            elif "kube-master" in replacement_roles:
                warn_or_abort_on_replacement_provided(
                    "will not replace the k8s minion %s: the replacement found/provided, %s, is already scheduled for being a new k8s master",
                    target,
                    minion_replacement,
                )
                minion_replacement = ""

        if minion_replacement:
            # once again, check if the new replacement is compatible with any previous one
            if not replacement:
                replacement = minion_replacement

            assert len(replacement) > 0
            if replacement == minion_replacement:
                debug(
                    "setting %s as replacement for the k8s minion %s",
                    replacement,
                    target,
                )
                replacement_roles.append("kube-minion")
            else:
                warn(
                    "the k8s minion replacement (%s) is not the same as the current replacement (%s) "
                    + "(it will run %s) so we cannot use it for running the k8s minion too",
                    minion_replacement,
                    replacement,
                    ",".join(replacement_roles),
                )

        if "kube-minion" not in replacement_roles:
            # stability check: check if it is ok not to run the k8s minion in the replacement
            if len(minions) <= _MIN_MINIONS_AFTER_REMOVAL:
                # we need at least one minion (for running dex, kube-dns, etc..)
                abort(
                    "cannot remove k8s minion %s: too few k8s minions, and no replacement found or provided",
                    target,
                )
            else:
                warn(
                    "number of k8s minions will be reduced to %d, as no replacement for the k8s minion in %s has been found (or provided)",
                    len(masters),
                    target,
                )

    # other consistency checks...
    if replacement:
        # consistency check: if there is a replacement, it must have some (new) role(s)
        if not replacement_roles:
            abort("internal error: replacement %s has no roles assigned", replacement)
    else:
        # if no valid replacement has been found, clear the roles too
        replacement_roles = []

    return replacement, replacement_roles