def get_cluster_size(**kwargs): ''' Determines the optimal/desired (but possible) etcd cluster size Determines the desired number of cluster members, defaulting to the value supplied in the etcd:masters pillar, falling back to match the number nodes with the kube-master role, and if this is less than 3, it will bump it to 3 (or the number of nodes available if the number of nodes is less than 3). Optional arguments: * `masters`: list of current kubernetes masters * `minions`: list of current kubernetes minions ''' member_count = __salt__['pillar.get']('etcd:masters', None) masters = __salt__['caasp_nodes.get_from_args_or_with_expr']( 'masters', kwargs, 'G@roles:kube-master') minions = __salt__['caasp_nodes.get_from_args_or_with_expr']( 'minions', kwargs, 'G@roles:kube-minion') if not member_count: # A value has not been set in the pillar, calculate a "good" number # for the user. num_masters = len(masters) member_count = _optimal_etcd_number(num_masters) if member_count < MIN_RECOMMENDED_MEMBER_COUNT: # Attempt to increase the number of etcd master to 3, # however, if we don't have 3 nodes in total, # then match the number of nodes we have. increased_member_count = len(masters) + len(minions) increased_member_count = min(MIN_RECOMMENDED_MEMBER_COUNT, increased_member_count) # ... but make sure we are using an odd number # (otherwise we could have some leader election problems) member_count = _optimal_etcd_number(increased_member_count) warn("etcd member count too low (%d), increasing to %d", num_masters, increased_member_count) # TODO: go deeper and look for candidates in nodes with # no role (as get_replacement_for_member() does) else: # A value has been set in the pillar, respect the users choice # even it's not a "good" number. member_count = int(member_count) if member_count < MIN_RECOMMENDED_MEMBER_COUNT: warn( "etcd member count too low (%d), consider increasing " "to %d", member_count, MIN_RECOMMENDED_MEMBER_COUNT) member_count = max(1, member_count) debug("using member count = %d", member_count) return member_count
def warn_or_abort_on_replacement_provided(msg, *args): if replacement_provided: abort("the user provided replacement cannot be used: " + msg, *args) else: warn(msg, *args)
def get_replacement_for(target, replacement="", **kwargs): """ When removing a node `target`, try to get a `replacement` (and the new roles that must be assigned) for all the roles that were running there. If the user provides an explicit `replacement`, verify that that replacement is valid. In case the user-provided is not valid, raise an exception (aborting the execution). If no replacement can be found, we are fine as long as we have a minimum number of nodes with that role (ie, for masters, we are fine as long as we have at least one master). """ assert target excluded = kwargs.get("excluded", []) replacement_provided = replacement != "" replacement_roles = [] def warn_or_abort_on_replacement_provided(msg, *args): if replacement_provided: abort("the user provided replacement cannot be used: " + msg, *args) else: warn(msg, *args) # preparations # check: we cannot try to remove some 'virtual' nodes forbidden = get_from_args_or_with_expr("forbidden", kwargs, "P@roles:(admin|ca)") if target in forbidden: abort('%s cannot be removed: it has a "ca" or "admin" role', target) elif replacement_provided and replacement in forbidden: abort( '%s cannot be replaced by %s: the replacement has a "ca" or "admin" role', target, replacement, ) elif replacement_provided and replacement in excluded: abort( "%s cannot be replaced by %s: the replacement is in the list of nodes excluded", target, replacement, ) masters = get_from_args_or_with_expr("masters", kwargs, "G@roles:kube-master") minions = get_from_args_or_with_expr("minions", kwargs, "G@roles:kube-minion") etcd_members = get_from_args_or_with_expr("etcd_members", kwargs, "G@roles:etcd") # # replacement for etcd members # if target in etcd_members: etcd_replacement = replacement if not etcd_replacement: debug("looking for replacement for etcd at %s", target) # we must choose another node and promote it to be an etcd member etcd_replacement = _get_one_for_role("etcd", excluded=excluded) # check if the replacement provided is valid if etcd_replacement: bootstrapped_etcd_members = get_from_args_or_with_expr( "booted_etcd_members", kwargs, "G@roles:kube-master", booted=True ) if etcd_replacement in bootstrapped_etcd_members: warn_or_abort_on_replacement_provided( "the replacement for the etcd server %s cannot be %s: another etcd server is already running there", target, etcd_replacement, ) etcd_replacement = "" # the etcd replacement can be run in bootstrapped masters/minions, # so we are done with the incompatibility checks... if etcd_replacement: debug( "setting %s as the replacement for the etcd member %s", etcd_replacement, target, ) replacement = etcd_replacement replacement_roles.append("etcd") if "etcd" not in replacement_roles: if len(etcd_members) <= _MIN_ETCD_MEMBERS_AFTER_REMOVAL: # we need at least one etcd server abort( "cannot remove etcd member %s: too few etcd members, and no replacement found or provided", target, ) else: warn( "number of etcd members will be reduced to %d, as no replacement for etcd server in %s has been found (or provided)", len(etcd_members), target, ) # # replacement for k8s masters # if target in masters: master_replacement = replacement if not master_replacement: # NOTE: even if no `replacement` was provided in the pillar, # we probably have one at this point: if the master was # running etcd as well, we have already tried to find # a replacement in the previous step... # however, we must verify that the etcd replacement # is a valid k8s master replacement too. # (ideally we should find the union of etcd and # masters candidates) debug("looking for replacement for kubernetes master at %s", target) master_replacement = _get_one_for_role("kube-master", excluded=excluded) # check if the replacement provided/found is valid if master_replacement: bootstrapped_masters = get_from_args_or_with_expr( "booted_masters", kwargs, "G@roles:kube-master", booted=True ) if master_replacement in bootstrapped_masters: warn_or_abort_on_replacement_provided( "will not replace the k8s master %s: the replacement %s is already running a k8s master", target, master_replacement, ) master_replacement = "" elif master_replacement in minions: warn_or_abort_on_replacement_provided( "will not replace the k8s master at %s: the replacement found/provided is the k8s minion %s", target, master_replacement, ) master_replacement = "" if master_replacement: # so far we do not support having two replacements for two roles, # so we check if the new replacement is compatible with any previous # replacement found so far. If it is not, keep the previous one and # warn the user if not replacement: replacement = master_replacement assert len(replacement) > 0 if replacement == master_replacement: debug( "setting %s as replacement for the kubernetes master %s", replacement, target, ) replacement_roles.append("kube-master") else: warn( "the k8s master replacement (%s) is not the same as the current replacement (%s) " + "(it will run %s) so we cannot use it for running the k8s master too", master_replacement, replacement, ",".join(replacement_roles), ) if "kube-master" not in replacement_roles: # stability check: check if it is ok not to run the k8s master in the replacement if len(masters) <= _MIN_MASTERS_AFTER_REMOVAL: # we need at least one master (for runing the k8s API at all times) abort( "cannot remove k8s master %s: too few k8s masters, and no replacement found or provided", target, ) else: warn( "number of k8s masters will be reduced to %d, as no replacement for the k8s master in %s has been found (or provided)", len(masters), target, ) # # replacement for k8s minions # if target in minions: minion_replacement = replacement if not minion_replacement: debug("looking for replacement for kubernetes minion at %s", target) minion_replacement = _get_one_for_role("kube-minion", excluded=excluded) # check if the replacement provided/found is valid # NOTE: maybe the new role has already been assigned in Velum... if minion_replacement: bootstrapped_minions = get_from_args_or_with_expr( "booted_minions", kwargs, "G@roles:kube-minion", booted=True ) if minion_replacement in bootstrapped_minions: warn_or_abort_on_replacement_provided( "will not replace minion %s: the replacement %s is already running a k8s minion", target, minion_replacement, ) minion_replacement = "" elif minion_replacement in masters: warn_or_abort_on_replacement_provided( "will not replace the k8s minion %s: the replacement %s is already a k8s master", target, minion_replacement, ) minion_replacement = "" elif "kube-master" in replacement_roles: warn_or_abort_on_replacement_provided( "will not replace the k8s minion %s: the replacement found/provided, %s, is already scheduled for being a new k8s master", target, minion_replacement, ) minion_replacement = "" if minion_replacement: # once again, check if the new replacement is compatible with any previous one if not replacement: replacement = minion_replacement assert len(replacement) > 0 if replacement == minion_replacement: debug( "setting %s as replacement for the k8s minion %s", replacement, target, ) replacement_roles.append("kube-minion") else: warn( "the k8s minion replacement (%s) is not the same as the current replacement (%s) " + "(it will run %s) so we cannot use it for running the k8s minion too", minion_replacement, replacement, ",".join(replacement_roles), ) if "kube-minion" not in replacement_roles: # stability check: check if it is ok not to run the k8s minion in the replacement if len(minions) <= _MIN_MINIONS_AFTER_REMOVAL: # we need at least one minion (for running dex, kube-dns, etc..) abort( "cannot remove k8s minion %s: too few k8s minions, and no replacement found or provided", target, ) else: warn( "number of k8s minions will be reduced to %d, as no replacement for the k8s minion in %s has been found (or provided)", len(masters), target, ) # other consistency checks... if replacement: # consistency check: if there is a replacement, it must have some (new) role(s) if not replacement_roles: abort("internal error: replacement %s has no roles assigned", replacement) else: # if no valid replacement has been found, clear the roles too replacement_roles = [] return replacement, replacement_roles