def serialize_config(self): with open(self.config_filename, mode='r') as local_dump_file: try: return yaml.safe_load(local_dump_file) except yaml.YAMLError as yaml_error: print(crayons.red(f'Error: failed to load from {local_dump_file}')) print(crayons.red(f'{yaml_error}')) return None
def metallb_install(self, file='', version='0.12.0', interface='vmbr0'): prepend = f'HOME={self.home}' if not file: values_file = 'values.yaml' local_workdir = os.path.join(WORKDIR, '.metallb') local_values_path = os.path.join(local_workdir, values_file) metallb_range = pve_cluster_config_client.loadbalancer_ip_range_to_string_or_list() if not metallb_range: logging.error(crayons.red('Could not deploy MetalLB with given cluster values.')) return common_iface = self.get_bridge_common_interface(interface) if not common_iface: logging.error(crayons.red(f'Interface {interface} not found as common bridge in PVE Cluster nodes.')) return template_values = ( 'configInline:', ' address-pools:', f' - name: {common_iface}', ' protocol: layer2', ' addresses:', f' - {metallb_range}', 'controller:', ' tolerations:', ' - effect: NoExecute', ' key: node.kubernetes.io/not-ready', ' operator: Exists', ' tolerationSeconds: 60', ' - effect: NoExecute', ' key: node.kubernetes.io/unreachable', ' operator: Exists', ' tolerationSeconds: 60', ) self.local.run(f'mkdir -p {local_workdir}') _, file, error = KubeProvisioner.write_config_to_file( file=values_file, local_file_path=local_values_path, script_body=template_values ) if error: # Logging executes in write_config_to_file return if not self.helm_exists(prepend): return print(crayons.cyan(f'Deploying MetalLB')) metal_install = self.local.run(f'{prepend} helm install --name metallb -f {file} stable/metallb --version={version}') if metal_install.failed: logging.error(crayons.red('MetalLB installation failed.Rolling Back')) rollback = self.local.run('helm delete metallb --purge') if rollback.ok: print(crayons.green('Rollback completed')) return print(crayons.green('MetalLB installed'))
def bootstrap_control_plane(self, version='1.16'): cni_definitions = self.supported_cnis(networking=self.control_plane.networking) print(crayons.cyan(f'Building K8s Control-Plane using High Availability: {self.control_plane.ha_masters}')) if self.control_plane.ha_masters: if not self.control_plane.apiserver_ip: logging.warning(crayons.yellow(f'API Server IP must be provided for HA Control Plane option')) return None print(crayons.cyan(f'Getting Container Networking Definitions for CNI: {self.control_plane.networking}')) self.instance.self_node_sudo.execute(f'mkdir -p {self.remote_path}') self.instance.self_node_sudo.execute(f'chown -R $USER:$USER {self.remote_path}') if self.control_plane.networking == 'calico': self.instance.self_node.execute(f'wget {cni_definitions.cni_url} -O {os.path.join(self.remote_path, cni_definitions.file)}') print(crayons.cyan('Pulling Required Images from gcr.io')) has_patch, _, _, _ = semver_has_patch_suffix(version) image_version = version if has_patch else f'stable-{version}' version_snippet = f"--kubernetes-version {image_version.split('-')[0]}" if '-' in image_version else f'--kubernetes-version {image_version}' try: self.instance.self_node_sudo.execute(f'kubeadm config images pull {version_snippet}', warn=True) except invoke.exceptions.UnexpectedExit: logging.warning(crayons.yellow(f'Version: {version_snippet} does not exist.')) version_snippet = '' self.instance.self_node_sudo.execute('kubeadm config images pull') print(crayons.blue('Running pre-flight checks & deploying Control Plane')) init_command = ( f'kubeadm init --control-plane-endpoint "{self.control_plane.apiserver_ip}:{self.control_plane.apiserver_port}" --upload-certs {cni_definitions.networking_option} {version_snippet}' ) if self.control_plane.ha_masters else ( f'kubeadm init {cni_definitions.networking_option} {version_snippet}' ) deployed = self.instance.self_node_sudo.execute(init_command) if deployed.failed: logging.error(crayons.red(f'Master {self.instance.vm_attributes.name} initialization was not performed correctly.')) self.rollback_node() return None print(crayons.green('Initial master deployment success.')) time.sleep(60) self.post_install_steps() if not self.deploy_container_networking(cni_definitions): logging.error(crayons.red(f'Container networking {self.control_plane.networking} failed to deploy correctly.')) return None master_executor = KubeExecutor(wrapper=self.instance.self_node) master_executor.wait_for_running_system_status(namespace='kube-system', remote=True) if self.control_plane.ha_masters: return self.get_certificate_key(deployed) return None
def get_join_token(self, control_plane_node=False, certificate_key=''): join_token = '' if self.control_plane.ha_masters: token_list = self.instance.self_node_sudo.execute("kubeadm token list").stdout.split('\n') for line in token_list: if 'authentication,signing' in line: print(crayons.white(line)) join_token = line.split()[0].strip() join_url = f'{self.control_plane.apiserver_ip}:{self.control_plane.apiserver_port}' else: join_url = f'{self.instance.allowed_ip}:{self.control_plane.apiserver_port}' join_token = self.instance.self_node_sudo.execute("kubeadm token list | awk '{print $1}'").stdout.split('TOKEN')[-1].strip() while not join_token: logging.warning(crayons.yellow('Join Token not found on master. Creating new join token...')) self.instance.self_node_sudo.execute("kubeadm token create") join_token = self.instance.self_node_sudo.execute("kubeadm token list | awk '{print $1}'").stdout.split('TOKEN')[-1].strip() cert_hash = self.instance.self_node.execute("openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'").stdout.strip() if not join_token or not cert_hash: logging.error(crayons.red('Unable to retrieve join-token or cert-hash')) return None return ( f'kubeadm join {join_url} --token {join_token} --discovery-token-ca-cert-hash sha256:{cert_hash} --control-plane --certificate-key {certificate_key}' ) if control_plane_node else ( f'kubeadm join {join_url} --token {join_token} --discovery-token-ca-cert-hash sha256:{cert_hash}' )
def write_config_to_file(file, local_file_path, script_body): try: with open(local_file_path, mode='w') as local_script_fp: local_script_fp.write('\n'.join(script_body)) return file, local_file_path, None except (OSError, IOError) as file_write_error: logging.error(crayons.red(file_write_error)) return None, None, file_write_error
def helm_install_v2(self, patch=True, helm=True, tiller=True): prepend = f'HOME={self.home}' helm_script = 'https://git.io/get_helm.sh' current_context = self.get_current_context() if not current_context: return if helm: print(crayons.cyan('Installing Helm locally')) install = self.local.run(f'{prepend} curl -L {helm_script} | bash') if not install.ok: logging.error(crayons.red(f'Helm installation failed')) return self.local.run(f'echo "source <(helm completion bash)" >> {self.home}/.bashrc') print(crayons.green('Helm installed locally')) if tiller: if not patch: logging.warning(crayons.yellow('No-Patch (K8s versions > 1.16.*) installation is not implemented.')) return print(crayons.cyan('Bootstrapping Tiller with patch for K8s versions > 1.16.*')) bootstrap = self.tiller_install_v2_patch() if not bootstrap.ok: logging.error(crayons.red(f'Helm initialization with Tiller failed')) logging.warning(crayons.yellow('Rolling back installation')) rollback = self.local.run(f'{prepend} helm reset --force --remove-helm-home') if rollback.ok: print(crayons.green('Rollback completed')) return tiller_ready = '' while not tiller_ready: print(crayons.white('Ping for tiller ready')) tiller_ready = self.local.run(f'{prepend} kubectl get pod --namespace kube-system -l app=helm,name=tiller --field-selector=status.phase=Running').stdout.strip() time.sleep(1) print(crayons.green(f'Helm initialized with Tiller for context: {current_context}')) self.wait_for_running_system_status() time.sleep(10) print(crayons.magenta('You might need to run "helm init --client-only to initialize repos"')) self.local.run(f'{prepend} helm init --client-only')
def rollback_node(self): logging.warning(crayons.yellow(f'Performing Node {self.instance.vm_attributes.name} Rollback.')) rollback = self.instance.self_node_sudo.execute('kubeadm reset -f --v=5') config_reset = self.instance.self_node.execute(f'rm -f $HOME/.kube/config', warn=True) iptables_reset = self.instance.self_node_sudo.execute('su - root -c \'iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X\'') if rollback.ok: print(crayons.green('Rollback completed.')) else: logging.error(crayons.red('Rollback failed')) return if config_reset.ok: print(crayons.green('Config removed.')) else: logging.warning(crayons.yellow('Config removal not performed.')) if iptables_reset.ok: print(crayons.green('IPTables reset completed.')) else: logging.error(crayons.red('IPTables reset failed.')) return
def supported_cnis(networking='weave'): if networking not in CNI.keys(): logging.error(crayons.red(f'CNI option: {networking} not supported')) return None if networking == 'calico': return CNIDefinitions( cni_url=CNI.get(networking), pod_network_cidr = '192.168.0.0/16', networking_option = f'--pod-network-cidr=192.168.0.0/16', file = 'calico.yaml' ) return CNIDefinitions(cni_url=CNI.get(networking))
def deploy_dashboard(self, remote_path='/opt/kube', local=True): """ Argument remote_path exists on hosts with the file by previously running install_kube() method. """ prepend = f'HOME={self.home} ' if local else '' run = self.local.run if local else self.wrapper.execute bootstrap_path = os.path.join(BASE_PATH, 'bootstrap') if local else os.path.join(remote_path, 'bootstrap') user_creation = f'{bootstrap_path}/dashboard-adminuser.yaml' print(crayons.cyan('Deploying Dashboard')) dashboard = run(command=f'{prepend}kubectl apply -f {KUBE_DASHBOARD_URL}') if dashboard.ok: print(crayons.green(f'Kubernetes Dashboard deployed successfully.')) print(crayons.cyan('Deploying User admin-user with role-binding: cluster-admin')) time.sleep(30) user_role = run(command=f'{prepend}kubectl apply -f {user_creation}') if user_role.ok: print(crayons.green(f'User admin-user created successfully.')) else: logging.error(crayons.red('User admin-user was not created correctly.')) else: logging.error(crayons.red('Dashboard was not deployed correctly.'))
def join_node(self, leader: InstanceClone, control_plane_node=False, certificate_key=''): leader_provisioner = KubeProvisioner.kube_provisioner_factory(os_type=leader.vm_attributes.os_type)( instance=leader, control_plane=self.control_plane, remote_path=self.remote_path ) if not certificate_key or not leader.allowed_ip: join_command = leader_provisioner.get_join_token_v2(control_plane_node) else: join_command = leader_provisioner.get_join_token(control_plane_node, certificate_key) if not join_command: logging.error(crayons.red('Node Join command not generated. Abort.')) return print(crayons.white(f'Join command: {join_command}')) print(crayons.cyan(f'Joining Node: {self.instance.vm_attributes.name} to the cluster')) join = self.instance.self_node_sudo.execute(join_command) if join.failed: logging.error(crayons.red(f'Joining Node: {self.instance.vm_attributes.name} failed. Performing Rollback.')) self.rollback_node() return if control_plane_node: self.post_install_steps() print(crayons.green(f'Node: {self.instance.vm_attributes.name} has joined the cluster.'))
def wait_for_running_system_status(self, namespace='kube-system', remote=False, poll_interval=1): runner = LOCAL.run if not remote else self.wrapper.execute prepend = f'HOME={self.home} ' if not remote else '' awk_table_1 = 'awk \'{print $1}\'' awk_table_2 = 'awk \'{print $2}\'' non_running_command = f'kubectl get pods -n {namespace} --field-selector=status.phase!=Running' running_command = f'kubectl get pods -n {namespace} --field-selector=status.phase=Running | {awk_table_1}' running_command_current_desired = f'kubectl get pods -n {namespace} --field-selector=status.phase=Running | {awk_table_2}' pods_not_ready = 'initial' while pods_not_ready: print(crayons.white(f'Wait for all {namespace} pods to enter "Running" phase')) pods_not_ready = runner(command=f'{prepend}{non_running_command}').stdout.strip() time.sleep(poll_interval) print(crayons.green(f'All {namespace} pods entered "Running" phase.')) all_complete = False while not all_complete: print(crayons.white(f'Wait for all {namespace} pods to reach "Desired" state')) names_table = runner(command=f'{prepend}{running_command}', hide=True).stdout.strip() current_to_desired_table = runner(command=f'{prepend}{running_command_current_desired}', hide=True).stdout.strip() clean_table = current_to_desired_table.split()[1:] names = names_table.split()[1:] complete_table = [] state_table = [ { 'name': names[clean_table.index(entry)], 'current': int(entry.split('/')[0]), 'desired': int(entry.split('/')[1]), 'complete': False } for entry in clean_table ] for state_entry in state_table: complete = state_entry.get('current') == state_entry.get('desired') state_entry['complete'] = complete complete_table.append(complete) print( crayons.white(f'Name: {state_entry.get("name")} Complete: ') + ( crayons.green(f'{complete}') if complete else crayons.red(f'{complete}') ) ) all_complete = all(complete_table) time.sleep(poll_interval) print(crayons.green(f'All {namespace} pods reached "Desired" state.'))
def install_control_plane_loadbalancer(self, is_leader=False): if not self.control_plane.ha_masters: logging.warning(crayons.yellow('Skip install keepalived. Control Plane not Deployed in High Available Mode.')) return None local = LOCAL host = self.instance.vm_attributes.name remote_path = '/etc/keepalived' state = 'MASTER' if is_leader else 'BACKUP' priority = '101' if is_leader else '100' interface = self.get_instance_interface() virtual_ip = self.get_control_plane_virtual_ip() script_file, local_script_path, script_error = self.generate_keepalived_healthcheck(virtual_ip) config_file, local_config_path, config_error = self.generate_keepalived_config( virtual_ip=virtual_ip, interface=interface, state=state, priority=priority ) if script_error or config_error: logging.error(crayons.red(f'Abort keepalived install on {host}')) return None print(crayons.cyan(f'Sending config files to {host}')) sent1 = local.run(f'scp {local_config_path} {host}:~') sent2 = local.run(f'scp {local_script_path} {host}:~') if sent1.ok and sent2.ok: self.wait_pkg_lock() print(crayons.blue(f'Installing keepalived service on {host}')) self.check_install_prerequisites() self.instance.self_node.execute(f'sudo mv {config_file} {remote_path} && sudo mv {script_file} {remote_path}') self.instance.self_node_sudo.execute(f'chmod 0666 {remote_path}/{config_file}') restart = self.instance.self_node_sudo.execute(f'systemctl restart keepalived') if restart.ok: self.instance.self_node_sudo.execute('systemctl status keepalived') test_connection = f'if nc -v -w 5 {virtual_ip} {self.control_plane.apiserver_port}; then echo "Success"; fi' output = self.instance.self_node.execute(test_connection).stderr if 'Connection refused' in output.strip(): print(crayons.green(f'Keepalived running on {host} with virtual ip: {virtual_ip}')) return virtual_ip
def add_local_cluster_config( self, custom_cluster_name=None, custom_context=None, custom_user_name=None, set_current_context=True ): # TODO: Needs further refactoring. local_kube = os.path.join(self.home, '.kube') remote_kube = os.path.join(self.remote, '.kube') local_config_base = os.path.join(local_kube, 'config') remote_config_base = os.path.join(remote_kube, 'config') local_dump_folder = os.path.join(WORKDIR, 'dump') local_dump = os.path.join(local_dump_folder, self.host, 'config.yaml') self.local.run(f'mkdir -p {local_dump_folder} && mkdir -p {os.path.join(local_dump_folder, self.host)}') self.local.run(f'scp {self.host}:{remote_config_base} {local_dump}') remote_kube_config = KubeConfig( config_filename=local_dump, custom_cluster_name=custom_cluster_name, custom_context=custom_context, custom_user_name=custom_user_name, set_current_context=set_current_context ) remote_config_yaml = remote_kube_config.serialize_config() self.local.run(f'rm -rf {local_dump_folder}') current_context = remote_kube_config.current_context print(crayons.cyan('Current Context: ') + f'{current_context}') remote_cluster = remote_kube_config.cluster_first remote_user = remote_kube_config.user_first remote_context = remote_kube_config.context_first remote_cluster_name, _, _ = KubeConfig.get_cluster_data(remote_cluster) remote_user_name, _, _ = KubeConfig.get_user_data(remote_user) self.local.run(f'cp {local_config_base} {local_config_base}.bak') local_kube_config = KubeConfig( config_filename=local_config_base, custom_cluster_name=custom_cluster_name, custom_context=custom_context, custom_user_name=custom_user_name, set_current_context=set_current_context ) local_config_yaml = local_kube_config.serialize_config() local_kube_config.update_current_context(current_context) custom_cluster_name_used = True if custom_cluster_name else False custom_user_name_used = True if custom_user_name else False local_kube_config.get_or_create_custom_cluster_user_values( new_cluster_name=remote_cluster_name, new_user_name=remote_user_name ) local_cluster_entries = local_kube_config.clusters local_context_entries = local_kube_config.contexts local_user_entries = local_kube_config.users for cluster_entry in local_cluster_entries: if cluster_entry.get('name') == remote_cluster_name or custom_cluster_name_used: remote_cluster['name'] = custom_cluster_name for user_entry in local_user_entries: if user_entry.get('name') == remote_user_name or custom_user_name_used: remote_user['name'] = custom_user_name for context_entry in local_context_entries: if context_entry.get('context').get('cluster') == remote_cluster_name or custom_cluster_name_used: remote_context['context']['cluster'] = custom_cluster_name if context_entry.get('context').get('user') == remote_user_name or custom_user_name_used: remote_context['context']['user'] = custom_user_name remote_context['name'] = ( custom_context if custom_context else f'{remote_user["name"]}@{remote_cluster["name"]}' ) if not local_kube_config.clusters: local_config_yaml['clusters'] = remote_config_yaml.get('clusters') else: local_config_yaml['clusters'].append(remote_cluster) if not local_kube_config.users: local_config_yaml['users'] = remote_config_yaml.get('users') else: local_config_yaml['users'].append(remote_user) if not local_kube_config.contexts: local_config_yaml['contexts'] = remote_config_yaml.get('contexts') else: local_config_yaml['contexts'].append(remote_context) local_config_yaml['current-context'] = ( custom_context if set_current_context and custom_context else local_kube_config.current_context ) try: with open(local_config_base, mode='w') as local_config_file_mutated: try: yaml.safe_dump(local_config_yaml, stream=local_config_file_mutated) except yaml.YAMLError as yaml_error: logging.error(crayons.red(f'Error: failed to load from {local_config_base}')) logging.error(crayons.red(f'{yaml_error}')) print(crayons.blue(f'Performing rollback of {local_config_base}')) self.local.run(f'mv {local_config_base}.bak {local_config_base}') print(crayons.green('Rollback complete')) return except Exception as generic: logging.error(crayons.red(f'Error during writing to kube config {local_config_base}: {generic}')) print(crayons.blue(f'Performing rollback of {local_config_base}')) self.local.run(f'mv {local_config_base}.bak {local_config_base}') print(crayons.green('Rollback complete'))