def deploy(self, log_cli_level="DEBUG"): """ Deploy """ # Uploading pxe files logger.info("Deploying OCP cluster for Bare Metal platform") logger.info( f"Openshift-installer will be using log level:{log_cli_level}") upload_file( self.host, constants.COMMON_CONF_FILE, os.path.join(self.helper_node_details["bm_dnsmasq_dir"], "dnsmasq.common.conf"), self.user, key_file=self.private_key, ) logger.info("Uploading PXE files") ocp_version = get_ocp_version() float_ocp_version = float(ocp_version) for machine in self.mgmt_details: if self.mgmt_details[machine].get( "cluster_name") or self.mgmt_details[machine].get( "extra_node"): pxe_file_path = self.create_pxe_files( ocp_version=float_ocp_version, role=self.mgmt_details[machine].get("role"), ) upload_file( server=self.host, localpath=pxe_file_path, remotepath=f"{self.helper_node_details['bm_tftp_dir']}" f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}", user=self.user, key_file=self.private_key, ) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Restarting dnsmasq service cmd = "systemctl restart dnsmasq" assert self.helper_node_handler.exec_cmd( cmd=cmd), "Failed to restart dnsmasq service" # Rebooting Machine with pxe boot for machine in self.mgmt_details: if (self.mgmt_details[machine].get("cluster_name") == constants.BM_DEFAULT_CLUSTER_NAME): secrets = [ self.mgmt_details[machine]["mgmt_username"], self.mgmt_details[machine]["mgmt_password"], ] # Changes boot prioriy to pxe cmd = ( f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis bootdev pxe" ) run_cmd(cmd=cmd, secrets=secrets) sleep(2) # Power On Machine cmd = ( f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power cycle || " f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power on" ) run_cmd(cmd=cmd, secrets=secrets) logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e OCP.set_kubeconfig(self.kubeconfig) wait_for_all_nodes_csr_and_approve() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster() logger.info("Performing Disk cleanup") clean_disk() # We need NTP for OCS cluster to become clean configure_chrony_and_wait_for_machineconfig_status(node_type="all")
def deploy(self, log_cli_level='DEBUG'): """ Deployment specific to OCP cluster on this platform Args: log_cli_level (str): openshift installer's log level (default: "DEBUG") """ logger.info("Deploying OCP cluster for vSphere platform") logger.info( f"Openshift-installer will be using loglevel:{log_cli_level}" ) os.chdir(self.terraform_data_dir) self.terraform.initialize() self.terraform.apply(self.terraform_var) os.chdir(self.previous_dir) logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600 ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e if not config.DEPLOYMENT['preserve_bootstrap_node']: logger.info("removing bootstrap node") os.chdir(self.terraform_data_dir) self.terraform.apply( self.terraform_var, bootstrap_complete=True ) os.chdir(self.previous_dir) OCP.set_kubeconfig(self.kubeconfig) # wait for all nodes to generate CSR # From OCP version 4.4 and above, we have to approve CSR manually # for all the nodes ocp_version = get_ocp_version() if Version.coerce(ocp_version) >= Version.coerce('4.4'): wait_for_all_nodes_csr_and_approve(timeout=1200, sleep=30) # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800 ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster()
def deploy(self, log_cli_level="DEBUG"): """ Deploy """ # Uploading pxe files master_count = 0 worker_count = 0 logger.info("Deploying OCP cluster for Bare Metal platform") logger.info( f"Openshift-installer will be using log level:{log_cli_level}") upload_file( self.host, constants.COMMON_CONF_FILE, os.path.join(self.helper_node_details["bm_dnsmasq_dir"], "dnsmasq.common.conf"), self.user, key_file=self.private_key, ) logger.info("Uploading PXE files") ocp_version = get_ocp_version() float_ocp_version = float(ocp_version) for machine in self.mgmt_details: if self.mgmt_details[machine].get( "cluster_name") or self.mgmt_details[machine].get( "extra_node"): pxe_file_path = self.create_pxe_files( ocp_version=float_ocp_version, role=self.mgmt_details[machine].get("role"), ) upload_file( server=self.host, localpath=pxe_file_path, remotepath=f"{self.helper_node_details['bm_tftp_dir']}" f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}", user=self.user, key_file=self.private_key, ) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Restarting dnsmasq service cmd = "systemctl restart dnsmasq" assert self.helper_node_handler.exec_cmd( cmd=cmd), "Failed to restart dnsmasq service" # Rebooting Machine with pxe boot api_record_ip_list = [] apps_record_ip_list = [] response_list = [] cluster_name = f"{constants.BM_DEFAULT_CLUSTER_NAME}" self.aws.delete_hosted_zone(cluster_name=cluster_name, delete_zone=False) for machine in self.mgmt_details: if (self.mgmt_details[machine].get("cluster_name") == constants.BM_DEFAULT_CLUSTER_NAME): if (self.mgmt_details[machine]["role"] == constants.BOOTSTRAP_MACHINE): self.set_pxe_boot_and_reboot(machine) bootstrap_ip = self.mgmt_details[machine]["ip"] api_record_ip_list.append( self.mgmt_details[machine]["ip"]) elif (self.mgmt_details[machine]["role"] == constants.MASTER_MACHINE and master_count < config.ENV_DATA["master_replicas"]): self.set_pxe_boot_and_reboot(machine) api_record_ip_list.append( self.mgmt_details[machine]["ip"]) master_count += 1 elif (self.mgmt_details[machine]["role"] == constants.WORKER_MACHINE and worker_count < config.ENV_DATA["worker_replicas"]): self.set_pxe_boot_and_reboot(machine) apps_record_ip_list.append( self.mgmt_details[machine]["ip"]) worker_count += 1 logger.info("Configuring DNS records") zone_id = self.aws.get_hosted_zone_id(cluster_name=cluster_name) if config.ENV_DATA["worker_replicas"] == 0: apps_record_ip_list = api_record_ip_list for ip in api_record_ip_list: response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api-int.{cluster_name}", data=ip, type="A", operation_type="Add", )) response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api.{cluster_name}", data=ip, type="A", operation_type="Add", )) for ip in apps_record_ip_list: response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"*.apps.{cluster_name}", data=ip, type="A", operation_type="Add", )) logger.info("Waiting for Record Response") self.aws.wait_for_record_set(response_list=response_list) logger.info("Records Created Successfully") logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e OCP.set_kubeconfig(self.kubeconfig) wait_for_all_nodes_csr_and_approve() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) logger.info("Removing Bootstrap Ip for DNS Records") self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api-int.{cluster_name}", data=bootstrap_ip, type="A", operation_type="Delete", ) self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api.{cluster_name}", data=bootstrap_ip, type="A", operation_type="Delete", ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster() logger.info("Performing Disk cleanup") clean_disk() # We need NTP for OCS cluster to become clean worker_timeout = 400 * config.ENV_DATA["worker_replicas"] master_timeout = 400 * config.ENV_DATA["master_replicas"] if master_timeout <= worker_timeout: chrony_timeout = worker_timeout else: chrony_timeout = master_timeout configure_chrony_and_wait_for_machineconfig_status( node_type="all", timeout=chrony_timeout)