def process_cluster_cli_params(config): """ Process cluster related cli parameters Args: config (pytest.config): Pytest config object Raises: ClusterPathNotProvidedError: If a cluster path is missing ClusterNameNotProvidedError: If a cluster name is missing ClusterNameLengthError: If a cluster name is too short or too long """ cluster_path = get_cli_param(config, 'cluster_path') if not cluster_path: raise ClusterPathNotProvidedError() cluster_path = os.path.expanduser(cluster_path) if not os.path.exists(cluster_path): os.makedirs(cluster_path) # Importing here cause once the function is invoked we have already config # loaded, so this is OK to import once you sure that config is loaded. from ocs_ci.ocs.openshift_ops import OCP OCP.set_kubeconfig( os.path.join(cluster_path, ocsci_config.RUN['kubeconfig_location'])) cluster_name = get_cli_param(config, 'cluster_name') ocsci_config.RUN['cli_params']['teardown'] = get_cli_param(config, "teardown", default=False) ocsci_config.RUN['cli_params']['deploy'] = get_cli_param(config, "deploy", default=False) live_deployment = get_cli_param(config, "live_deploy", default=False) ocsci_config.DEPLOYMENT['live_deployment'] = live_deployment or ( ocsci_config.DEPLOYMENT.get('live_deployment', False)) ocsci_config.RUN['cli_params']['io_in_bg'] = get_cli_param(config, "io_in_bg", default=False) upgrade_ocs_version = get_cli_param(config, "upgrade_ocs_version") if upgrade_ocs_version: ocsci_config.UPGRADE['upgrade_ocs_version'] = upgrade_ocs_version ocs_registry_image = get_cli_param(config, "ocs_registry_image") if ocs_registry_image: ocsci_config.DEPLOYMENT['ocs_registry_image'] = ocs_registry_image upgrade_ocs_registry_image = get_cli_param(config, "upgrade_ocs_registry_image") if upgrade_ocs_registry_image: ocsci_config.UPGRADE[ 'upgrade_ocs_registry_image'] = upgrade_ocs_registry_image ocsci_config.ENV_DATA['cluster_name'] = cluster_name ocsci_config.ENV_DATA['cluster_path'] = cluster_path get_cli_param(config, 'collect-logs') if ocsci_config.RUN.get("cli_params").get("deploy"): if not cluster_name: raise ClusterNameNotProvidedError() if (len(cluster_name) < CLUSTER_NAME_MIN_CHARACTERS or len(cluster_name) > CLUSTER_NAME_MAX_CHARACTERS): raise ClusterNameLengthError(cluster_name) if get_cli_param(config, 'email') and not get_cli_param(config, '--html'): pytest.exit("--html option must be provided to send email reports") get_cli_param(config, '-m') osd_size = get_cli_param(config, '--osd-size') if osd_size: ocsci_config.ENV_DATA['device_size'] = osd_size ocp_version = get_cli_param(config, '--ocp-version') if ocp_version: version_config_file = f"ocp-{ocp_version}-config.yaml" version_config_file_path = os.path.join(OCP_VERSION_CONF_DIR, version_config_file) load_config_file(version_config_file_path)
def deploy(self, log_cli_level="DEBUG"): """ Deploy """ # Uploading pxe files master_count = 0 worker_count = 0 logger.info("Deploying OCP cluster for Bare Metal platform") logger.info( f"Openshift-installer will be using log level:{log_cli_level}") upload_file( self.host, constants.COMMON_CONF_FILE, os.path.join(self.helper_node_details["bm_dnsmasq_dir"], "dnsmasq.common.conf"), self.user, key_file=self.private_key, ) logger.info("Uploading PXE files") ocp_version = get_ocp_version() float_ocp_version = float(ocp_version) for machine in self.mgmt_details: if self.mgmt_details[machine].get( "cluster_name") or self.mgmt_details[machine].get( "extra_node"): pxe_file_path = self.create_pxe_files( ocp_version=float_ocp_version, role=self.mgmt_details[machine].get("role"), ) upload_file( server=self.host, localpath=pxe_file_path, remotepath=f"{self.helper_node_details['bm_tftp_dir']}" f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}", user=self.user, key_file=self.private_key, ) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Restarting dnsmasq service cmd = "systemctl restart dnsmasq" assert self.helper_node_handler.exec_cmd( cmd=cmd), "Failed to restart dnsmasq service" # Rebooting Machine with pxe boot api_record_ip_list = [] apps_record_ip_list = [] response_list = [] cluster_name = f"{constants.BM_DEFAULT_CLUSTER_NAME}" self.aws.delete_hosted_zone(cluster_name=cluster_name, delete_zone=False) for machine in self.mgmt_details: if (self.mgmt_details[machine].get("cluster_name") == constants.BM_DEFAULT_CLUSTER_NAME): if (self.mgmt_details[machine]["role"] == constants.BOOTSTRAP_MACHINE): self.set_pxe_boot_and_reboot(machine) bootstrap_ip = self.mgmt_details[machine]["ip"] api_record_ip_list.append( self.mgmt_details[machine]["ip"]) elif (self.mgmt_details[machine]["role"] == constants.MASTER_MACHINE and master_count < config.ENV_DATA["master_replicas"]): self.set_pxe_boot_and_reboot(machine) api_record_ip_list.append( self.mgmt_details[machine]["ip"]) master_count += 1 elif (self.mgmt_details[machine]["role"] == constants.WORKER_MACHINE and worker_count < config.ENV_DATA["worker_replicas"]): self.set_pxe_boot_and_reboot(machine) apps_record_ip_list.append( self.mgmt_details[machine]["ip"]) worker_count += 1 logger.info("Configuring DNS records") zone_id = self.aws.get_hosted_zone_id(cluster_name=cluster_name) if config.ENV_DATA["worker_replicas"] == 0: apps_record_ip_list = api_record_ip_list for ip in api_record_ip_list: response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api-int.{cluster_name}", data=ip, type="A", operation_type="Add", )) response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api.{cluster_name}", data=ip, type="A", operation_type="Add", )) for ip in apps_record_ip_list: response_list.append( self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"*.apps.{cluster_name}", data=ip, type="A", operation_type="Add", )) logger.info("Waiting for Record Response") self.aws.wait_for_record_set(response_list=response_list) logger.info("Records Created Successfully") logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e OCP.set_kubeconfig(self.kubeconfig) wait_for_all_nodes_csr_and_approve() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) logger.info("Removing Bootstrap Ip for DNS Records") self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api-int.{cluster_name}", data=bootstrap_ip, type="A", operation_type="Delete", ) self.aws.update_hosted_zone_record( zone_id=zone_id, record_name=f"api.{cluster_name}", data=bootstrap_ip, type="A", operation_type="Delete", ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster() logger.info("Performing Disk cleanup") clean_disk()
def is_cluster_running(cluster_path): from ocs_ci.ocs.openshift_ops import OCP return config.RUN['cli_params'].get('cluster_path') and OCP.set_kubeconfig( os.path.join(cluster_path, config.RUN.get('kubeconfig_location')))
def process_cluster_cli_params(config): """ Process cluster related cli parameters Args: config (pytest.config): Pytest config object Raises: ClusterPathNotProvidedError: If a cluster path is missing ClusterNameNotProvidedError: If a cluster name is missing ClusterNameLengthError: If a cluster name is too short or too long """ cluster_path = get_cli_param(config, "cluster_path") if not cluster_path: raise ClusterPathNotProvidedError() cluster_path = os.path.expanduser(cluster_path) if not os.path.exists(cluster_path): os.makedirs(cluster_path) # Importing here cause once the function is invoked we have already config # loaded, so this is OK to import once you sure that config is loaded. from ocs_ci.ocs.openshift_ops import OCP OCP.set_kubeconfig( os.path.join(cluster_path, ocsci_config.RUN["kubeconfig_location"]) ) cluster_name = get_cli_param(config, "cluster_name") ocsci_config.RUN["cli_params"]["teardown"] = get_cli_param( config, "teardown", default=False ) ocsci_config.RUN["cli_params"]["deploy"] = get_cli_param( config, "deploy", default=False ) live_deployment = get_cli_param(config, "live_deploy", default=False) ocsci_config.DEPLOYMENT["live_deployment"] = live_deployment or ( ocsci_config.DEPLOYMENT.get("live_deployment", False) ) io_in_bg = get_cli_param(config, "io_in_bg") if io_in_bg: ocsci_config.RUN["io_in_bg"] = True io_load = get_cli_param(config, "io_load") if io_load: ocsci_config.RUN["io_load"] = io_load log_utilization = get_cli_param(config, "log_cluster_utilization") if log_utilization: ocsci_config.RUN["log_utilization"] = True upgrade_ocs_version = get_cli_param(config, "upgrade_ocs_version") if upgrade_ocs_version: ocsci_config.UPGRADE["upgrade_ocs_version"] = upgrade_ocs_version ocs_registry_image = get_cli_param(config, "ocs_registry_image") if ocs_registry_image: ocsci_config.DEPLOYMENT["ocs_registry_image"] = ocs_registry_image upgrade_ocs_registry_image = get_cli_param(config, "upgrade_ocs_registry_image") if upgrade_ocs_registry_image: ocsci_config.UPGRADE["upgrade_ocs_registry_image"] = upgrade_ocs_registry_image ocsci_config.ENV_DATA["cluster_name"] = cluster_name ocsci_config.ENV_DATA["cluster_path"] = cluster_path get_cli_param(config, "collect-logs") if ocsci_config.RUN.get("cli_params").get("deploy"): if not cluster_name: raise ClusterNameNotProvidedError() if ( len(cluster_name) < CLUSTER_NAME_MIN_CHARACTERS or len(cluster_name) > CLUSTER_NAME_MAX_CHARACTERS ): raise ClusterNameLengthError(cluster_name) elif not cluster_name: try: ocsci_config.ENV_DATA["cluster_name"] = get_cluster_name(cluster_path) except FileNotFoundError: raise ClusterNameNotProvidedError() if get_cli_param(config, "email") and not get_cli_param(config, "--html"): pytest.exit("--html option must be provided to send email reports") get_cli_param(config, "squad_analysis") get_cli_param(config, "-m") osd_size = get_cli_param(config, "--osd-size") if osd_size: ocsci_config.ENV_DATA["device_size"] = osd_size ocp_version = get_cli_param(config, "--ocp-version") if ocp_version: version_config_file = f"ocp-{ocp_version}-config.yaml" version_config_file_path = os.path.join( OCP_VERSION_CONF_DIR, version_config_file ) load_config_file(version_config_file_path) upgrade_ocp_version = get_cli_param(config, "--upgrade-ocp-version") if upgrade_ocp_version: version_config_file = f"ocp-{upgrade_ocp_version}-upgrade.yaml" version_config_file_path = os.path.join( OCP_VERSION_CONF_DIR, version_config_file ) load_config_file(version_config_file_path) upgrade_ocp_image = get_cli_param(config, "--upgrade-ocp-image") if upgrade_ocp_image: ocp_image = upgrade_ocp_image.rsplit(":", 1) ocsci_config.UPGRADE["ocp_upgrade_path"] = ocp_image[0] ocsci_config.UPGRADE["ocp_upgrade_version"] = ocp_image[1] ocp_installer_version = get_cli_param(config, "--ocp-installer-version") if ocp_installer_version: ocsci_config.DEPLOYMENT["installer_version"] = ocp_installer_version ocsci_config.RUN["client_version"] = ocp_installer_version csv_change = get_cli_param(config, "--csv-change") if csv_change: csv_change = csv_change.split("::") ocsci_config.DEPLOYMENT["csv_change_from"] = csv_change[0] ocsci_config.DEPLOYMENT["csv_change_to"] = csv_change[1] collect_logs_on_success_run = get_cli_param(config, "collect_logs_on_success_run") if collect_logs_on_success_run: ocsci_config.REPORTING["collect_logs_on_success_run"] = True get_cli_param(config, "dev_mode") ceph_debug = get_cli_param(config, "ceph_debug") if ceph_debug: ocsci_config.DEPLOYMENT["ceph_debug"] = True
def deploy(self, log_cli_level="DEBUG"): """ Deployment specific to OCP cluster on this platform Args: log_cli_level (str): openshift installer's log level (default: "DEBUG") """ logger.info("Deploying OCP cluster for vSphere platform") logger.info(f"Openshift-installer will be using loglevel:{log_cli_level}") os.chdir(self.terraform_data_dir) self.terraform.initialize() self.terraform.apply(self.terraform_var) os.chdir(self.previous_dir) logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e if self.folder_structure: # comment bootstrap module comment_bootstrap_in_lb_module() # remove bootstrap IP in load balancer and # restart haproxy lb = LoadBalancer() lb.remove_boostrap_in_proxy() lb.restart_haproxy() # remove bootstrap node if not config.DEPLOYMENT["preserve_bootstrap_node"]: logger.info("removing bootstrap node") os.chdir(self.terraform_data_dir) if self.folder_structure: self.terraform.destroy_module( self.terraform_var, constants.BOOTSTRAP_MODULE ) else: self.terraform.apply(self.terraform_var, bootstrap_complete=True) os.chdir(self.previous_dir) OCP.set_kubeconfig(self.kubeconfig) # wait for all nodes to generate CSR # From OCP version 4.4 and above, we have to approve CSR manually # for all the nodes ocp_version = get_ocp_version() if Version.coerce(ocp_version) >= Version.coerce("4.4"): wait_for_all_nodes_csr_and_approve(timeout=1500, sleep=30) # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster()
def cluster(request): log.info(f"All logs located at {log_path}") log.info("Running OCS basic installation") cluster_path = config.ENV_DATA['cluster_path'] deploy = config.RUN['cli_params']['deploy'] teardown = config.RUN['cli_params']['teardown'] # Add a finalizer to teardown the cluster after test execution is finished if teardown: request.addfinalizer(cluster_teardown) log.info("Will teardown cluster because --teardown was provided") # Test cluster access and if exist just skip the deployment. if is_cluster_running(cluster_path): log.info("The installation is skipped because the cluster is running") return elif teardown and not deploy: log.info("Attempting teardown of non-accessible cluster: %s", cluster_path) return elif not deploy and not teardown: msg = "The given cluster can not be connected to: {}. ".format(cluster_path) msg += "Provide a valid --cluster-path or use --deploy to deploy a new cluster" pytest.fail(msg) elif not system.is_path_empty(cluster_path) and deploy: msg = "The given cluster path is not empty: {}. ".format(cluster_path) msg += "Provide an empty --cluster-path and --deploy to deploy a new cluster" pytest.fail(msg) else: log.info("A testing cluster will be deployed and cluster information stored at: %s", cluster_path) # Generate install-config from template log.info("Generating install-config") run_cmd(f"mkdir -p {cluster_path}") pull_secret_path = os.path.join( constants.TOP_DIR, "data", "pull-secret" ) # TODO: check for supported platform and raise the exception if not # supported. Currently we support just AWS. _templating = templating.Templating() install_config_str = _templating.render_template( "install-config.yaml.j2", config.ENV_DATA ) # Log the install config *before* adding the pull secret, so we don't leak # sensitive data. log.info(f"Install config: \n{install_config_str}") # Parse the rendered YAML so that we can manipulate the object directly install_config_obj = yaml.safe_load(install_config_str) with open(pull_secret_path, "r") as f: # Parse, then unparse, the JSON file. # We do this for two reasons: to ensure it is well-formatted, and # also to ensure it ends up as a single line. install_config_obj['pullSecret'] = json.dumps(json.loads(f.read())) install_config_str = yaml.safe_dump(install_config_obj) install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(install_config_str) # Download installer installer = get_openshift_installer( config.DEPLOYMENT['installer_version'] ) # Download client get_openshift_client() # Deploy cluster log.info("Deploying cluster") run_cmd( f"{installer} create cluster " f"--dir {cluster_path} " f"--log-level debug" ) # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, config.RUN.get('kubeconfig_location')) ): pytest.fail("Cluster is not available!") # TODO: Create cluster object, add to config.ENV_DATA for other tests to # utilize. # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region']) # render templates and create resources create_oc_resource('common.yaml', cluster_path, _templating, config.ENV_DATA) run_cmd( f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"' ) run_cmd( f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {config.ENV_DATA['cluster_namespace']}" ) apply_oc_resource( 'csi-nodeplugin-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-provisioner-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-nodeplugin-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) apply_oc_resource( 'csi-provisioner-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'operator-openshift-with-csi.yaml', cluster_path, _templating, config.ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) create_oc_resource('cluster.yaml', cluster_path, _templating, config.ENV_DATA) POD = ocp.OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) CFS = ocp.OCP( kind=constants.CEPHFILESYSTEM, namespace=config.ENV_DATA['cluster_namespace'] ) # Check for the Running status of Ceph Pods run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600 ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-mgr', timeout=600 ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600 ) create_oc_resource('toolbox.yaml', cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'storage-manifest.yaml', cluster_path, _templating, config.ENV_DATA ) create_oc_resource( "service-monitor.yaml", cluster_path, _templating, config.ENV_DATA ) create_oc_resource( "prometheus-rules.yaml", cluster_path, _templating, config.ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML) fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace'] ceph_obj = OCS(**fs_data) ceph_obj.create() assert POD.wait_for_resource( condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600 ) # Check for CephFilesystem creation in ocp cfs_data = CFS.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): log.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: log.error( f"MDS deployment Failed! Please check logs!" ) # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])
def deploy(self, log_cli_level='DEBUG'): """ Deployment specific to OCP cluster on this platform Args: log_cli_level (str): openshift installer's log level (default: "DEBUG") """ logger.info("Deploying OCP cluster for vSphere platform") logger.info( f"Openshift-installer will be using loglevel:{log_cli_level}") os.chdir(self.terraform_data_dir) self.terraform.initialize() self.terraform.apply(self.terraform_var) os.chdir(self.previous_dir) logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e if not config.DEPLOYMENT['preserve_bootstrap_node']: logger.info("removing bootstrap node") os.chdir(self.terraform_data_dir) self.terraform.apply(self.terraform_var, bootstrap_complete=True) os.chdir(self.previous_dir) OCP.set_kubeconfig(self.kubeconfig) approve_pending_csr() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800) # wait for all nodes to generate CSR # From OCP version 4.4 and above, we have to approve CSR manually # for all the nodes ocp_version = get_ocp_version() if Version.coerce(ocp_version) >= Version.coerce('4.4'): wait_for_all_nodes_csr_and_approve() self.test_cluster()
def deploy(self, log_cli_level="DEBUG"): """ Deploy """ # Uploading pxe files logger.info("Deploying OCP cluster for Bare Metal platform") logger.info( f"Openshift-installer will be using log level:{log_cli_level}") upload_file( self.host, constants.COMMON_CONF_FILE, os.path.join(self.helper_node_details["bm_dnsmasq_dir"], "dnsmasq.common.conf"), self.user, key_file=self.private_key, ) logger.info("Uploading PXE files") ocp_version = get_ocp_version() for machine in self.mgmt_details: if self.mgmt_details[machine].get( "cluster_name") or self.mgmt_details[machine].get( "extra_node"): pxe_file_path = self.create_pxe_files( ocp_version=ocp_version, role=self.mgmt_details[machine].get("role"), ) upload_file( server=self.host, localpath=pxe_file_path, remotepath=f"{self.helper_node_details['bm_tftp_dir']}" f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}", user=self.user, key_file=self.private_key, ) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Restarting dnsmasq service cmd = "systemctl restart dnsmasq" assert self.helper_node_handler.exec_cmd( cmd=cmd), "Failed to restart dnsmasq service" # Rebooting Machine with pxe boot for machine in self.mgmt_details: if (self.mgmt_details[machine].get("cluster_name") == constants.BM_DEFAULT_CLUSTER_NAME): secrets = [ self.mgmt_details[machine]["mgmt_username"], self.mgmt_details[machine]["mgmt_password"], ] # Changes boot prioriy to pxe cmd = ( f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis bootdev pxe" ) run_cmd(cmd=cmd, secrets=secrets) sleep(2) # Power On Machine cmd = ( f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power cycle || " f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} " f"-P {self.mgmt_details[machine]['mgmt_password']} " f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power on" ) run_cmd(cmd=cmd, secrets=secrets) logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e OCP.set_kubeconfig(self.kubeconfig) wait_for_all_nodes_csr_and_approve() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster() logger.info("Performing Disk cleanup") clean_disk() # We need NTP for OCS cluster to become clean configure_chrony_and_wait_for_machineconfig_status(node_type="all")
def deploy(self, log_cli_level="DEBUG"): """ Deploy """ # Uploading pxe files master_count = 0 worker_count = 0 logger.info("Deploying OCP cluster for Bare Metal platform") logger.info(f"Openshift-installer will be using log level:{log_cli_level}") upload_file( self.host, constants.COMMON_CONF_FILE, os.path.join( self.helper_node_details["bm_dnsmasq_dir"], "dnsmasq.common.conf" ), self.user, key_file=self.private_key, ) logger.info("Uploading PXE files") ocp_version = get_ocp_version() float_ocp_version = float(ocp_version) for machine in self.mgmt_details: if self.mgmt_details[machine].get("cluster_name") or self.mgmt_details[ machine ].get("extra_node"): pxe_file_path = self.create_pxe_files( ocp_version=float_ocp_version, role=self.mgmt_details[machine].get("role"), ) upload_file( server=self.host, localpath=pxe_file_path, remotepath=f"{self.helper_node_details['bm_tftp_dir']}" f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}", user=self.user, key_file=self.private_key, ) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Applying Permission cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}" self.helper_node_handler.exec_cmd(cmd=cmd) # Restarting dnsmasq service cmd = "systemctl restart dnsmasq" assert self.helper_node_handler.exec_cmd( cmd=cmd ), "Failed to restart dnsmasq service" # Rebooting Machine with pxe boot for machine in self.mgmt_details: if ( self.mgmt_details[machine].get("cluster_name") == constants.BM_DEFAULT_CLUSTER_NAME ): if ( self.mgmt_details[machine]["role"] == constants.BOOTSTRAP_MACHINE ): self.set_pxe_boot_and_reboot(machine) elif ( self.mgmt_details[machine]["role"] == constants.MASTER_MACHINE and master_count < config.ENV_DATA["master_replicas"] ): self.set_pxe_boot_and_reboot(machine) master_count += 1 elif ( self.mgmt_details[machine]["role"] == constants.WORKER_MACHINE and worker_count < config.ENV_DATA["worker_replicas"] ): self.set_pxe_boot_and_reboot(machine) worker_count += 1 logger.info("waiting for bootstrap to complete") try: run_cmd( f"{self.installer} wait-for bootstrap-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=3600, ) except CommandFailed as e: if constants.GATHER_BOOTSTRAP_PATTERN in str(e): try: gather_bootstrap() except Exception as ex: logger.error(ex) raise e OCP.set_kubeconfig(self.kubeconfig) wait_for_all_nodes_csr_and_approve() # wait for image registry to show-up co = "image-registry" wait_for_co(co) # patch image registry to null self.configure_storage_for_image_registry(self.kubeconfig) # wait for install to complete logger.info("waiting for install to complete") run_cmd( f"{self.installer} wait-for install-complete " f"--dir {self.cluster_path} " f"--log-level {log_cli_level}", timeout=1800, ) # Approving CSRs here in-case if any exists approve_pending_csr() self.test_cluster() logger.info("Performing Disk cleanup") clean_disk() # We need NTP for OCS cluster to become clean worker_timeout = 400 * config.ENV_DATA["worker_replicas"] master_timeout = 400 * config.ENV_DATA["master_replicas"] if master_timeout <= worker_timeout: chrony_timeout = worker_timeout else: chrony_timeout = master_timeout configure_chrony_and_wait_for_machineconfig_status( node_type="all", timeout=chrony_timeout )