def test_deployment(pvc_factory, pod_factory): deploy = config.RUN['cli_params'].get('deploy') teardown = config.RUN['cli_params'].get('teardown') if not teardown or deploy: log.info("Verifying OCP cluster is running") assert is_cluster_running(config.ENV_DATA['cluster_path']) if not config.ENV_DATA['skip_ocs_deployment']: ocs_registry_image = config.DEPLOYMENT.get( 'ocs_registry_image' ) ocs_install_verification(ocs_registry_image=ocs_registry_image) # Check basic cluster functionality by creating resources # (pools, storageclasses, PVCs, pods - both CephFS and RBD), # run IO and delete the resources if config.DEPLOYMENT['external_mode']: sanity_helpers = SanityExternalCluster() else: sanity_helpers = Sanity() sanity_helpers.health_check() sanity_helpers.create_resources(pvc_factory, pod_factory) sanity_helpers.delete_resources() if teardown: log.info( "Cluster will be destroyed during teardown part of this test." )
def deploy_cluster(self, log_cli_level='DEBUG'): """ We are handling both OCP and OCS deployment here based on flags Args: log_cli_level (str): log level for installer (default: DEBUG) """ if not config.ENV_DATA['skip_ocp_deployment']: if is_cluster_running(self.cluster_path): logger.warning( "OCP cluster is already running, skipping installation") else: try: self.deploy_ocp(log_cli_level) self.post_ocp_deploy() except Exception as e: logger.error(e) if config.REPORTING['gather_on_deploy_failure']: collect_ocs_logs('deployment', ocs=False) raise if not config.ENV_DATA['skip_ocs_deployment']: try: self.deploy_ocs() except Exception as e: logger.error(e) if config.REPORTING['gather_on_deploy_failure']: # Let's do the collections separately to guard against one # of them failing collect_ocs_logs('deployment', ocs=False) collect_ocs_logs('deployment', ocp=False) raise else: logger.warning("OCS deployment will be skipped")
def test_deployment(): deploy = config.RUN['cli_params'].get('deploy') teardown = config.RUN['cli_params'].get('teardown') if not teardown or deploy: assert is_cluster_running(config.ENV_DATA['cluster_path']) if teardown: log.info( "Cluster will be destroyed during teardown part of this test.")
def test_deployment(): deploy = config.RUN['cli_params'].get('deploy') teardown = config.RUN['cli_params'].get('teardown') if not teardown or deploy: log.info("Verifying OCP cluster is running") assert is_cluster_running(config.ENV_DATA['cluster_path']) if not config.ENV_DATA['skip_ocs_deployment']: ocs_install_verification() if teardown: log.info( "Cluster will be destroyed during teardown part of this test.")
def test_deployment(pvc_factory, pod_factory): deploy = config.RUN["cli_params"].get("deploy") teardown = config.RUN["cli_params"].get("teardown") if not teardown or deploy: log.info("Verifying OCP cluster is running") assert is_cluster_running(config.ENV_DATA["cluster_path"]) if not config.ENV_DATA["skip_ocs_deployment"]: if config.multicluster: restore_ctx_index = config.cur_index for cluster in get_non_acm_cluster_config(): config.switch_ctx( cluster.MULTICLUSTER["multicluster_index"]) log.info( f"Sanity check for cluster: {cluster.ENV_DATA['cluster_name']}" ) sanity_helpers = Sanity() sanity_helpers.health_check() sanity_helpers.delete_resources() config.switch_ctx(restore_ctx_index) else: ocs_registry_image = config.DEPLOYMENT.get( "ocs_registry_image") if config.ENV_DATA["mcg_only_deployment"]: mcg_only_install_verification( ocs_registry_image=ocs_registry_image) return else: ocs_install_verification( ocs_registry_image=ocs_registry_image) # Check basic cluster functionality by creating resources # (pools, storageclasses, PVCs, pods - both CephFS and RBD), # run IO and delete the resources if config.DEPLOYMENT["external_mode"]: sanity_helpers = SanityExternalCluster() else: sanity_helpers = Sanity() if (config.ENV_DATA["platform"].lower() in constants.MANAGED_SERVICE_PLATFORMS): try: sanity_helpers.health_check() except exceptions.ResourceWrongStatusException as err_msg: log.warning(err_msg) else: sanity_helpers.health_check() sanity_helpers.delete_resources() # Verify ceph health log.info("Verifying ceph health after deployment") assert ceph_health_check(tries=10, delay=30) if teardown: log.info( "Cluster will be destroyed during teardown part of this test.")
def deploy_cluster(self, log_cli_level='DEBUG'): """ We are handling both OCP and OCS deployment here based on flags Args: log_cli_level (str): log level for installer (default: DEBUG) """ if not config.ENV_DATA['skip_ocp_deployment']: if is_cluster_running(self.cluster_path): logger.warning( "OCP cluster is already running, skipping installation") else: self.deploy_ocp(log_cli_level) if not config.ENV_DATA['skip_ocs_deployment']: self.deploy_ocs() else: logger.warning("OCS deployment will be skipped")
def test_cluster_is_running(): assert is_cluster_running(config.ENV_DATA['cluster_path'])
def cluster(request): log.info(f"All logs located at {log_path}") log.info("Running OCS basic installation") cluster_path = config.ENV_DATA['cluster_path'] deploy = config.RUN['cli_params']['deploy'] teardown = config.RUN['cli_params']['teardown'] # Add a finalizer to teardown the cluster after test execution is finished if teardown: request.addfinalizer(cluster_teardown) log.info("Will teardown cluster because --teardown was provided") # Test cluster access and if exist just skip the deployment. if is_cluster_running(cluster_path): log.info("The installation is skipped because the cluster is running") return elif teardown and not deploy: log.info("Attempting teardown of non-accessible cluster: %s", cluster_path) return elif not deploy and not teardown: msg = "The given cluster can not be connected to: {}. ".format( cluster_path) msg += "Provide a valid --cluster-path or use --deploy to deploy a new cluster" pytest.fail(msg) elif not system.is_path_empty(cluster_path) and deploy: msg = "The given cluster path is not empty: {}. ".format(cluster_path) msg += "Provide an empty --cluster-path and --deploy to deploy a new cluster" pytest.fail(msg) else: log.info( "A testing cluster will be deployed and cluster information stored at: %s", cluster_path) # Generate install-config from template log.info("Generating install-config") pull_secret_path = os.path.join(constants.TOP_DIR, "data", "pull-secret") # TODO: check for supported platform and raise the exception if not # supported. Currently we support just AWS. _templating = templating.Templating() install_config_str = _templating.render_template("install-config.yaml.j2", config.ENV_DATA) # Log the install config *before* adding the pull secret, so we don't leak # sensitive data. log.info(f"Install config: \n{install_config_str}") # Parse the rendered YAML so that we can manipulate the object directly install_config_obj = yaml.safe_load(install_config_str) with open(pull_secret_path, "r") as f: # Parse, then unparse, the JSON file. # We do this for two reasons: to ensure it is well-formatted, and # also to ensure it ends up as a single line. install_config_obj['pullSecret'] = json.dumps(json.loads(f.read())) install_config_str = yaml.safe_dump(install_config_obj) install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(install_config_str) # Download installer installer = get_openshift_installer(config.DEPLOYMENT['installer_version']) # Download client get_openshift_client() # Deploy cluster log.info("Deploying cluster") run_cmd(f"{installer} create cluster " f"--dir {cluster_path} " f"--log-level debug") # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, config.RUN.get('kubeconfig_location'))): pytest.fail("Cluster is not available!") # TODO: Create cluster object, add to config.ENV_DATA for other tests to # utilize. # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region']) # render templates and create resources create_oc_resource('common.yaml', cluster_path, _templating, config.ENV_DATA) run_cmd(f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"') run_cmd(f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {config.ENV_DATA['cluster_namespace']}") apply_oc_resource('csi-nodeplugin-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/") apply_oc_resource('csi-provisioner-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/") apply_oc_resource('csi-nodeplugin-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/") apply_oc_resource('csi-provisioner-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/") # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('operator-openshift-with-csi.yaml', cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") create_oc_resource('cluster.yaml', cluster_path, _templating, config.ENV_DATA) POD = ocp.OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) CFS = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=config.ENV_DATA['cluster_namespace']) # Check for the Running status of Ceph Pods run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") assert POD.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert POD.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert POD.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) create_oc_resource('toolbox.yaml', cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('storage-manifest.yaml', cluster_path, _templating, config.ENV_DATA) create_oc_resource("service-monitor.yaml", cluster_path, _templating, config.ENV_DATA) create_oc_resource("prometheus-rules.yaml", cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML) fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace'] ceph_obj = OCS(**fs_data) ceph_obj.create() assert POD.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = CFS.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): log.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: log.error(f"MDS deployment Failed! Please check logs!") # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])
def deploy_cluster(self, log_cli_level="DEBUG"): """ We are handling both OCP and OCS deployment here based on flags Args: log_cli_level (str): log level for installer (default: DEBUG) """ if not config.ENV_DATA["skip_ocp_deployment"]: if is_cluster_running(self.cluster_path): logger.warning( "OCP cluster is already running, skipping installation") else: try: self.deploy_ocp(log_cli_level) self.post_ocp_deploy() except Exception as e: config.RUN["is_ocp_deployment_failed"] = True logger.error(e) if config.REPORTING["gather_on_deploy_failure"]: collect_ocs_logs("deployment", ocs=False) raise # Deployment of network split scripts via machineconfig API happens # before OCS deployment. if config.DEPLOYMENT.get("network_split_setup"): master_zones = config.ENV_DATA.get("master_availability_zones") worker_zones = config.ENV_DATA.get("worker_availability_zones") # special external zone, which is directly defined by ip addr list, # such zone could represent external services, which we could block # access to via ax-bx-cx network split if config.DEPLOYMENT.get("network_split_zonex_addrs") is not None: x_addr_list = config.DEPLOYMENT[ "network_split_zonex_addrs"].split(",") else: x_addr_list = None if config.DEPLOYMENT.get("arbiter_deployment"): arbiter_zone = self.get_arbiter_location() logger.debug("detected arbiter zone: %s", arbiter_zone) else: arbiter_zone = None # TODO: use temporary directory for all temporary files of # ocs-deployment, not just here in this particular case tmp_path = Path(tempfile.mkdtemp(prefix="ocs-ci-deployment-")) logger.debug("created temporary directory %s", tmp_path) setup_netsplit(tmp_path, master_zones, worker_zones, x_addr_list, arbiter_zone) if not config.ENV_DATA["skip_ocs_deployment"]: try: self.deploy_ocs() if config.REPORTING["collect_logs_on_success_run"]: collect_ocs_logs("deployment", ocp=False, status_failure=False) except Exception as e: logger.error(e) if config.REPORTING["gather_on_deploy_failure"]: # Let's do the collections separately to guard against one # of them failing collect_ocs_logs("deployment", ocs=False) collect_ocs_logs("deployment", ocp=False) raise else: logger.warning("OCS deployment will be skipped")