def cluster(request): log.info(f"All logs located at {log_path}") log.info("Running OCS basic installation") cluster_path = config.ENV_DATA['cluster_path'] deploy = config.RUN['cli_params']['deploy'] teardown = config.RUN['cli_params']['teardown'] # Add a finalizer to teardown the cluster after test execution is finished if teardown: request.addfinalizer(cluster_teardown) log.info("Will teardown cluster because --teardown was provided") # Test cluster access and if exist just skip the deployment. if is_cluster_running(cluster_path): log.info("The installation is skipped because the cluster is running") return elif teardown and not deploy: log.info("Attempting teardown of non-accessible cluster: %s", cluster_path) return elif not deploy and not teardown: msg = "The given cluster can not be connected to: {}. ".format(cluster_path) msg += "Provide a valid --cluster-path or use --deploy to deploy a new cluster" pytest.fail(msg) elif not system.is_path_empty(cluster_path) and deploy: msg = "The given cluster path is not empty: {}. ".format(cluster_path) msg += "Provide an empty --cluster-path and --deploy to deploy a new cluster" pytest.fail(msg) else: log.info("A testing cluster will be deployed and cluster information stored at: %s", cluster_path) # Generate install-config from template log.info("Generating install-config") run_cmd(f"mkdir -p {cluster_path}") pull_secret_path = os.path.join( constants.TOP_DIR, "data", "pull-secret" ) # TODO: check for supported platform and raise the exception if not # supported. Currently we support just AWS. _templating = templating.Templating() install_config_str = _templating.render_template( "install-config.yaml.j2", config.ENV_DATA ) # Log the install config *before* adding the pull secret, so we don't leak # sensitive data. log.info(f"Install config: \n{install_config_str}") # Parse the rendered YAML so that we can manipulate the object directly install_config_obj = yaml.safe_load(install_config_str) with open(pull_secret_path, "r") as f: # Parse, then unparse, the JSON file. # We do this for two reasons: to ensure it is well-formatted, and # also to ensure it ends up as a single line. install_config_obj['pullSecret'] = json.dumps(json.loads(f.read())) install_config_str = yaml.safe_dump(install_config_obj) install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(install_config_str) # Download installer installer = get_openshift_installer( config.DEPLOYMENT['installer_version'] ) # Download client get_openshift_client() # Deploy cluster log.info("Deploying cluster") run_cmd( f"{installer} create cluster " f"--dir {cluster_path} " f"--log-level debug" ) # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, config.RUN.get('kubeconfig_location')) ): pytest.fail("Cluster is not available!") # TODO: Create cluster object, add to config.ENV_DATA for other tests to # utilize. # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region']) # render templates and create resources create_oc_resource('common.yaml', cluster_path, _templating, config.ENV_DATA) run_cmd( f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"' ) run_cmd( f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {config.ENV_DATA['cluster_namespace']}" ) apply_oc_resource( 'csi-nodeplugin-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-provisioner-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-nodeplugin-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) apply_oc_resource( 'csi-provisioner-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'operator-openshift-with-csi.yaml', cluster_path, _templating, config.ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) create_oc_resource('cluster.yaml', cluster_path, _templating, config.ENV_DATA) POD = ocp.OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) CFS = ocp.OCP( kind=constants.CEPHFILESYSTEM, namespace=config.ENV_DATA['cluster_namespace'] ) # Check for the Running status of Ceph Pods run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600 ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-mgr', timeout=600 ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600 ) create_oc_resource('toolbox.yaml', cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'storage-manifest.yaml', cluster_path, _templating, config.ENV_DATA ) create_oc_resource( "service-monitor.yaml", cluster_path, _templating, config.ENV_DATA ) create_oc_resource( "prometheus-rules.yaml", cluster_path, _templating, config.ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML) fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace'] ceph_obj = OCS(**fs_data) ceph_obj.create() assert POD.wait_for_resource( condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600 ) # Check for CephFilesystem creation in ocp cfs_data = CFS.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): log.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: log.error( f"MDS deployment Failed! Please check logs!" ) # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") self.deploy_ocs_via_operator() pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( 'persistent-monitoring'): sc = helpers.default_storage_class( interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=['prometheus', 'alertmanager']) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get( "telemeter_server_url")) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)( pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)( validate_pvc_are_mounted_on_monitoring_pods)(pods_list) elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( "telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=self.namespace) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") if config.DEPLOYMENT['external_mode']: logger.info("Deploying OCS on external mode RHCS") return self.deploy_with_external_mode() self.deploy_ocs_via_operator() pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( 'persistent-monitoring'): sc = helpers.default_storage_class( interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=['prometheus', 'alertmanager']) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get( "telemeter_server_url")) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)( pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)( validate_pvc_are_mounted_on_monitoring_pods)(pods_list) elif config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( "telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") try: ceph_health_check(namespace=self.namespace, tries=30, delay=10) except CephHealthException as ex: err = str(ex) logger.warning(f"Ceph health check failed with {err}") if "clock skew detected" in err: logger.info(f"Changing NTP on compute nodes to" f" {constants.RH_NTP_CLOCK}") update_ntp_compute_nodes() assert ceph_health_check(namespace=self.namespace, tries=60, delay=10) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default() # Modify Noobaa endpoint auto scale values according to the cluster specs if check_nodes_specs(min_cpu=constants.MIN_NODE_CPU, min_memory=constants.MIN_NODE_MEMORY): logger.info("The cluster specs meet the minimum requirements and " "therefore, NooBaa auto scale will be enabled") min_nb_eps = config.DEPLOYMENT.get('min_noobaa_endpoints') max_nb_eps = config.DEPLOYMENT.get('max_noobaa_endpoints') change_noobaa_endpoints_count(min_nb_eps=min_nb_eps, max_nb_eps=max_nb_eps) else: logger.warning( "The cluster specs do not meet the minimum requirements and " "therefore, NooBaa auto scale will remain with its default values" ) min_eps = 1 max_eps = 1 if float(config.ENV_DATA['ocs_version']) < 4.6 else 2 logger.info( f"The Noobaa endpoint auto scale values: min: {min_eps}, max: {max_eps}" )
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ _templating = templating.Templating() ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") if not self.ocs_operator_deployment: create_oc_resource('common.yaml', self.cluster_path, _templating, config.ENV_DATA) run_cmd( f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"') run_cmd( f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {self.namespace}") # HACK: If you would like to drop this hack, make sure that you # also updated docs and write appropriate unit/integration tests # for config processing. if config.ENV_DATA.get('monitoring_enabled') in ("true", "True", True): # RBAC rules for monitoring, based on documentation change in # rook: # https://github.com/rook/rook/commit/1b6fe840f6ae7372a9675ba727ecc65326708aa8 # HACK: This should be dropped when OCS is managed by OLM apply_oc_resource('rbac.yaml', self.cluster_path, _templating, config.ENV_DATA, template_dir="monitoring") # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('operator-openshift.yaml', self.cluster_path, _templating, config.ENV_DATA) logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {self.namespace} " f"--timeout=120s") run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {self.namespace} " f"--timeout=120s") create_oc_resource('cluster.yaml', self.cluster_path, _templating, config.ENV_DATA) else: self.deploy_ocs_via_operator() pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) if not self.ocs_operator_deployment: # Creatig toolbox pod create_oc_resource( 'toolbox.yaml', self.cluster_path, _templating, config.ENV_DATA, ) assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600) if not self.ocs_operator_deployment: logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # HACK: This should be dropped (including service-monitor.yaml and # prometheus-rules.yaml files) when OCS is managed by OLM if config.ENV_DATA.get('monitoring_enabled') not in ("true", "True", True): # HACK: skip creation of rook-ceph-mgr service monitor when # monitoring is enabled (if this were not skipped, the step # would fail because rook would create the service monitor at # this point already) create_oc_resource("service-monitor.yaml", self.cluster_path, _templating, config.ENV_DATA) # HACK: skip creation of prometheus-rules, rook-ceph is # concerned with it's setup now, based on clarification from # Umanga Chapagain create_oc_resource("prometheus-rules.yaml", self.cluster_path, _templating, config.ENV_DATA) logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem fs_data = templating.load_yaml(constants.CEPHFILESYSTEM_YAML) fs_data['metadata']['namespace'] = self.namespace ceph_obj = OCS(**fs_data) ceph_obj.create() assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error(f"MDS deployment Failed! Please check logs!") # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=self.namespace) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP(kind='CephCluster', namespace=self.namespace) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") self.deploy_ocs_via_operator() if config.DEPLOYMENT.get('ui_deployment'): config.ENV_DATA['skip_ocs_deployment'] = True return pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-tools', resource_count=1, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error(f"MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get( 'persistent-monitoring'): sc_name = f"{config.ENV_DATA['storage_cluster_name']}-{constants.DEFAULT_SC_RBD}" # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=['prometheus', 'alertmanager']) # Create configmap cluster-monitoring-config create_configmap_cluster_monitoring_pod(sc_name) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state validate_pods_are_respinned_and_running_state(pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods validate_pvc_are_mounted_on_monitoring_pods(pods_list) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=self.namespace) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ _templating = templating.Templating() ceph_cluster = ocp.OCP(kind='CephCluster', namespace=config.ENV_DATA['cluster_namespace']) try: ceph_cluster.get().get('items')[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") create_oc_resource('common.yaml', self.cluster_path, _templating, config.ENV_DATA) run_cmd(f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"') run_cmd(f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {config.ENV_DATA['cluster_namespace']}") apply_oc_resource('csi-nodeplugin-rbac_rbd.yaml', self.cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/") apply_oc_resource('csi-provisioner-rbac_rbd.yaml', self.cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/") apply_oc_resource('csi-nodeplugin-rbac_cephfs.yaml', self.cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/") apply_oc_resource('csi-provisioner-rbac_cephfs.yaml', self.cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/") # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('operator-openshift-with-csi.yaml', self.cluster_path, _templating, config.ENV_DATA) logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") create_oc_resource('cluster.yaml', self.cluster_path, _templating, config.ENV_DATA) pod = ocp.OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=config.ENV_DATA['cluster_namespace']) # Check for the Running status of Ceph Pods run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert pod.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) create_oc_resource('toolbox.yaml', self.cluster_path, _templating, config.ENV_DATA) logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('storage-manifest.yaml', self.cluster_path, _templating, config.ENV_DATA) create_oc_resource("service-monitor.yaml", self.cluster_path, _templating, config.ENV_DATA) create_oc_resource("prometheus-rules.yaml", self.cluster_path, _templating, config.ENV_DATA) logger.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML) fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace'] ceph_obj = OCS(**fs_data) ceph_obj.create() assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): logger.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error(f"MDS deployment Failed! Please check logs!") # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check( namespace=config.ENV_DATA['cluster_namespace']) # patch gp2 (EBS) storage class as 'non-default' logger.info("Patch gp2 storageclass as non-default") patch = " '{\"metadata\": {\"annotations\":{\"storageclass.kubernetes.io/is-default-class\":\"false\"}}}' " run_cmd(f"oc patch storageclass gp2 " f"-p {patch} " f"--request-timeout=120s")
def test_deployment(self): log.info("Running OCS basic installation") cluster_path = config.ENV_DATA['cluster_path'] # Test cluster access and if exist just skip the deployment. if config.RUN['cli_params'].get('cluster_path') and OCP.set_kubeconfig( os.path.join(cluster_path, config.RUN.get('kubeconfig_location')) ): pytest.skip( "The installation is skipped cause the cluster is running" ) # Generate install-config from template log.info("Generating install-config") run_cmd(f"mkdir -p {cluster_path}") pull_secret_path = os.path.join( TOP_DIR, "data", "pull-secret" ) # TODO: check for supported platform and raise the exception if not # supported. Currently we support just AWS. _templating = templating.Templating() install_config_str = _templating.render_template( "install-config.yaml.j2", config.ENV_DATA ) # Parse the rendered YAML so that we can manipulate the object directly install_config_obj = yaml.safe_load(install_config_str) with open(pull_secret_path, "r") as f: # Parse, then unparse, the JSON file. # We do this for two reasons: to ensure it is well-formatted, and # also to ensure it ends up as a single line. install_config_obj['pullSecret'] = json.dumps(json.loads(f.read())) install_config_str = yaml.safe_dump(install_config_obj) log.info(f"Install config: \n{install_config_str}") install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(install_config_str) # Download installer installer = get_openshift_installer( config.DEPLOYMENT['installer_version'] ) # Download client get_openshift_client() # Deploy cluster log.info("Deploying cluster") run_cmd( f"{installer} create cluster " f"--dir {cluster_path} " f"--log-level debug" ) # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, config.RUN.get('kubeconfig_location')) ): pytest.fail("Cluster is not available!") # TODO: Create cluster object, add to config.ENV_DATA for other tests to # utilize. # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region']) # render templates and create resources create_oc_resource('common.yaml', cluster_path, _templating, config.ENV_DATA) run_cmd( f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"' ) run_cmd( f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {config.ENV_DATA['cluster_namespace']}" ) apply_oc_resource( 'csi-nodeplugin-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-provisioner-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-nodeplugin-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) apply_oc_resource( 'csi-provisioner-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'operator-openshift-with-csi.yaml', cluster_path, _templating, config.ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) create_oc_resource('cluster.yaml', cluster_path, _templating, config.ENV_DATA) # Check for the Running status of Ceph Pods run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600 ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-mgr', timeout=600 ) assert POD.wait_for_resource( condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600 ) create_oc_resource('toolbox.yaml', cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'storage-manifest.yaml', cluster_path, _templating, config.ENV_DATA ) create_oc_resource( "service-monitor.yaml", cluster_path, _templating, config.ENV_DATA ) create_oc_resource( "prometheus-rules.yaml", cluster_path, _templating, config.ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem self.fs_data = copy.deepcopy(defaults.CEPHFILESYSTEM_DICT) self.fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace'] global CEPH_OBJ CEPH_OBJ = OCS(**self.fs_data) CEPH_OBJ.create() assert POD.wait_for_resource( condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600 ) # Check for CephFilesystem creation in ocp cfs_data = CFS.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): log.info(f"MDS deployment is successful!") else: log.error( f"MDS deployment Failed! Please check logs!" ) # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])