def wait_for_mirroring_status_ok(replaying_images=None, timeout=300): """ Wait for mirroring status to reach health OK and expected number of replaying images for each of the ODF cluster Args: replaying_images (int): Expected number of images in replaying state timeout (int): time in seconds to wait for mirroring status reach OK Returns: bool: True if status contains expected health and states values Raises: AssertionError: In case of unexpected mirroring status """ for cluster in get_non_acm_cluster_config(): config.switch_ctx(cluster.MULTICLUSTER["multicluster_index"]) logger.info( f"Validating mirroring status on cluster {cluster.ENV_DATA['cluster_name']}" ) sample = TimeoutSampler( timeout=timeout, sleep=5, func=check_mirroring_status_ok, replaying_images=replaying_images, ) assert sample.wait_for_func_status(result=True), ( "The mirroring status does not have expected values within the time" f" limit on cluster {cluster.ENV_DATA['cluster_name']}" )
def do_rdr_acm_ocp_deploy(self): """ Specific to regional DR OCP cluster deployments """ factory = acm_ui.ACMOCPDeploymentFactory() self.ui_driver = acm.login_to_acm() if self.deploy_sync_mode == "async": rdr_clusters = get_non_acm_cluster_config() for c in rdr_clusters: logger.info(f"{c.ENV_DATA['cluster_name']}") logger.info(f"{c.ENV_DATA['platform']}") logger.info(f"{c.ENV_DATA['deployment_type']}") for cluster_conf in rdr_clusters: deployer = factory.get_platform_instance( self.ui_driver, cluster_conf) deployer.create_cluster_prereq() deployer.create_cluster() self.deployment_cluster_list.append(deployer) # At this point deployment of all non-acm ocp clusters have been # triggered, we need to wait for all of them to succeed self.wait_for_all_clusters_async() # Download kubeconfig to the respective directories for cluster in self.deployment_cluster_list: cluster.download_cluster_conf_files()
def submariner_configure_upstream(self): """ Deploy and Configure upstream submariner Raises: DRPrimaryNotFoundException: If there is no designated primary cluster found """ if self.designated_broker_cluster_index < 0: raise DRPrimaryNotFoundException( "Designated primary cluster not found") # Deploy broker on designated cluster # follow this config switch statement carefully to be mindful # about the context with which we are performing the operations config.switch_ctx(self.designated_broker_cluster_index) logger.info( f"Switched context: {config.cluster_ctx.ENV_DATA['cluster_name']}") deploy_broker_cmd = "deploy-broker" try: run_subctl_cmd(deploy_broker_cmd) except CommandFailed: logger.exception("Failed to deploy submariner broker") raise # Label the gateway nodes on all non acm cluster restore_index = config.cur_index for cluster in get_non_acm_cluster_config(): config.switch_ctx(cluster.MULTICLUSTER["multicluster_index"]) gateway_node = self.get_default_gateway_node() label_nodes([gateway_node], constants.SUBMARINER_GATEWAY_NODE_LABEL) config.switch_ctx(restore_index) # Join all the clusters (except ACM cluster in case of hub deployment) for cluster in config.clusters: print(len(config.clusters)) cluster_index = cluster.MULTICLUSTER["multicluster_index"] if cluster_index != config.get_acm_index(): join_cmd = (f"join --kubeconfig {cluster.RUN['kubeconfig']} " f"{config.ENV_DATA['submariner_info_file']} " f"--clusterid c{self.cluster_seq} --natt=false") try: run_subctl_cmd(join_cmd, ) logger.info( f"Subctl join succeded for {cluster.ENV_DATA['cluster_name']}" ) except CommandFailed: logger.exception("Cluster failed to join") raise self.cluster_seq = self.cluster_seq + 1 self.dr_only_list.append(cluster_index) # Verify submariner connectivity between clusters(excluding ACM) kubeconf_list = [] for i in self.dr_only_list: kubeconf_list.append(config.clusters[i].RUN["kubeconfig"]) connct_check = f"verify {' '.join(kubeconf_list)} --only connectivity" run_subctl_cmd(connct_check)
def wait_for_mirroring_status_ok(replaying_images=None, timeout=300): """ Wait for mirroring status to reach health OK and expected number of replaying images for each of the ODF cluster Args: replaying_images (int): Expected number of images in replaying state timeout (int): time in seconds to wait for mirroring status reach OK Returns: bool: True if status contains expected health and states values Raises: TimeoutExpiredError: In case of unexpected mirroring status """ restore_index = config.cur_index if not replaying_images: replaying_images = 0 for cluster in get_non_acm_cluster_config(): config.switch_ctx(cluster.MULTICLUSTER["multicluster_index"]) replaying_images += len( get_all_pvcs_in_storageclass(constants.CEPHBLOCKPOOL_SC) ) replaying_images -= 2 # Ignore db-noobaa-db-pg-0 PVCs for cluster in get_non_acm_cluster_config(): config.switch_ctx(cluster.MULTICLUSTER["multicluster_index"]) logger.info( f"Validating mirroring status on cluster {cluster.ENV_DATA['cluster_name']}" ) sample = TimeoutSampler( timeout=timeout, sleep=5, func=check_mirroring_status_ok, replaying_images=replaying_images, ) if not sample.wait_for_func_status(result=True): error_msg = ( "The mirroring status does not have expected values within the time" f" limit on cluster {cluster.ENV_DATA['cluster_name']}" ) logger.error(error_msg) raise TimeoutExpiredError(error_msg) config.switch_ctx(restore_index) return True
def test_deployment(pvc_factory, pod_factory): deploy = config.RUN["cli_params"].get("deploy") teardown = config.RUN["cli_params"].get("teardown") if not teardown or deploy: log.info("Verifying OCP cluster is running") assert is_cluster_running(config.ENV_DATA["cluster_path"]) if not config.ENV_DATA["skip_ocs_deployment"]: if config.multicluster: restore_ctx_index = config.cur_index for cluster in get_non_acm_cluster_config(): config.switch_ctx( cluster.MULTICLUSTER["multicluster_index"]) log.info( f"Sanity check for cluster: {cluster.ENV_DATA['cluster_name']}" ) sanity_helpers = Sanity() sanity_helpers.health_check() sanity_helpers.delete_resources() config.switch_ctx(restore_ctx_index) else: ocs_registry_image = config.DEPLOYMENT.get( "ocs_registry_image") if config.ENV_DATA["mcg_only_deployment"]: mcg_only_install_verification( ocs_registry_image=ocs_registry_image) return else: ocs_install_verification( ocs_registry_image=ocs_registry_image) # Check basic cluster functionality by creating resources # (pools, storageclasses, PVCs, pods - both CephFS and RBD), # run IO and delete the resources if config.DEPLOYMENT["external_mode"]: sanity_helpers = SanityExternalCluster() else: sanity_helpers = Sanity() if (config.ENV_DATA["platform"].lower() in constants.MANAGED_SERVICE_PLATFORMS): try: sanity_helpers.health_check() except exceptions.ResourceWrongStatusException as err_msg: log.warning(err_msg) else: sanity_helpers.health_check() sanity_helpers.delete_resources() # Verify ceph health log.info("Verifying ceph health after deployment") assert ceph_health_check(tries=10, delay=30) if teardown: log.info( "Cluster will be destroyed during teardown part of this test.")
def post_deploy_ops(self): """ 1. Install ingress certificates on OCP clusters deployed through ACM 2. Run post_ocp_deploy on OCP clusters """ prev = config.cur_index for cluster in get_non_acm_cluster_config(): config.switch_ctx(cluster.MULTICLUSTER["multicluster_index"]) ssl_key = config.DEPLOYMENT.get("ingress_ssl_key") ssl_cert = config.DEPLOYMENT.get("ingress_ssl_cert") for key in [ssl_key, ssl_cert]: if os.path.exists(key): os.unlink(key) logger.info("Running post ocp deploy ops") self.post_ocp_deploy() config.switch_ctx(prev)
def destroy_cluster(self, log_cli_level=None): """ Teardown OCP clusters deployed through ACM """ self.ui_driver = acm.login_to_acm() cluster_list = list() rdr_clusters = get_non_acm_cluster_config() logger.info("Following ACM deployed OCP clusters will be destroyed") for cluster in rdr_clusters: logger.info(f"[{cluster.ENV_DATA['cluster_name']}" f"{cluster.ENV_DATA['platform']}_" f"{cluster.ENV_DATA['deployment_type']}]") for cluster_conf in rdr_clusters: destroyer = self.factory.get_platform_instance( self.ui_driver, cluster_conf) destroyer.destroy_cluster() cluster_list.append(destroyer) self.wait_for_all_cluster_async_destroy(cluster_list) self.post_destroy_ops(cluster_list)