def check_local_volume_local_volume_set(): """ Function to check if Local-volume and Local volume set is present or not Returns: dict: dict for localvolume and localvolumeset """ lv_or_lvs_dict = {} logger.info("Checking if Local Volume is Present") if csv.get_csvs_start_with_prefix( csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME, namespace=config.ENV_DATA["local_storage_namespace"], ): ocp_obj = OCP() command = f"get localvolume local-block -n {config.ENV_DATA['local_storage_namespace']} " try: ocp_obj.exec_oc_cmd(command, out_yaml_format=False) lv_or_lvs_dict["localvolume"] = True except CommandFailed as ex: logger.debug(f"Local volume does not exists! Exception: {ex}") logger.info("No Local volume found") lv_or_lvs_dict["localvolume"] = False logger.info("Checking if Local Volume Set is Present") if csv.get_csvs_start_with_prefix( csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME, namespace=config.ENV_DATA["local_storage_namespace"], ): ocp_obj = OCP() command = ( f"get {constants.LOCAL_VOLUME_SET} {constants.LOCAL_BLOCK_RESOURCE} " f"-n {config.ENV_DATA['local_storage_namespace']} ") try: ocp_obj.exec_oc_cmd(command, out_yaml_format=False) lv_or_lvs_dict["localvolumeset"] = True except CommandFailed as ex: logger.debug( f"Local volume Set does not exists! Exception: {ex}") lv_or_lvs_dict["localvolumeset"] = False return lv_or_lvs_dict
def check_local_volume(): """ Function to check if Local-volume is present or not Returns: bool: True if LV present, False if LV not present """ if csv.get_csvs_start_with_prefix( csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME, namespace=constants.LOCAL_STORAGE_NAMESPACE ): ocp_obj = OCP() command = "get localvolume local-block -n local-storage " status = ocp_obj.exec_oc_cmd(command, out_yaml_format=False) return "No resources found" not in status
def check_local_volume(): """ Function to check if Local-volume is present or not Returns: bool: True if LV present, False if LV not present """ if csv.get_csvs_start_with_prefix( csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME, namespace=config.ENV_DATA['local_storage_namespace']): ocp_obj = OCP() command = f"get localvolume local-block -n {config.ENV_DATA['local_storage_namespace']} " try: status = ocp_obj.exec_oc_cmd(command, out_yaml_format=False) except CommandFailed as ex: logger.debug(f"Local volume does not exists! Exception: {ex}") return False return "No resources found" not in status
def get_ocs_build_number(): """ Gets the build number for ocs operator Return: str: build number for ocs operator version """ # Importing here to avoid circular dependency from ocs_ci.ocs.resources.csv import get_csvs_start_with_prefix build_num = "" if config.REPORTING['us_ds'] == 'DS': build_str = get_csvs_start_with_prefix( defaults.OCS_OPERATOR_NAME, defaults.ROOK_CLUSTER_NAMESPACE, ) try: return build_str[0]['metadata']['name'].partition('.')[2] except (IndexError, AttributeError): logging.warning("No version info found for OCS operator") return build_num
def setup_quay_operator(self): """ Deploys Quay operator """ quay_operator_data = templating.load_yaml(file=constants.QUAY_SUB) self.quay_operator = OCS(**quay_operator_data) logger.info(f"Installing Quay operator: {self.quay_operator.name}") self.quay_operator.create() for quay_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern, constants.QUAY_OPERATOR, self.namespace): if quay_pod: self.quay_pod_obj.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=quay_pod[0], sleep=30, timeout=600, ) break self.quay_operator_csv = get_csvs_start_with_prefix( csv_prefix=constants.QUAY_OPERATOR, namespace=self.namespace, )[0]["metadata"]["name"]
def test_pod_disruptions(self, create_pvcs_and_pods): """ Test to perform pod disruption in consumer and provider cluster """ # List of pods to be disrupted. Using different list for consumer and provider for the easy implementation pods_on_consumer = [ "alertmanager_managed_ocs_alertmanager", "ocs_osd_controller_manager", "prometheus_managed_ocs_prometheus", "prometheus_operator", "ocs_operator", ] pods_on_provider = [ "alertmanager_managed_ocs_alertmanager", "ocs_osd_controller_manager", "prometheus_managed_ocs_prometheus", "prometheus_operator", "ocs_provider_server", "ocs_operator", ] disruption_on_consumer = [] disruption_on_provider = [] # Start I/O log.info("Starting fio on all pods") for pod_obj in self.io_pods: if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK: storage_type = "block" direct = 1 else: storage_type = "fs" direct = 0 pod_obj.run_io( storage_type=storage_type, size="10G", fio_filename=f"{pod_obj.name}", runtime=320, end_fsync=1, direct=direct, invalidate=0, fio_installed=True, ) consumer_index_iter = cycle(self.consumer_indexes) # Create Disruptions instance for each pod to be disrupted on consumer for pod_type in pods_on_consumer: consumer_index = next(consumer_index_iter) config.switch_ctx(consumer_index) disruption_obj = disruption_helpers.Disruptions() # Select each pod to be disrupted from different consumers disruption_obj.set_resource(resource=pod_type, cluster_index=consumer_index) disruption_obj.index_of_consumer = consumer_index disruption_on_consumer.append(disruption_obj) # Create Disruptions instance for each pod to be disrupted on provider config.switch_to_provider() for pod_type in pods_on_provider: disruption_obj = disruption_helpers.Disruptions() disruption_obj.set_resource( resource=pod_type, cluster_index=self.provider_cluster_index) disruption_on_provider.append(disruption_obj) # Delete pods on consumer one at a time log.info("Starting pod disruptions on consumer clusters") for disruptions_obj in disruption_on_consumer: disruptions_obj.delete_resource() # ocs-operator respin will trigger rook-ceph-tools pod respin. # Patch rook-ceph-tools pod to run ceph commands. if disruptions_obj.resource == "ocs_operator": config.switch_ctx(disruptions_obj.index_of_consumer) patch_consumer_toolbox() # Delete pods on provider one at a time log.info("Starting pod disruptions on provider cluster") for disruptions_obj in disruption_on_provider: disruptions_obj.delete_resource() log.info("Wait for IO to complete on pods") for pod_obj in self.io_pods: pod_obj.get_fio_results() log.info(f"Verified IO on pod {pod_obj.name}") log.info("IO is successful on all pods") # Performs different checks in the clusters for cluster_index in [self.provider_cluster_index ] + self.consumer_indexes: config.switch_ctx(cluster_index) # Verify managedocs components are Ready log.info("Verifying managedocs components state") managedocs_obj = OCP( kind="managedocs", resource_name="managedocs", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) for component in {"alertmanager", "prometheus", "storageCluster"}: assert ( managedocs_obj.get()["status"]["components"][component] ["state"] == "Ready" ), f"{component} status is {managedocs_obj.get()['status']['components'][component]['state']}" # Verify storagecluster status log.info("Verifying storagecluster status") verify_storage_cluster() # Verify CSV status for managed_csv in { constants.OCS_CSV_PREFIX, constants.OSD_DEPLOYER, constants.OSE_PROMETHEUS_OPERATOR, }: csvs = csv.get_csvs_start_with_prefix( managed_csv, constants.OPENSHIFT_STORAGE_NAMESPACE) assert ( len(csvs) == 1 ), f"Unexpected number of CSVs with {managed_csv} prefix: {len(csvs)}" csv_name = csvs[0]["metadata"]["name"] csv_obj = csv.CSV( resource_name=csv_name, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) log.info(f"Check if {csv_name} is in Succeeded phase.") csv_obj.wait_for_phase(phase="Succeeded", timeout=600) # Verify the phase of ceph cluster log.info("Verify the phase of ceph cluster") cephcluster = OCP(kind="CephCluster", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) cephcluster_yaml = cephcluster.get().get("items")[0] expected_phase = "Connected" if cluster_index == self.provider_cluster_index: expected_phase = "Ready" assert ( cephcluster_yaml["status"]["phase"] == expected_phase ), f"Status of cephcluster {cephcluster_yaml['metadata']['name']} is {cephcluster_yaml['status']['phase']}" # Create PVC and pods on all consumer clusters log.info("Creating new PVCs and pods") pods = list() for cluster_index in self.consumer_indexes: config.switch_ctx(cluster_index) consumer_cluster_kubeconfig = os.path.join( config.clusters[cluster_index].ENV_DATA["cluster_path"], config.clusters[cluster_index].RUN.get("kubeconfig_location"), ) pvcs, io_pods = create_pvcs_and_pods( pvc_size=self.pvc_size, replica_count=1, pod_dict_path=constants.PERF_POD_YAML, ) for pvc_obj in pvcs: pvc_obj.ocp.cluster_kubeconfig = consumer_cluster_kubeconfig for io_pod in io_pods: io_pod.ocp.cluster_kubeconfig = consumer_cluster_kubeconfig pvcs[0].project.cluster_kubeconfig = consumer_cluster_kubeconfig pods.extend(io_pods) # Run I/O on new pods log.info("Running I/O on new pods") for pod_obj in pods: if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK: storage_type = "block" direct = 1 else: storage_type = "fs" direct = 0 pod_obj.run_io( storage_type=storage_type, size="10G", fio_filename=f"{pod_obj.name}", runtime=320, end_fsync=1, direct=direct, invalidate=0, fio_installed=True, ) log.info("Wait for I/O to complete on new pods") for pod_obj in pods: pod_obj.get_fio_results() log.info(f"Verified IO on the new pod {pod_obj.name}") log.info("IO is successful on new pods")
def verify_managed_service_resources(): """ Verify creation and status of resources specific to OSD and ROSA deployments: 1. ocs-operator, ocs-osd-deployer, ose-prometheus-operator csvs are Succeeded 2. 1 prometheus pod and 3 alertmanager pods are in Running state 3. Managedocs components alertmanager, prometheus, storageCluster are in Ready state 4. Verify that noobaa-operator replicas is set to 0 5. Verify managed ocs secrets 6. If cluster is Provider, verify resources specific to provider clusters 7. [temporarily left out] Verify Networkpolicy and EgressNetworkpolicy creation """ # Verify CSV status for managed_csv in { constants.OCS_CSV_PREFIX, constants.OSD_DEPLOYER, constants.OSE_PROMETHEUS_OPERATOR, }: csvs = csv.get_csvs_start_with_prefix( managed_csv, constants.OPENSHIFT_STORAGE_NAMESPACE) assert ( len(csvs) == 1 ), f"Unexpected number of CSVs with {managed_csv} prefix: {len(csvs)}" csv_name = csvs[0]["metadata"]["name"] csv_obj = csv.CSV(resource_name=csv_name, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) log.info(f"Check if {csv_name} is in Succeeded phase.") csv_obj.wait_for_phase(phase="Succeeded", timeout=600) # Verify alerting secrets creation verify_managed_secrets() # Verify alerting pods are Running pod_obj = OCP( kind="pod", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) for alert_pod in { (constants.MANAGED_PROMETHEUS_LABEL, 1), (constants.MANAGED_ALERTMANAGER_LABEL, 3), }: pod_obj.wait_for_resource(condition="Running", selector=alert_pod[0], resource_count=alert_pod[1]) # Verify managedocs components are Ready log.info("Getting managedocs components data") managedocs_obj = OCP( kind="managedocs", resource_name="managedocs", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) for component in {"alertmanager", "prometheus", "storageCluster"}: assert ( managedocs_obj.get()["status"]["components"][component]["state"] == "Ready" ), f"{component} status is {managedocs_obj.get()['status']['components'][component]['state']}" # Verify that noobaa-operator replicas is set to 0 noobaa_deployment = deployment.get_deployments_having_label( "operators.coreos.com/mcg-operator.openshift-storage=", constants.OPENSHIFT_STORAGE_NAMESPACE, )[0] log.info(f"Noobaa replicas count: {noobaa_deployment.replicas}") assert noobaa_deployment.replicas == 0 # Verify attributes specific to cluster types sc = get_storage_cluster() sc_data = sc.get()["items"][0] if config.ENV_DATA["cluster_type"].lower() == "provider": verify_provider_storagecluster(sc_data) verify_provider_resources() else: verify_consumer_storagecluster(sc_data)
def ocs_install_verification(timeout=600): """ Perform steps necessary to verify a successful OCS installation Args: timeout (int): Number of seconds for timeout which will be used in the checks used in this function. """ log.info("Verifying OCS installation") namespace = config.ENV_DATA['cluster_namespace'] # Verify Local Storage CSV is in Succeeded phase log.info("Verifying Local Storage CSV") # There is BZ opened: # https://bugzilla.redhat.com/show_bug.cgi?id=1770183 # which makes this check problematic as current CSV is not the currently # installed. local_storage_csvs = get_csvs_start_with_prefix( csv_prefix=constants.LOCAL_STORAGE_CSV_PREFIX, namespace=namespace, ) assert len(local_storage_csvs) == 1, ( f"There are more than one local storage CSVs: {local_storage_csvs}") local_storage_name = local_storage_csvs[0]['metadata']['name'] log.info(f"Check if local storage operator: {local_storage_name} is in" f"Succeeded phase") local_storage_csv = CSV(resource_name=local_storage_name, namespace=namespace) local_storage_csv.wait_for_phase("Succeeded", timeout=timeout) # Verify OCS CSV is in Succeeded phase log.info("verifying ocs csv") ocs_package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME) ocs_csv_name = ocs_package_manifest.get_current_csv() ocs_csv = CSV(resource_name=ocs_csv_name, namespace=namespace) log.info(f"Check if OCS operator: {ocs_csv_name} is in Succeeded phase.") ocs_csv.wait_for_phase(phase="Succeeded", timeout=timeout) # Verify OCS Cluster Service (ocs-storagecluster) is Ready log.info("Verifying OCS Cluster service") storage_clusters = StorageCluster(namespace=namespace) for item in storage_clusters.get()['items']: storage_cluster_name = item['metadata']['name'] storage_cluster = StorageCluster(resource_name=storage_cluster_name, namespace=namespace) log.info("Checking status of %s", storage_cluster_name) log.info(f"Check if StorageCluster: {local_storage_name} is in" f"Succeeded phase") storage_cluster.wait_for_phase(phase='Ready', timeout=timeout) # Verify pods in running state and proper counts log.info("Verifying pod states and counts") pod = OCP(kind=constants.POD, namespace=namespace) # ocs-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OCS_OPERATOR_LABEL, timeout=timeout) # rook-ceph-operator assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OPERATOR_LABEL, timeout=timeout) # noobaa assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.NOOBAA_APP_LABEL, resource_count=2, timeout=timeout) # local-storage-operator assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.LOCAL_STORAGE_OPERATOR_LABEL, timeout=timeout) # mons assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MON_APP_LABEL, resource_count=3, timeout=timeout) # csi-cephfsplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_LABEL, resource_count=3, timeout=timeout) # csi-cephfsplugin-provisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # csi-rbdplugin assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_LABEL, resource_count=3, timeout=timeout) # csi-rbdplugin-profisioner assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, resource_count=2, timeout=timeout) # osds assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.OSD_APP_LABEL, resource_count=3, timeout=timeout) # mgr assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MGR_APP_LABEL, timeout=timeout) # mds assert pod.wait_for_resource(condition=constants.STATUS_RUNNING, selector=constants.MDS_APP_LABEL, resource_count=2, timeout=timeout) # Verify ceph health log.info("Verifying ceph health") assert utils.ceph_health_check(namespace=namespace) # Verify StorageClasses (1 ceph-fs, 1 ceph-rbd) log.info("Verifying storage classes") storage_class = OCP(kind=constants.STORAGECLASS, namespace=namespace) storage_cluster_name = config.ENV_DATA['storage_cluster_name'] required_storage_classes = { f'{storage_cluster_name}-cephfs', f'{storage_cluster_name}-ceph-rbd' } storage_classes = storage_class.get() storage_class_names = { item['metadata']['name'] for item in storage_classes['items'] } assert required_storage_classes.issubset(storage_class_names) # Verify OSD's are distributed log.info("Verifying OSD's are distributed evenly across worker nodes") ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace) osds = ocp_pod_obj.get(selector=constants.OSD_APP_LABEL)['items'] node_names = [osd['spec']['nodeName'] for osd in osds] for node in node_names: assert not node_names.count(node) > 1, ( "OSD's are not distributed evenly across worker nodes")
def verify_managed_service_resources(): """ Verify creation and status of resources specific to OSD and ROSA deployments: 1. ocs-operator, ocs-osd-deployer, ose-prometheus-operator csvs are Succeeded 2. ocs-converged-pagerduty, ocs-converged-smtp, ocs-converged-deadmanssnitch secrets exist in openshift-storage namespace 3. 1 prometheus pod and 3 alertmanager pods are in Running state 4. Managedocs components alertmanager, prometheus, storageCluster are in Ready state 5. Networkpolicy and EgressNetworkpolicy resources are present """ # Verify CSV status for managed_csv in { constants.OCS_CSV_PREFIX, constants.OSD_DEPLOYER, constants.OSE_PROMETHEUS_OPERATOR, }: csvs = csv.get_csvs_start_with_prefix( managed_csv, constants.OPENSHIFT_STORAGE_NAMESPACE) assert ( len(csvs) == 1 ), f"Unexpected number of CSVs with {managed_csv} prefix: {len(csvs)}" csv_name = csvs[0]["metadata"]["name"] csv_obj = csv.CSV(resource_name=csv_name, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) log.info(f"Check if {csv_name} is in Succeeded phase.") csv_obj.wait_for_phase(phase="Succeeded", timeout=600) # Verify alerting secrets creation secret_ocp_obj = OCP(kind="secret", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) for secret_name in { constants.MANAGED_SMTP_SECRET, constants.MANAGED_PAGERDUTY_SECRET, constants.MANAGED_DEADMANSSNITCH_SECRET, }: assert secret_ocp_obj.is_exist( resource_name=secret_name ), f"{secret_name} does not exist in openshift-storage namespace" # Verify alerting pods are Running pod_obj = OCP( kind="pod", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) for alert_pod in { (constants.MANAGED_PROMETHEUS_LABEL, 1), (constants.MANAGED_ALERTMANAGER_LABEL, 3), }: pod_obj.wait_for_resource(condition="Running", selector=alert_pod[0], resource_count=alert_pod[1]) # Verify managedocs components are Ready log.info("Getting managedocs components data") managedocs_obj = OCP( kind="managedocs", resource_name="managedocs", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) for component in {"alertmanager", "prometheus", "storageCluster"}: assert ( managedocs_obj.get()["status"]["components"][component]["state"] == "Ready" ), f"{component} status is {managedocs_obj.get()['status']['components'][component]['state']}" # Verify Networkpolicy and EgressNetworkpolicy creation for policy in { ("Networkpolicy", "ceph-ingress-rule"), ("EgressNetworkpolicy", "egress-rule"), }: policy_obj = OCP( kind=policy[0], namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) assert policy_obj.is_exist( resource_name=policy[1] ), f"{policy[0]} {policy}[1] does not exist in openshift-storage namespace"