def setup_persistent_monitoring(): """ Change monitoring backend to OCS """ sc = helpers.default_storage_class(interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=["prometheus", "alertmanager"], ) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get("telemeter_server_url"), ) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)(pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)(validate_pvc_are_mounted_on_monitoring_pods)(pods_list)
def setup( self, storageclass_factory, project_factory, snapshot_restore_factory, pvc_clone_factory, create_pvcs_and_pods, pvc_create_sc_type, restore_sc_type, ): """ Create Storage Class, PVCs and pods """ self.pvc_size = 2 if "thick" in (pvc_create_sc_type, restore_sc_type): # Thick provisioning is applicable only for RBD thick_sc = storageclass_factory( interface=constants.CEPHBLOCKPOOL, new_rbd_pool=False, rbd_thick_provision=True, ) access_modes_cephfs = None num_of_cephfs_pvc = 0 thin_sc = default_storage_class(constants.CEPHBLOCKPOOL) else: thick_sc = None access_modes_cephfs = [constants.ACCESS_MODE_RWO] num_of_cephfs_pvc = 1 thin_sc = default_storage_class(constants.CEPHFILESYSTEM) sc_dict = {"thin": thin_sc, "thick": thick_sc} self.pvc_create_sc = sc_dict[pvc_create_sc_type] self.restore_sc = sc_dict[restore_sc_type] self.pvcs, self.pods = create_pvcs_and_pods( pvc_size=self.pvc_size, access_modes_rbd=[constants.ACCESS_MODE_RWO], access_modes_cephfs=access_modes_cephfs, num_of_rbd_pvc=1, num_of_cephfs_pvc=num_of_cephfs_pvc, sc_rbd=self.pvc_create_sc, )
def storageclass(self, storageclass_factory, reclaim_policy): """ Create storage class if reclaim policy is not "Delete" """ self.reclaim_policy = reclaim_policy self.sc_obj = (default_storage_class(constants.CEPHBLOCKPOOL) if reclaim_policy == constants.RECLAIM_POLICY_DELETE else storageclass_factory( interface=constants.CEPHBLOCKPOOL, reclaim_policy=self.reclaim_policy))
def amq_setup(self, amq_factory_fixture): """ Creates amq cluster and run benchmarks """ sc_name = default_storage_class(interface_type=constants.CEPHBLOCKPOOL) self.amq, self.threads = amq_factory_fixture(sc_name=sc_name.name) # Initialize Sanity instance self.sanity_helpers = Sanity()
def test_install_amq_backed_by_ocs(self, interface, test_fixture_amq): """ Create amq cluster and run open messages on it """ # Get sc sc = default_storage_class(interface_type=interface) # Deploy amq cluster test_fixture_amq.setup_amq_cluster(sc.name) # Run open messages test_fixture_amq.create_messaging_on_amq() # Wait for some time to generate msg waiting_time = 60 log.info(f"Waiting for {waiting_time}sec to generate msg") time.sleep(waiting_time) # Check messages are sent and received threads = test_fixture_amq.run_in_bg() for thread in threads: thread.result(timeout=1800)
def setup(self, interface_type, reclaim_policy, storageclass_factory): """ Creates storage class with specified interface and reclaim policy. Fetches all worker nodes Args: interface_type (str): The type of the interface (e.g. CephBlockPool, CephFileSystem) reclaim_policy (str): The type of reclaim policy (eg., 'Delete', 'Retain') storageclass_factory: A fixture to create new storage class Returns: tuple: containing the storage class instance and list of worker nodes """ # Create storage class if reclaim policy is not "Delete" sc_obj = (default_storage_class(interface_type) if reclaim_policy == constants.RECLAIM_POLICY_DELETE else storageclass_factory( interface=interface_type, reclaim_policy=reclaim_policy)) worker_nodes_list = node.get_worker_nodes() return sc_obj, worker_nodes_list
def create_multi_pvc_pod(self, pods_per_iter=5, io_runtime=3600, start_io=False, pvc_size=None): """ Function to create PVC of different type and attach them to PODs and start IO. Args: pods_per_iter (int): Number of PVC-POD to be created per PVC type Example, If 2 then 8 PVC+POD will be created with 2 each of 4 PVC types io_runtime (sec): Fio run time in seconds start_io (bool): If True start IO else don't pvc_size (Gi): size of PVC Returns: pod_objs (obj): Objs of all the PODs created pvc_objs (obj): Objs of all the PVCs created """ rbd_sc = helpers.default_storage_class(constants.CEPHBLOCKPOOL) cephfs_sc = helpers.default_storage_class(constants.CEPHFILESYSTEM) pvc_size = pvc_size or f"{random.randrange(15, 105, 5)}Gi" fio_size = get_size_based_on_cls_usage() fio_rate = get_rate_based_on_cls_iops() logging.info(f"Create {pods_per_iter * 4} PVCs and PODs") # Create PVCs cephfs_pvcs = helpers.create_multiple_pvc_parallel( sc_obj=cephfs_sc, namespace=self.namespace, number_of_pvc=pods_per_iter, size=pvc_size, access_modes=[ constants.ACCESS_MODE_RWO, constants.ACCESS_MODE_RWX ], ) rbd_pvcs = helpers.create_multiple_pvc_parallel( sc_obj=rbd_sc, namespace=self.namespace, number_of_pvc=pods_per_iter, size=pvc_size, access_modes=[ constants.ACCESS_MODE_RWO, constants.ACCESS_MODE_RWX ], ) # Appending all the pvc_obj and pod_obj to list pvc_objs, pod_objs = ([] for i in range(2)) pvc_objs.extend(cephfs_pvcs + rbd_pvcs) # Create pods with above pvc list cephfs_pods = helpers.create_pods_parallel( cephfs_pvcs, self.namespace, constants.CEPHFS_INTERFACE, pod_dict_path=self.pod_dict_path, sa_name=self.sa_name, dc_deployment=self.dc_deployment, node_selector=self.node_selector, ) rbd_rwo_pvc, rbd_rwx_pvc = ([] for i in range(2)) for pvc_obj in rbd_pvcs: if pvc_obj.get_pvc_access_mode == constants.ACCESS_MODE_RWX: rbd_rwx_pvc.append(pvc_obj) else: rbd_rwo_pvc.append(pvc_obj) rbd_rwo_pods = helpers.create_pods_parallel( rbd_rwo_pvc, self.namespace, constants.CEPHBLOCKPOOL, pod_dict_path=self.pod_dict_path, sa_name=self.sa_name, dc_deployment=self.dc_deployment, node_selector=self.node_selector, ) rbd_rwx_pods = helpers.create_pods_parallel( rbd_rwx_pvc, self.namespace, constants.CEPHBLOCKPOOL, pod_dict_path=self.pod_dict_path, sa_name=self.sa_name, dc_deployment=self.dc_deployment, raw_block_pv=True, node_selector=self.node_selector, ) temp_pod_objs = list() temp_pod_objs.extend(cephfs_pods + rbd_rwo_pods) # Appending all the pod_obj to list pod_objs.extend(temp_pod_objs + rbd_rwx_pods) # Start IO if start_io: threads = list() for pod_obj in temp_pod_objs: process = threading.Thread( target=pod_obj.run_io, kwargs={ "storage_type": "fs", "size": fio_size, "runtime": io_runtime, "rate": fio_rate, }, ) process.start() threads.append(process) time.sleep(30) for pod_obj in rbd_rwx_pods: process = threading.Thread( target=pod_obj.run_io, kwargs={ "storage_type": "block", "size": fio_size, "runtime": io_runtime, "rate": fio_rate, }, ) process.start() threads.append(process) time.sleep(30) for process in threads: process.join() return pod_objs, pvc_objs
def test_rgw_kafka_notifications(self, bucket_factory): """ Test to verify rgw kafka notifications """ # Get sc sc = default_storage_class(interface_type=constants.CEPHBLOCKPOOL) # Deploy amq cluster self.amq.setup_amq_cluster(sc.name) # Create topic self.kafka_topic = self.amq.create_kafka_topic() # Create Kafkadrop pod ( self.kafkadrop_pod, self.kafkadrop_pod, self.kafkadrop_route, ) = self.amq.create_kafkadrop() # Get the kafkadrop route kafkadrop_host = self.kafkadrop_route.get().get("spec").get("host") # Create bucket bucketname = bucket_factory(amount=1, interface="RGW-OC")[0].name # Get RGW credentials rgw_obj = RGW() rgw_endpoint, access_key, secret_key = rgw_obj.get_credentials() # Clone notify repo notify_path = clone_notify() # Initialise to put objects data = "A random string data to write on created rgw bucket" obc_obj = OBC(bucketname) s3_resource = boto3.resource( "s3", verify=retrieve_verification_mode(), endpoint_url=rgw_endpoint, aws_access_key_id=obc_obj.access_key_id, aws_secret_access_key=obc_obj.access_key, ) s3_client = s3_resource.meta.client # Initialize notify command to run notify_cmd = ( f"python {notify_path} -e {rgw_endpoint} -a {obc_obj.access_key_id} " f"-s {obc_obj.access_key} -b {bucketname} -ke {constants.KAFKA_ENDPOINT} -t {self.kafka_topic.name}" ) log.info(f"Running cmd {notify_cmd}") # Put objects to bucket assert s3_client.put_object(Bucket=bucketname, Key="key-1", Body=data), "Failed: Put object: key-1" exec_cmd(notify_cmd) # Validate rgw logs notification are sent # No errors are seen pattern = "ERROR: failed to create push endpoint" rgw_pod_obj = get_rgw_pods() rgw_log = get_pod_logs(pod_name=rgw_pod_obj[0].name, container="rgw") assert re.search(pattern=pattern, string=rgw_log) is None, ( f"Error: {pattern} msg found in the rgw logs." f"Validate {pattern} found on rgw logs and also " f"rgw bucket notification is working correctly") assert s3_client.put_object(Bucket=bucketname, Key="key-2", Body=data), "Failed: Put object: key-2" exec_cmd(notify_cmd) # Validate message are received Kafka side using curl command # A temporary way to check from Kafka side, need to check from UI curl_command = ( f"curl -X GET {kafkadrop_host}/topic/{self.kafka_topic.name} " "-H 'content-type: application/vnd.kafka.json.v2+json'") json_output = run_cmd(cmd=curl_command) new_string = json_output.split() messages = new_string[new_string.index("messages</td>") + 1] if messages.find("1") == -1: raise Exception( "Error: Messages are not recieved from Kafka side." "RGW bucket notification is not working as expected.") # Validate the timestamp events ocs_version = config.ENV_DATA["ocs_version"] if Version.coerce(ocs_version) >= Version.coerce("4.8"): cmd = ( f"bin/kafka-console-consumer.sh --bootstrap-server {constants.KAFKA_ENDPOINT} " f"--topic {self.kafka_topic.name} --from-beginning --timeout-ms 20000" ) pod_list = get_pod_name_by_pattern( pattern="my-cluster-zookeeper", namespace=constants.AMQ_NAMESPACE) zookeeper_obj = get_pod_obj(name=pod_list[0], namespace=constants.AMQ_NAMESPACE) event_obj = zookeeper_obj.exec_cmd_on_pod(command=cmd) log.info(f"Event obj: {event_obj}") event_time = event_obj.get("Records")[0].get("eventTime") format_string = "%Y-%m-%dT%H:%M:%S.%fZ" try: datetime.strptime(event_time, format_string) except ValueError as ef: log.error( f"Timestamp event {event_time} doesnt match the pattern {format_string}" ) raise ef log.info( f"Timestamp event {event_time} matches the pattern {format_string}" )
def test_rgw_kafka_notifications(self, bucket_factory): """ Test to verify rgw kafka notifications """ # Get sc sc = default_storage_class(interface_type=constants.CEPHBLOCKPOOL) # Deploy amq cluster self.amq.setup_amq_cluster(sc.name) # Create topic self.kafka_topic = self.amq.create_kafka_topic() # Create Kafkadrop pod ( self.kafkadrop_pod, self.kafkadrop_pod, self.kafkadrop_route, ) = self.amq.create_kafkadrop() # Get the kafkadrop route kafkadrop_host = self.kafkadrop_route.get().get("spec").get("host") # Create bucket bucketname = bucket_factory(amount=1, interface="RGW-OC")[0].name # Get RGW credentials rgw_obj = RGW() rgw_endpoint, access_key, secret_key = rgw_obj.get_credentials() # Clone notify repo notify_path = clone_notify() # Initialise to put objects data = "A random string data to write on created rgw bucket" obc_obj = OBC(bucketname) s3_resource = boto3.resource( "s3", verify=retrieve_verification_mode(), endpoint_url=rgw_endpoint, aws_access_key_id=obc_obj.access_key_id, aws_secret_access_key=obc_obj.access_key, ) s3_client = s3_resource.meta.client # Initialize notify command to run notify_cmd = ( f"python {notify_path} -e {rgw_endpoint} -a {obc_obj.access_key_id} " f"-s {obc_obj.access_key} -b {bucketname} -ke {constants.KAFKA_ENDPOINT} -t {self.kafka_topic.name}" ) log.info(f"Running cmd {notify_cmd}") # Put objects to bucket assert s3_client.put_object(Bucket=bucketname, Key="key-1", Body=data), "Failed: Put object: key-1" exec_cmd(notify_cmd) # Validate rgw logs notification are sent # No errors are seen pattern = "ERROR: failed to create push endpoint" rgw_pod_obj = get_rgw_pods() rgw_log = get_pod_logs(pod_name=rgw_pod_obj[0].name, container="rgw") assert re.search(pattern=pattern, string=rgw_log) is None, ( f"Error: {pattern} msg found in the rgw logs." f"Validate {pattern} found on rgw logs and also " f"rgw bucket notification is working correctly") assert s3_client.put_object(Bucket=bucketname, Key="key-2", Body=data), "Failed: Put object: key-2" exec_cmd(notify_cmd) # Validate message are received Kafka side using curl command # A temporary way to check from Kafka side, need to check from UI curl_command = ( f"curl -X GET {kafkadrop_host}/topic/{self.kafka_topic.name} " "-H 'content-type: application/vnd.kafka.json.v2+json'") json_output = run_cmd(cmd=curl_command) new_string = json_output.split() messages = new_string[new_string.index("messages</td>") + 1] if messages.find("1") == -1: raise Exception( "Error: Messages are not recieved from Kafka side." "RGW bucket notification is not working as expected.")
def base_setup(self, interface, pvc_factory, pod_factory): """ A setup phase for the test: get all the ceph pods information, create maxsize pvc, pod and run IO """ # Setting the io_size_gb to 40% of the total PVC capacity ceph_pod = Pod.get_ceph_tools_pod() external = config.DEPLOYMENT["external_mode"] if external: ocp_obj = ocp.OCP() if interface == constants.CEPHBLOCKPOOL: resource_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD elif interface == constants.CEPHFILESYSTEM: resource_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_CEPHFS cmd = f"get sc {resource_name} -o yaml" pool_data = ocp_obj.exec_oc_cmd(cmd) pool = pool_data["parameters"]["pool"] else: pool = (constants.DEFAULT_BLOCKPOOL if interface == constants.CEPHBLOCKPOOL else constants.DATA_POOL) ceph_replica = ceph_pod.exec_ceph_cmd( ceph_cmd=f"ceph osd pool get {pool} size") replica = ceph_replica["size"] ceph_status = ceph_pod.exec_ceph_cmd(ceph_cmd="ceph df") ceph_capacity = (int(ceph_status["stats"]["total_bytes"]) / replica / constants.GB) pvc_size_gb = int(ceph_capacity * 0.5) io_size_gb = int(pvc_size_gb * 0.4) io_size_gb = 400 if io_size_gb >= 400 else io_size_gb pod_objs = get_all_pods( namespace=defaults.ROOK_CLUSTER_NAMESPACE, selector=[ "noobaa", "rook-ceph-osd-prepare", "rook-ceph-drain-canary" ], exclude_selector=True, ) # Create maxsize pvc, app pod and run ios self.sc = default_storage_class(interface_type=interface) self.pvc_obj = pvc_factory( interface=interface, storageclass=self.sc, size=pvc_size_gb, ) self.pod_obj = pod_factory(interface=interface, pvc=self.pvc_obj) log.info(f"Running FIO to fill PVC size: {io_size_gb}G") self.pod_obj.run_io("fs", size=f"{io_size_gb}G", io_direction="write", runtime=480) log.info("Waiting for IO results") self.pod_obj.get_fio_results() return pod_objs
def deploy_ocs(self): """ Handle OCS deployment, since OCS deployment steps are common to any platform, implementing OCS deployment here in base class. """ ceph_cluster = ocp.OCP(kind="CephCluster", namespace=self.namespace) try: ceph_cluster.get().get("items")[0] logger.warning("OCS cluster already exists") return except (IndexError, CommandFailed): logger.info("Running OCS basic installation") if config.DEPLOYMENT["external_mode"]: logger.info("Deploying OCS on external mode RHCS") return self.deploy_with_external_mode() self.deploy_ocs_via_operator() pod = ocp.OCP(kind=constants.POD, namespace=self.namespace) cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=self.namespace) # Check for Ceph pods assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-mon", resource_count=3, timeout=600, ) assert pod.wait_for_resource(condition="Running", selector="app=rook-ceph-mgr", timeout=600) assert pod.wait_for_resource( condition="Running", selector="app=rook-ceph-osd", resource_count=3, timeout=600, ) # validate ceph mon/osd volumes are backed by pvc validate_cluster_on_pvc() # validate PDB creation of MON, MDS, OSD pods validate_pdb_creation() # Creating toolbox pod setup_ceph_toolbox() assert pod.wait_for_resource( condition=constants.STATUS_RUNNING, selector="app=rook-ceph-tools", resource_count=1, timeout=600, ) # Check for CephFilesystem creation in ocp cfs_data = cfs.get() cfs_name = cfs_data["items"][0]["metadata"]["name"] if helpers.validate_cephfilesystem(cfs_name): logger.info("MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: logger.error("MDS deployment Failed! Please check logs!") # Change monitoring backend to OCS if config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "persistent-monitoring"): sc = helpers.default_storage_class( interface_type=constants.CEPHBLOCKPOOL) # Get the list of monitoring pods pods_list = get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=["prometheus", "alertmanager"], ) # Create configmap cluster-monitoring-config and reconfigure # storage class and telemeter server (if the url is specified in a # config file) create_configmap_cluster_monitoring_pod( sc_name=sc.name, telemeter_server_url=config.ENV_DATA.get( "telemeter_server_url"), ) # Take some time to respin the pod waiting_time = 45 logger.info(f"Waiting {waiting_time} seconds...") time.sleep(waiting_time) # Validate the pods are respinned and in running state retry((CommandFailed, ResourceWrongStatusException), tries=3, delay=15)(validate_pods_are_respinned_and_running_state)( pods_list) # Validate the pvc is created on monitoring pods validate_pvc_created_and_bound_on_monitoring_pods() # Validate the pvc are mounted on pods retry((CommandFailed, AssertionError), tries=3, delay=15)( validate_pvc_are_mounted_on_monitoring_pods)(pods_list) elif config.ENV_DATA.get("monitoring_enabled") and config.ENV_DATA.get( "telemeter_server_url"): # Create configmap cluster-monitoring-config to reconfigure # telemeter server url when 'persistent-monitoring' is False create_configmap_cluster_monitoring_pod( telemeter_server_url=config.ENV_DATA["telemeter_server_url"]) # Change registry backend to OCS CEPHFS RWX PVC registry.change_registry_backend_to_ocs() # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? logger.info("Done creating rook resources, waiting for HEALTH_OK") try: ceph_health_check(namespace=self.namespace, tries=30, delay=10) except CephHealthException as ex: err = str(ex) logger.warning(f"Ceph health check failed with {err}") if "clock skew detected" in err: logger.info(f"Changing NTP on compute nodes to" f" {constants.RH_NTP_CLOCK}") if self.platform == constants.VSPHERE_PLATFORM: update_ntp_compute_nodes() assert ceph_health_check(namespace=self.namespace, tries=60, delay=10) # patch gp2/thin storage class as 'non-default' self.patch_default_sc_to_non_default()