def create_connection(self, cld_mgr, platform, conn_name=None): """ Creates a new NooBaa connection to an AWS backend Args: cld_mgr (obj): A cloud manager instance platform (str): Platform to use for new connection conn_name (str): The connection name to be used If None provided then the name will be generated Returns: bool: False if the connection creation failed """ if conn_name is None: conn_name = create_unique_resource_name(f"{platform}-connection", "mcgconn") if platform == constants.AWS_PLATFORM: params = { "auth_method": "AWS_V4", "endpoint": constants.MCG_NS_AWS_ENDPOINT, "endpoint_type": "AWS", "identity": get_attr_chain(cld_mgr, "aws_client.access_key"), "name": conn_name, "secret": get_attr_chain(cld_mgr, "aws_client.secret_key"), } elif platform == constants.AZURE_PLATFORM: params = { "endpoint": constants.MCG_NS_AZURE_ENDPOINT, "endpoint_type": "AZURE", "identity": get_attr_chain(cld_mgr, "azure_client.account_name"), "name": conn_name, "secret": get_attr_chain(cld_mgr, "azure_client.credential"), } elif platform == constants.RGW_PLATFORM: params = { "auth_method": "AWS_V4", "endpoint": get_attr_chain(cld_mgr, "rgw_client.endpoint"), "endpoint_type": "S3_COMPATIBLE", "identity": get_attr_chain(cld_mgr, "rgw_client.access_key"), "name": conn_name, "secret": get_attr_chain(cld_mgr, "rgw_client.secret_key"), } else: raise UnsupportedPlatformError(f"Unsupported Platform: {platform}") try: for resp in TimeoutSampler( 30, 3, self.send_rpc_query, "account_api", "add_external_connection", params, ): if "error" not in resp.text: logger.info(f"Connection {conn_name} created successfully") return True else: logger.info( f"{platform} IAM {conn_name} did not yet propagate: {resp.text}" ) except TimeoutExpiredError: logger.error(f"Could not create connection {conn_name}") assert False
def attach_pgsql_pod_to_claim_pvc(self, pvc_objs, postgres_name, run_benchmark=True, pgbench_name=None): """ Attaches pgsql pod to created claim PVC Args: pvc_objs (list): List of PVC objs which needs to attached to pod postgres_name (str): Name of the postgres pod run_benchmark (bool): On true, runs pgbench benchmark on postgres pod pgbench_name (str): Name of pgbench benchmark Returns: pgsql_obj_list (list): List of pod objs created """ pgsql_obj_list = [] for pvc_obj in pvc_objs: try: pgsql_sset = templating.load_yaml( constants.PGSQL_STATEFULSET_YAML) del pgsql_sset["spec"]["volumeClaimTemplates"] pgsql_sset["metadata"]["name"] = (f"{postgres_name}" + f"{pvc_objs.index(pvc_obj)}") pgsql_sset["spec"]["template"]["spec"]["containers"][0][ "volumeMounts"][0]["name"] = pvc_obj.name pgsql_sset["spec"]["template"]["spec"]["volumes"] = [{ "name": f"{pvc_obj.name}", "persistentVolumeClaim": { "claimName": f"{pvc_obj.name}" }, }] pgsql_sset = OCS(**pgsql_sset) pgsql_sset.create() pgsql_obj_list.append(pgsql_sset) self.wait_for_postgres_status(status=constants.STATUS_RUNNING, timeout=300) if run_benchmark: pg_data = templating.load_yaml( constants.PGSQL_BENCHMARK_YAML) pg_data["metadata"]["name"] = ( f"{pgbench_name}" + f"{pvc_objs.index(pvc_obj)}" if pgbench_name else create_unique_resource_name("benchmark", "pgbench")) pg_data["spec"]["workload"]["args"]["databases"][0][ "host"] = (f"{postgres_name}" + f"{pvc_objs.index(pvc_obj)}-0" + ".postgres") pg_obj = OCS(**pg_data) pg_obj.create() pgsql_obj_list.append(pg_obj) wait_time = 120 log.info(f"Wait {wait_time} seconds before mounting pod") time.sleep(wait_time) except (CommandFailed, CalledProcessError) as cf: log.error("Failed during creation of postgres pod") raise cf if run_benchmark: log.info("Checking all pgbench benchmark reached Completed state") self.wait_for_pgbench_status(status=constants.STATUS_COMPLETED, timeout=1800) return pgsql_obj_list
def _create_backingstore(method, uls_dict): """ Tracks creation and cleanup of all the backing stores that were created in the scope Args: method (str): String for selecting method of backing store creation (CLI/OC) uls_dict (dict): Dictionary containing storage provider as key and a list of tuples as value. Cloud backing stores form - 'CloudName': [(amount, region), (amount, region)] i.e. - 'aws': [(3, us-west-1),(2, eu-west-2)] PV form - 'pv': [(amount, size_in_gb, storagecluster), ...] i.e. - 'pv': [(3, 32, ocs-storagecluster-ceph-rbd),(2, 100, ocs-storagecluster-ceph-rbd)] Returns: list: A list of backingstore names. """ current_call_created_backingstores = [] if method.lower() not in cmdMap: raise RuntimeError(f"Invalid method type received: {method}. " f'available types: {", ".join(cmdMap.keys())}') for cloud, uls_lst in uls_dict.items(): for uls_tup in uls_lst: # Todo: Replace multiple .append calls, create names in advance, according to amountoc if cloud.lower() not in cmdMap[method.lower()]: raise RuntimeError( f"Invalid cloud type received: {cloud}. " f'available types: {", ".join(cmdMap[method.lower()].keys())}' ) if cloud == "pv": vol_num, size, storagecluster = uls_tup if (storagecluster == constants.DEFAULT_STORAGECLASS_RBD and storagecluster_independent_check()): storagecluster = ( constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD) backingstore_name = create_unique_resource_name( resource_description="backingstore", resource_type=cloud.lower()) backingstore_obj = BackingStore( name=backingstore_name, method=method.lower(), type="pv", mcg_obj=mcg_obj, vol_num=vol_num, vol_size=size, ) current_call_created_backingstores.append(backingstore_obj) created_backingstores.append(backingstore_obj) if method.lower() == "cli": cmdMap[method.lower()][cloud.lower()]( mcg_obj, backingstore_name, vol_num, size, storagecluster) else: cmdMap[method.lower()][cloud.lower()]( backingstore_name, vol_num, size, storagecluster) else: _, region = uls_tup uls_dict = cloud_uls_factory({cloud: [uls_tup]}) for uls_name in uls_dict[cloud.lower()]: backingstore_name = create_unique_resource_name( resource_description="backingstore", resource_type=cloud.lower(), ) backingstore_obj = BackingStore( name=backingstore_name, method=method.lower(), type="cloud", uls_name=uls_name, mcg_obj=mcg_obj, ) current_call_created_backingstores.append( backingstore_obj) created_backingstores.append(backingstore_obj) if method.lower() == "cli": cmdMap[method.lower()][cloud.lower()]( mcg_obj, cld_mgr, backingstore_name, uls_name, region) elif method.lower() == "oc": cmdMap[method.lower()][cloud.lower()]( cld_mgr, backingstore_name, uls_name, region) mcg_obj.check_backingstore_state( backingstore_name, constants.BS_OPTIMAL) # TODO: Verify OC\CLI BS health by using the appropriate methods return current_call_created_backingstores
def request_aws_credentials(self): """ Uses a CredentialsRequest CR to create an AWS IAM that allows the program to interact with S3 Returns: OCS: The CredentialsRequest resource """ awscreds_data = templating.load_yaml(constants.MCG_AWS_CREDS_YAML) req_name = create_unique_resource_name("awscredreq", "credentialsrequests") awscreds_data["metadata"]["name"] = req_name awscreds_data["metadata"]["namespace"] = self.namespace awscreds_data["spec"]["secretRef"]["name"] = req_name awscreds_data["spec"]["secretRef"]["namespace"] = self.namespace creds_request = create_resource(**awscreds_data) sleep(5) secret_ocp_obj = OCP(kind="secret", namespace=self.namespace) try: cred_req_secret_dict = secret_ocp_obj.get( resource_name=creds_request.name, retry=5) except CommandFailed: logger.error("Failed to retrieve credentials request secret") raise CredReqSecretNotFound( "Please make sure that the cluster used is an AWS cluster, " "or that the `platform` var in your config is correct.") aws_access_key_id = base64.b64decode( cred_req_secret_dict.get("data").get("aws_access_key_id")).decode( "utf-8") aws_access_key = base64.b64decode( cred_req_secret_dict.get("data").get( "aws_secret_access_key")).decode("utf-8") def _check_aws_credentials(): try: sts = boto3.client( "sts", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_access_key, ) sts.get_caller_identity() return True except ClientError: logger.info("Credentials are still not active. Retrying...") return False try: for api_test_result in TimeoutSampler(120, 5, _check_aws_credentials): if api_test_result: logger.info("AWS credentials created successfully.") break except TimeoutExpiredError: logger.error("Failed to create credentials") assert False return creds_request, aws_access_key_id, aws_access_key
def test_create_resize_delete_pvc( self, project_factory, teardown_factory, setup_ui, sc_name, access_mode, pvc_size, vol_mode, ): """ Test create, resize and delete pvc via UI """ # Creating a test project via CLI pro_obj = project_factory() project_name = pro_obj.namespace pvc_ui_obj = PvcUI(setup_ui) # Creating PVC via UI pvc_name = create_unique_resource_name("test", "pvc") pvc_ui_obj.create_pvc_ui(project_name, sc_name, pvc_name, access_mode, pvc_size, vol_mode) pvc_objs = get_all_pvc_objs(namespace=project_name) pvc = [pvc_obj for pvc_obj in pvc_objs if pvc_obj.name == pvc_name] assert pvc[0].size == int(pvc_size), ( f"size error| expected size:{pvc_size} \n " f"actual size:{str(pvc[0].size)}") assert pvc[0].get_pvc_access_mode == access_mode, ( f"access mode error| expected access mode:{access_mode} " f"\n actual access mode:{pvc[0].get_pvc_access_mode}") assert pvc[0].backed_sc == sc_name, ( f"storage class error| expected storage class:{sc_name} " f"\n actual storage class:{pvc[0].backed_sc}") assert pvc[0].get_pvc_vol_mode == vol_mode, ( f"volume mode error| expected volume mode:{vol_mode} " f"\n actual volume mode:{pvc[0].get_pvc_vol_mode}") # Verifying PVC via UI logger.info("Verifying PVC Details via UI") pvc_ui_obj.verify_pvc_ui( pvc_size=pvc_size, access_mode=access_mode, vol_mode=vol_mode, sc_name=sc_name, pvc_name=pvc_name, project_name=project_name, ) logger.info("PVC Details Verified via UI..!!") # Creating Pod via CLI logger.info("Creating Pod") if sc_name in ( constants.DEFAULT_STORAGECLASS_RBD_THICK, constants.DEFAULT_STORAGECLASS_RBD, ): interface_type = constants.CEPHBLOCKPOOL else: interface_type = constants.CEPHFILESYSTEM new_pod = helpers.create_pod( interface_type=interface_type, pvc_name=pvc_name, namespace=project_name, raw_block_pv=vol_mode == constants.VOLUME_MODE_BLOCK, ) logger.info(f"Waiting for Pod: state= {constants.STATUS_RUNNING}") wait_for_resource_state(resource=new_pod, state=constants.STATUS_RUNNING) # Calling the Teardown Factory Method to make sure Pod is deleted teardown_factory(new_pod) # Expanding the PVC logger.info("Pvc Resizing") new_size = int(pvc_size) + 3 pvc_ui_obj.pvc_resize_ui(pvc_name=pvc_name, new_size=new_size, project_name=project_name) assert new_size > int( pvc_size ), f"New size of the PVC cannot be less than existing size: new size is {new_size})" ocp_version = get_ocp_version() self.pvc_loc = locators[ocp_version]["pvc"] # Verifying PVC expansion logger.info("Verifying PVC resize") expected_capacity = f"{new_size} GiB" pvc_resize = pvc_ui_obj.verify_pvc_resize_ui( project_name=project_name, pvc_name=pvc_name, expected_capacity=expected_capacity, ) assert pvc_resize, "PVC resize failed" logger.info("Pvc resize verified..!!" f"New Capacity after PVC resize is {expected_capacity}") # Running FIO logger.info("Execute FIO on a Pod") if vol_mode == constants.VOLUME_MODE_BLOCK: storage_type = constants.WORKLOAD_STORAGE_TYPE_BLOCK else: storage_type = constants.WORKLOAD_STORAGE_TYPE_FS new_pod.run_io(storage_type, size=(new_size - 1), invalidate=0, rate="1000m") get_fio_rw_iops(new_pod) logger.info("FIO execution on Pod successfully completed..!!") # Checking if the Pod is deleted or not new_pod.delete(wait=True) new_pod.ocp.wait_for_delete(resource_name=new_pod.name) # Deleting the PVC via UI logger.info(f"Delete {pvc_name} pvc") pvc_ui_obj.delete_pvc_ui(pvc_name, project_name) pvc[0].ocp.wait_for_delete(pvc_name, timeout=120) pvc_objs = get_all_pvc_objs(namespace=project_name) pvcs = [pvc_obj for pvc_obj in pvc_objs if pvc_obj.name == pvc_name] if len(pvcs) > 0: assert f"PVC {pvcs[0].name} does not deleted"
def install_submariner_ui(self): """ Installs the Submariner on the ACM Hub cluster and expects 2 OCP clusters to be already imported on the Hub Cluster to create a link between them """ self.navigate_clusters_page() self.page_has_loaded(retries=15, sleep_time=5) self.do_click(locator=self.acm_page_nav["Clusters_page"]) log.info("Click on Cluster sets") self.do_click(self.page_nav["cluster-sets"]) self.page_has_loaded(retries=15, sleep_time=5) log.info("Click on Create cluster set") self.do_click(self.page_nav["create-cluster-set"]) global cluster_set_name cluster_set_name = create_unique_resource_name("submariner", "clusterset") log.info(f"Send Cluster set name '{cluster_set_name}'") self.do_send_keys(self.page_nav["cluster-set-name"], text=cluster_set_name) log.info("Click on Create") self.do_click(self.page_nav["click-create"], enable_screenshot=True) time.sleep(1) log.info("Click on Manage resource assignments") self.do_click(self.page_nav["click-manage-resource-assignments"], enable_screenshot=True) log.info(f"Search and select cluster '{cluster_name_a}'") self.do_send_keys(self.page_nav["search-cluster"], text=cluster_name_a) self.do_click(self.page_nav["select-first-checkbox"], enable_screenshot=True) log.info("Clear search by clicking on cross mark") self.do_click(self.page_nav["clear-search"]) log.info(f"Search and select cluster '{cluster_name_b}'") self.do_send_keys(self.page_nav["search-cluster"], text=cluster_name_b) self.do_click(self.page_nav["select-first-checkbox"], enable_screenshot=True) log.info("Clear search by clicking on cross mark [2]") self.do_click(self.page_nav["clear-search"]) log.info("Click on 'Review'") self.do_click(self.page_nav["review-btn"], enable_screenshot=True) log.info("Click on 'Save' to confirm the changes") self.do_click(self.page_nav["confirm-btn"], enable_screenshot=True) time.sleep(3) log.info("Click on 'Submariner add-ons' tab") self.do_click(self.page_nav["submariner-tab"]) log.info("Click on 'Install Submariner add-ons' button") self.do_click(self.page_nav["install-submariner-btn"]) log.info("Click on 'Target clusters'") self.do_click(self.page_nav["target-clusters"]) log.info(f"Select 1st cluster which is {cluster_name_a}") self.do_click( format_locator( locator=self.page_nav["cluster-name-selection"], string_to_insert=cluster_name_a, )) log.info(f"Select 2nd cluster which is {cluster_name_b}") self.do_click( format_locator( locator=self.page_nav["cluster-name-selection"], string_to_insert=cluster_name_b, ), enable_screenshot=True, ) log.info("Click on Next button") self.do_click(self.page_nav["next-btn"]) log.info("Click on 'Enable NAT-T' to uncheck it") self.do_click(self.page_nav["nat-t-checkbox"]) log.info( "Increase the gateway count to 3 by clicking twice on the gateway count add button" ) self.do_click(self.page_nav["gateway-count-btn"]) self.do_click(self.page_nav["gateway-count-btn"]) log.info("Click on Next button") self.do_click(self.page_nav["next-btn"]) log.info("Click on 'Enable NAT-T' to uncheck it [2]") self.do_click(self.page_nav["nat-t-checkbox"]) log.info( "Increase the gateway count to 3 by clicking twice on the gateway count add button [2]" ) self.do_click(self.page_nav["gateway-count-btn"]) self.do_click(self.page_nav["gateway-count-btn"]) log.info("Click on Next button [2]") self.do_click(self.page_nav["next-btn"]) self.take_screenshot() log.info("Click on 'Install'") self.do_click(self.page_nav["install-btn"])
def measure_corrupt_pg(measurement_dir): """ Create Ceph pool and corrupt Placement Group on one of OSDs, measures the time when it was corrupted and records alerts that were triggered during this event. Returns: dict: Contains information about `start` and `stop` time for corrupting Ceph Placement Group """ oc = ocp.OCP( kind=constants.DEPLOYMENT, namespace=config.ENV_DATA.get("cluster_namespace") ) osd_deployments = oc.get(selector=constants.OSD_APP_LABEL).get("items") osd_deployment = osd_deployments[0].get("metadata").get("name") ct_pod = pod.get_ceph_tools_pod() pool_name = helpers.create_unique_resource_name("corrupted", "pool") ct_pod.exec_ceph_cmd(f"ceph osd pool create {pool_name} 1 1") logger.info("Setting osd noout flag") ct_pod.exec_ceph_cmd("ceph osd set noout") logger.info(f"Put object into {pool_name}") pool_object = "test_object" ct_pod.exec_ceph_cmd(f"rados -p {pool_name} put {pool_object} /etc/passwd") logger.info(f"Looking for Placement Group with {pool_object} object") pg = ct_pod.exec_ceph_cmd(f"ceph osd map {pool_name} {pool_object}")["pgid"] logger.info(f"Found Placement Group: {pg}") dummy_deployment, dummy_pod = helpers.create_dummy_osd(osd_deployment) def corrupt_pg(): """ Corrupt PG on one OSD in Ceph pool for 12 minutes and measure it. There should be only CephPGRepairTakingTooLong Pending alert as it takes 2 hours for it to become Firing. This configuration of alert can be observed in ceph-mixins which is used in the project: https://github.com/ceph/ceph-mixins/blob/d22afe8c0da34490cb77e52a202eefcf4f62a869/config.libsonnet#L23 There should be also CephClusterErrorState alert that takes 10 minutest to start firing. Returns: str: Name of corrupted deployment """ # run_time of operation run_time = 60 * 12 nonlocal oc nonlocal pool_name nonlocal pool_object nonlocal dummy_pod nonlocal pg nonlocal osd_deployment nonlocal dummy_deployment logger.info(f"Corrupting {pg} PG on {osd_deployment}") dummy_pod.exec_sh_cmd_on_pod( f"ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-" f"{osd_deployment.split('-')[-1]} --pgid {pg} {pool_object} " f"set-bytes /etc/shadow --no-mon-config" ) logger.info("Unsetting osd noout flag") ct_pod.exec_ceph_cmd("ceph osd unset noout") ct_pod.exec_ceph_cmd(f"ceph pg deep-scrub {pg}") oc.exec_oc_cmd(f"scale --replicas=0 deployment/{dummy_deployment}") oc.exec_oc_cmd(f"scale --replicas=1 deployment/{osd_deployment}") logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return osd_deployment test_file = os.path.join(measurement_dir, "measure_corrupt_pg.json") measured_op = measure_operation(corrupt_pg, test_file) logger.info(f"Deleting pool {pool_name}") ct_pod.exec_ceph_cmd( f"ceph osd pool delete {pool_name} {pool_name} " f"--yes-i-really-really-mean-it" ) logger.info(f"Checking that pool {pool_name} is deleted") logger.info(f"Deleting deployment {dummy_deployment}") oc.delete(resource_name=dummy_deployment) # wait for ceph to return into HEALTH_OK state after osd deployment # is returned back to normal ceph_health_check(tries=20, delay=15) return measured_op
def measure_noobaa_exceed_bucket_quota(measurement_dir, request, mcg_obj, awscli_pod): """ Create NooBaa bucket, set its capacity quota to 2GB and fill it with data. Returns: dict: Contains information about `start` and `stop` time for corrupting Ceph Placement Group """ bucket_name = create_unique_resource_name( resource_description="bucket", resource_type="s3" ) bucket = MCGS3Bucket(bucket_name, mcg=mcg_obj) mcg_obj.send_rpc_query( "bucket_api", "update_bucket", {"name": bucket_name, "quota": {"unit": "GIGABYTE", "size": 2}}, ) bucket_info = mcg_obj.get_bucket_info(bucket.name) logger.info(f"Bucket {bucket.name} storage: {bucket_info['storage']}") logger.info(f"Bucket {bucket.name} data: {bucket_info['data']}") def teardown(): """ Delete test bucket. """ bucket.delete() request.addfinalizer(teardown) def exceed_bucket_quota(): """ Upload 5 files with 500MB size into bucket that has quota set to 2GB. Returns: str: Name of utilized bucket """ nonlocal mcg_obj nonlocal bucket_name nonlocal awscli_pod # run_time of operation run_time = 60 * 14 awscli_pod.exec_cmd_on_pod("dd if=/dev/zero of=/tmp/testfile bs=1M count=500") for i in range(1, 6): awscli_pod.exec_cmd_on_pod( craft_s3_command( f"cp /tmp/testfile s3://{bucket_name}/testfile{i}", mcg_obj ), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint, ], ) logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return bucket_name test_file = os.path.join( measurement_dir, "measure_noobaa_exceed__bucket_quota.json" ) measured_op = measure_operation(exceed_bucket_quota, test_file) bucket_info = mcg_obj.get_bucket_info(bucket.name) logger.info(f"Bucket {bucket.name} storage: {bucket_info['storage']}") logger.info(f"Bucket {bucket.name} data: {bucket_info['data']}") logger.info(f"Deleting data from bucket {bucket_name}") for i in range(1, 6): awscli_pod.exec_cmd_on_pod( craft_s3_command(f"rm s3://{bucket_name}/testfile{i}", mcg_obj), out_yaml_format=False, secrets=[mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint], ) return measured_op
def measure_corrupt_pg(request, measurement_dir): """ Create Ceph pool and corrupt Placement Group on one of OSDs, measures the time when it was corrupted and records alerts that were triggered during this event. Returns: dict: Contains information about `start` and `stop` time for corrupting Ceph Placement Group """ osd_deployment = deployment.get_osd_deployments()[0] original_deployment_revision = osd_deployment.revision ct_pod = pod.get_ceph_tools_pod() pool_name = helpers.create_unique_resource_name("corrupted", "pool") ct_pod.exec_ceph_cmd(f"ceph osd pool create {pool_name} 1 1") ct_pod.exec_ceph_cmd(f"ceph osd pool application enable {pool_name} rbd") def teardown(): """ Make sure that corrupted pool is deleted and ceph health is ok """ nonlocal pool_name nonlocal osd_deployment nonlocal original_deployment_revision logger.info(f"Deleting pool {pool_name}") ct_pod.exec_ceph_cmd(f"ceph osd pool delete {pool_name} {pool_name} " f"--yes-i-really-really-mean-it") logger.info("Unsetting osd noout flag") ct_pod.exec_ceph_cmd("ceph osd unset noout") logger.info("Unsetting osd noscrub flag") ct_pod.exec_ceph_cmd("ceph osd unset noscrub") logger.info("Unsetting osd nodeep-scrub flag") ct_pod.exec_ceph_cmd("ceph osd unset nodeep-scrub") logger.info(f"Checking that pool {pool_name} is deleted") logger.info( f"Restoring deployment {osd_deployment.name} " f"to its original revision: {original_deployment_revision}") if original_deployment_revision: osd_deployment.set_revision(original_deployment_revision) # unset original_deployment_revision because revision number is deleted when used original_deployment_revision = False # wait for ceph to return into HEALTH_OK state after osd deployment # is returned back to normal ceph_health_check(tries=20, delay=15) request.addfinalizer(teardown) logger.info("Setting osd noout flag") ct_pod.exec_ceph_cmd("ceph osd set noout") logger.info(f"Put object into {pool_name}") pool_object = "test_object" ct_pod.exec_ceph_cmd(f"rados -p {pool_name} put {pool_object} /etc/passwd") def corrupt_pg(): """ Corrupt PG on one OSD in Ceph pool for 14 minutes and measure it. There should be only CephPGRepairTakingTooLong Pending alert as it takes 2 hours for it to become Firing. This configuration of alert can be observed in ceph-mixins which is used in the project: https://github.com/ceph/ceph-mixins/blob/d22afe8c0da34490cb77e52a202eefcf4f62a869/config.libsonnet#L23 There should be also CephClusterErrorState alert that takes 10 minutest to start firing. Returns: str: Name of corrupted pod """ # run_time of operation run_time = 60 * 14 nonlocal pool_name nonlocal pool_object nonlocal osd_deployment logger.info(f"Corrupting pool {pool_name} on {osd_deployment.name}") rados_utils.corrupt_pg(osd_deployment, pool_name, pool_object) logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return osd_deployment.name test_file = os.path.join(measurement_dir, "measure_corrupt_pg.json") if config.ENV_DATA["platform"].lower( ) in constants.MANAGED_SERVICE_PLATFORMS: # It seems that it takes longer to propagate incidents to PagerDuty. # Adding 3 extra minutes measured_op = measure_operation(corrupt_pg, test_file, minimal_time=60 * 17) else: measured_op = measure_operation(corrupt_pg, test_file) teardown() return measured_op
def _create_bucket_class(bucket_class_dict): """ Creates and deletes all bucket classes that were created as part of the test Args: bucket_class_dict (dict): Dictionary containing the description of the bucket class. Possible keys and values are: - interface (str): The interface to use for creation of buckets. OC | CLI - placement_policy (str): The Placement policy for this bucket class. Spread | Mirror - backingstore_dict (dict): A dictionary compatible with the backing store factory requirements. (Described in backingstore.py, under _create_backingstore) - namespace_policy_dict (dict): A dictionary compatible with the namespace store factory. Needs to contain the following keys and values: - type (str): Single | Multi | Cache - namespacestore_dict (dict): Identical format to backingstore_dict, contains data that's forwarded to cloud_uls_factory. Returns: BucketClass: A Bucket Class object. """ if "interface" in bucket_class_dict: interface = bucket_class_dict["interface"] if interface.lower() not in interfaces.keys(): raise RuntimeError( f"Invalid interface type received: {interface}. " f'available types: {", ".join(interfaces)}') else: interface = "OC" namespace_policy = {} backingstores = None namespacestores = None placement_policy = None if "namespace_policy_dict" in bucket_class_dict: if "namespacestore_dict" in bucket_class_dict[ "namespace_policy_dict"]: nss_dict = bucket_class_dict["namespace_policy_dict"][ "namespacestore_dict"] namespacestores = namespace_store_factory(interface, nss_dict) namespace_policy["type"] = bucket_class_dict[ "namespace_policy_dict"]["type"] namespace_policy["read_resources"] = [ nss.name for nss in namespacestores ] namespace_policy["write_resource"] = namespacestores[0].name elif "backingstore_dict" in bucket_class_dict: backingstores = [ backingstore for backingstore in backingstore_factory( interface, bucket_class_dict["backingstore_dict"]) ] else: backingstores = [ BackingStore(constants.DEFAULT_NOOBAA_BACKINGSTORE, method="oc") ] if "placement_policy" in bucket_class_dict: placement_policy = bucket_class_dict["placement_policy"] else: placement_policy = "Spread" bucket_class_name = create_unique_resource_name( resource_description="bucketclass", resource_type=interface.lower()) interfaces[interface.lower()]( name=bucket_class_name, backingstores=backingstores, placement=placement_policy, namespace_policy=namespace_policy, ) bucket_class_object = BucketClass( bucket_class_name, backingstores, namespacestores, placement_policy, namespace_policy, ) created_bucket_classes.append(bucket_class_object) return bucket_class_object
def test_pvc_snapshot(self, interface, teardown_factory): """ 1. Run I/O on a pod file. 2. Calculate md5sum of the file. 3. Take a snapshot of the PVC. 4. Create a new PVC out of that snapshot. 5. Attach a new pod to it. 6. Verify that the file is present on the new pod also. 7. Verify that the md5sum of the file on the new pod matches with the md5sum of the file on the original pod. Args: interface(str): The type of the interface (e.g. CephBlockPool, CephFileSystem) pvc_factory: A fixture to create new pvc teardown_factory: A fixture to destroy objects """ log.info(f"Running IO on pod {self.pod_obj.name}") file_name = self.pod_obj.name log.info(f"File created during IO {file_name}") self.pod_obj.run_io(storage_type="fs", size="1G", fio_filename=file_name) # Wait for fio to finish fio_result = self.pod_obj.get_fio_results() err_count = fio_result.get("jobs")[0].get("error") assert err_count == 0, (f"IO error on pod {self.pod_obj.name}. " f"FIO result: {fio_result}") log.info(f"Verified IO on pod {self.pod_obj.name}.") # Verify presence of the file file_path = pod.get_file_path(self.pod_obj, file_name) log.info(f"Actual file path on the pod {file_path}") assert pod.check_file_existence( self.pod_obj, file_path), f"File {file_name} doesn't exist" log.info(f"File {file_name} exists in {self.pod_obj.name}") # Calculate md5sum orig_md5_sum = pod.cal_md5sum(self.pod_obj, file_name) # Take a snapshot snap_yaml = constants.CSI_RBD_SNAPSHOT_YAML if interface == constants.CEPHFILESYSTEM: snap_yaml = constants.CSI_CEPHFS_SNAPSHOT_YAML snap_name = helpers.create_unique_resource_name("test", "snapshot") snap_obj = pvc.create_pvc_snapshot( self.pvc_obj.name, snap_yaml, snap_name, helpers.default_volumesnapshotclass(interface).name, ) snap_obj.ocp.wait_for_resource( condition="true", resource_name=snap_obj.name, column=constants.STATUS_READYTOUSE, timeout=60, ) teardown_factory(snap_obj) # Same Storage class of the original PVC sc_name = self.pvc_obj.backed_sc # Size should be same as of the original PVC pvc_size = str(self.pvc_obj.size) + "Gi" # Create pvc out of the snapshot # Both, the snapshot and the restore PVC should be in same namespace restore_pvc_name = helpers.create_unique_resource_name( "test", "restore-pvc") restore_pvc_yaml = constants.CSI_RBD_PVC_RESTORE_YAML if interface == constants.CEPHFILESYSTEM: restore_pvc_yaml = constants.CSI_CEPHFS_PVC_RESTORE_YAML restore_pvc_obj = pvc.create_restore_pvc( sc_name=sc_name, snap_name=snap_obj.name, namespace=snap_obj.namespace, size=pvc_size, pvc_name=restore_pvc_name, restore_pvc_yaml=restore_pvc_yaml, ) helpers.wait_for_resource_state(restore_pvc_obj, constants.STATUS_BOUND) restore_pvc_obj.reload() teardown_factory(restore_pvc_obj) # Create and attach pod to the pvc restore_pod_obj = helpers.create_pod( interface_type=interface, pvc_name=restore_pvc_obj.name, namespace=snap_obj.namespace, pod_dict_path=constants.NGINX_POD_YAML, ) # Confirm that the pod is running helpers.wait_for_resource_state(resource=restore_pod_obj, state=constants.STATUS_RUNNING) restore_pod_obj.reload() teardown_factory(restore_pod_obj) # Verify that the file is present on the new pod log.info(f"Checking the existence of {file_name} " f"on restore pod {restore_pod_obj.name}") assert pod.check_file_existence( restore_pod_obj, file_path), f"File {file_name} doesn't exist" log.info(f"File {file_name} exists in {restore_pod_obj.name}") # Verify that the md5sum matches log.info(f"Verifying that md5sum of {file_name} " f"on pod {self.pod_obj.name} matches with md5sum " f"of the same file on restore pod {restore_pod_obj.name}") assert pod.verify_data_integrity( restore_pod_obj, file_name, orig_md5_sum), "Data integrity check failed" log.info("Data integrity check passed, md5sum are same") log.info("Running IO on new pod") # Run IO on new pod restore_pod_obj.run_io(storage_type="fs", size="1G", runtime=20) # Wait for fio to finish restore_pod_obj.get_fio_results() log.info("IO finished o new pod")
def _create_bucket_class(bucket_class_dict): """ Creates and deletes all bucket classes that were created as part of the test Args: bucket_class_dict (dict): Dictionary containing the description of the bucket class. Possible keys and values are: - interface (str): The interface to use for creation of buckets. OC | CLI - placement_policy (str): The Placement policy for this bucket class. Spread | Mirror - backingstore_dict (dict): A dictionary compatible with the backing store factory requirements. (Described in backingstore.py, under _create_backingstore) - namespace_policy_dict (dict): A dictionary compatible with the namespace store factory. Needs to contain the following keys and values: - type (str): Single | Multi | Cache - namespacestore_dict (dict): Identical format to backingstore_dict, contains data that's forwarded to cloud_uls_factory. - namespacestores (list): If namespacestores list is provided instead of namespacestore_dict then NamespaceStore instances provided in the list are used. First NamespaceStore is used as write resource. All of them are used as read resources. for cache bucket a required field of ttl (int), the behavior for this field is after the amount of ms has passed noobaa will go to the underlying storage and check if the etag of the file has changed. **Very important** ttl field is in ms not seconds!! Returns: BucketClass: A Bucket Class object. """ if "interface" in bucket_class_dict: interface = bucket_class_dict["interface"] if interface.lower() not in interfaces.keys(): raise RuntimeError( f"Invalid interface type received: {interface}. " f'available types: {", ".join(interfaces)}') else: interface = "OC" namespace_policy = {} backingstores = None namespacestores = None placement_policy = None if "namespace_policy_dict" in bucket_class_dict: if "namespacestore_dict" in bucket_class_dict[ "namespace_policy_dict"]: nss_dict = bucket_class_dict["namespace_policy_dict"][ "namespacestore_dict"] namespacestores = namespace_store_factory(interface, nss_dict) namespace_policy["type"] = bucket_class_dict[ "namespace_policy_dict"]["type"] if namespace_policy["type"] == "Cache": namespace_policy["cache"] = { "hubResource": namespacestores[0].name, "caching": { "ttl": bucket_class_dict["namespace_policy_dict"]["ttl"] }, } else: # TODO: Implement support for Single-tiered NS bucketclass namespace_policy["read_resources"] = [ nss.name for nss in namespacestores ] namespace_policy["write_resource"] = namespacestores[ 0].name elif "namespacestores" in bucket_class_dict[ "namespace_policy_dict"]: namespacestores = bucket_class_dict["namespace_policy_dict"][ "namespacestores"] namespace_policy["type"] = bucket_class_dict[ "namespace_policy_dict"]["type"] if namespace_policy["type"] == "Cache": namespace_policy["cache"] = { "hubResource": namespacestores[0].name, "caching": { "ttl": bucket_class_dict["namespace_policy_dict"]["ttl"] }, } else: namespace_policy["read_resources"] = [ nss.name for nss in namespacestores ] namespace_policy["write_resource"] = namespacestores[ 0].name elif "backingstore_dict" in bucket_class_dict: backingstores = [ backingstore for backingstore in backingstore_factory( interface, bucket_class_dict["backingstore_dict"]) ] else: backingstores = [ BackingStore(constants.DEFAULT_NOOBAA_BACKINGSTORE, method="oc") ] if "placement_policy" in bucket_class_dict: placement_policy = bucket_class_dict["placement_policy"] else: placement_policy = "Spread" bucket_class_name = create_unique_resource_name( resource_description="bucketclass", resource_type=interface.lower()) interfaces[interface.lower()]( name=bucket_class_name, backingstores=backingstores, placement=placement_policy, namespace_policy=namespace_policy, ) bucket_class_object = BucketClass( bucket_class_name, backingstores, namespacestores, placement_policy, namespace_policy, ) created_bucket_classes.append(bucket_class_object) return bucket_class_object
def test_duplicate_noobaa_secrets( self, backingstore_factory, cloud_uls_factory, mcg_obj, teardown_factory, cld_mgr, ): """ Objective of this test is: * Create a secret with the same credentials and see if the duplicates are allowed when BS created """ # create secret with the same credentials to check if duplicates are allowed first_bs_obj = backingstore_factory( method="oc", uls_dict={"aws": [(1, constants.AWS_REGION)]} )[0] aws_secret_obj = cld_mgr.aws_client.create_s3_secret( cld_mgr.aws_client.secret_prefix, cld_mgr.aws_client.data_prefix ) logger.info(f"New secret created: {aws_secret_obj.name}") teardown_factory(aws_secret_obj) cloud = "aws" uls_tup = (1, constants.AWS_REGION) uls_name = list(cloud_uls_factory({cloud: [uls_tup]})["aws"])[0] logger.info(f"ULS dict: {type(uls_name)}") second_bs_name = create_unique_resource_name( resource_description="backingstore", resource_type=cloud.lower(), ) bs_data = templating.load_yaml(constants.MCG_BACKINGSTORE_YAML) bs_data["metadata"]["name"] = second_bs_name bs_data["metadata"]["namespace"] = config.ENV_DATA["cluster_namespace"] bs_data["spec"] = { "type": "aws-s3", "awsS3": { "targetBucket": uls_name, "region": constants.AWS_REGION, "secret": { "name": aws_secret_obj.name, "namespace": bs_data["metadata"]["namespace"], }, }, } second_bs_obj = create_resource(**bs_data) teardown_factory(second_bs_obj) # Check if the duplicate secrets are allowed first_bs_dict = OCP( namespace=config.ENV_DATA["cluster_namespace"], kind="backingstore" ).get(resource_name=first_bs_obj.name) second_bs_dict = OCP( namespace=config.ENV_DATA["cluster_namespace"], kind="backingstore" ).get(resource_name=second_bs_name) assert ( first_bs_dict["spec"]["awsS3"]["secret"]["name"] == second_bs_dict["spec"]["awsS3"]["secret"]["name"] ), "Backingstores are not referring to the same secrets when secrets with duplicate credentials are created!!" logger.info( "Duplicate secrets are not allowed! only the first secret is being referred" )
def test_noobaa_secret_deletion_method2(self, teardown_factory, mcg_obj, cleanup): """ Objectives of this tests are: 1) create first backingstore using CLI passing credentials, which creates secret as well 2) create second backingstore using CLI passing credentials, which recognizes the duplicates and uses the secret created above 3) Modify the existing secret credentials see if the owned BS/NS is getting reconciled 4) delete the first backingstore and make sure secret is not deleted 5) check for the ownerReference see if its removed for the above backingstore deletion 6) delete the second backingstore and make sure secret is now deleted """ # create ULS try: logger.info( "Trying to load credentials from ocs-ci-data. " "This flow is only relevant when running under OCS-QE environments." ) secret_dict = update_config_from_s3().get("AUTH") except (AttributeError, EndpointConnectionError): logger.warning( "Failed to load credentials from ocs-ci-data.\n" "Your local AWS credentials might be misconfigured.\n" "Trying to load credentials from local auth.yaml instead" ) secret_dict = load_auth_config().get("AUTH", {}) access_key = secret_dict["AWS"]["AWS_ACCESS_KEY_ID"] secret_key = secret_dict["AWS"]["AWS_SECRET_ACCESS_KEY"] first_uls_name = create_unique_resource_name( resource_description="uls", resource_type="aws" ) client = boto3.resource( "s3", verify=True, endpoint_url="https://s3.amazonaws.com", aws_access_key_id=access_key, aws_secret_access_key=secret_key, ) client.create_bucket( Bucket=first_uls_name, CreateBucketConfiguration={"LocationConstraint": "eu-central-1"}, ) first_bs_name = create_unique_resource_name( resource_description="backingstore", resource_type="aws" ) create_aws_bs_using_cli( mcg_obj=mcg_obj, backingstore_name=first_bs_name, access_key=access_key, secret_key=secret_key, uls_name=first_uls_name, region="eu-central-1", ) mcg_obj.check_backingstore_state( backingstore_name=first_bs_name, desired_state=constants.BS_OPTIMAL ) first_bs_obj = BackingStore( name=first_bs_name, method="cli", type="cloud", uls_name=first_uls_name, mcg_obj=mcg_obj, ) cleanup(first_bs_obj) # create second backingstore using CLI and pass the secret credentials second_uls_name = create_unique_resource_name( resource_description="uls", resource_type="aws" ) client.create_bucket( Bucket=second_uls_name, CreateBucketConfiguration={"LocationConstraint": "eu-central-1"}, ) second_bs_name = create_unique_resource_name( resource_description="backingstore", resource_type="aws" ) create_aws_bs_using_cli( mcg_obj=mcg_obj, backingstore_name=second_bs_name, access_key=access_key, secret_key=secret_key, uls_name=second_uls_name, region="eu-central-1", ) mcg_obj.check_backingstore_state( backingstore_name=second_bs_name, desired_state=constants.BS_OPTIMAL ) second_bs_obj = BackingStore( name=second_bs_name, method="cli", type="cloud", uls_name=second_uls_name, mcg_obj=mcg_obj, ) cleanup(second_bs_obj) # Modify the secret credentials to wrong one and see if the backingstores get rejected secret_name = OCP( namespace=config.ENV_DATA["cluster_namespace"], kind="backingstore" ).get(resource_name=second_bs_name)["spec"]["awsS3"]["secret"]["name"] wrong_access_key_patch = { "data": {"AWS_ACCESS_KEY_ID": "d3JvbmdhY2Nlc3NrZXk="} } # Invalid Access Key OCP(namespace=config.ENV_DATA["cluster_namespace"], kind="secret").patch( resource_name=secret_name, params=json.dumps(wrong_access_key_patch), format_type="merge", ) logger.info("Patched wrong access key!") assert OCP( namespace=config.ENV_DATA["cluster_namespace"], kind="backingstore" ).wait_for_resource( resource_name=second_bs_name, condition="Creating", column="PHASE", ), "Backingstores are not getting reconciled after changing linked secret credentials!" logger.info("Backingstores getting reconciled!") # delete first backingstore first_bs_obj.delete() logger.info(f"First backingstore {first_bs_name} deleted!") assert ( OCP(namespace=config.ENV_DATA["cluster_namespace"], kind="secret").get( resource_name=secret_name, dont_raise=True ) is not None ), "[Not expected] Secret got deleted along when first backingstore deleted!!" logger.info("Secret exists after the first backingstore deletion!") # check for the owner reference secret_owner_ref = OCP( namespace=config.ENV_DATA["cluster_namespace"], kind="secret" ).get(resource_name=secret_name)["metadata"]["ownerReferences"] for owner in secret_owner_ref: assert owner["name"] != first_bs_name, ( f"Owner reference for {first_bs_name} still exists in the secret {secret_name} " f"even after backingstore {first_bs_name} got deleted!" ) logger.info( f"Owner reference for first backingstore {first_bs_name} is deleted in {secret_name} !!" ) # delete second backingstore second_bs_obj.delete() logger.info(f"Second backingstore {second_bs_name} deleted!") assert ( OCP(namespace=config.ENV_DATA["cluster_namespace"], kind="secret").get( resource_name=secret_name, dont_raise=True ) is None ), "[Not expected] Secret still exists even after all backingstores linked are deleted!" logger.info( "Secret got deleted after the all the linked backingstores are deleted!" )
def test_clone_pvc( self, project_factory, teardown_factory, setup_ui, sc_name, access_mode, clone_access_mode, ): """ Test to verify PVC clone from UI """ pvc_size = "1" vol_mode = constants.VOLUME_MODE_FILESYSTEM # Creating a project from CLI pro_obj = project_factory() project_name = pro_obj.namespace pvc_ui_obj = PvcUI(setup_ui) # Creating PVC from UI pvc_name = create_unique_resource_name("test", "pvc") pvc_ui_obj.create_pvc_ui(project_name, sc_name, pvc_name, access_mode, pvc_size, vol_mode) teardown_factory( get_pvc_objs(pvc_names=[pvc_name], namespace=project_name)[0]) # Verifying PVC details in UI logger.info("Verifying PVC details in UI") pvc_ui_obj.verify_pvc_ui( pvc_size=pvc_size, access_mode=access_mode, vol_mode=vol_mode, sc_name=sc_name, pvc_name=pvc_name, project_name=project_name, ) logger.info("Verified PVC details in UI") # Clone PVC from UI clone_pvc_name = f"{pvc_name}-clone" pvc_ui_obj.pvc_clone_ui( project_name=project_name, pvc_name=pvc_name, cloned_pvc_access_mode=clone_access_mode, cloned_pvc_name=clone_pvc_name, ) teardown_factory( get_pvc_objs(pvc_names=[clone_pvc_name], namespace=project_name)[0]) # Verifying cloned PVC details in UI logger.info("Verifying cloned PVC details in UI") pvc_ui_obj.verify_pvc_ui( pvc_size=pvc_size, access_mode=clone_access_mode, vol_mode=vol_mode, sc_name=sc_name, pvc_name=clone_pvc_name, project_name=project_name, ) logger.info("Verified cloned PVC details in UI")