def exceed_bucket_quota(): """ Upload 5 files with 500MB size into bucket that has quota set to 2GB. Returns: str: Name of utilized bucket """ nonlocal mcg_obj nonlocal bucket_name nonlocal awscli_pod # run_time of operation run_time = 60 * 14 awscli_pod.exec_cmd_on_pod("dd if=/dev/zero of=/tmp/testfile bs=1M count=500") for i in range(1, 6): awscli_pod.exec_cmd_on_pod( craft_s3_command( f"cp /tmp/testfile s3://{bucket_name}/testfile{i}", mcg_obj ), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint, ], ) logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return bucket_name
def test_mcg_data_deduplication( self, mcg_obj, awscli_pod_session, bucket_factory, bucketclass_dict ): """ Test data deduplication mechanics Args: mcg_obj (obj): An object representing the current state of the MCG in the cluster awscli_pod_session (pod): A pod running the AWSCLI tools bucket_factory: Calling this fixture creates a new bucket(s) """ download_dir = AWSCLI_TEST_OBJ_DIR file_size = int( awscli_pod_session.exec_cmd_on_pod( command=f"stat -c %s {download_dir}danny.webm", out_yaml_format=False ) ) bucketname = bucket_factory(1, bucketclass=bucketclass_dict)[0].name for i in range(3): awscli_pod_session.exec_cmd_on_pod( command=craft_s3_command( f"cp {download_dir}danny.webm s3://{bucketname}/danny{i}.webm", mcg_obj=mcg_obj, ), out_yaml_format=False, ) mcg_obj.check_data_reduction(bucketname, 2 * file_size)
def test_check_multi_object_integrity(self, mcg_obj, awscli_pod, bucket_factory, amount, file_type): """ Test write multiple files to bucket and check integrity """ original_dir = "/original" result_dir = "/result" if file_type == 'large': public_bucket = PUBLIC_BUCKET obj_key = LARGE_FILE_KEY elif file_type == 'small': public_bucket = constants.TEST_FILES_BUCKET obj_key = 'random1.txt' elif file_type == 'large_small': public_bucket = PUBLIC_BUCKET obj_key = LARGE_FILE_KEY.rsplit('/', 1)[0] # Download the file to pod awscli_pod.exec_cmd_on_pod( command=f'mkdir {original_dir} {result_dir}') public_s3_client = retrieve_anon_s3_resource().meta.client download_files = [] # Use obj_key as prefix to download multiple files for large_small # case, it also works with single file for obj in public_s3_client.list_objects( Bucket=public_bucket, Prefix=obj_key).get('Contents'): # Skip the extra file in large file type if file_type == 'large' and obj["Key"] != obj_key: continue logger.info( f'Downloading {obj["Key"]} from AWS bucket {public_bucket}') download_obj_cmd = f'cp s3://{public_bucket}/{obj["Key"]} {original_dir}' awscli_pod.exec_cmd_on_pod( command=craft_s3_command(download_obj_cmd), out_yaml_format=False) download_files.append(obj['Key'].split('/')[-1]) # Write downloaded objects to the new bucket and check integrity bucketname = bucket_factory(1)[0].name base_path = f"s3://{bucketname}" for i in range(amount): full_object_path = base_path + f"/{i}/" sync_object_directory(awscli_pod, original_dir, full_object_path, mcg_obj) # Retrieve all objects from MCG bucket to result dir in Pod logger.info('Downloading objects from MCG bucket to awscli pod') sync_object_directory(awscli_pod, full_object_path, result_dir, mcg_obj) # Checksum is compared between original and result object for obj in download_files: assert verify_s3_object_integrity( original_object_path=f'{original_dir}/{obj}', result_object_path=f'{result_dir}/{obj}', awscli_pod=awscli_pod ), ('Checksum comparision between original and result object ' 'failed')
def test_write_multi_files_to_bucket(self, mcg_obj, awscli_pod, bucket_factory, amount, file_type): """ Test write multiple files to bucket """ data_dir = "/data" if file_type == "large": public_bucket = PUBLIC_BUCKET obj_key = LARGE_FILE_KEY elif file_type == "small": public_bucket = constants.TEST_FILES_BUCKET obj_key = "random1.txt" elif file_type == "large_small": public_bucket = PUBLIC_BUCKET obj_key = LARGE_FILE_KEY.rsplit("/", 1)[0] # Download the file to pod awscli_pod.exec_cmd_on_pod(command=f"mkdir {data_dir}") public_s3_client = retrieve_anon_s3_resource().meta.client download_files = [] # Use obj_key as prefix to download multiple files for large_small # case, it also works with single file for obj in public_s3_client.list_objects( Bucket=public_bucket, Prefix=obj_key).get("Contents"): # Skip the extra file in large file type if file_type == "large" and obj["Key"] != obj_key: continue logger.info( f'Downloading {obj["Key"]} from AWS bucket {public_bucket}') download_obj_cmd = f'cp s3://{public_bucket}/{obj["Key"]} {data_dir}' awscli_pod.exec_cmd_on_pod( command=craft_s3_command(download_obj_cmd), out_yaml_format=False) download_files.append(obj["Key"]) # Write all downloaded objects to the new bucket bucketname = bucket_factory(1)[0].name base_path = f"s3://{bucketname}" for i in range(amount): full_object_path = base_path + f"/{i}/" sync_object_directory(awscli_pod, data_dir, full_object_path, mcg_obj) obj_list = list( obj.key.split("/")[-1] for obj in mcg_obj.s3_list_all_objects_in_bucket(bucketname)) # Check total copy files amount match if file_type == "large_small": assert len( obj_list) == 2 * amount, "Total file amount does not match" else: assert len(obj_list) == amount, "Total file amount does not match" # Check deduplicate set is same test_set = set([i.split("/")[-1] for i in download_files]) assert test_set == set(obj_list), "File name set does not match"
def upgrade_buckets(bucket_factory_session, awscli_pod_session, mcg_obj_session): """ Additional NooBaa buckets that are created for upgrade testing. First bucket is populated with data and quota to 1 PB is set. Returns: list: list of buckets that should survive OCS and OCP upgrade. First one has bucket quota set to 1 PB and is populated with 3.5 GB. """ buckets = bucket_factory_session(amount=3) # add quota to the first bucket mcg_obj_session.send_rpc_query( "bucket_api", "update_bucket", { "name": buckets[0].name, "quota": { "unit": "PETABYTE", "size": 1 } }, ) # add some data to the first pod awscli_pod_session.exec_cmd_on_pod( "dd if=/dev/urandom of=/tmp/testfile bs=1M count=500") for i in range(1, 7): awscli_pod_session.exec_cmd_on_pod( craft_s3_command( f"cp /tmp/testfile s3://{buckets[0].name}/testfile{i}", mcg_obj_session), out_yaml_format=False, secrets=[ mcg_obj_session.access_key_id, mcg_obj_session.access_key, mcg_obj_session.s3_endpoint, ], ) return buckets
def test_mcg_data_compression(self, mcg_obj, awscli_pod_session, bucket_factory, bucketclass_dict): """ Test data reduction mechanics Args: mcg_obj (obj): An object representing the current state of the MCG in the cluster awscli_pod_session (pod): A pod running the AWSCLI tools bucket_factory: Calling this fixture creates a new bucket(s) """ download_dir = AWSCLI_TEST_OBJ_DIR bucketname = bucket_factory(1, bucketclass=bucketclass_dict)[0].name full_object_path = f"s3://{bucketname}" awscli_pod_session.exec_cmd_on_pod( command=craft_s3_command( f"cp {download_dir}enwik8 {full_object_path}", mcg_obj), out_yaml_format=False, ) # For this test, enwik8 is used in conjunction with Snappy compression # utilized by NooBaa. Snappy consistently compresses 35MB of the file. mcg_obj.check_data_reduction(bucketname, 35 * 1024 * 1024)
def test_mcg_data_compression(self, mcg_obj, awscli_pod, bucket_factory): """ Test data reduction mechanics Args: mcg_obj (obj): An object representing the current state of the MCG in the cluster awscli_pod (pod): A pod running the AWSCLI tools bucket_factory: Calling this fixture creates a new bucket(s) """ download_dir = "/aws/compression/" awscli_pod.exec_cmd_on_pod( command=craft_s3_command( f"cp s3://{constants.TEST_FILES_BUCKET}/enwik8 {download_dir}" ), out_yaml_format=False, ) bucket = bucket_factory(amount=1)[0] bucketname = bucket.name full_object_path = f"s3://{bucketname}" sync_object_directory(awscli_pod, download_dir, full_object_path, mcg_obj) # For this test, enwik8 is used in conjunction with Snappy compression # utilized by NooBaa. Snappy consistently compresses 35MB of the file. mcg_obj.check_data_reduction(bucketname, 35 * 1024 * 1024)
def measure_noobaa_exceed_bucket_quota(measurement_dir, request, mcg_obj, awscli_pod): """ Create NooBaa bucket, set its capacity quota to 2GB and fill it with data. Returns: dict: Contains information about `start` and `stop` time for corrupting Ceph Placement Group """ bucket_name = create_unique_resource_name( resource_description="bucket", resource_type="s3" ) bucket = MCGS3Bucket(bucket_name, mcg=mcg_obj) mcg_obj.send_rpc_query( "bucket_api", "update_bucket", {"name": bucket_name, "quota": {"unit": "GIGABYTE", "size": 2}}, ) bucket_info = mcg_obj.get_bucket_info(bucket.name) logger.info(f"Bucket {bucket.name} storage: {bucket_info['storage']}") logger.info(f"Bucket {bucket.name} data: {bucket_info['data']}") def teardown(): """ Delete test bucket. """ bucket.delete() request.addfinalizer(teardown) def exceed_bucket_quota(): """ Upload 5 files with 500MB size into bucket that has quota set to 2GB. Returns: str: Name of utilized bucket """ nonlocal mcg_obj nonlocal bucket_name nonlocal awscli_pod # run_time of operation run_time = 60 * 14 awscli_pod.exec_cmd_on_pod("dd if=/dev/zero of=/tmp/testfile bs=1M count=500") for i in range(1, 6): awscli_pod.exec_cmd_on_pod( craft_s3_command( f"cp /tmp/testfile s3://{bucket_name}/testfile{i}", mcg_obj ), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint, ], ) logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return bucket_name test_file = os.path.join( measurement_dir, "measure_noobaa_exceed__bucket_quota.json" ) measured_op = measure_operation(exceed_bucket_quota, test_file) bucket_info = mcg_obj.get_bucket_info(bucket.name) logger.info(f"Bucket {bucket.name} storage: {bucket_info['storage']}") logger.info(f"Bucket {bucket.name} data: {bucket_info['data']}") logger.info(f"Deleting data from bucket {bucket_name}") for i in range(1, 6): awscli_pod.exec_cmd_on_pod( craft_s3_command(f"rm s3://{bucket_name}/testfile{i}", mcg_obj), out_yaml_format=False, secrets=[mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint], ) return measured_op