def test_check_object_integrity(self, mcg_obj, awscli_pod, bucket_factory): """ Test object integrity using md5sum """ downloaded_files = [] original_dir = "/aws/original" result_dir = "/aws/result" # Retrieve a list of all objects on the test-objects bucket and downloads them to the pod awscli_pod.exec_cmd_on_pod( command=f'mkdir {original_dir} {result_dir}') public_s3 = boto3.resource('s3', region_name=mcg_obj.region) for obj in public_s3.Bucket(constants.TEST_FILES_BUCKET).objects.all(): logger.info(f'Downloading {obj.key} from aws test bucket') awscli_pod.exec_cmd_on_pod( command=f'sh -c "cd {original_dir} && ' f'wget https://{constants.TEST_FILES_BUCKET}.s3.' f'{mcg_obj.region}.amazonaws.com/{obj.key}"') downloaded_files.append(obj.key) bucket_name = bucket_factory(1)[0].name # Write all downloaded objects from original_dir to the MCG bucket logger.info(f'Uploading all pod objects to MCG bucket') bucket_path = f's3://{bucket_name}' copy_cmd = f'cp --recursive {original_dir} {bucket_path}' assert 'Completed' in awscli_pod.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, copy_cmd), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint ]), 'Failed to Upload objects to MCG bucket' # Retrieve all objects from MCG bucket to result dir in Pod logger.info(f'Downloading all objects from MCG bucket to awscli pod') retrieve_cmd = f'cp --recursive {bucket_path} {result_dir}' assert 'Completed' in awscli_pod.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, retrieve_cmd), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint ]), 'Failed to Download objects from MCG bucket' # Checksum is compared between original and result object for obj in downloaded_files: assert mcg_obj.verify_s3_object_integrity( original_object_path=f'{original_dir}/{obj}', result_object_path=f'{result_dir}/{obj}', awscli_pod=awscli_pod ), 'Checksum comparision between original and result object failed'
def exceed_bucket_quota(): """ Upload 5 files with 500MB size into bucket that has quota set to 2GB. Returns: str: Name of utilized bucket """ nonlocal mcg_obj nonlocal bucket_name nonlocal awscli_pod # run_time of operation run_time = 60 * 11 awscli_pod.exec_cmd_on_pod( 'dd if=/dev/zero of=/tmp/testfile bs=1M count=500' ) for i in range(1, 6): awscli_pod.exec_cmd_on_pod( helpers.craft_s3_command( mcg_obj, f"cp /tmp/testfile s3://{bucket_name}/testfile{i}" ), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint ] ) logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return bucket_name
def test_write_file_to_bucket(self, mcg_obj, awscli_pod, bucket_factory, uploaded_objects): """ Test object IO using the S3 SDK """ # Retrieve a list of all objects on the test-objects bucket and downloads them to the pod downloaded_files = [] public_s3 = boto3.resource('s3', region_name=mcg_obj.region) for obj in public_s3.Bucket(constants.TEST_FILES_BUCKET).objects.all(): # Download test object(s) logger.info(f'Downloading {obj.key}') awscli_pod.exec_cmd_on_pod( command= f'wget https://{constants.TEST_FILES_BUCKET}.s3.{mcg_obj.region}.amazonaws.com/{obj.key}' ) downloaded_files.append(obj.key) bucketname = bucket_factory(1)[0].name # Write all downloaded objects to the new bucket logger.info(f'Writing objects to bucket') for obj_name in downloaded_files: full_object_path = f"s3://{bucketname}/{obj_name}" copycommand = f"cp {obj_name} {full_object_path}" assert 'Completed' in awscli_pod.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, copycommand), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.endpoint ]) uploaded_objects.append(full_object_path) assert set(downloaded_files).issubset( obj.key for obj in mcg_obj.s3_list_all_objects_in_bucket(bucketname))
def test_data_reduction(self, mcg_obj, awscli_pod, bucket_factory): """ Test data reduction mechanics """ # TODO: Privatize test bucket download_dir = '/aws/downloaded' synccmd = ( f"aws s3 sync s3://{constants.TEST_FILES_BUCKET} {download_dir} --no-sign-request" ) assert 'download' in awscli_pod.exec_cmd_on_pod( command=synccmd, out_yaml_format=False), ('Failed to download test files') bucketname = None for bucket in bucket_factory(5): synccmd = f'sync {download_dir} s3://{bucket.name}' awscli_pod.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, synccmd), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint ]) bucketname = bucket.name assert mcg_obj.check_data_reduction( bucketname), 'Data reduction did not work as anticipated.'
def test_write_file_to_bucket(self, noobaa_obj, awscli_pod, created_buckets, uploaded_objects): """ Test object IO using the S3 SDK """ # Retrieve a list of all objects on the test-objects bucket and downloads them to the pod downloaded_files = [] public_s3 = boto3.resource('s3', region_name=noobaa_obj.region) for obj in public_s3.Bucket(constants.TEST_FILES_BUCKET).objects.all(): # Download test object(s) logger.info(f'Downloading {obj.key}') awscli_pod.exec_cmd_on_pod( command=f'wget https://{constants.TEST_FILES_BUCKET}.s3.{noobaa_obj.region}.amazonaws.com/{obj.key}' ) downloaded_files.append(obj.key) bucketname = create_unique_resource_name(self.__class__.__name__.lower(), 's3-bucket') logger.info(f'Creating the test bucket - {bucketname}') created_buckets.append(noobaa_obj.s3_create_bucket(bucketname=bucketname)) # Write all downloaded objects to the new bucket logger.info(f'Writing objects to bucket') for obj_name in downloaded_files: full_object_path = f"s3://{bucketname}/{obj_name}" copycommand = f"cp {obj_name} {full_object_path}" assert 'Completed' in awscli_pod.exec_cmd_on_pod( command=craft_s3_command(noobaa_obj, copycommand), out_yaml_format=False, secrets=[noobaa_obj.access_key_id, noobaa_obj.access_key, noobaa_obj.endpoint] ) uploaded_objects.append(full_object_path)
def write_individual_s3_objects(mcg_obj, awscli_pod, bucket_factory, downloaded_files, target_dir, bucket_name=None): """ Writes objects one by one to an s3 bucket Args: mcg_obj (obj): An MCG object containing the MCG S3 connection credentials awscli_pod (pod): A pod running the AWSCLI tools bucket_factory: Calling this fixture creates a new bucket(s) downloaded_files (list): List of downloaded object keys target_dir (str): The fully qualified path of the download target folder bucket_name (str): Name of the bucket (default: none) """ bucketname = bucket_name or bucket_factory(1)[0].name logger.info(f'Writing objects to bucket') for obj_name in downloaded_files: full_object_path = f"s3://{bucketname}/{obj_name}" copycommand = f"cp {target_dir}{obj_name} {full_object_path}" assert 'Completed' in awscli_pod.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, copycommand), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint ])
def object_cleanup(): for uploaded_filename in uploaded_objects_paths: logger.info(f'Deleting object {uploaded_filename}') awscli_pod.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, "rm " + uploaded_filename), secrets=[mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint] )
def upload_parts(mcg_obj, awscli_pod, bucketname, object_key, body_path, upload_id, uploaded_parts): """ Uploads individual parts to a bucket Args: mcg_obj (obj): An MCG object containing the MCG S3 connection credentials awscli_pod (pod): A pod running the AWSCLI tools bucketname (str): Name of the bucket to upload parts on object_key (list): Unique object Identifier body_path (str): Path of the directory on the aws pod which contains the parts to be uploaded upload_id (str): Multipart Upload-ID uploaded_parts (list): list containing the name of the parts to be uploaded Returns: list: List containing the ETag of the parts """ parts = [] secrets = [mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint] for count, part in enumerate(uploaded_parts, 1): upload_cmd = (f'upload-part --bucket {bucketname} --key {object_key}' f' --part-number {count} --body {body_path}/{part}' f' --upload-id {upload_id}') # upload_cmd will return ETag, upload_id etc which is then split to get just the ETag part = awscli_pod.exec_cmd_on_pod( command=craft_s3_command(upload_cmd, mcg_obj, api=True), out_yaml_format=False, secrets=secrets).split("\"")[-3].split("\\")[0] parts.append({"PartNumber": count, "ETag": f'"{part}"'}) return parts
def test_write_multi_files_to_bucket(self, mcg_obj, awscli_pod, bucket_factory, amount, file_type): """ Test write multiple files to bucket """ data_dir = '/data' if file_type == 'large': public_bucket = PUBLIC_BUCKET obj_key = LARGE_FILE_KEY elif file_type == 'small': public_bucket = constants.TEST_FILES_BUCKET obj_key = 'random1.txt' elif file_type == 'large_small': public_bucket = PUBLIC_BUCKET obj_key = LARGE_FILE_KEY.rsplit('/', 1)[0] # Download the file to pod awscli_pod.exec_cmd_on_pod(command=f'mkdir {data_dir}') public_s3_client = retrieve_anon_s3_resource().meta.client download_files = [] # Use obj_key as prefix to download multiple files for large_small # case, it also works with single file for obj in public_s3_client.list_objects( Bucket=public_bucket, Prefix=obj_key).get('Contents'): # Skip the extra file in large file type if file_type == 'large' and obj["Key"] != obj_key: continue logger.info( f'Downloading {obj["Key"]} from AWS bucket {public_bucket}') download_obj_cmd = f'cp s3://{public_bucket}/{obj["Key"]} {data_dir}' awscli_pod.exec_cmd_on_pod( command=craft_s3_command(download_obj_cmd), out_yaml_format=False) download_files.append(obj['Key']) # Write all downloaded objects to the new bucket bucketname = bucket_factory(1)[0].name base_path = f"s3://{bucketname}" for i in range(amount): full_object_path = base_path + f"/{i}/" helpers.sync_object_directory(awscli_pod, data_dir, full_object_path, mcg_obj) obj_list = list( obj.key.split('/')[-1] for obj in mcg_obj.s3_list_all_objects_in_bucket(bucketname)) # Check total copy files amount match if file_type == 'large_small': assert len(obj_list) == 2 * amount, ( "Total file amount does not match") else: assert len(obj_list) == amount, "Total file amount does not match" # Check deduplicate set is same test_set = set([i.split('/')[-1] for i in download_files]) assert test_set == set(obj_list), "File name set does not match"
def del_objects(uploaded_objects_paths, awscli_pod, mcg_obj): """ Deleting objects from bucket Args: uploaded_objects_paths (list): List of object paths awscli_pod (pod): A pod running the AWSCLI tools mcg_obj (obj): An MCG object containing the MCG S3 connection credentials """ for uploaded_filename in uploaded_objects_paths: logger.info(f'Deleting object {uploaded_filename}') awscli_pod.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, "rm " + uploaded_filename), secrets=[mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint] )
def rm_object_recursive(podobj, target, mcg_obj, option=''): """ Remove bucket objects with --recursive option Args: podobj (OCS): The pod on which to execute the commands and download the objects to target (str): Fully qualified bucket target path mcg_obj (MCG, optional): The MCG object to use in case the target or source are in an MCG option (str): Extra s3 remove command option """ rm_command = f"rm s3://{target} --recursive {option}" podobj.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, rm_command), out_yaml_format=False, secrets=[mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint] )
def sync_object_directory(podobj, src, target, mcg_obj=None): """ Syncs objects between a target and source directories Args: podobj (OCS): The pod on which to execute the commands and download the objects to src (str): Fully qualified object source path target (str): Fully qualified object target path mcg_obj (MCG, optional): The MCG object to use in case the target or source are in an MCG """ logger.info(f'Syncing all objects and directories from {src} to {target}') retrieve_cmd = f'sync {src} {target}' if mcg_obj: secrets = [mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint] else: secrets = None podobj.exec_cmd_on_pod( command=craft_s3_command(mcg_obj, retrieve_cmd), out_yaml_format=False, secrets=secrets ), 'Failed to sync objects'
def upgrade_buckets(bucket_factory_session, awscli_pod_session, mcg_obj_session): """ Additional NooBaa buckets that are created for upgrade testing. First bucket is populated with data and quota to 1 PB is set. Returns: list: list of buckets that should survive OCS and OCP upgrade. First one has bucket quota set to 1 PB and is populated with 3.5 GB. """ buckets = bucket_factory_session(amount=3) # add quota to the first bucket mcg_obj_session.send_rpc_query('bucket_api', 'update_bucket', { 'name': buckets[0].name, 'quota': { 'unit': 'PETABYTE', 'size': 1 } }) # add some data to the first pod awscli_pod_session.exec_cmd_on_pod( 'dd if=/dev/urandom of=/tmp/testfile bs=1M count=500') for i in range(1, 7): awscli_pod_session.exec_cmd_on_pod(helpers.craft_s3_command( mcg_obj_session, f"cp /tmp/testfile s3://{buckets[0].name}/testfile{i}"), out_yaml_format=False, secrets=[ mcg_obj_session.access_key_id, mcg_obj_session.access_key, mcg_obj_session.s3_endpoint ]) return buckets
def measure_noobaa_exceed_bucket_quota( measurement_dir, request, mcg_obj, awscli_pod ): """ Create NooBaa bucket, set its capacity quota to 2GB and fill it with data. Returns: dict: Contains information about `start` and `stop` time for corrupting Ceph Placement Group """ bucket_name = create_unique_resource_name( resource_description='bucket', resource_type='s3' ) bucket = S3Bucket( mcg_obj, bucket_name ) mcg_obj.send_rpc_query( 'bucket_api', 'update_bucket', { 'name': bucket_name, 'quota': { 'unit': 'GIGABYTE', 'size': 2 } } ) def teardown(): """ Delete test bucket. """ bucket.delete() request.addfinalizer(teardown) def exceed_bucket_quota(): """ Upload 5 files with 500MB size into bucket that has quota set to 2GB. Returns: str: Name of utilized bucket """ nonlocal mcg_obj nonlocal bucket_name nonlocal awscli_pod # run_time of operation run_time = 60 * 11 awscli_pod.exec_cmd_on_pod( 'dd if=/dev/zero of=/tmp/testfile bs=1M count=500' ) for i in range(1, 6): awscli_pod.exec_cmd_on_pod( helpers.craft_s3_command( mcg_obj, f"cp /tmp/testfile s3://{bucket_name}/testfile{i}" ), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint ] ) logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return bucket_name test_file = os.path.join( measurement_dir, 'measure_noobaa_exceed__bucket_quota.json' ) measured_op = measure_operation(exceed_bucket_quota, test_file) logger.info(f"Deleting data from bucket {bucket_name}") for i in range(1, 6): awscli_pod.exec_cmd_on_pod( helpers.craft_s3_command( mcg_obj, f"rm s3://{bucket_name}/testfile{i}" ), out_yaml_format=False, secrets=[ mcg_obj.access_key_id, mcg_obj.access_key, mcg_obj.s3_endpoint ] ) return measured_op