def test_large_copy(self, num_parts=LAMBDA_PARALLELIZATION_FACTOR + 1): test_bucket = infra.get_env("DSS_S3_BUCKET_TEST") test_src_key = infra.generate_test_key() s3_client = boto3.client("s3") mpu = s3_client.create_multipart_upload(Bucket=test_bucket, Key=test_src_key) with ThreadPoolExecutor(max_workers=8) as tpe: parts_futures = tpe.map( lambda part_id: TestS3ParallelCopy.upload_part( test_bucket, test_src_key, mpu['UploadId'], part_id), range(1, num_parts + 1)) parts = [ dict(ETag=part_etag, PartNumber=part_id) for part_id, part_etag in parts_futures ] src_etag = s3_client.complete_multipart_upload( Bucket=test_bucket, Key=test_src_key, MultipartUpload=dict(Parts=parts), UploadId=mpu['UploadId'], )['ETag'].strip('"') test_dst_key = infra.generate_test_key() state = s3copyclient.copy_sfn_event(test_bucket, test_src_key, test_bucket, test_dst_key) execution_id = str(uuid.uuid4()) stepfunctions.step_functions_invoke("dss-s3-copy-sfn-{stage}", execution_id, state) self._check_dst_key_etag(test_bucket, test_dst_key, src_etag)
def parallel_copy(replica: Replica, source_bucket: str, source_key: str, destination_bucket: str, destination_key: str) -> str: log.debug( f"Copy file from bucket {source_bucket} with key {source_key} to " f"bucket {destination_bucket} destination file: {destination_key}") if replica == Replica.aws: state = s3copyclient.copy_sfn_event( source_bucket, source_key, destination_bucket, destination_key, ) state_machine_name_template = "dss-s3-copy-sfn-{stage}" elif replica == Replica.gcp: state = gscopyclient.copy_sfn_event(source_bucket, source_key, destination_bucket, destination_key) state_machine_name_template = "dss-gs-copy-sfn-{stage}" else: raise ValueError("Unsupported replica") execution_id = get_execution_id() stepfunctions.step_functions_invoke(state_machine_name_template, execution_id, state) return execution_id
def _test_aws_cache(self, src_data, content_type, checkout_bucket): replica = Replica.aws checkout_bucket = checkout_bucket if checkout_bucket else replica.checkout_bucket test_src_key = infra.generate_test_key() s3_blobstore = Config.get_blobstore_handle(Replica.aws) # upload with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() fh.seek(0) s3_blobstore.upload_file_handle(replica.bucket, test_src_key, fh, content_type) # checkout test_dst_key = infra.generate_test_key() event = s3copyclient.copy_sfn_event(replica.bucket, test_src_key, checkout_bucket, test_dst_key) event = s3copyclient.implementation.setup_copy_task(event, None) spoof_context = self.SpoofContext() # parameters of copy_worker are arbitrary, only passed because required. event = s3copyclient.implementation.copy_worker( event, spoof_context, 10) # verify tagging = s3_blobstore.get_user_metadata(checkout_bucket, test_dst_key) # cleanup s3_blobstore.delete(replica.bucket, test_src_key) s3_blobstore.delete(checkout_bucket, test_dst_key) return tagging
def test_zero_copy(self): test_bucket = infra.get_env("DSS_S3_BUCKET_TEST") test_src_key = infra.generate_test_key() s3_blobstore = Config.get_blobstore_handle(Replica.aws) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.seek(0) s3_blobstore.upload_file_handle(test_bucket, test_src_key, fh) src_etag = s3_blobstore.get_cloud_checksum(test_bucket, test_src_key) test_dst_key = infra.generate_test_key() state = s3copyclient.copy_sfn_event(test_bucket, test_src_key, test_bucket, test_dst_key) execution_id = str(uuid.uuid4()) stepfunctions.step_functions_invoke("dss-s3-copy-sfn-{stage}", execution_id, state) self._check_dst_key_etag(test_bucket, test_dst_key, src_etag)