class S3Bucket: def __init__(self, bucket, endpoint, id, key, region): self.bucket = bucket self.service_endpoint = endpoint self.aws_access_key_id = id self.aws_secret_access_key = key self.region_name = region self.client = boto3.client( 's3', endpoint_url=self.service_endpoint, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, region_name=self.region_name) self.transfer = TransferManager(self.client, None, None, None) def manager_upload(self, file): self.transfer.upload(file, self.bucket, file[1:], None, None) def upload(self, file, filekey): exist = self.client.list_objects(Bucket=self.bucket, Prefix=filekey[1:]) if exist: return False try: with open(file, 'rb') as f: self.client.upload_fileobj(f, self.bucket, filekey[1:]) except Exception as ex: traceback.print_exc() raise return True
def test_cntrl_c_in_context_manager_cancels_incomplete_transfers(self): # The purpose of this test is to make sure if an error is raised # in the body of the context manager, incomplete transfers will # be cancelled with value of the exception wrapped by a CancelledError # NOTE: The fact that delete() was chosen to test this is arbitrary # other than it is the easiet to set up for the stubber. # The specific operation is not important to the purpose of this test. num_transfers = 100 futures = [] for _ in range(num_transfers): self.stubber.add_response('delete_object', {}) manager = TransferManager( self.client, TransferConfig( max_request_concurrency=1, max_submission_concurrency=1) ) try: with manager: for i in range(num_transfers): futures.append(manager.delete('mybucket', 'mykey')) raise KeyboardInterrupt() except KeyboardInterrupt: # At least one of the submitted futures should have been # cancelled. with self.assertRaisesRegexp( CancelledError, 'KeyboardInterrupt()'): for future in futures: future.result()
def test_enable_disable_callbacks_only_ever_registered_once(self): body = CallbackEnablingBody() request = create_request_object({ 'method': 'PUT', 'url': 'https://s3.amazonaws.com', 'body': body, 'headers': {}, 'context': {} }) # Create two TransferManager's using the same client TransferManager(self.client) TransferManager(self.client) self.client.meta.events.emit('request-created.s3', request=request, operation_name='PutObject') # The client should have only have the enable/disable callback # handlers registered once depite being used for two different # TransferManagers. self.assertEqual( body.enable_callback_call_count, 1, 'The enable_callback() should have only ever been registered once') self.assertEqual( body.disable_callback_call_count, 1, 'The disable_callback() should have only ever been registered ' 'once')
def test_cntrl_c_in_context_manager_cancels_incomplete_transfers(self): # The purpose of this test is to make sure if an error is raised # in the body of the context manager, incomplete transfers will # be cancelled with value of the exception wrapped by a CancelledError # NOTE: The fact that delete() was chosen to test this is arbitrary # other than it is the easiet to set up for the stubber. # The specific operation is not important to the purpose of this test. num_transfers = 100 futures = [] for _ in range(num_transfers): self.stubber.add_response('delete_object', {}) manager = TransferManager( self.client, TransferConfig(max_request_concurrency=1, max_submission_concurrency=1)) try: with manager: for i in range(num_transfers): futures.append(manager.delete('mybucket', 'mykey')) raise KeyboardInterrupt() except KeyboardInterrupt: # At least one of the submitted futures should have been # cancelled. with self.assertRaisesRegexp(CancelledError, 'KeyboardInterrupt()'): for future in futures: future.result()
def test_can_disable_bucket_validation(self): s3_object_lambda_arn = ( 'arn:aws:s3-object-lambda:us-west-2:123456789012:' 'accesspoint:my-accesspoint') config = TransferConfig() manager = TransferManager(self.client, config) manager.VALIDATE_SUPPORTED_BUCKET_VALUES = False manager.delete(s3_object_lambda_arn, 'my-key')
class TestDeleteObject(BaseGeneralInterfaceTest): __test__ = True def setUp(self): super(TestDeleteObject, self).setUp() self.bucket = 'mybucket' self.key = 'mykey' self.manager = TransferManager(self.client) @property def method(self): """The transfer manager method to invoke i.e. upload()""" return self.manager.delete def create_call_kwargs(self): """The kwargs to be passed to the transfer manager method""" return { 'bucket': self.bucket, 'key': self.key, } def create_invalid_extra_args(self): return { 'BadKwargs': True, } def create_stubbed_responses(self): """A list of stubbed responses that will cause the request to succeed The elements of this list is a dictionary that will be used as key word arguments to botocore.Stubber.add_response(). For example:: [{'method': 'put_object', 'service_response': {}}] """ return [{ 'method': 'delete_object', 'service_response': {}, 'expected_params': {'Bucket': self.bucket, 'Key': self.key}, }] def create_expected_progress_callback_info(self): return [] def test_known_allowed_args_in_input_shape(self): op_model = self.client.meta.service_model.operation_model( 'DeleteObject') for allowed_arg in self.manager.ALLOWED_DELETE_ARGS: self.assertIn(allowed_arg, op_model.input_shape.members) def test_raise_exception_on_s3_object_lambda_resource(self): s3_object_lambda_arn = ( 'arn:aws:s3-object-lambda:us-west-2:123456789012:' 'accesspoint:my-accesspoint' ) with self.assertRaisesRegexp(ValueError, 'methods do not support'): self.manager.delete(s3_object_lambda_arn, self.key)
def __init__(self, bucket, endpoint, id, key, region): self.bucket = bucket self.service_endpoint = endpoint self.aws_access_key_id = id self.aws_secret_access_key = key self.region_name = region self.client = boto3.client( 's3', endpoint_url=self.service_endpoint, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, region_name=self.region_name) self.transfer = TransferManager(self.client, None, None, None)
def test_uses_bandwidth_limiter(self): self.content = b'a' * 1024 * 1024 self.stream = six.BytesIO(self.content) self.config = TransferConfig(max_request_concurrency=1, max_bandwidth=len(self.content) / 2) self._manager = TransferManager(self.client, self.config) self.add_head_object_response() self.add_successful_get_object_responses() start = time.time() future = self.manager.download(self.bucket, self.key, self.filename, self.extra_args) future.result() # This is just a smoke test to make sure that the limiter is # being used and not necessary its exactness. So we set the maximum # bandwidth to len(content)/2 per sec and make sure that it is # noticeably slower. Ideally it will take more than two seconds, but # given tracking at the beginning of transfers are not entirely # accurate setting at the initial start of a transfer, we give us # some flexibility by setting the expected time to half of the # theoretical time to take. self.assertGreaterEqual(time.time() - start, 1) # Ensure that the contents are correct with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read())
def __init__(self, s3_client, bucket_name, prefix=None, kms_key_id=None, force_upload=False, transfer_manager=None): self.bucket_name = bucket_name self.prefix = prefix self.kms_key_id = kms_key_id or None self.force_upload = force_upload self.s3 = s3_client self.transfer_manager = transfer_manager if not transfer_manager: self.transfer_manager = TransferManager(self.s3)
def __call__(self, client, result_queue): """Creates a S3TransferHandler instance :type client: botocore.client.Client :param client: The client to power the S3TransferHandler :type result_queue: queue.Queue :param result_queue: The result queue to be used to process results for the S3TransferHandler :returns: A S3TransferHandler instance """ transfer_config = create_transfer_config_from_runtime_config( self._runtime_config) transfer_config.max_in_memory_upload_chunks = self.MAX_IN_MEMORY_CHUNKS transfer_config.max_in_memory_download_chunks = \ self.MAX_IN_MEMORY_CHUNKS transfer_manager = TransferManager(client, transfer_config) LOGGER.debug( "Using a multipart threshold of %s and a part size of %s", transfer_config.multipart_threshold, transfer_config.multipart_chunksize ) result_recorder = ResultRecorder() result_processor_handlers = [result_recorder] self._add_result_printer(result_recorder, result_processor_handlers) result_processor = ResultProcessor( result_queue, result_processor_handlers) command_result_recorder = CommandResultRecorder( result_queue, result_recorder, result_processor) return S3TransferHandler( transfer_manager, self._cli_params, command_result_recorder)
def test_limits_in_memory_chunks_for_fileobj(self): # Limit the maximum in memory chunks to one but make number of # threads more than one. This means that the upload will have to # happen sequentially despite having many threads available because # data is sequentially partitioned into chunks in memory and since # there can only every be one in memory chunk, each upload part will # have to happen one at a time. self.config.max_request_concurrency = 10 self.config.max_in_memory_upload_chunks = 1 self._manager = TransferManager(self.client, self.config) # Add some default stubbed responses. # These responses are added in order of part number so if the # multipart upload is not done sequentially, which it should because # we limit the in memory upload chunks to one, the stubber will # raise exceptions for mismatching parameters for partNumber when # once the upload() method is called on the transfer manager. # If there is a mismatch, the stubber error will propogate on # the future.result() self.add_create_multipart_response_with_default_expected_params() self.add_upload_part_responses_with_default_expected_params() self.add_complete_multipart_response_with_default_expected_params() with open(self.filename, 'rb') as f: future = self.manager.upload(f, self.bucket, self.key, self.extra_args) future.result() # Make sure that the stubber had all of its stubbed responses consumed. self.assert_expected_client_calls_were_correct() # Ensure the contents were uploaded in sequentially order by checking # the sent contents were in order. self.assert_upload_part_bodies_were_correct()
def __init__(self, client=None, config=None, osutil=None, manager=None): if not client and not manager: raise ValueError( 'Either a boto3.Client or s3transfer.manager.TransferManager ' 'must be provided') if manager and any([client, config, osutil]): raise ValueError( 'Manager cannot be provided with client, config, ' 'nor osutil. These parameters are mutually exclusive.') if config is None: config = TransferConfig() if osutil is None: osutil = OSUtils() if manager: self._manager = manager else: self._manager = TransferManager(client, config, osutil)
def setUp(self): super(TestMultipartUpload, self).setUp() self.chunksize = 4 self.config = TransferConfig(max_request_concurrency=1, multipart_threshold=1, multipart_chunksize=self.chunksize) self._manager = TransferManager(self.client, self.config) self.multipart_id = 'my-upload-id'
def __init__(self, s3_bucket, s3_key, boto3_s3_client=None): import boto3 from s3transfer.manager import TransferManager, TransferConfig self.s3_bucket = s3_bucket self.s3_key = s3_key self.s3_client = boto3_s3_client if self.s3_client is None: self.s3_client = boto3.client('s3') self._internal_queue = BlockingReaderWriterByteStream() self._boto3_multipart_upload_workaround_buffer = b'' self.temp_s3_key = self.s3_key + '-{:0>10}-tmp'.format( random.randrange(0, 1e10)) # don't start the upload until we've written at least # boto3.TransferConfig.multipart_threshold bytes self._transfer_manager = TransferManager(self.s3_client, TransferConfig()) self._upload_future = None
def test_uses_provided_osutil(self): osutil = RecordingOSUtils() # Use the recording os utility for the transfer manager self._manager = TransferManager(self.client, self.config, osutil) self.add_put_object_response_with_default_expected_params() future = self.manager.upload(self.filename, self.bucket, self.key) future.result() # The upload should have used the os utility. We check this by making # sure that the recorded opens are as expected. self.assertEqual(osutil.open_records, [(self.filename, 'rb')])
def call(self, files): # There is only ever one file in a stream transfer. file = files[0] if self._manager is not None: manager = self._manager else: manager = TransferManager(file.client, self.config) if file.operation_name == 'upload': bucket, key = find_bucket_key(file.dest) return self._upload(manager, bucket, key) elif file.operation_name == 'download': bucket, key = find_bucket_key(file.src) return self._download(manager, bucket, key)
def test_uses_provided_osutil(self): osutil = RecordingOSUtils() # Use the recording os utility for the transfer manager self._manager = TransferManager(self.client, self.config, osutil) self.add_head_object_response() self.add_successful_get_object_responses() future = self.manager.download(**self.create_call_kwargs()) future.result() # The osutil should have had its open() method invoked when opening # a temporary file and its rename_file() method invoked when the # the temporary file was moved to its final location. self.assertEqual(len(osutil.open_records), 1) self.assertEqual(len(osutil.rename_records), 1)
def setUp(self): super(BaseCopyTest, self).setUp() self.config = TransferConfig(max_request_concurrency=1) self._manager = TransferManager(self.client, self.config) # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.copy_source = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey' } self.extra_args = {} self.subscribers = [] self.content = b'my content'
def setUp(self): super(BaseCopyTest, self).setUp() self.config = TransferConfig(max_request_concurrency=1, multipart_chunksize=MIN_UPLOAD_CHUNKSIZE, multipart_threshold=MIN_UPLOAD_CHUNKSIZE * 4) self._manager = TransferManager(self.client, self.config) # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.copy_source = {'Bucket': 'mysourcebucket', 'Key': 'mysourcekey'} self.extra_args = {} self.subscribers = [] self.half_chunksize = int(MIN_UPLOAD_CHUNKSIZE / 2) self.content = b'0' * (2 * MIN_UPLOAD_CHUNKSIZE + self.half_chunksize)
def test_retry_failure(self): self.add_head_object_response() max_retries = 3 self.config.num_download_attempts = max_retries self._manager = TransferManager(self.client, self.config) # Add responses that fill up the maximum number of retries. self.add_n_retryable_get_object_responses(max_retries) future = self.manager.download(**self.create_call_kwargs()) # A retry exceeded error should have happened. with self.assertRaises(RetriesExceededError): future.result() # All of the retries should have been used up. self.stubber.assert_no_pending_responses()
def setUp(self): super(BaseDownloadTest, self).setUp() self.config = TransferConfig(max_request_concurrency=1) self._manager = TransferManager(self.client, self.config) # Create a temporary directory to write to self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # Create a stream to read from self.content = b'my content' self.stream = six.BytesIO(self.content)
def create_transfer_manager(client, config, osutil=None): """Creates a transfer manager based on configuration :type client: boto3.client :param client: The S3 client to use :type config: boto3.s3.transfer.TransferConfig :param config: The transfer config to use :type osutil: s3transfer.utils.OSUtils :param osutil: The os utility to use :rtype: s3transfer.manager.TransferManager :returns: A transfer manager based on parameters provided """ executor_cls = None if not config.use_threads: executor_cls = NonThreadedExecutor return TransferManager(client, config, osutil, executor_cls)
def test_sigv4_progress_callbacks_invoked_once(self): # Reset the client and manager to use sigv4 self.reset_stubber_with_new_client( {'config': Config(signature_version='s3v4')}) self.client.meta.events.register( 'before-parameter-build.s3.*', self.collect_body) self._manager = TransferManager(self.client, self.config) # Add the stubbed response. self.add_put_object_response_with_default_expected_params() subscriber = RecordingSubscriber() future = self.manager.upload( self.filename, self.bucket, self.key, subscribers=[subscriber]) future.result() self.assert_expected_client_calls_were_correct() # The amount of bytes seen should be the same as the file size self.assertEqual(subscriber.calculate_bytes_seen(), len(self.content))
def __init__(self, client=None, config=None, osutil=None, manager=None): if not client and not manager: raise ValueError( 'Either a boto3.Client or s3transfer.manager.TransferManager ' 'must be provided' ) if manager and any([client, config, osutil]): raise ValueError( 'Manager cannot be provided with client, config, ' 'nor osutil. These parameters are mutually exclusive.' ) if config is None: config = TransferConfig() if osutil is None: osutil = OSUtils() if manager: self._manager = manager else: self._manager = TransferManager(client, config, osutil)
def __call__( self, client: S3Client, result_queue: "Queue[Any]" ) -> S3TransferHandler: """Create a S3TransferHandler instance. Args: client: The client to power the S3TransferHandler. result_queue: The result queue to be used to process results for the S3TransferHandler. """ transfer_config = create_transfer_config_from_runtime_config( self._runtime_config ) transfer_config.max_in_memory_upload_chunks = self.MAX_IN_MEMORY_CHUNKS transfer_config.max_in_memory_download_chunks = self.MAX_IN_MEMORY_CHUNKS transfer_manager = TransferManager(client, transfer_config) LOGGER.debug( "Using a multipart threshold of %s and a part size of %s", transfer_config.multipart_threshold, transfer_config.multipart_chunksize, ) result_recorder = ResultRecorder() result_processor_handlers: List[Any] = [result_recorder] self._add_result_printer(result_recorder, result_processor_handlers) result_processor = ResultProcessor( result_queue=result_queue, result_handlers=result_processor_handlers ) command_result_recorder = CommandResultRecorder( result_queue=result_queue, result_recorder=result_recorder, result_processor=result_processor, ) return S3TransferHandler( transfer_manager=transfer_manager, config_params=self._config_params, result_command_recorder=command_result_recorder, )
def test_upload_with_bandwidth_limiter(self): self.content = b'a' * 1024 * 1024 with open(self.filename, 'wb') as f: f.write(self.content) self.config = TransferConfig(max_request_concurrency=1, max_bandwidth=len(self.content) / 2) self._manager = TransferManager(self.client, self.config) self.add_put_object_response_with_default_expected_params() start = time.time() future = self.manager.upload(self.filename, self.bucket, self.key) future.result() # This is just a smoke test to make sure that the limiter is # being used and not necessary its exactness. So we set the maximum # bandwidth to len(content)/2 per sec and make sure that it is # noticeably slower. Ideally it will take more than two seconds, but # given tracking at the beginning of transfers are not entirely # accurate setting at the initial start of a transfer, we give us # some flexibility by setting the expected time to half of the # theoretical time to take. self.assertGreaterEqual(time.time() - start, 1) self.assert_expected_client_calls_were_correct() self.assert_put_object_body_was_correct()
def setUp(self): super(BaseUploadTest, self).setUp() self.config = TransferConfig(max_request_concurrency=1) self._manager = TransferManager(self.client, self.config) # Create a temporary directory with files to read from self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.content = b'my content' with open(self.filename, 'wb') as f: f.write(self.content) # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # A list to keep track of all of the bodies sent over the wire # and their order. self.sent_bodies = [] self.client.meta.events.register( 'before-parameter-build.s3.*', self.collect_body)
def setUp(self): super(BaseUploadTest, self).setUp() # TODO: We do not want to use the real MIN_UPLOAD_CHUNKSIZE # when we're adjusting parts. # This is really wasteful and fails CI builds because self.contents # would normally use 10MB+ of memory. # Until there's an API to configure this, we're patching this with # a min size of 1. We can't patch MIN_UPLOAD_CHUNKSIZE directly # because it's already bound to a default value in the # chunksize adjuster. Instead we need to patch out the # chunksize adjuster class. self.adjuster_patch = mock.patch('s3transfer.upload.ChunksizeAdjuster', lambda: ChunksizeAdjuster(min_size=1)) self.adjuster_patch.start() self.config = TransferConfig(max_request_concurrency=1) self._manager = TransferManager(self.client, self.config) # Create a temporary directory with files to read from self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.content = b'my content' with open(self.filename, 'wb') as f: f.write(self.content) # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # A list to keep track of all of the bodies sent over the wire # and their order. self.sent_bodies = [] self.client.meta.events.register('before-parameter-build.s3.*', self.collect_body)
class AtomicRemoteWritableS3File(object): """ An S3 file that writes to a remote temp object on S3; copies to the true key on close. This class requires boto3 v1.4.0+ for its non-seekable file object upload ability. Useful for performing operations on large S3 objects when you don't have sufficient space on local drives. Works around AWS S3's multipart transfer size requirements and boto3's idiosyncratic implementation that requires an initial buffer size larger than the multipart transfer threshold in order to correctly select the 'read-until-empty' behavior needed for a streaming upload. """ _boto3_default_multipart_threshold = 8 * 1024 * 1024 def __init__(self, s3_bucket, s3_key, boto3_s3_client=None): import boto3 from s3transfer.manager import TransferManager, TransferConfig self.s3_bucket = s3_bucket self.s3_key = s3_key self.s3_client = boto3_s3_client if self.s3_client is None: self.s3_client = boto3.client('s3') self._internal_queue = BlockingReaderWriterByteStream() self._boto3_multipart_upload_workaround_buffer = b'' self.temp_s3_key = self.s3_key + '-{:0>10}-tmp'.format( random.randrange(0, 1e10)) # don't start the upload until we've written at least # boto3.TransferConfig.multipart_threshold bytes self._transfer_manager = TransferManager(self.s3_client, TransferConfig()) self._upload_future = None def write(self, some_bytes): """ Writes bytes to S3. This method may not be safely called by multiple writers in different threads. """ self._write(some_bytes) def _write(self, some_bytes, close_and_flush=False): """ Buffers writes until they're large enough to be safely sent to boto3. """ buffer_write = (len(self._boto3_multipart_upload_workaround_buffer) + len(some_bytes) < self._boto3_default_multipart_threshold) self._boto3_multipart_upload_workaround_buffer += some_bytes if not buffer_write or close_and_flush: self._internal_queue.write( self._boto3_multipart_upload_workaround_buffer) self._boto3_multipart_upload_workaround_buffer = b'' if not self._upload_future: self._submit_upload() def _submit_upload(self): self._upload_future = self._transfer_manager.upload( fileobj=self._internal_queue, bucket=self.s3_bucket, key=self.temp_s3_key) def close(self): """ Closes the writer, so that it will flush to the reader. This method will block until the file has been fully flushed to S3, and until it has been properly moved to its final destination. """ self._write(b'', close_and_flush=True) self._internal_queue.close() self._upload_future.result( ) # wait for upload to complete before moving self._move_to_final_destination() def _move_to_final_destination(self): self.s3_client.copy_object(Bucket=self.s3_bucket, Key=self.s3_key, CopySource={ 'Bucket': self.s3_bucket, 'Key': self.temp_s3_key }) self.s3_client.delete_object(Bucket=self.s3_bucket, Key=self.temp_s3_key) def __del__(self): self.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_type: self._internal_queue.error('Pipe not properly closed.') if self._upload_future: self._upload_future.result() self.s3_client.delete_object(Bucket=self.s3_bucket, Key=self.temp_s3_key) return else: self.close()
def test_use_custom_executor_implementation(self): mocked_executor_cls = mock.Mock(BaseExecutor) transfer_manager = TransferManager(self.client, executor_cls=mocked_executor_cls) transfer_manager.delete('bucket', 'key') self.assertTrue(mocked_executor_cls.return_value.submit.called)
class S3Uploader(object): """ Class to upload objects to S3 bucket that use versioning. If bucket does not already use versioning, this class will turn on versioning. """ def __init__(self, s3_client, bucket_name, region, prefix=None, kms_key_id=None, force_upload=False, transfer_manager=None): self.bucket_name = bucket_name self.prefix = prefix self.kms_key_id = kms_key_id or None self.force_upload = force_upload self.s3 = s3_client self.region = region self.transfer_manager = transfer_manager if not transfer_manager: self.transfer_manager = TransferManager(self.s3) def upload(self, file_name, remote_path): """ Uploads given file to S3 :param file_name: Path to the file that will be uploaded :param remote_path: be uploaded :return: VersionId of the latest upload """ if self.prefix and len(self.prefix) > 0: remote_path = "{0}/{1}".format(self.prefix, remote_path) # Check if a file with same data exists if not self.force_upload and self.file_exists(remote_path): LOG.debug("File with same data is already exists at {0}. " "Skipping upload".format(remote_path)) return self.make_url(remote_path) try: # Default to regular server-side encryption unless customer has # specified their own KMS keys additional_args = { "ServerSideEncryption": "AES256" } if self.kms_key_id: additional_args["ServerSideEncryption"] = "aws:kms" additional_args["SSEKMSKeyId"] = self.kms_key_id print_progress_callback = \ ProgressPercentage(file_name, remote_path) future = self.transfer_manager.upload(file_name, self.bucket_name, remote_path, additional_args, [print_progress_callback]) future.result() return self.make_url(remote_path) except botocore.exceptions.ClientError as ex: error_code = ex.response["Error"]["Code"] if error_code == "NoSuchBucket": raise exceptions.NoSuchBucketError( bucket_name=self.bucket_name) raise ex def upload_with_dedup(self, file_name, extension=None): """ Makes and returns name of the S3 object based on the file's MD5 sum :param file_name: file to upload :param extension: String of file extension to append to the object :return: S3 URL of the uploaded object """ # This construction of remote_path is critical to preventing duplicate # uploads of same object. Uploader will check if the file exists in S3 # and re-upload only if necessary. So the template points to same file # in multiple places, this will upload only once filemd5 = self.file_checksum(file_name) remote_path = filemd5 if extension: remote_path = remote_path + "." + extension return self.upload(file_name, remote_path) def file_exists(self, remote_path): """ Check if the file we are trying to upload already exists in S3 :param remote_path: :return: True, if file exists. False, otherwise """ try: # Find the object that matches this ETag self.s3.head_object( Bucket=self.bucket_name, Key=remote_path) return True except botocore.exceptions.ClientError: # Either File does not exist or we are unable to get # this information. return False def make_url(self, obj_path): return "s3://{0}/{1}".format( self.bucket_name, obj_path) def file_checksum(self, file_name): with open(file_name, "rb") as file_handle: md5 = hashlib.md5() # Read file in chunks of 4096 bytes block_size = 4096 # Save current cursor position and reset cursor to start of file curpos = file_handle.tell() file_handle.seek(0) buf = file_handle.read(block_size) while len(buf) > 0: md5.update(buf) buf = file_handle.read(block_size) # Restore file cursor's position file_handle.seek(curpos) return md5.hexdigest() def to_path_style_s3_url(self, key, version=None): """ This link describes the format of Path Style URLs http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro """ base = "https://s3.amazonaws.com" if self.region and self.region != "us-east-1": base = "https://s3-{0}.amazonaws.com".format(self.region) result = "{0}/{1}/{2}".format(base, self.bucket_name, key) if version: result = "{0}?versionId={1}".format(result, version) return result
class S3Uploader(object): """ Class to upload objects to S3 bucket that use versioning. If bucket does not already use versioning, this class will turn on versioning. """ def __init__(self, s3_client, bucket_name, region, prefix=None, kms_key_id=None, force_upload=False, transfer_manager=None): self.bucket_name = bucket_name self.prefix = prefix self.kms_key_id = kms_key_id or None self.force_upload = force_upload self.s3 = s3_client self.region = region self.transfer_manager = transfer_manager if not transfer_manager: self.transfer_manager = TransferManager(self.s3) def upload(self, file_name, remote_path): """ Uploads given file to S3 :param file_name: Path to the file that will be uploaded :param remote_path: be uploaded :return: VersionId of the latest upload """ if self.prefix and len(self.prefix) > 0: remote_path = "{0}/{1}".format(self.prefix, remote_path) # Check if a file with same data exists if not self.force_upload and self.file_exists(remote_path): LOG.debug("File with same data is already exists at {0}. " "Skipping upload".format(remote_path)) return self.make_url(remote_path) try: # Default to regular server-side encryption unless customer has # specified their own KMS keys additional_args = {"ServerSideEncryption": "AES256"} if self.kms_key_id: additional_args["ServerSideEncryption"] = "aws:kms" additional_args["SSEKMSKeyId"] = self.kms_key_id print_progress_callback = \ ProgressPercentage(file_name, remote_path) future = self.transfer_manager.upload(file_name, self.bucket_name, remote_path, additional_args, [print_progress_callback]) future.result() return self.make_url(remote_path) except botocore.exceptions.ClientError as ex: error_code = ex.response["Error"]["Code"] if error_code == "NoSuchBucket": raise exceptions.NoSuchBucketError( bucket_name=self.bucket_name) raise ex def upload_with_dedup(self, file_name, extension=None): """ Makes and returns name of the S3 object based on the file's MD5 sum :param file_name: file to upload :param extension: String of file extension to append to the object :return: S3 URL of the uploaded object """ # This construction of remote_path is critical to preventing duplicate # uploads of same object. Uploader will check if the file exists in S3 # and re-upload only if necessary. So the template points to same file # in multiple places, this will upload only once filemd5 = self.file_checksum(file_name) remote_path = filemd5 if extension: remote_path = remote_path + "." + extension return self.upload(file_name, remote_path) def file_exists(self, remote_path): """ Check if the file we are trying to upload already exists in S3 :param remote_path: :return: True, if file exists. False, otherwise """ try: # Find the object that matches this ETag self.s3.head_object(Bucket=self.bucket_name, Key=remote_path) return True except botocore.exceptions.ClientError: # Either File does not exist or we are unable to get # this information. return False def make_url(self, obj_path): return "s3://{0}/{1}".format(self.bucket_name, obj_path) def file_checksum(self, file_name): with open(file_name, "rb") as file_handle: md5 = hashlib.md5() # Read file in chunks of 4096 bytes block_size = 4096 # Save current cursor position and reset cursor to start of file curpos = file_handle.tell() file_handle.seek(0) buf = file_handle.read(block_size) while len(buf) > 0: md5.update(buf) buf = file_handle.read(block_size) # Restore file cursor's position file_handle.seek(curpos) return md5.hexdigest() def to_path_style_s3_url(self, key, version=None): """ This link describes the format of Path Style URLs http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro """ base = "https://s3.amazonaws.com" if self.region and self.region != "us-east-1": base = "https://s3-{0}.amazonaws.com".format(self.region) result = "{0}/{1}/{2}".format(base, self.bucket_name, key) if version: result = "{0}?versionId={1}".format(result, version) return result
def setUp(self): super(TestRangedDownload, self).setUp() self.config = TransferConfig(max_request_concurrency=1, multipart_threshold=1, multipart_chunksize=4) self._manager = TransferManager(self.client, self.config)
class S3Transfer(object): ALLOWED_DOWNLOAD_ARGS = TransferManager.ALLOWED_DOWNLOAD_ARGS ALLOWED_UPLOAD_ARGS = TransferManager.ALLOWED_UPLOAD_ARGS def __init__(self, client=None, config=None, osutil=None, manager=None): if not client and not manager: raise ValueError( 'Either a boto3.Client or s3transfer.manager.TransferManager ' 'must be provided' ) if manager and any([client, config, osutil]): raise ValueError( 'Manager cannot be provided with client, config, ' 'nor osutil. These parameters are mutually exclusive.' ) if config is None: config = TransferConfig() if osutil is None: osutil = OSUtils() if manager: self._manager = manager else: self._manager = TransferManager(client, config, osutil) def upload_file(self, filename, bucket, key, callback=None, extra_args=None): """Upload a file to an S3 object. Variants have also been injected into S3 client, Bucket and Object. You don't have to use S3Transfer.upload_file() directly. """ if not isinstance(filename, six.string_types): raise ValueError('Filename must be a string') subscribers = self._get_subscribers(callback) future = self._manager.upload( filename, bucket, key, extra_args, subscribers) try: future.result() # If a client error was raised, add the backwards compatibility layer # that raises a S3UploadFailedError. These specific errors were only # ever thrown for upload_parts but now can be thrown for any related # client error. except ClientError as e: raise S3UploadFailedError( "Failed to upload %s to %s: %s" % ( filename, '/'.join([bucket, key]), e)) def download_file(self, bucket, key, filename, extra_args=None, callback=None): """Download an S3 object to a file. Variants have also been injected into S3 client, Bucket and Object. You don't have to use S3Transfer.download_file() directly. """ if not isinstance(filename, six.string_types): raise ValueError('Filename must be a string') subscribers = self._get_subscribers(callback) future = self._manager.download( bucket, key, filename, extra_args, subscribers) try: future.result() # This is for backwards compatibility where when retries are # exceeded we need to throw the same error from boto3 instead of # s3transfer's built in RetriesExceededError as current users are # catching the boto3 one instead of the s3transfer exception to do # their own retries. except S3TransferRetriesExceededError as e: raise RetriesExceededError(e.last_exception) def _get_subscribers(self, callback): if not callback: return None return [ProgressCallbackInvoker(callback)] def __enter__(self): return self def __exit__(self, *args): self._manager.__exit__(*args)