class TestAdjustChunksize(unittest.TestCase): def setUp(self): self.adjuster = ChunksizeAdjuster() def test_valid_chunksize(self): chunksize = 7 * (1024**2) file_size = 8 * (1024**2) new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, chunksize) def test_chunksize_below_minimum(self): chunksize = MIN_UPLOAD_CHUNKSIZE - 1 file_size = 3 * MIN_UPLOAD_CHUNKSIZE new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, MIN_UPLOAD_CHUNKSIZE) def test_chunksize_above_maximum(self): chunksize = MAX_SINGLE_UPLOAD_SIZE + 1 file_size = MAX_SINGLE_UPLOAD_SIZE * 2 new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, MAX_SINGLE_UPLOAD_SIZE) def test_chunksize_too_small(self): chunksize = 7 * (1024**2) file_size = 5 * (1024**4) # If we try to upload a 5TB file, we'll need to use 896MB part # sizes. new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, 896 * (1024**2)) num_parts = file_size / new_size self.assertLessEqual(num_parts, MAX_PARTS) def test_unknown_file_size_with_valid_chunksize(self): chunksize = 7 * (1024**2) new_size = self.adjuster.adjust_chunksize(chunksize) self.assertEqual(new_size, chunksize) def test_unknown_file_size_below_minimum(self): chunksize = MIN_UPLOAD_CHUNKSIZE - 1 new_size = self.adjuster.adjust_chunksize(chunksize) self.assertEqual(new_size, MIN_UPLOAD_CHUNKSIZE) def test_unknown_file_size_above_maximum(self): chunksize = MAX_SINGLE_UPLOAD_SIZE + 1 new_size = self.adjuster.adjust_chunksize(chunksize) self.assertEqual(new_size, MAX_SINGLE_UPLOAD_SIZE)
def setUp(self): super(BaseUploadTest, self).setUp() # TODO: We do not want to use the real MIN_UPLOAD_CHUNKSIZE # when we're adjusting parts. # This is really wasteful and fails CI builds because self.contents # would normally use 10MB+ of memory. # Until there's an API to configure this, we're patching this with # a min size of 1. We can't patch MIN_UPLOAD_CHUNKSIZE directly # because it's already bound to a default value in the # chunksize adjuster. Instead we need to patch out the # chunksize adjuster class. self.adjuster_patch = mock.patch( 'ibm_s3transfer.upload.ChunksizeAdjuster', lambda: ChunksizeAdjuster(min_size=1)) self.adjuster_patch.start() self.config = TransferConfig(max_request_concurrency=1) self._manager = TransferManager(self.client, self.config) # Create a temporary directory with files to read from self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.content = b'my content' with open(self.filename, 'wb') as f: f.write(self.content) # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # A list to keep track of all of the bodies sent over the wire # and their order. self.sent_bodies = [] self.client.meta.events.register('before-parameter-build.s3.*', self.collect_body)
def setUp(self): self.adjuster = ChunksizeAdjuster()
def _submit_multipart_request(self, client, config, osutil, request_executor, transfer_future, upload_input_manager): call_args = transfer_future.meta.call_args # Submit the request to create a multipart upload and make sure it # does not include any of the arguments used for copy part. create_multipart_extra_args = {} for param, val in call_args.extra_args.items(): if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST: create_multipart_extra_args[param] = val # Submit the request to create a multipart upload. create_multipart_future = self._transfer_coordinator.submit( request_executor, CreateMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': create_multipart_extra_args, })) # Submit requests to upload the parts of the file. part_futures = [] extra_part_args = self._extra_upload_part_args(call_args.extra_args) # Get any tags that need to be associated to the submitted task # for upload the data upload_part_tag = self._get_upload_task_tag(upload_input_manager, 'upload_part') size = transfer_future.meta.size adjuster = ChunksizeAdjuster() chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size) part_iterator = upload_input_manager.yield_upload_part_bodies( transfer_future, chunksize) for part_number, fileobj in part_iterator: part_futures.append( self._transfer_coordinator.submit( request_executor, UploadPartTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'fileobj': fileobj, 'bucket': call_args.bucket, 'key': call_args.key, 'part_number': part_number, 'extra_args': extra_part_args }, pending_main_kwargs={ 'upload_id': create_multipart_future }), tag=upload_part_tag)) complete_multipart_extra_args = self._extra_complete_multipart_args( call_args.extra_args) # Submit the request to complete the multipart upload. self._transfer_coordinator.submit( request_executor, CompleteMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': complete_multipart_extra_args, }, pending_main_kwargs={ 'upload_id': create_multipart_future, 'parts': part_futures }, is_final=True))
def _submit_multipart_request(self, client, config, osutil, request_executor, transfer_future): call_args = transfer_future.meta.call_args # Submit the request to create a multipart upload and make sure it # does not include any of the arguments used for copy part. create_multipart_extra_args = {} for param, val in call_args.extra_args.items(): if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST: create_multipart_extra_args[param] = val create_multipart_future = self._transfer_coordinator.submit( request_executor, CreateMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': create_multipart_extra_args, } ) ) # Determine how many parts are needed based on filesize and # desired chunksize. part_size = config.multipart_chunksize adjuster = ChunksizeAdjuster() part_size = adjuster.adjust_chunksize( part_size, transfer_future.meta.size) num_parts = int( math.ceil(transfer_future.meta.size / float(part_size))) # Submit requests to upload the parts of the file. part_futures = [] progress_callbacks = get_callbacks(transfer_future, 'progress') for part_number in range(1, num_parts + 1): extra_part_args = self._extra_upload_part_args( call_args.extra_args) # The part number for upload part starts at 1 while the # range parameter starts at zero, so just subtract 1 off of # the part number extra_part_args['CopySourceRange'] = calculate_range_parameter( part_size, part_number-1, num_parts, transfer_future.meta.size) # Get the size of the part copy as well for the progress # callbacks. size = self._get_transfer_size( part_size, part_number-1, num_parts, transfer_future.meta.size ) part_futures.append( self._transfer_coordinator.submit( request_executor, CopyPartTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'copy_source': call_args.copy_source, 'bucket': call_args.bucket, 'key': call_args.key, 'part_number': part_number, 'extra_args': extra_part_args, 'callbacks': progress_callbacks, 'size': size }, pending_main_kwargs={ 'upload_id': create_multipart_future } ) ) ) complete_multipart_extra_args = self._extra_complete_multipart_args( call_args.extra_args) # Submit the request to complete the multipart upload. self._transfer_coordinator.submit( request_executor, CompleteMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': complete_multipart_extra_args, }, pending_main_kwargs={ 'upload_id': create_multipart_future, 'parts': part_futures }, is_final=True ) )