def _enqueue_multipart_upload_tasks(self, filename, remove_local_file=False): # First we need to create a CreateMultipartUpload task, # then create UploadTask objects for each of the parts. # And finally enqueue a CompleteMultipartUploadTask. chunksize = find_chunksize(filename.size, self.chunksize) num_uploads = int(math.ceil(filename.size / float(chunksize))) upload_context = tasks.MultipartUploadContext( expected_parts=num_uploads) create_multipart_upload_task = tasks.CreateMultipartUploadTask( session=self.session, filename=filename, parameters=self.params, result_queue=self.result_queue, upload_context=upload_context) self.executer.submit(create_multipart_upload_task) for i in range(1, (num_uploads + 1)): task = tasks.UploadPartTask( part_number=i, chunk_size=chunksize, result_queue=self.result_queue, upload_context=upload_context, filename=filename) self.executer.submit(task) complete_multipart_upload_task = tasks.CompleteMultipartUploadTask( session=self.session, filename=filename, parameters=self.params, result_queue=self.result_queue, upload_context=upload_context) self.executer.submit(complete_multipart_upload_task) self._multipart_uploads.append((upload_context, filename)) if remove_local_file: remove_task = tasks.RemoveFileTask(local_filename=filename.src, upload_context=upload_context) self.executer.submit(remove_task) return num_uploads
def _enqueue_range_download_tasks(self, filename, remove_remote_file=False): chunksize = find_chunksize(filename.size, self.chunksize) num_downloads = int(filename.size / chunksize) context = tasks.MultipartDownloadContext(num_downloads) create_file_task = tasks.CreateLocalFileTask(context=context, filename=filename) self.executor.submit(create_file_task) for i in range(num_downloads): task = tasks.DownloadPartTask( part_number=i, chunk_size=chunksize, result_queue=self.result_queue, service=filename.service, filename=filename, context=context, io_queue=self.write_queue, ) self.executor.submit(task) complete_file_task = tasks.CompleteDownloadTask( context=context, filename=filename, result_queue=self.result_queue, params=self.params, io_queue=self.write_queue, ) self.executor.submit(complete_file_task) self._multipart_downloads.append((context, filename.dest)) if remove_remote_file: remove_task = tasks.RemoveRemoteObjectTask(filename=filename, context=context) self.executor.submit(remove_task) return num_downloads
def _enqueue_range_download_tasks(self, filename, remove_remote_file=False): chunksize = find_chunksize(filename.size, self.chunksize) num_downloads = int(filename.size / chunksize) context = tasks.MultipartDownloadContext(num_downloads) create_file_task = tasks.CreateLocalFileTask(context=context, filename=filename) self.executor.submit(create_file_task) for i in range(num_downloads): task = tasks.DownloadPartTask(part_number=i, chunk_size=chunksize, result_queue=self.result_queue, service=filename.service, filename=filename, context=context, io_queue=self.write_queue) self.executor.submit(task) complete_file_task = tasks.CompleteDownloadTask( context=context, filename=filename, result_queue=self.result_queue, params=self.params, io_queue=self.write_queue) self.executor.submit(complete_file_task) self._multipart_downloads.append((context, filename.dest)) if remove_remote_file: remove_task = tasks.RemoveRemoteObjectTask(filename=filename, context=context) self.executor.submit(remove_task) return num_downloads
def test_small_chunk(self): """ This test ensures that if the ``chunksize`` is below the minimum threshold, it is automatically raised to the minimum. """ chunksize = MIN_UPLOAD_CHUNKSIZE - 1 size = 3 * MIN_UPLOAD_CHUNKSIZE self.assertEqual(find_chunksize(size, chunksize), MIN_UPLOAD_CHUNKSIZE)
def test_large_chunk(self): """ This test ensures if the ``chunksize`` adapts to an appropriate size because the original ``chunksize`` is too small. """ chunksize = 7 * (1024 ** 2) size = 8 * (1024 ** 3) self.assertEqual(find_chunksize(size, chunksize), chunksize * 2)
def test_small_chunk(self): """ This test ensures if the ``chunksize`` is appropriate to begin with, it does not change. """ chunksize = 7 * (1024 ** 2) size = 8 * (1024 ** 2) self.assertEqual(find_chunksize(size, chunksize), chunksize)
def test_small_chunk(self): """ This test ensures if the ``chunksize`` is appropriate to begin with, it does not change. """ chunksize = 7 * (1024**2) size = 8 * (1024**2) self.assertEqual(find_chunksize(size, chunksize), chunksize)
def test_super_chunk(self): """ This tests to ensure that the ``chunksize can never be larger than the ``MAX_SINGLE_UPLOAD_SIZE`` """ chunksize = MAX_SINGLE_UPLOAD_SIZE + 1 size = MAX_SINGLE_UPLOAD_SIZE * 2 self.assertEqual(find_chunksize(size, chunksize), MAX_SINGLE_UPLOAD_SIZE)
def test_large_chunk(self): """ This test ensures if the ``chunksize`` adapts to an appropriate size because the original ``chunksize`` is too small. """ chunksize = 7 * (1024**2) size = 5 * (1024**4) # If we try to upload a 5TB file, we'll need to use 896MB part # sizes. self.assertEqual(find_chunksize(size, chunksize), 896 * (1024**2))
def _enqueue_multipart_copy_tasks(self, filename, remove_remote_file=False): chunksize = find_chunksize(filename.size, self.chunksize) num_uploads = int(math.ceil(filename.size / float(chunksize))) upload_context = self._enqueue_upload_start_task(chunksize, num_uploads, filename) self._enqueue_upload_tasks(num_uploads, chunksize, upload_context, filename, tasks.CopyPartTask) self._enqueue_upload_end_task(filename, upload_context) if remove_remote_file: remove_task = tasks.RemoveRemoteObjectTask(filename=filename, context=upload_context) self.executor.submit(remove_task) return num_uploads
def test_large_chunk(self): """ This test ensures if the ``chunksize`` adapts to an appropriate size because the original ``chunksize`` is too small. """ chunksize = 7 * (1024 ** 2) size = 5 * (1024 ** 4) # If we try to upload a 5TB file, we'll need to use 896MB part # sizes. self.assertEqual(find_chunksize(size, chunksize), 896 * (1024 ** 2))
def _enqueue_multipart_upload_tasks(self, filename, remove_local_file=False): # First we need to create a CreateMultipartUpload task, # then create UploadTask objects for each of the parts. # And finally enqueue a CompleteMultipartUploadTask. chunksize = find_chunksize(filename.size, self.chunksize) num_uploads = int(math.ceil(filename.size / float(chunksize))) upload_context = self._enqueue_upload_start_task(chunksize, num_uploads, filename) self._enqueue_upload_tasks(num_uploads, chunksize, upload_context, filename, tasks.UploadPartTask) self._enqueue_upload_end_task(filename, upload_context) if remove_local_file: remove_task = tasks.RemoveFileTask(local_filename=filename.src, upload_context=upload_context) self.executor.submit(remove_task) return num_uploads
def _enqueue_multipart_copy_tasks(self, filename, remove_remote_file=False): chunksize = find_chunksize(filename.size, self.chunksize) num_uploads = int(math.ceil(filename.size / float(chunksize))) upload_context = self._enqueue_upload_start_task( chunksize, num_uploads, filename) self._enqueue_upload_tasks( num_uploads, chunksize, upload_context, filename, tasks.CopyPartTask) self._enqueue_upload_end_task(filename, upload_context) if remove_remote_file: remove_task = tasks.RemoveRemoteObjectTask( filename=filename, context=upload_context) self.executor.submit(remove_task) return num_uploads
def _upload(self, manager, bucket, key): """ Upload stdin using to the specified location. :type manager: s3transfer.manager.TransferManager :param manager: The transfer manager to use for the upload. :type bucket: str :param bucket: The bucket to upload the stream to. :type key: str :param key: The name of the key to upload the stream to. :return: A CommandResult representing the upload status. """ expected_size = self.params.get('expected_size', None) subscribers = None if expected_size is not None: # `expected_size` comes in as a string expected_size = int(expected_size) # set the size of the transfer if we know it ahead of time. subscribers = [ProvideSizeSubscriber(expected_size)] # TODO: remove when this happens in s3transfer # If we have the expected size, we can calculate an appropriate # chunksize based on max parts and chunksize limits chunksize = find_chunksize(expected_size, self.config.multipart_chunksize) else: # TODO: remove when this happens in s3transfer # Otherwise, we can still adjust for chunksize limits chunksize = adjust_chunksize_to_upload_limits( self.config.multipart_chunksize) self.config.multipart_chunksize = chunksize params = {} RequestParamsMapper.map_put_object_params(params, self.params) fileobj = NonSeekableStream(binary_stdin) with manager: future = manager.upload(fileobj=fileobj, bucket=bucket, key=key, extra_args=params, subscribers=subscribers) return self._process_transfer(future)
def _enqueue_multipart_upload_tasks(self, filename, remove_local_file=False): # First we need to create a CreateMultipartUpload task, # then create UploadTask objects for each of the parts. # And finally enqueue a CompleteMultipartUploadTask. chunksize = find_chunksize(filename.size, self.chunksize) num_uploads = int(math.ceil(filename.size / float(chunksize))) upload_context = self._enqueue_upload_start_task( chunksize, num_uploads, filename) self._enqueue_upload_tasks(num_uploads, chunksize, upload_context, filename, tasks.UploadPartTask) self._enqueue_upload_end_task(filename, upload_context) if remove_local_file: remove_task = tasks.RemoveFileTask(local_filename=filename.src, upload_context=upload_context) self.executor.submit(remove_task) return num_uploads
def _enqueue_range_download_tasks(self, filename, remove_remote_file=False): # Create the context for the multipart download. chunksize = find_chunksize(filename.size, self.chunksize) num_downloads = int(filename.size / chunksize) context = tasks.MultipartDownloadContext(num_downloads) # No file is needed for downloading a stream. So just announce # that it has been made since it is required for the context to # begin downloading. context.announce_file_created() # Submit download part tasks to the executor. self._do_enqueue_range_download_tasks( filename=filename, chunksize=chunksize, num_downloads=num_downloads, context=context, remove_remote_file=remove_remote_file ) return num_downloads
def _enqueue_multipart_upload_tasks(self, filename, remove_local_file=False): # First we need to create a CreateMultipartUpload task, # then create UploadTask objects for each of the parts. # And finally enqueue a CompleteMultipartUploadTask. chunksize = find_chunksize(filename.size, self.chunksize) num_uploads = int(math.ceil(filename.size / float(chunksize))) upload_context = tasks.MultipartUploadContext( expected_parts=num_uploads) create_multipart_upload_task = tasks.CreateMultipartUploadTask( session=self.session, filename=filename, parameters=self.params, result_queue=self.result_queue, upload_context=upload_context) self.executer.submit(create_multipart_upload_task) for i in range(1, (num_uploads + 1)): task = tasks.UploadPartTask(part_number=i, chunk_size=chunksize, result_queue=self.result_queue, upload_context=upload_context, filename=filename) self.executer.submit(task) complete_multipart_upload_task = tasks.CompleteMultipartUploadTask( session=self.session, filename=filename, parameters=self.params, result_queue=self.result_queue, upload_context=upload_context) self.executer.submit(complete_multipart_upload_task) self._multipart_uploads.append((upload_context, filename)) if remove_local_file: remove_task = tasks.RemoveFileTask(local_filename=filename.src, upload_context=upload_context) self.executer.submit(remove_task) return num_uploads
def _enqueue_multipart_upload_tasks(self, filename, payload=None): # First we need to create a CreateMultipartUpload task, # then create UploadTask objects for each of the parts. # And finally enqueue a CompleteMultipartUploadTask. if self.params['expected_size']: # If we have the expected size, we can calculate an appropriate # chunksize based on max parts and chunksize limits chunksize = find_chunksize(int(self.params['expected_size']), self.chunksize) else: # Otherwise, we can still adjust for chunksize limits chunksize = adjust_chunksize_to_upload_limits(self.chunksize) num_uploads = '...' # Submit a task to begin the multipart upload. upload_context = self._enqueue_upload_start_task( chunksize, num_uploads, filename) # Now submit a task to upload the initial chunk of data pulled # from the stream that was used to determine if a multipart upload # was needed. self._enqueue_upload_single_part_task(part_number=1, chunk_size=chunksize, upload_context=upload_context, filename=filename, task_class=tasks.UploadPartTask, payload=payload) # Submit tasks to upload the rest of the chunks of the data coming in # from standard input. num_uploads = self._enqueue_upload_tasks(num_uploads, chunksize, upload_context, filename, tasks.UploadPartTask) # Submit a task to notify the multipart upload is complete. self._enqueue_upload_end_task(filename, upload_context) return num_uploads
def _enqueue_multipart_upload_tasks(self, filename, payload=None): # First we need to create a CreateMultipartUpload task, # then create UploadTask objects for each of the parts. # And finally enqueue a CompleteMultipartUploadTask. if self.params['expected_size']: # If we have the expected size, we can calculate an appropriate # chunksize based on max parts and chunksize limits chunksize = find_chunksize(int(self.params['expected_size']), self.chunksize) else: # Otherwise, we can still adjust for chunksize limits chunksize = adjust_chunksize_to_upload_limits(self.chunksize) num_uploads = '...' # Submit a task to begin the multipart upload. upload_context = self._enqueue_upload_start_task( chunksize, num_uploads, filename) # Now submit a task to upload the initial chunk of data pulled # from the stream that was used to determine if a multipart upload # was needed. self._enqueue_upload_single_part_task( part_number=1, chunk_size=chunksize, upload_context=upload_context, filename=filename, task_class=tasks.UploadPartTask, payload=payload ) # Submit tasks to upload the rest of the chunks of the data coming in # from standard input. num_uploads = self._enqueue_upload_tasks( num_uploads, chunksize, upload_context, filename, tasks.UploadPartTask ) # Submit a task to notify the multipart upload is complete. self._enqueue_upload_end_task(filename, upload_context) return num_uploads
def call(self, files): """ This function pulls a ``FileInfo`` or ``TaskInfo`` object from a list ``files``. Each object is then deemed if it will be a multipart operation and add the necessary attributes if so. Each object is then wrapped with a ``BasicTask`` object which is essentially a thread of execution for a thread to follow. These tasks are then submitted to the main executer. """ self.done.clear() self.interrupt.clear() try: self.executer.start() tot_files = 0 tot_parts = 0 for filename in files: num_uploads = 1 is_larger = False chunksize = self.chunksize too_large = False if hasattr(filename, 'size'): is_larger = filename.size > self.multi_threshold too_large = filename.size > MAX_UPLOAD_SIZE if is_larger: if filename.operation == 'upload': num_uploads = int(math.ceil(filename.size / float(chunksize))) chunksize = find_chunksize(filename.size, chunksize) filename.set_multi(executer=self.executer, printQueue=self.printQueue, interrupt=self.interrupt, chunksize=chunksize) elif filename.operation == 'download': num_uploads = int(filename.size / chunksize) filename.set_multi(executer=self.executer, printQueue=self.printQueue, interrupt=self.interrupt, chunksize=chunksize) task = BasicTask(session=self.session, filename=filename, executer=self.executer, done=self.done, parameters=self.params, multi_threshold=self.multi_threshold, chunksize=chunksize, printQueue=self.printQueue, interrupt=self.interrupt) if too_large and filename.operation == 'upload': warning = "Warning %s exceeds 5 TB and upload is " \ "being skipped" % os.path.relpath(filename.src) self.printQueue.put({'result': warning}) else: self.executer.submit(task) tot_files += 1 tot_parts += num_uploads self.executer.print_thread.totalFiles = tot_files self.executer.print_thread.totalParts = tot_parts self.executer.wait() self.printQueue.join() except Exception as e: LOGGER.debug('%s' % str(e)) except KeyboardInterrupt: self.interrupt.set() self.printQueue.put({'result': "Cleaning up. Please wait..."}) self.done.set() self.executer.join()
def test_file_too_large(self): size = MAX_UPLOAD_SIZE + 1 chunksize = 1 with self.assertRaises(ValueError): find_chunksize(size, chunksize)