Esempio n. 1
0
    def _enqueue_multipart_upload_tasks(self, filename,
                                        remove_local_file=False):
        # First we need to create a CreateMultipartUpload task,
        # then create UploadTask objects for each of the parts.
        # And finally enqueue a CompleteMultipartUploadTask.
        chunksize = find_chunksize(filename.size, self.chunksize)
        num_uploads = int(math.ceil(filename.size /
                                    float(chunksize)))
        upload_context = tasks.MultipartUploadContext(
            expected_parts=num_uploads)
        create_multipart_upload_task = tasks.CreateMultipartUploadTask(
            session=self.session, filename=filename,
            parameters=self.params,
            result_queue=self.result_queue, upload_context=upload_context)
        self.executer.submit(create_multipart_upload_task)

        for i in range(1, (num_uploads + 1)):
            task = tasks.UploadPartTask(
                part_number=i, chunk_size=chunksize,
                result_queue=self.result_queue, upload_context=upload_context,
                filename=filename)
            self.executer.submit(task)

        complete_multipart_upload_task = tasks.CompleteMultipartUploadTask(
            session=self.session, filename=filename, parameters=self.params,
            result_queue=self.result_queue, upload_context=upload_context)
        self.executer.submit(complete_multipart_upload_task)
        self._multipart_uploads.append((upload_context, filename))
        if remove_local_file:
            remove_task = tasks.RemoveFileTask(local_filename=filename.src,
                                               upload_context=upload_context)
            self.executer.submit(remove_task)
        return num_uploads
Esempio n. 2
0
 def _enqueue_range_download_tasks(self, filename, remove_remote_file=False):
     chunksize = find_chunksize(filename.size, self.chunksize)
     num_downloads = int(filename.size / chunksize)
     context = tasks.MultipartDownloadContext(num_downloads)
     create_file_task = tasks.CreateLocalFileTask(context=context, filename=filename)
     self.executor.submit(create_file_task)
     for i in range(num_downloads):
         task = tasks.DownloadPartTask(
             part_number=i,
             chunk_size=chunksize,
             result_queue=self.result_queue,
             service=filename.service,
             filename=filename,
             context=context,
             io_queue=self.write_queue,
         )
         self.executor.submit(task)
     complete_file_task = tasks.CompleteDownloadTask(
         context=context,
         filename=filename,
         result_queue=self.result_queue,
         params=self.params,
         io_queue=self.write_queue,
     )
     self.executor.submit(complete_file_task)
     self._multipart_downloads.append((context, filename.dest))
     if remove_remote_file:
         remove_task = tasks.RemoveRemoteObjectTask(filename=filename, context=context)
         self.executor.submit(remove_task)
     return num_downloads
Esempio n. 3
0
 def _enqueue_range_download_tasks(self,
                                   filename,
                                   remove_remote_file=False):
     chunksize = find_chunksize(filename.size, self.chunksize)
     num_downloads = int(filename.size / chunksize)
     context = tasks.MultipartDownloadContext(num_downloads)
     create_file_task = tasks.CreateLocalFileTask(context=context,
                                                  filename=filename)
     self.executor.submit(create_file_task)
     for i in range(num_downloads):
         task = tasks.DownloadPartTask(part_number=i,
                                       chunk_size=chunksize,
                                       result_queue=self.result_queue,
                                       service=filename.service,
                                       filename=filename,
                                       context=context,
                                       io_queue=self.write_queue)
         self.executor.submit(task)
     complete_file_task = tasks.CompleteDownloadTask(
         context=context,
         filename=filename,
         result_queue=self.result_queue,
         params=self.params,
         io_queue=self.write_queue)
     self.executor.submit(complete_file_task)
     self._multipart_downloads.append((context, filename.dest))
     if remove_remote_file:
         remove_task = tasks.RemoveRemoteObjectTask(filename=filename,
                                                    context=context)
         self.executor.submit(remove_task)
     return num_downloads
Esempio n. 4
0
 def test_small_chunk(self):
     """
     This test ensures that if the ``chunksize`` is below the minimum
     threshold, it is automatically raised to the minimum.
     """
     chunksize = MIN_UPLOAD_CHUNKSIZE - 1
     size = 3 * MIN_UPLOAD_CHUNKSIZE
     self.assertEqual(find_chunksize(size, chunksize), MIN_UPLOAD_CHUNKSIZE)
Esempio n. 5
0
 def test_large_chunk(self):
     """
     This test ensures if the ``chunksize`` adapts to an appropriate
     size because the original ``chunksize`` is too small.
     """
     chunksize = 7 * (1024 ** 2)
     size = 8 * (1024 ** 3)
     self.assertEqual(find_chunksize(size, chunksize), chunksize * 2)
Esempio n. 6
0
 def test_small_chunk(self):
     """
     This test ensures if the ``chunksize`` is appropriate to begin with,
     it does not change.
     """
     chunksize = 7 * (1024 ** 2)
     size = 8 * (1024 ** 2)
     self.assertEqual(find_chunksize(size, chunksize), chunksize)
Esempio n. 7
0
 def test_small_chunk(self):
     """
     This test ensures that if the ``chunksize`` is below the minimum
     threshold, it is automatically raised to the minimum.
     """
     chunksize = MIN_UPLOAD_CHUNKSIZE - 1
     size = 3 * MIN_UPLOAD_CHUNKSIZE
     self.assertEqual(find_chunksize(size, chunksize), MIN_UPLOAD_CHUNKSIZE)
Esempio n. 8
0
 def test_small_chunk(self):
     """
     This test ensures if the ``chunksize`` is appropriate to begin with,
     it does not change.
     """
     chunksize = 7 * (1024**2)
     size = 8 * (1024**2)
     self.assertEqual(find_chunksize(size, chunksize), chunksize)
Esempio n. 9
0
 def test_super_chunk(self):
     """
     This tests to ensure that the ``chunksize can never be larger than
     the ``MAX_SINGLE_UPLOAD_SIZE``
     """
     chunksize = MAX_SINGLE_UPLOAD_SIZE + 1
     size = MAX_SINGLE_UPLOAD_SIZE * 2
     self.assertEqual(find_chunksize(size, chunksize), MAX_SINGLE_UPLOAD_SIZE)
Esempio n. 10
0
 def test_large_chunk(self):
     """
     This test ensures if the ``chunksize`` adapts to an appropriate
     size because the original ``chunksize`` is too small.
     """
     chunksize = 7 * (1024 ** 2)
     size = 8 * (1024 ** 3)
     self.assertEqual(find_chunksize(size, chunksize), chunksize * 2)
Esempio n. 11
0
 def test_super_chunk(self):
     """
     This tests to ensure that the ``chunksize can never be larger than
     the ``MAX_SINGLE_UPLOAD_SIZE``
     """
     chunksize = MAX_SINGLE_UPLOAD_SIZE + 1
     size = MAX_SINGLE_UPLOAD_SIZE * 2
     self.assertEqual(find_chunksize(size, chunksize),
                      MAX_SINGLE_UPLOAD_SIZE)
Esempio n. 12
0
 def test_large_chunk(self):
     """
     This test ensures if the ``chunksize`` adapts to an appropriate
     size because the original ``chunksize`` is too small.
     """
     chunksize = 7 * (1024**2)
     size = 5 * (1024**4)
     # If we try to upload a 5TB file, we'll need to use 896MB part
     # sizes.
     self.assertEqual(find_chunksize(size, chunksize), 896 * (1024**2))
Esempio n. 13
0
 def _enqueue_multipart_copy_tasks(self, filename, remove_remote_file=False):
     chunksize = find_chunksize(filename.size, self.chunksize)
     num_uploads = int(math.ceil(filename.size / float(chunksize)))
     upload_context = self._enqueue_upload_start_task(chunksize, num_uploads, filename)
     self._enqueue_upload_tasks(num_uploads, chunksize, upload_context, filename, tasks.CopyPartTask)
     self._enqueue_upload_end_task(filename, upload_context)
     if remove_remote_file:
         remove_task = tasks.RemoveRemoteObjectTask(filename=filename, context=upload_context)
         self.executor.submit(remove_task)
     return num_uploads
Esempio n. 14
0
 def test_large_chunk(self):
     """
     This test ensures if the ``chunksize`` adapts to an appropriate
     size because the original ``chunksize`` is too small.
     """
     chunksize = 7 * (1024 ** 2)
     size = 5 * (1024 ** 4)
     # If we try to upload a 5TB file, we'll need to use 896MB part
     # sizes.
     self.assertEqual(find_chunksize(size, chunksize), 896 * (1024 ** 2))
Esempio n. 15
0
 def _enqueue_multipart_upload_tasks(self, filename, remove_local_file=False):
     # First we need to create a CreateMultipartUpload task,
     # then create UploadTask objects for each of the parts.
     # And finally enqueue a CompleteMultipartUploadTask.
     chunksize = find_chunksize(filename.size, self.chunksize)
     num_uploads = int(math.ceil(filename.size / float(chunksize)))
     upload_context = self._enqueue_upload_start_task(chunksize, num_uploads, filename)
     self._enqueue_upload_tasks(num_uploads, chunksize, upload_context, filename, tasks.UploadPartTask)
     self._enqueue_upload_end_task(filename, upload_context)
     if remove_local_file:
         remove_task = tasks.RemoveFileTask(local_filename=filename.src, upload_context=upload_context)
         self.executor.submit(remove_task)
     return num_uploads
Esempio n. 16
0
 def _enqueue_multipart_copy_tasks(self, filename,
                                   remove_remote_file=False):
     chunksize = find_chunksize(filename.size, self.chunksize)
     num_uploads = int(math.ceil(filename.size / float(chunksize)))
     upload_context = self._enqueue_upload_start_task(
         chunksize, num_uploads, filename)
     self._enqueue_upload_tasks(
         num_uploads, chunksize, upload_context, filename, tasks.CopyPartTask)
     self._enqueue_upload_end_task(filename, upload_context)
     if remove_remote_file:
         remove_task = tasks.RemoveRemoteObjectTask(
             filename=filename, context=upload_context)
         self.executor.submit(remove_task)
     return num_uploads
Esempio n. 17
0
    def _upload(self, manager, bucket, key):
        """
        Upload stdin using to the specified location.

        :type manager: s3transfer.manager.TransferManager
        :param manager: The transfer manager to use for the upload.

        :type bucket: str
        :param bucket: The bucket to upload the stream to.

        :type key: str
        :param key: The name of the key to upload the stream to.

        :return: A CommandResult representing the upload status.
        """
        expected_size = self.params.get('expected_size', None)
        subscribers = None
        if expected_size is not None:
            # `expected_size` comes in as a string
            expected_size = int(expected_size)

            # set the size of the transfer if we know it ahead of time.
            subscribers = [ProvideSizeSubscriber(expected_size)]

            # TODO: remove when this happens in s3transfer
            # If we have the expected size, we can calculate an appropriate
            # chunksize based on max parts and chunksize limits
            chunksize = find_chunksize(expected_size,
                                       self.config.multipart_chunksize)
        else:
            # TODO: remove when this happens in s3transfer
            # Otherwise, we can still adjust for chunksize limits
            chunksize = adjust_chunksize_to_upload_limits(
                self.config.multipart_chunksize)
        self.config.multipart_chunksize = chunksize

        params = {}
        RequestParamsMapper.map_put_object_params(params, self.params)

        fileobj = NonSeekableStream(binary_stdin)
        with manager:
            future = manager.upload(fileobj=fileobj,
                                    bucket=bucket,
                                    key=key,
                                    extra_args=params,
                                    subscribers=subscribers)

            return self._process_transfer(future)
Esempio n. 18
0
 def _enqueue_multipart_upload_tasks(self,
                                     filename,
                                     remove_local_file=False):
     # First we need to create a CreateMultipartUpload task,
     # then create UploadTask objects for each of the parts.
     # And finally enqueue a CompleteMultipartUploadTask.
     chunksize = find_chunksize(filename.size, self.chunksize)
     num_uploads = int(math.ceil(filename.size / float(chunksize)))
     upload_context = self._enqueue_upload_start_task(
         chunksize, num_uploads, filename)
     self._enqueue_upload_tasks(num_uploads, chunksize, upload_context,
                                filename, tasks.UploadPartTask)
     self._enqueue_upload_end_task(filename, upload_context)
     if remove_local_file:
         remove_task = tasks.RemoveFileTask(local_filename=filename.src,
                                            upload_context=upload_context)
         self.executor.submit(remove_task)
     return num_uploads
Esempio n. 19
0
    def _enqueue_range_download_tasks(self, filename, remove_remote_file=False):

        # Create the context for the multipart download.
        chunksize = find_chunksize(filename.size, self.chunksize)
        num_downloads = int(filename.size / chunksize)
        context = tasks.MultipartDownloadContext(num_downloads)

        # No file is needed for downloading a stream.  So just announce
        # that it has been made since it is required for the context to
        # begin downloading.
        context.announce_file_created()

        # Submit download part tasks to the executor.
        self._do_enqueue_range_download_tasks(
            filename=filename, chunksize=chunksize,
            num_downloads=num_downloads, context=context,
            remove_remote_file=remove_remote_file
        )
        return num_downloads
Esempio n. 20
0
    def _enqueue_range_download_tasks(self, filename, remove_remote_file=False):

        # Create the context for the multipart download.
        chunksize = find_chunksize(filename.size, self.chunksize)
        num_downloads = int(filename.size / chunksize)
        context = tasks.MultipartDownloadContext(num_downloads)

        # No file is needed for downloading a stream.  So just announce
        # that it has been made since it is required for the context to
        # begin downloading.
        context.announce_file_created()

        # Submit download part tasks to the executor.
        self._do_enqueue_range_download_tasks(
            filename=filename, chunksize=chunksize,
            num_downloads=num_downloads, context=context,
            remove_remote_file=remove_remote_file
        )
        return num_downloads
Esempio n. 21
0
    def _enqueue_multipart_upload_tasks(self,
                                        filename,
                                        remove_local_file=False):
        # First we need to create a CreateMultipartUpload task,
        # then create UploadTask objects for each of the parts.
        # And finally enqueue a CompleteMultipartUploadTask.
        chunksize = find_chunksize(filename.size, self.chunksize)
        num_uploads = int(math.ceil(filename.size / float(chunksize)))
        upload_context = tasks.MultipartUploadContext(
            expected_parts=num_uploads)
        create_multipart_upload_task = tasks.CreateMultipartUploadTask(
            session=self.session,
            filename=filename,
            parameters=self.params,
            result_queue=self.result_queue,
            upload_context=upload_context)
        self.executer.submit(create_multipart_upload_task)

        for i in range(1, (num_uploads + 1)):
            task = tasks.UploadPartTask(part_number=i,
                                        chunk_size=chunksize,
                                        result_queue=self.result_queue,
                                        upload_context=upload_context,
                                        filename=filename)
            self.executer.submit(task)

        complete_multipart_upload_task = tasks.CompleteMultipartUploadTask(
            session=self.session,
            filename=filename,
            parameters=self.params,
            result_queue=self.result_queue,
            upload_context=upload_context)
        self.executer.submit(complete_multipart_upload_task)
        self._multipart_uploads.append((upload_context, filename))
        if remove_local_file:
            remove_task = tasks.RemoveFileTask(local_filename=filename.src,
                                               upload_context=upload_context)
            self.executer.submit(remove_task)
        return num_uploads
Esempio n. 22
0
    def _enqueue_multipart_upload_tasks(self, filename, payload=None):
        # First we need to create a CreateMultipartUpload task,
        # then create UploadTask objects for each of the parts.
        # And finally enqueue a CompleteMultipartUploadTask.
        if self.params['expected_size']:
            # If we have the expected size, we can calculate an appropriate
            # chunksize based on max parts and chunksize limits
            chunksize = find_chunksize(int(self.params['expected_size']),
                                       self.chunksize)
        else:
            # Otherwise, we can still adjust for chunksize limits
            chunksize = adjust_chunksize_to_upload_limits(self.chunksize)

        num_uploads = '...'

        # Submit a task to begin the multipart upload.
        upload_context = self._enqueue_upload_start_task(
            chunksize, num_uploads, filename)

        # Now submit a task to upload the initial chunk of data pulled
        # from the stream that was used to determine if a multipart upload
        # was needed.
        self._enqueue_upload_single_part_task(part_number=1,
                                              chunk_size=chunksize,
                                              upload_context=upload_context,
                                              filename=filename,
                                              task_class=tasks.UploadPartTask,
                                              payload=payload)

        # Submit tasks to upload the rest of the chunks of the data coming in
        # from standard input.
        num_uploads = self._enqueue_upload_tasks(num_uploads, chunksize,
                                                 upload_context, filename,
                                                 tasks.UploadPartTask)

        # Submit a task to notify the multipart upload is complete.
        self._enqueue_upload_end_task(filename, upload_context)

        return num_uploads
Esempio n. 23
0
    def _enqueue_multipart_upload_tasks(self, filename, payload=None):
        # First we need to create a CreateMultipartUpload task,
        # then create UploadTask objects for each of the parts.
        # And finally enqueue a CompleteMultipartUploadTask.
        if self.params['expected_size']:
            # If we have the expected size, we can calculate an appropriate
            # chunksize based on max parts and chunksize limits
            chunksize = find_chunksize(int(self.params['expected_size']),
                                       self.chunksize)
        else:
            # Otherwise, we can still adjust for chunksize limits
            chunksize = adjust_chunksize_to_upload_limits(self.chunksize)

        num_uploads = '...'

        # Submit a task to begin the multipart upload.
        upload_context = self._enqueue_upload_start_task(
            chunksize, num_uploads, filename)

        # Now submit a task to upload the initial chunk of data pulled
        # from the stream that was used to determine if a multipart upload
        # was needed.
        self._enqueue_upload_single_part_task(
            part_number=1, chunk_size=chunksize,
            upload_context=upload_context, filename=filename,
            task_class=tasks.UploadPartTask, payload=payload
        )

        # Submit tasks to upload the rest of the chunks of the data coming in
        # from standard input.
        num_uploads = self._enqueue_upload_tasks(
            num_uploads, chunksize, upload_context,
            filename, tasks.UploadPartTask
        )

        # Submit a task to notify the multipart upload is complete.
        self._enqueue_upload_end_task(filename, upload_context)

        return num_uploads
Esempio n. 24
0
    def call(self, files):
        """
        This function pulls a ``FileInfo`` or ``TaskInfo`` object from
        a list ``files``.  Each object is then deemed if it will be a
        multipart operation and add the necessary attributes if so.  Each
        object is then wrapped with a ``BasicTask`` object which is
        essentially a thread of execution for a thread to follow.  These
        tasks are then submitted to the main executer.
        """
        self.done.clear()
        self.interrupt.clear()
        try:
            self.executer.start()
            tot_files = 0
            tot_parts = 0
            for filename in files:
                num_uploads = 1
                is_larger = False
                chunksize = self.chunksize
                too_large = False
                if hasattr(filename, 'size'):
                    is_larger = filename.size > self.multi_threshold
                    too_large = filename.size > MAX_UPLOAD_SIZE
                if is_larger:
                    if filename.operation == 'upload':
                        num_uploads = int(math.ceil(filename.size /
                                                    float(chunksize)))
                        chunksize = find_chunksize(filename.size, chunksize)
                        filename.set_multi(executer=self.executer,
                                           printQueue=self.printQueue,
                                           interrupt=self.interrupt,
                                           chunksize=chunksize)
                    elif filename.operation == 'download':
                        num_uploads = int(filename.size / chunksize)
                        filename.set_multi(executer=self.executer,
                                           printQueue=self.printQueue,
                                           interrupt=self.interrupt,
                                           chunksize=chunksize)
                task = BasicTask(session=self.session, filename=filename,
                                 executer=self.executer, done=self.done,
                                 parameters=self.params,
                                 multi_threshold=self.multi_threshold,
                                 chunksize=chunksize,
                                 printQueue=self.printQueue,
                                 interrupt=self.interrupt)
                if too_large and filename.operation == 'upload':
                    warning = "Warning %s exceeds 5 TB and upload is " \
                              "being skipped" % os.path.relpath(filename.src)
                    self.printQueue.put({'result': warning})
                else:
                    self.executer.submit(task)
                tot_files += 1
                tot_parts += num_uploads
            self.executer.print_thread.totalFiles = tot_files
            self.executer.print_thread.totalParts = tot_parts
            self.executer.wait()
            self.printQueue.join()

        except Exception as e:
            LOGGER.debug('%s' % str(e))
        except KeyboardInterrupt:
            self.interrupt.set()
            self.printQueue.put({'result': "Cleaning up. Please wait..."})

        self.done.set()
        self.executer.join()
Esempio n. 25
0
 def test_file_too_large(self):
     size = MAX_UPLOAD_SIZE + 1
     chunksize = 1
     with self.assertRaises(ValueError):
         find_chunksize(size, chunksize)
Esempio n. 26
0
 def test_file_too_large(self):
     size = MAX_UPLOAD_SIZE + 1
     chunksize = 1
     with self.assertRaises(ValueError):
         find_chunksize(size, chunksize)