Example #1
0
class TransferManager(object):
    ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS

    ALLOWED_UPLOAD_ARGS = [
        'ACL',
        'CacheControl',
        'ContentDisposition',
        'ContentEncoding',
        'ContentLanguage',
        'ContentType',
        'Expires',
        'GrantFullControl',
        'GrantRead',
        'GrantReadACP',
        'GrantWriteACP',
        'Metadata',
        'RequestPayer',
        'ServerSideEncryption',
        'StorageClass',
        'SSECustomerAlgorithm',
        'SSECustomerKey',
        'SSECustomerKeyMD5',
        'SSEKMSKeyId',
        'WebsiteRedirectLocation',
        'RetentionExpirationDate',
        'RetentionLegalHoldId',
        'RetentionPeriod',
    ]

    ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [
        'CopySourceIfMatch', 'CopySourceIfModifiedSince',
        'CopySourceIfNoneMatch', 'CopySourceIfUnmodifiedSince',
        'CopySourceSSECustomerAlgorithm', 'CopySourceSSECustomerKey',
        'CopySourceSSECustomerKeyMD5', 'MetadataDirective'
    ]

    ALLOWED_DELETE_ARGS = [
        'MFA',
        'VersionId',
        'RequestPayer',
    ]

    def __init__(self, client, config=None, osutil=None, executor_cls=None):
        """A transfer manager interface for Amazon S3

        :param client: Client to be used by the manager
        :param config: TransferConfig to associate specific configurations
        :param osutil: OSUtils object to use for os-related behavior when
            using with transfer manager.

        :type executor_cls: ibm_s3transfer.futures.BaseExecutor
        :param executor_cls: The class of executor to use with the transfer
            manager. By default, concurrent.futures.ThreadPoolExecutor is used.
        """
        self._client = client
        self._config = config
        if config is None:
            self._config = TransferConfig()
        self._osutil = osutil
        if osutil is None:
            self._osutil = OSUtils()
        self._coordinator_controller = TransferCoordinatorController()
        # A counter to create unique id's for each transfer submitted.
        self._id_counter = 0

        # The executor responsible for making S3 API transfer requests
        self._request_executor = BoundedExecutor(
            max_size=self._config.max_request_queue_size,
            max_num_threads=self._config.max_request_concurrency,
            tag_semaphores={
                IN_MEMORY_UPLOAD_TAG:
                TaskSemaphore(self._config.max_in_memory_upload_chunks),
                IN_MEMORY_DOWNLOAD_TAG:
                SlidingWindowSemaphore(
                    self._config.max_in_memory_download_chunks)
            },
            executor_cls=executor_cls)

        # The executor responsible for submitting the necessary tasks to
        # perform the desired transfer
        self._submission_executor = BoundedExecutor(
            max_size=self._config.max_submission_queue_size,
            max_num_threads=self._config.max_submission_concurrency,
            executor_cls=executor_cls)

        # There is one thread available for writing to disk. It will handle
        # downloads for all files.
        self._io_executor = BoundedExecutor(
            max_size=self._config.max_io_queue_size,
            max_num_threads=1,
            executor_cls=executor_cls)

        # The component responsible for limiting bandwidth usage if it
        # is configured.
        self._bandwidth_limiter = None
        if self._config.max_bandwidth is not None:
            logger.debug('Setting max_bandwidth to %s',
                         self._config.max_bandwidth)
            leaky_bucket = LeakyBucket(self._config.max_bandwidth)
            self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)

        self._register_handlers()

    def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
        """Uploads a file to S3

        :type fileobj: str or seekable file-like object
        :param fileobj: The name of a file to upload or a seekable file-like
            object to upload. It is recommended to use a filename because
            file-like objects may result in higher memory usage.

        :type bucket: str
        :param bucket: The name of the bucket to upload to

        :type key: str
        :param key: The name of the key to upload to

        :type extra_args: dict
        :param extra_args: Extra arguments that may be passed to the
            client operation

        :type subscribers: list(ibm_s3transfer.subscribers.BaseSubscriber)
        :param subscribers: The list of subscribers to be invoked in the
            order provided based on the event emit during the process of
            the transfer request.

        :rtype: ibm_s3transfer.futures.TransferFuture
        :returns: Transfer future representing the upload
        """
        if extra_args is None:
            extra_args = {}
        if subscribers is None:
            subscribers = []
        self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
        call_args = CallArgs(fileobj=fileobj,
                             bucket=bucket,
                             key=key,
                             extra_args=extra_args,
                             subscribers=subscribers)
        extra_main_kwargs = {}
        if self._bandwidth_limiter:
            extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
        return self._submit_transfer(call_args, UploadSubmissionTask,
                                     extra_main_kwargs)

    def download(self,
                 bucket,
                 key,
                 fileobj,
                 extra_args=None,
                 subscribers=None):
        """Downloads a file from S3

        :type bucket: str
        :param bucket: The name of the bucket to download from

        :type key: str
        :param key: The name of the key to download from

        :type fileobj: str or seekable file-like object
        :param fileobj: The name of a file to download or a seekable file-like
            object to download. It is recommended to use a filename because
            file-like objects may result in higher memory usage.

        :type extra_args: dict
        :param extra_args: Extra arguments that may be passed to the
            client operation

        :type subscribers: list(ibm_s3transfer.subscribers.BaseSubscriber)
        :param subscribers: The list of subscribers to be invoked in the
            order provided based on the event emit during the process of
            the transfer request.

        :rtype: ibm_s3transfer.futures.TransferFuture
        :returns: Transfer future representing the download
        """
        if extra_args is None:
            extra_args = {}
        if subscribers is None:
            subscribers = []
        self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
        call_args = CallArgs(bucket=bucket,
                             key=key,
                             fileobj=fileobj,
                             extra_args=extra_args,
                             subscribers=subscribers)
        extra_main_kwargs = {'io_executor': self._io_executor}
        if self._bandwidth_limiter:
            extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
        return self._submit_transfer(call_args, DownloadSubmissionTask,
                                     extra_main_kwargs)

    def copy(self,
             copy_source,
             bucket,
             key,
             extra_args=None,
             subscribers=None,
             source_client=None):
        """Copies a file in S3

        :type copy_source: dict
        :param copy_source: The name of the source bucket, key name of the
            source object, and optional version ID of the source object. The
            dictionary format is:
            ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note
            that the ``VersionId`` key is optional and may be omitted.

        :type bucket: str
        :param bucket: The name of the bucket to copy to

        :type key: str
        :param key: The name of the key to copy to

        :type extra_args: dict
        :param extra_args: Extra arguments that may be passed to the
            client operation

        :type subscribers: a list of subscribers
        :param subscribers: The list of subscribers to be invoked in the
            order provided based on the event emit during the process of
            the transfer request.

        :type source_client: ibm_botocore or ibm_boto3 Client
        :param source_client: The client to be used for operation that
            may happen at the source object. For example, this client is
            used for the head_object that determines the size of the copy.
            If no client is provided, the transfer manager's client is used
            as the client for the source object.

        :rtype: ibm_s3transfer.futures.TransferFuture
        :returns: Transfer future representing the copy
        """
        if extra_args is None:
            extra_args = {}
        if subscribers is None:
            subscribers = []
        if source_client is None:
            source_client = self._client
        self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS)
        call_args = CallArgs(copy_source=copy_source,
                             bucket=bucket,
                             key=key,
                             extra_args=extra_args,
                             subscribers=subscribers,
                             source_client=source_client)
        return self._submit_transfer(call_args, CopySubmissionTask)

    def delete(self, bucket, key, extra_args=None, subscribers=None):
        """Delete an S3 object.

        :type bucket: str
        :param bucket: The name of the bucket.

        :type key: str
        :param key: The name of the S3 object to delete.

        :type extra_args: dict
        :param extra_args: Extra arguments that may be passed to the
            DeleteObject call.

        :type subscribers: list
        :param subscribers: A list of subscribers to be invoked during the
            process of the transfer request.  Note that the ``on_progress``
            callback is not invoked during object deletion.

        :rtype: ibm_s3transfer.futures.TransferFuture
        :return: Transfer future representing the deletion.

        """
        if extra_args is None:
            extra_args = {}
        if subscribers is None:
            subscribers = []
        self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
        call_args = CallArgs(bucket=bucket,
                             key=key,
                             extra_args=extra_args,
                             subscribers=subscribers)
        return self._submit_transfer(call_args, DeleteSubmissionTask)

    def _validate_all_known_args(self, actual, allowed):
        for kwarg in actual:
            if kwarg not in allowed:
                raise ValueError("Invalid extra_args key '%s', "
                                 "must be one of: %s" %
                                 (kwarg, ', '.join(allowed)))

    def _submit_transfer(self,
                         call_args,
                         submission_task_cls,
                         extra_main_kwargs=None):
        if not extra_main_kwargs:
            extra_main_kwargs = {}

        # Create a TransferFuture to return back to the user
        transfer_future, components = self._get_future_with_components(
            call_args)

        # Add any provided done callbacks to the created transfer future
        # to be invoked on the transfer future being complete.
        for callback in get_callbacks(transfer_future, 'done'):
            components['coordinator'].add_done_callback(callback)

        # Get the main kwargs needed to instantiate the submission task
        main_kwargs = self._get_submission_task_main_kwargs(
            transfer_future, extra_main_kwargs)

        # Submit a SubmissionTask that will submit all of the necessary
        # tasks needed to complete the S3 transfer.
        self._submission_executor.submit(
            submission_task_cls(transfer_coordinator=components['coordinator'],
                                main_kwargs=main_kwargs))

        # Increment the unique id counter for future transfer requests
        self._id_counter += 1

        return transfer_future

    def _get_future_with_components(self, call_args):
        transfer_id = self._id_counter
        # Creates a new transfer future along with its components
        transfer_coordinator = TransferCoordinator(transfer_id=transfer_id)
        # Track the transfer coordinator for transfers to manage.
        self._coordinator_controller.add_transfer_coordinator(
            transfer_coordinator)
        # Also make sure that the transfer coordinator is removed once
        # the transfer completes so it does not stick around in memory.
        transfer_coordinator.add_done_callback(
            self._coordinator_controller.remove_transfer_coordinator,
            transfer_coordinator)
        components = {
            'meta': TransferMeta(call_args, transfer_id=transfer_id),
            'coordinator': transfer_coordinator
        }
        transfer_future = TransferFuture(**components)
        return transfer_future, components

    def _get_submission_task_main_kwargs(self, transfer_future,
                                         extra_main_kwargs):
        main_kwargs = {
            'client': self._client,
            'config': self._config,
            'osutil': self._osutil,
            'request_executor': self._request_executor,
            'transfer_future': transfer_future
        }
        main_kwargs.update(extra_main_kwargs)
        return main_kwargs

    def _register_handlers(self):
        # Register handlers to enable/disable callbacks on uploads.
        event_name = 'request-created.s3'
        self._client.meta.events.register_first(
            event_name,
            signal_not_transferring,
            unique_id='s3upload-not-transferring')
        self._client.meta.events.register_last(
            event_name, signal_transferring, unique_id='s3upload-transferring')

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, *args):
        cancel = False
        cancel_msg = ''
        cancel_exc_type = FatalError
        # If a exception was raised in the context handler, signal to cancel
        # all of the inprogress futures in the shutdown.
        if exc_type:
            cancel = True
            cancel_msg = six.text_type(exc_value)
            if not cancel_msg:
                cancel_msg = repr(exc_value)
            # If it was a KeyboardInterrupt, the cancellation was initiated
            # by the user.
            if isinstance(exc_value, KeyboardInterrupt):
                cancel_exc_type = CancelledError
        self._shutdown(cancel, cancel_msg, cancel_exc_type)

    def shutdown(self, cancel=False, cancel_msg=''):
        """Shutdown the TransferManager

        It will wait till all transfers complete before it completely shuts
        down.

        :type cancel: boolean
        :param cancel: If True, calls TransferFuture.cancel() for
            all in-progress in transfers. This is useful if you want the
            shutdown to happen quicker.

        :type cancel_msg: str
        :param cancel_msg: The message to specify if canceling all in-progress
            transfers.
        """
        self._shutdown(cancel, cancel, cancel_msg)

    def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError):
        if cancel:
            # Cancel all in-flight transfers if requested, before waiting
            # for them to complete.
            self._coordinator_controller.cancel(cancel_msg, exc_type)
        try:
            # Wait until there are no more in-progress transfers. This is
            # wrapped in a try statement because this can be interrupted
            # with a KeyboardInterrupt that needs to be caught.
            self._coordinator_controller.wait()
        except KeyboardInterrupt:
            # If not errors were raised in the try block, the cancel should
            # have no coordinators it needs to run cancel on. If there was
            # an error raised in the try statement we want to cancel all of
            # the inflight transfers before shutting down to speed that
            # process up.
            self._coordinator_controller.cancel('KeyboardInterrupt()')
            raise
        finally:
            # Shutdown all of the executors.
            self._submission_executor.shutdown()
            self._request_executor.shutdown()
            self._io_executor.shutdown()
Example #2
0
 def test_replace_underlying_executor(self):
     mocked_executor_cls = mock.Mock(BaseExecutor)
     executor = BoundedExecutor(10, 1, {}, mocked_executor_cls)
     executor.submit(self.get_task(ReturnFooTask))
     self.assertTrue(mocked_executor_cls.return_value.submit.called)