Esempio n. 1
0
 def test_create_transfer_manager_with_no_threads(self):
     client = object()
     config = TransferConfig()
     config.use_threads = False
     with mock.patch('ibm_boto3.s3.transfer.TransferManager') as manager:
         create_transfer_manager(client, config)
         assert manager.call_args == mock.call(client, config, None,
                                               NonThreadedExecutor)
Esempio n. 2
0
    def test_alias_max_io_queue(self):
        ref_value = 10
        config = TransferConfig(max_io_queue=ref_value)
        self.assert_value_of_actual_and_alias(config, 'max_io_queue_size',
                                              'max_io_queue', ref_value)

        # Set a new value using the alias
        new_value = 15
        config.max_io_queue = new_value
        # Make sure it sets the value for both the alias and the actual
        # value that will be used in the TransferManager
        self.assert_value_of_actual_and_alias(config, 'max_io_queue_size',
                                              'max_io_queue', new_value)
Esempio n. 3
0
    def test_download_progress(self):
        self.contents = b'A' * 55
        self.stub_multipart_download(contents=self.contents,
                                     part_size=5,
                                     num_parts=11)
        transfer_config = TransferConfig(multipart_chunksize=5,
                                         multipart_threshold=1,
                                         max_concurrency=1)

        def progress_callback(amount):
            self.progress += amount
            self.progress_times_called += 1

        with self.stubber:
            self.s3.meta.client.download_fileobj(Bucket=self.bucket,
                                                 Key=self.key,
                                                 Fileobj=self.fileobj,
                                                 Config=transfer_config,
                                                 Callback=progress_callback)

        # Assert that the progress callback was called the correct number of
        # times with the correct amounts.
        self.assertEqual(self.progress_times_called, 11)
        self.assertEqual(self.progress, 55)
        self.stubber.assert_no_pending_responses()
Esempio n. 4
0
 def test_create_transfer_manager(self):
     client = object()
     config = TransferConfig()
     osutil = OSUtils()
     with mock.patch('ibm_boto3.s3.transfer.TransferManager') as manager:
         create_transfer_manager(client, config, osutil)
         assert manager.call_args == mock.call(client, config, osutil, None)
Esempio n. 5
0
    def test_copy_progress(self):
        chunksize = 8 * (1024**2)
        self.stub_multipart_copy(chunksize, 3)
        transfer_config = TransferConfig(
            multipart_chunksize=chunksize,
            multipart_threshold=1,
            max_concurrency=1,
        )

        def progress_callback(amount):
            self.progress += amount
            self.progress_times_called += 1

        with self.stubber:
            self.s3.meta.client.copy(
                Bucket=self.bucket,
                Key=self.key,
                CopySource=self.copy_source,
                Config=transfer_config,
                Callback=progress_callback,
            )

        # Assert that the progress callback was called the correct number of
        # times with the correct amounts.
        assert self.progress_times_called == 3
        assert self.progress == chunksize * 3
Esempio n. 6
0
def download_fileobj(self, Bucket, Key, Fileobj, ExtraArgs=None,
                     Callback=None, Config=None):
    """Download an object from S3 to a file-like object.

    The file-like object must be in binary mode.

    This is a managed transfer which will perform a multipart download in
    multiple threads if necessary.

    Usage::

        import ibm_boto3
        s3 = ibm_boto3.client('s3')

        with open('filename', 'wb') as data:
            s3.download_fileobj('mybucket', 'mykey', data)

    :type Fileobj: a file-like object
    :param Fileobj: A file-like object to download into. At a minimum, it must
        implement the `write` method and must accept bytes.

    :type Bucket: str
    :param Bucket: The name of the bucket to download from.

    :type Key: str
    :param Key: The name of the key to download from.

    :type ExtraArgs: dict
    :param ExtraArgs: Extra arguments that may be passed to the
        client operation.

    :type Callback: function
    :param Callback: A method which takes a number of bytes transferred to
        be periodically called during the download.

    :type Config: ibm_boto3.s3.transfer.TransferConfig
    :param Config: The transfer configuration to be used when performing the
        download.
    """
    if not hasattr(Fileobj, 'write'):
        raise ValueError('Fileobj must implement write')

    subscribers = None
    if Callback is not None:
        subscribers = [ProgressCallbackInvoker(Callback)]

    config = Config
    if config is None:
        config = TransferConfig()

    with create_transfer_manager(self, config) as manager:
        future = manager.download(
            bucket=Bucket, key=Key, fileobj=Fileobj,
            extra_args=ExtraArgs, subscribers=subscribers)
        return future.result()
Esempio n. 7
0
    def __init__(self):
        try:
            with open(os.path.join(sys.path[0], "config.json")) as config_file:
                config_json = json.loads(config_file.read())
        except IOError:
            raise ConversionOperationException("INIT", "Config file not found")
        except json.JSONDecodeError:
            raise ConversionOperationException("INIT", "Malformed config file")

        for required in [
                "IMAGE_FILE_NAME", "DESTINATION_IMAGE_FORMAT",
                "IBM_API_KEY_ID", "IAM_SERVICE_ID", "S3_ENDPOINT",
                "BUCKET_NAME"
        ]:
            if required not in config_json:
                raise ConversionOperationException(
                    "INIT", "Required key {} not found in config file".format(
                        required))

        self.bucket_name = config_json["BUCKET_NAME"]
        self.image_file_name = config_json["IMAGE_FILE_NAME"]

        self.source_image_format = None

        if config_json[
                "DESTINATION_IMAGE_FORMAT"] not in self.SUPPORTED_DEST_FORMATS:
            raise ConversionOperationException(
                "INIT", "Conversion to {} format not supported".format(
                    config_json["DESTINATION_IMAGE_FORMAT"]))
        self.destination_image_format = config_json["DESTINATION_IMAGE_FORMAT"]

        self.converted_image_name = self.clear_extension(
            self.image_file_name) + "." + self.destination_image_format

        self.image_path_download = config_json.get("DOWNLOAD_PATH") or './'
        if self.image_path_download[-1] != "/":
            self.image_path_download += "/"

        self.image_path_convert = config_json.get("CONVERT_PATH") or './'
        if self.image_path_convert[-1] != "/":
            self.image_path_convert += "/"

        client = ibm_boto3.client(
            service_name='s3',
            ibm_api_key_id=config_json["IBM_API_KEY_ID"],
            ibm_service_instance_id=config_json["IAM_SERVICE_ID"],
            ibm_auth_endpoint="https://iam.cloud.ibm.com/identity/token",
            config=Config(signature_version="oauth"),
            endpoint_url=config_json["S3_ENDPOINT"])

        self.transfer_manager = TransferManager(client, TransferConfig())
    def test_multipart_download(self):
        self.contents = b'A' * 55
        self.stub_multipart_download(
            contents=self.contents, part_size=5, num_parts=11)
        transfer_config = TransferConfig(
            multipart_chunksize=5, multipart_threshold=1,
            max_concurrency=1)

        with self.stubber:
            self.s3.meta.client.download_fileobj(
                Bucket=self.bucket, Key=self.key, Fileobj=self.fileobj,
                Config=transfer_config)

        self.assertEqual(self.fileobj.getvalue(), self.contents)
        self.stubber.assert_no_pending_responses()
    def test_multipart_upload(self):
        chunksize = 8 * (1024 ** 2)
        contents = six.BytesIO(b'0' * (chunksize * 3))
        self.stub_multipart_upload(num_parts=3)
        transfer_config = TransferConfig(
            multipart_chunksize=chunksize, multipart_threshold=1,
            max_concurrency=1)

        with self.stubber:
            # The stubber will assert that all the right parameters are called.
            self.s3.meta.client.upload_fileobj(
                Fileobj=contents, Bucket=self.bucket, Key=self.key,
                Config=transfer_config)

        self.stubber.assert_no_pending_responses()
Esempio n. 10
0
def _upload_if_needed(src_path,
                      storage,
                      sm_storage,
                      storage_type,
                      s3_client=None,
                      use_db_mutex=True):
    """
    Uploads the object from `src_path` if it doesn't already exist in its translated COS path.
    Returns a CloudObject for the COS object
    """
    bucket, key = _choose_cos_location(src_path, sm_storage, storage_type)

    with ExitStack() as stack:
        if use_db_mutex:
            # Lock during upload to prevent parallel jobs upload the same file simultaneously
            stack.enter_context(DBMutex().lock(bucket + key, timeout=1200))

        try:
            storage.head_object(bucket, key)
            logger.debug(f'{src_path} already uploaded')
            return CloudObject(storage.backend, bucket, key)
        except StorageNoSuchKeyError:
            logger.info(f'Uploading {src_path}...')
            if src_path.startswith('s3a://'):
                assert s3_client, 'S3 client must be supplied to support s3a:// paths'
                src_bucket, src_key = split_s3_path(src_path)

                obj = s3_client.get_object(Bucket=src_bucket, Key=src_key)
                if hasattr(storage.get_client(), 'upload_fileobj'):
                    # Try streaming upload to IBM COS
                    transfer_config = TransferConfig(multipart_chunksize=20 *
                                                     MB,
                                                     max_concurrency=20,
                                                     io_chunksize=1 * MB)
                    storage.get_client().upload_fileobj(Fileobj=obj['Body'],
                                                        Bucket=bucket,
                                                        Key=key,
                                                        Config=transfer_config)
                    cobject = CloudObject(storage.backend, bucket, key)
                else:
                    # Fall back to buffering the entire object in memory for other backends
                    cobject = storage.put_cloudobject(obj['Body'].read(),
                                                      bucket, key)
            else:
                cobject = storage.put_cloudobject(open(src_path, 'rb'), bucket,
                                                  key)
            logger.info(f'Uploading {src_path}...Done')
            return cobject
Esempio n. 11
0
 def test_transferconfig_parameters(self):
     config = TransferConfig(
         multipart_threshold=8 * MB,
         max_concurrency=10,
         multipart_chunksize=8 * MB,
         num_download_attempts=5,
         max_io_queue=100,
         io_chunksize=256 * KB,
         use_threads=True,
         max_bandwidth=1024 * KB,
     )
     assert config.multipart_threshold == 8 * MB
     assert config.multipart_chunksize == 8 * MB
     assert config.max_request_concurrency == 10
     assert config.num_download_attempts == 5
     assert config.max_io_queue_size == 100
     assert config.io_chunksize == 256 * KB
     assert config.use_threads is True
     assert config.max_bandwidth == 1024 * KB
Esempio n. 12
0
 def test_can_create_with_extra_configurations(self):
     transfer = S3Transfer(client=mock.Mock(),
                           config=TransferConfig(),
                           osutil=OSUtils())
     assert isinstance(transfer, S3Transfer)
Esempio n. 13
0
def copy(self,
         CopySource,
         Bucket,
         Key,
         ExtraArgs=None,
         Callback=None,
         SourceClient=None,
         Config=None):
    """Copy an object from one S3 location to another.

    This is a managed transfer which will perform a multipart copy in
    multiple threads if necessary.

    Usage::

        import ibm_boto3
        s3 = ibm_boto3.resource('s3')
        copy_source = {
            'Bucket': 'mybucket',
            'Key': 'mykey'
        }
        s3.meta.client.copy(copy_source, 'otherbucket', 'otherkey')

    :type CopySource: dict

    :param CopySource: The name of the source bucket, key name of the
        source object. The dictionary format is:
        ``{'Bucket': 'bucket', 'Key': 'key'}``. 

    :type Bucket: str
    :param Bucket: The name of the bucket to copy to

    :type Key: str
    :param Key: The name of the key to copy to

    :type ExtraArgs: dict
    :param ExtraArgs: Extra arguments that may be passed to the
        client operation

    :type Callback: method
    :param Callback: A method which takes a number of bytes transferred to
        be periodically called during the copy.

    :type SourceClient: ibm_botocore or ibm_boto3 Client
    :param SourceClient: The client to be used for operation that
        may happen at the source object. For example, this client is
        used for the head_object that determines the size of the copy.
        If no client is provided, the current client is used as the client
        for the source object.

    :type Config: ibm_boto3.s3.transfer.TransferConfig
    :param Config: The transfer configuration to be used when performing the
        copy.
    """
    subscribers = None
    if Callback is not None:
        subscribers = [ProgressCallbackInvoker(Callback)]

    config = Config
    if config is None:
        config = TransferConfig()

    with create_transfer_manager(self, config) as manager:
        future = manager.copy(copy_source=CopySource,
                              bucket=Bucket,
                              key=Key,
                              extra_args=ExtraArgs,
                              subscribers=subscribers,
                              source_client=SourceClient)
        return future.result()
Esempio n. 14
0
 def download_file(self, bucket, key, filename):
     tc = TransferConfig()
     self.cos_resource.Bucket(bucket).download_file(Key=key,
                                                    Filename=filename,
                                                    Config=tc)
Esempio n. 15
0
 def upload_file(self, filename, bucket, key):
     tc = TransferConfig()
     self.cos_client.upload_file(filename, bucket, key, Config=tc)
Esempio n. 16
0
def upload_fileobj(self,
                   Fileobj,
                   Bucket,
                   Key,
                   ExtraArgs=None,
                   Callback=None,
                   Config=None):
    """Upload a file-like object to S3.

    The file-like object must be in binary mode.

    This is a managed transfer which will perform a multipart upload in
    multiple threads if necessary.

    Usage::

        import ibm_boto3
        s3 = ibm_boto3.client('s3')

        with open('filename', 'rb') as data:
            s3.upload_fileobj(data, 'mybucket', 'mykey')

    :type Fileobj: a file-like object
    :param Fileobj: A file-like object to upload. At a minimum, it must
        implement the `read` method, and must return bytes.

    :type Bucket: str
    :param Bucket: The name of the bucket to upload to.

    :type Key: str
    :param Key: The name of the key to upload to.

    :type ExtraArgs: dict
    :param ExtraArgs: Extra arguments that may be passed to the
        client operation. For allowed upload arguments see
        ibm_boto3.s3.transfer.S3Transfer.ALLOWED_UPLOAD_ARGS.

    :type Callback: function
    :param Callback: A method which takes a number of bytes transferred to
        be periodically called during the upload.

    :type Config: ibm_boto3.s3.transfer.TransferConfig
    :param Config: The transfer configuration to be used when performing the
        upload.
    """
    if not hasattr(Fileobj, 'read'):
        raise ValueError('Fileobj must implement read')

    subscribers = None
    if Callback is not None:
        subscribers = [ProgressCallbackInvoker(Callback)]

    config = Config
    if config is None:
        config = TransferConfig()

    with create_transfer_manager(self, config) as manager:
        future = manager.upload(
            fileobj=Fileobj,
            bucket=Bucket,
            key=Key,
            extra_args=ExtraArgs,
            subscribers=subscribers,
        )
        return future.result()