Exemple #1
0
def test_upload_exceptions(caplog):
    """Exceptions raised from upload are expressed in error logging"""

    items = [
        BucketItem("tests/test_data/somefile3.txt"),
        BucketItem("tests/test_data/somefile2.txt"),
        BucketItem("tests/test_data/somefile.txt"),
    ]

    client = MockedClient()
    # Uploading fails twice before succeeding
    client._session.resource().Bucket().upload_file.side_effect = [
        S3UploadFailedError("Error uploading somefile3.txt"),
        S3UploadFailedError("Error uploading somefile2.txt"),
        mock.DEFAULT,
    ]

    with caplog.at_level(logging.DEBUG):
        client.upload(items, "test_bucket")

    for msg in [
            "One or more exceptions occurred during upload",
            "Error uploading somefile3.txt",
            "Error uploading somefile2.txt",
    ]:
        assert msg in caplog.text
def test_upload_file_with_s3_upload_failed_error():
    """Tests Upload file with S3UploadFailedError, which could indicate AWS token expires."""
    upload_file = MagicMock(
        side_effect=S3UploadFailedError(
            "An error occurred (ExpiredToken) when calling the "
            "CreateMultipartUpload operation: The provided token has expired."))
    client = Mock()
    client.Object.return_value = MagicMock(
        metadata=defaultdict(str), upload_file=upload_file)
    initial_parallel = 100
    upload_meta = {
        'no_sleeping_time': True,
        'parallel': initial_parallel,
        'put_callback': None,
        'put_callback_output_stream': None,
        'existing_files': [],
        SHA256_DIGEST: '123456789abcdef',
        'stage_info': {
            'location': 'sfc-teststage/rwyitestacco/users/1234/',
            'locationType': 'S3',
        },
        'client': client,
        'dst_file_name': 'data1.txt.gz',
        'src_file_name': path.join(THIS_DIR, '../data', 'put_get_1.txt'),
        'overwrite': True,
    }
    upload_meta['real_src_file_name'] = upload_meta['src_file_name']
    upload_meta[
        'upload_size'] = os.stat(upload_meta['src_file_name']).st_size

    akey = SnowflakeRemoteStorageUtil.upload_one_file(upload_meta)
    assert akey is None
    assert upload_meta['result_status'] == ResultStatus.RENEW_TOKEN
Exemple #3
0
    def upload_file(self,
                    filename,
                    bucket,
                    key,
                    callback=None,
                    extra_args=None):
        """Upload a file to an S3 object.

        Variants have also been injected into S3 client, Bucket and Object.
        You don't have to use S3Transfer.upload_file() directly.

        .. seealso::
            :py:meth:`S3.Client.upload_file`
            :py:meth:`S3.Client.upload_fileobj`
        """
        if not isinstance(filename, six.string_types):
            raise ValueError('Filename must be a string')

        subscribers = self._get_subscribers(callback)
        future = self._manager.upload(filename, bucket, key, extra_args,
                                      subscribers)
        try:
            future.result()
        # If a client error was raised, add the backwards compatibility layer
        # that raises a S3UploadFailedError. These specific errors were only
        # ever thrown for upload_parts but now can be thrown for any related
        # client error.
        except ClientError as e:
            raise S3UploadFailedError("Failed to upload %s to %s: %s" %
                                      (filename, '/'.join([bucket, key]), e))
Exemple #4
0
def test_upload_failed_error(caplog):
    """Tests whether token expiry error is handled as expected when uploading."""
    caplog.set_level(logging.DEBUG, "snowflake.connector")
    mock_resource, mock_object = MagicMock(), MagicMock()
    mock_resource.Object.return_value = mock_object
    mock_object.upload_file.side_effect = S3UploadFailedError("ExpiredToken")
    client_meta = {
        "cloud_client": mock_resource,
        "stage_info": {
            "location": "loc"
        },
    }
    meta = {
        "name": "f",
        "src_file_name": "f",
        "stage_location_type": "S3",
        "client_meta": SFResourceMeta(**client_meta),
        "sha256_digest": "asd",
        "dst_file_name": "f",
        "put_callback": None,
    }
    meta = SnowflakeFileMeta(**meta)
    with mock.patch(
            "snowflake.connector.s3_util.SnowflakeS3Util.extract_bucket_name_and_path"
    ):
        assert SnowflakeS3Util.upload_file("f", meta, {}, 4, 67108864) is None
    assert (
        "snowflake.connector.s3_util",
        logging.DEBUG,
        "Failed to upload a file: f, err: ExpiredToken. Renewing AWS Token and Retrying",
    ) in caplog.record_tuples
    assert meta.result_status == ResultStatus.RENEW_TOKEN
Exemple #5
0
    def upload_file(self, path, Config=None):
        if self.bucket_name not in self.meta.client.mock_s3_fs:
            # upload_file() is a higher-order operation, has fancy errors
            raise S3UploadFailedError(
                'Failed to upload %s to %s/%s: %s' %
                (path, self.bucket_name, self.key,
                 str(_no_such_bucket_error('PutObject'))))

        mock_keys = self._mock_bucket_keys('PutObject')
        with open(path, 'rb') as f:
            mock_keys[self.key] = (f.read(), _boto3_now())
def test_upload_failed_error(caplog):
    """Tests whether token expiry error is handled as expected when uploading."""
    mock_resource, mock_object = MagicMock(), MagicMock()
    mock_resource.Object.return_value = mock_object
    mock_object.upload_file.side_effect = S3UploadFailedError('ExpiredToken')
    meta = {'client': mock_resource,
            'sha256_digest': 'asd',
            'stage_info': {'location': 'loc'},
            'dst_file_name': 'f',
            'put_callback': None}
    with mock.patch('snowflake.connector.s3_util.SnowflakeS3Util.extract_bucket_name_and_path'):
        assert SnowflakeS3Util.upload_file('f', meta, {}, 4) is None
    assert ('snowflake.connector.s3_util',
            logging.DEBUG,
            'Failed to upload a file: f, err: ExpiredToken. Renewing AWS Token and Retrying') in caplog.record_tuples
    assert meta['result_status'] == ResultStatus.RENEW_TOKEN
 def upload_file(self, filename, bucket, key, callback, extra_args):
     response = self._client.create_multipart_upload(Bucket=bucket,
                                                     Key=key, **extra_args)
     upload_id = response['UploadId']
     try:
         parts = self._upload_parts(upload_id, filename, bucket, key,
                                    callback, extra_args)
     except Exception as e:
         logger.debug("Exception raised while uploading parts, "
                      "aborting multipart upload.", exc_info=True)
         self._client.abort_multipart_upload(
             Bucket=bucket, Key=key, UploadId=upload_id)
         raise S3UploadFailedError(
             "Failed to upload %s to %s: %s" % (
                 filename, '/'.join([bucket, key]), e))
     self._client.complete_multipart_upload(
         Bucket=bucket, Key=key, UploadId=upload_id,
         MultipartUpload={'Parts': parts})
    def upload_backup(self):

        if 's3_bucket' not in self.bkp_conf:
            self.info('no s3 bucket specified, ignore backup to s3')
            return

        self.info_r('backup to s3://{s3_bucket}/{s3_key} ...')

        bc = boto_client(self.bkp_conf['s3_host'],
                         self.bkp_conf['s3_access_key'],
                         self.bkp_conf['s3_secret_key'])

        # boto adds Content-MD5 automatically
        extra_args = {'Metadata': self.bkp_conf['backup_tgz_des3_meta']}

        try:
            boto_put(bc, self.render('{backup_tgz_des3}'),
                     self.render('{s3_bucket}'), self.render('{s3_key}'),
                     extra_args)

        except S3UploadFailedError as e:

            self.info(repr(e) + 'while upload {backup_tgz_des3} to s2 cloud')

            try:
                resp = boto_head(bc, self.render('{s3_bucket}'),
                                 self.render('{s3_key}'))
            except ClientError as ee:
                self.error(
                    repr(ee) +
                    'backup file: {backup_tgz_des3} not found in s2 cloud')
                raise

            if resp['ResponseMetadata']['HTTPStatusCode'] == 200:
                self.info('backup file: {backup_tgz_des3} already in s2 cloud')
            else:
                self.error(
                    repr(e) + 'get backup file: {backup_tgz_des3} error')
                raise S3UploadFailedError(
                    repr(e) + 'while upload backup file failed')
Exemple #9
0
    def upload_file(self, path, Config=None):
        if self.bucket_name not in self.meta.client.mock_s3_fs:
            # upload_file() is a higher-order operation, has fancy errors
            raise S3UploadFailedError(
                'Failed to upload %s to %s/%s: %s' %
                (path, self.bucket_name, self.key,
                 str(_no_such_bucket_error('PutObject'))))

        # verify that config doesn't have empty part size (see #2033)
        #
        # config is a boto3.s3.transfer.TransferConfig (we don't mock it),
        # which is actually part of s3transfer. Very old versions of s3transfer
        # (e.g. 0.10.0) disallow initializing TransferConfig with part sizes
        # that are zero or None
        if Config and not (Config.multipart_chunksize
                           and Config.multipart_threshold):
            raise TypeError('part size may not be 0 or None')

        mock_keys = self._mock_bucket_keys('PutObject')
        with open(path, 'rb') as f:
            mock_keys[self.key] = dict(body=f.read(),
                                       time_modified=_boto3_now())
Exemple #10
0
def test_upload_file_simulating_S3UploadFailedError(tmp_dir_fixture):  # NOQA
    """
    Mock scenario where upload fails with a S3UploadFailedError exception.
    """

    from dtool_s3.storagebroker import _upload_file  # NOQA
    import boto3
    from boto3.exceptions import S3UploadFailedError

    s3client = boto3.client("s3")
    s3client.upload_file = MagicMock(
        side_effect=S3UploadFailedError()
    )

    value = _upload_file(
        s3client,
        "dummy_fpath",
        "dummy_bucket",
        "dummy_dest_path",
        "dummy_extra_args",
    )

    assert value is False
Exemple #11
0
def test_upload_file_with_s3_upload_failed_error():
    """Tests Upload file with S3UploadFailedError, which could indicate AWS token expires."""
    upload_file = MagicMock(side_effect=S3UploadFailedError(
        "An error occurred (ExpiredToken) when calling the "
        "CreateMultipartUpload operation: The provided token has expired."))
    client = Mock()
    client.Object.return_value = MagicMock(metadata=defaultdict(str),
                                           upload_file=upload_file)
    initial_parallel = 100
    client_meta = {
        "stage_info": {
            "location": "sfc-teststage/rwyitestacco/users/1234/",
            "locationType": "S3",
        },
        "cloud_client": client,
    }
    upload_meta = {
        "name": "data1.txt.gz",
        "stage_location_type": "S3",
        "no_sleeping_time": True,
        "parallel": initial_parallel,
        "put_callback": None,
        "put_callback_output_stream": None,
        SHA256_DIGEST: "123456789abcdef",
        "client_meta": SFResourceMeta(**client_meta),
        "dst_file_name": "data1.txt.gz",
        "src_file_name": path.join(THIS_DIR, "../data", "put_get_1.txt"),
        "overwrite": True,
    }
    upload_meta["real_src_file_name"] = upload_meta["src_file_name"]
    upload_meta["upload_size"] = os.stat(upload_meta["src_file_name"]).st_size
    meta = SnowflakeFileMeta(**upload_meta)

    akey = SnowflakeRemoteStorageUtil.upload_one_file(meta)
    assert akey is None
    assert meta.result_status == ResultStatus.RENEW_TOKEN
Exemple #12
0
def s3_upload(
    bucket: str,
    local_filepath: Union[str, List[str]],
    s3_filepath: Union[str, List[str]],
    profile_name: str = "default",
    region_name: str = "us-west-2",
    multipart_threshold: int = 8388608,
    multipart_chunksize: int = 8388608,
) -> None:
    """Uploads a file or collection of files to S3

    Parameters
    ----------
    bucket : str
        name of S3 bucket
    local_filepath : str or list
        path and filename(s) to be uploaded
    s3_filepath : str or list
        path and filename(s) within the bucket for the file to be uploaded
    region_name : str
        name of AWS region (default value 'us-west-2')
    profile_name : str
        profile name for credentials (default 'default' or organization-specific)
    multipart_threshold : int
        minimum file size to initiate multipart upload
    multipart_chunksize : int
        chunksize for multipart upload

    Returns
    -------
    None

    Example use
    -----------

    # Uploading a single file to S3:
    s3_upload(
        bucket='my_bucket',
        local_filepath='../data/my_file.csv',
        s3_filepath='tmp/my_file.csv')

    # Uploading with a profile name:
    s3_upload(
        bucket='my_bucket',
        profile_name='my-profile-name',
        local_filepath='../data/my_file.csv',
        s3_filepath='tmp/my_file.csv')

    Uploading a list of files to S3 (will not upload contents of subdirectories):
    s3_upload(
        bucket='my_bucket',
        local_filepath=['../data/my_file1.csv', '../data/my_file2.csv', '../img.png'],
        s3_filepath=['tmp/my_file1.csv', 'tmp/my_file2.csv', 'img.png'])

    Uploading files matching a pattern to S3 (will not upload contents of subdirectories):
    s3_upload(
        bucket='my_bucket',
        local_filepath='../data/*.csv',
        s3_filepath='tmp/')

    Uploading all files in a directory to S3 (will not upload contents of subdirectories):
    s3_upload(
        bucket='my_bucket',
        local_filepath='../data/*'
        s3_filepath='tmp/')
    """
    _download_upload_filepath_validator(s3_filepath=s3_filepath,
                                        local_filepath=local_filepath)
    my_bucket = s3_get_bucket(bucket=bucket,
                              profile_name=profile_name,
                              region_name=region_name)
    # multipart_threshold and multipart_chunksize, defaults = Amazon defaults
    config = TransferConfig(multipart_threshold=multipart_threshold,
                            multipart_chunksize=multipart_chunksize)
    if isinstance(local_filepath, str):
        if "*" in local_filepath:
            items = glob.glob(local_filepath)
            # filter out directories
            local_filepath = [item for item in items if os.path.isfile(item)]
            tmp_s3_filepath = [
                s3_filepath + f.split("/")[-1] for f in local_filepath
            ]
            s3_filepath = tmp_s3_filepath
        else:
            local_filepath = [local_filepath]
            s3_filepath = [s3_filepath]
    # upload all files to S3
    for local_file, s3_key in zip(local_filepath, s3_filepath):
        try:
            my_bucket.upload_file(local_file, s3_key, Config=config)
        except boto3.exceptions.S3UploadFailedError as e:
            raise S3UploadFailedError(str(e))
    return
Exemple #13
0
def upload_file(**kwargs):
    """Upload manager."""
    filename = kwargs.pop('file')
    client = CosmosIdS3Client(**kwargs)
    config = TransferConfig()
    osutil = OSUtils()
    transfer_manager = CosmosIdTransferManager(client,
                                               config=config,
                                               osutil=osutil)

    subscribers = None

    _, file_name = os.path.split(filename)
    try:
        response = requests.get(client.base_url +
                                '/api/metagenid/v1/files/upload_init',
                                json=dict(file_name=file_name),
                                headers=client.header)
        if response.status_code == 403:
            raise AuthenticationFailed('Authentication Failed. Wrong API Key.')
        if response.status_code == requests.codes.ok:
            sources = response.json()
            future = transfer_manager.upload(filename,
                                             sources['upload_source'],
                                             sources['upload_key'], None,
                                             subscribers)
            s3path, _ = os.path.split(sources['upload_key'])
            data = dict(path=s3path,
                        size=str(os.stat(filename)[6]),
                        name=file_name,
                        parent='')
        else:
            logger.error(
                "File upload inititalisation Failed. Response code: {}".format(
                    response.status_code))
            raise UploadException(
                "File upload inititalisation Failed. Response code: {}".format(
                    response.status_code))

        future.result()
        create_response = requests.post(client.base_url +
                                        '/api/metagenid/v1/files',
                                        json=data,
                                        headers=client.header)
        if create_response.status_code == 201:
            return create_response.json()
        else:
            raise UploadException(
                'Failed to upload file: {}'.format(file_name))
        '''
           If a client error was raised, add the backwards compatibility layer
           that raises a S3UploadFailedError. These specific errors were only
           ever thrown for upload_parts but now can be thrown for any related
           client error.
        '''
    except ClientError as e:
        raise S3UploadFailedError(
            "Failed to upload %s to %s: %s" % (filename, '/'.join(
                [sources['upload_source'], sources['upload_key']]), e))
        return False
    except AuthenticationFailed as ae:
        logger.error('{}'.format(ae))
        return False
    except UploadException as ue:
        logger.error("File Upload Failed. Error: {}".format(ue))
        return False
Exemple #14
0
    def upload_file(self, filename, bucket, key,
                    callback=None, extra_args=None):

    # Upload a file to an S3 object.
    #
    # Variants have also been injected into S3 client, Bucket and Object.
    # You don't have to use S3Transfer.upload_file() directly.
    #
    # .. seealso::
    #     :py:meth:`S3.Client.upload_file`
    #     :py:meth:`S3.Client.upload_fileobj`

    if not isinstance(filename, six.string_types):
        raise ValueError('Filename must be a string')

    subscribers = self._get_subscribers(callback)
    future = self._manager.upload(
        filename, bucket, key, extra_args, subscribers)
    try:
        future.result()
    # If a client error was raised, add the backwards compatibility layer
    # that raises a S3UploadFailedError. These specific errors were only
    # ever thrown for upload_parts but now can be thrown for any related
    # client error.
    except ClientError as e:
        raise S3UploadFailedError(
            "Failed to upload %s to %s: %s" % (
                filename, '/'.join([bucket, key]), e))

    def download_file(self, bucket, key, filename, extra_args=None,
                      callback=None):

    # Download an S3 object to a file.
    #
    # Variants have also been injected into S3 client, Bucket and Object.
    # You don't have to use S3Transfer.download_file() directly.
    #
    # .. seealso::
    #     :py:meth:`S3.Client.download_file`
    #     :py:meth:`S3.Client.download_fileobj`

    if not isinstance(filename, six.string_types):
        raise ValueError('Filename must be a string')

    subscribers = self._get_subscribers(callback)
    future = self._manager.download(
        bucket, key, filename, extra_args, subscribers)
    try:
        future.result()
    # This is for backwards compatibility where when retries are
    # exceeded we need to throw the same error from boto3 instead of
    # s3transfer's built in RetriesExceededError as current users are
    # catching the boto3 one instead of the s3transfer exception to do
    # their own retries.
    except S3TransferRetriesExceededError as e:
        raise RetriesExceededError(e.last_exception)


def _get_subscribers(self, callback):
    if not callback:
        return None
    return [ProgressCallbackInvoker(callback)]


def __enter__(self):
    return self


def __exit__(self, *args):
    self._manager.__exit__(*args)


class ProgressCallbackInvoker(BaseSubscriber):
    """A back-compat wrapper to invoke a provided callback via a subscriber

    :param callback: A callable that takes a single positional argument for
        how many bytes were transferred.
    """

    def __init__(self, callback):
        self._callback = callback

    def on_progress(self, bytes_transferred, **kwargs):
        self._callback(bytes_transferred)
Exemple #15
0
def upload_file(**kwargs):
    """Upload manager."""
    filename = kwargs.pop('file')
    parent_id = kwargs.pop('parent_id', None)
    multipart_chunksize = file_size = os.stat(filename)[
        6]  #get size of file in bytes
    client = kwargs['client']

    if file_size > MULTIPART_THRESHOLD:  #bigger that 1GB
        multipart_chunksize = min(int(file_size / 10), int(MAX_CHUNK_SIZE))
        multipart_chunksize = max(multipart_chunksize, int(MIN_CHUNK_SIZE))
        LOGGER.info('File size: %s MB', file_size / MB)
        LOGGER.info('Chunk size: %s MB', int(multipart_chunksize / MB))
    config = TransferConfig(multipart_threshold=MULTIPART_THRESHOLD,
                            max_concurrency=MAX_CONCURRENCY,
                            multipart_chunksize=multipart_chunksize)
    osutil = OSUtilsWithCallbacks()
    # Check if given parent folder exists
    if parent_id:
        fl_obj = Files(base_url=kwargs['base_url'], api_key=kwargs['api_key'])
        res = fl_obj.get_list(parent_id=parent_id)
        if not res['status']:
            raise NotFoundException('Parent folder for upload does '
                                    'not exists.')

    transfer_manager = TransferManager(client, config=config, osutil=osutil)

    subscribers = [
        ProgressSubscriber(filename),
    ]

    _, file_name = os.path.split(filename)
    try:
        init_url = client.base_url + urls.UPLOAD_INIT_URL
        response = requests_retry_session().put(init_url,
                                                json=dict(file_name=file_name),
                                                headers=client.header)
        if response.status_code == 402:
            raise NotEnoughCredits('Insufficient credits for upload.')
        if response.status_code == 403:
            raise AuthenticationFailed('Authentication Failed. Wrong API Key.')
        if response.status_code == requests.codes.ok:
            sources = response.json()
            future = transfer_manager.upload(filename,
                                             sources['upload_source'],
                                             sources['upload_key'],
                                             extra_args=None,
                                             subscribers=subscribers)
        else:
            LOGGER.error(
                'File upload inititalisation Failed. '
                'Response code: %s', response.status_code)
            raise UploadException('File upload inititalisation Failed. '
                                  'Response code: %s' % response.status_code)
        try:
            future.result()
        except KeyboardInterrupt:
            do_not_retry_event.set()
            return
        return sources['upload_key']

        # If a client error was raised, add the backwards compatibility layer
        # that raises a S3UploadFailedError. These specific errors were only
        # ever thrown for upload_parts but now can be thrown for any related
        # client error.

    except ClientError as error:
        raise S3UploadFailedError("Failed to upload {} to {}: {}".format(
            filename,
            '/'.join([sources['upload_source'],
                      sources['upload_key']]), error))
Exemple #16
0
def s3_upload(bucket,
              s3_filepath,
              local_filepath,
              permission=None,
              region_name='us-west-2',
              environment=None,
              profile_name=None,
              multipart_threshold=8388608,
              multipart_chunksize=8388608):
    """
    Uploads a file to an S3 bucket, allows you to set permssions on upload.

    If running locally you should have the AWSCREDS running.

    Parameters
    ----------
    bucket : str
        S3 bucket name
    s3_filepath : str or list
        path and filename within the bucket for the file to be uploaded
    local_filepath : str or list
        path and filename for file to be uploaded
    permission : str
        'private'|'public-read'|'public-read-write'|'authenticated-read'
        'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control'
    region_name : str
        name of AWS region (default value 'us-west-2')
    environment : str
        'aws' or 'local' depending on whether running locally or in AWS
    profile_name : str
        profile name for credential purposes when running locally,
        typically 'nordstrom-federated'
    multipart_threshold : int
        minimum filesize to initiate multipart upload
    multipart_chunksize : int
        chunksize for multipart upload

    Returns
    -------
    None

    Example use
    -----------
    # to upload a single file
    s3_upload(bucket='nordypy',
              s3_filepath='tmp/myfile.csv',
              filepath='..data/myfile.csv',
              environment='local')

    # to upload all files in a directory (will not upload contents of subdirectories)
    s3_upload(bucket='nordypy',
              s3_filepath='tmp/',
              filepath='..data/*',
              environment='local')

    # to upload all files in a directory matching a wildcard (will not upload contents of subdirectories)
    s3_upload(bucket='nordypy',
              s3_filepath='tmp/',
              filepath='../data/*.csv')
    """
    # TODO check that permission is a proper type
    if type(s3_filepath) == list:
        if len(s3_filepath) != len(local_filepath):
            raise ValueError(
                'Length of s3_filepath arguments must equal length of local_filepath arguments'
            )

    mybucket = s3_get_bucket(bucket, region_name, environment, profile_name)
    # multipart_threshold and multipart_chunksize defaults = Amazon defaults
    config = TransferConfig(multipart_threshold=multipart_threshold,
                            multipart_chunksize=multipart_chunksize)
    if '*' in local_filepath:
        items = glob.glob(local_filepath)
        # filter out directories
        filepaths = [item for item in items if os.path.isfile(item)]
        filenames = [f.split('/')[-1] for f in filepaths]
    else:
        filepaths = [local_filepath]
        filenames = ['']
    for i, filepath in enumerate(filepaths):
        try:
            mybucket.upload_file(filepath,
                                 s3_filepath + filenames[i],
                                 Config=config)
            if permission:
                obj = mybucket.Object(s3_filepath + filenames[i])
                obj.Acl().put(ACL=permission)
        except boto3.exceptions.S3UploadFailedError as e:
            if '(ExpiredToken)' in str(e):
                raise S3UploadFailedError(
                    'If running locally, you must run awscreds in the background. '
                    + str(e))
            else:
                raise e
        print('{} upload complete'.format(filepath))