def test_upload_exceptions(caplog): """Exceptions raised from upload are expressed in error logging""" items = [ BucketItem("tests/test_data/somefile3.txt"), BucketItem("tests/test_data/somefile2.txt"), BucketItem("tests/test_data/somefile.txt"), ] client = MockedClient() # Uploading fails twice before succeeding client._session.resource().Bucket().upload_file.side_effect = [ S3UploadFailedError("Error uploading somefile3.txt"), S3UploadFailedError("Error uploading somefile2.txt"), mock.DEFAULT, ] with caplog.at_level(logging.DEBUG): client.upload(items, "test_bucket") for msg in [ "One or more exceptions occurred during upload", "Error uploading somefile3.txt", "Error uploading somefile2.txt", ]: assert msg in caplog.text
def test_upload_file_with_s3_upload_failed_error(): """Tests Upload file with S3UploadFailedError, which could indicate AWS token expires.""" upload_file = MagicMock( side_effect=S3UploadFailedError( "An error occurred (ExpiredToken) when calling the " "CreateMultipartUpload operation: The provided token has expired.")) client = Mock() client.Object.return_value = MagicMock( metadata=defaultdict(str), upload_file=upload_file) initial_parallel = 100 upload_meta = { 'no_sleeping_time': True, 'parallel': initial_parallel, 'put_callback': None, 'put_callback_output_stream': None, 'existing_files': [], SHA256_DIGEST: '123456789abcdef', 'stage_info': { 'location': 'sfc-teststage/rwyitestacco/users/1234/', 'locationType': 'S3', }, 'client': client, 'dst_file_name': 'data1.txt.gz', 'src_file_name': path.join(THIS_DIR, '../data', 'put_get_1.txt'), 'overwrite': True, } upload_meta['real_src_file_name'] = upload_meta['src_file_name'] upload_meta[ 'upload_size'] = os.stat(upload_meta['src_file_name']).st_size akey = SnowflakeRemoteStorageUtil.upload_one_file(upload_meta) assert akey is None assert upload_meta['result_status'] == ResultStatus.RENEW_TOKEN
def upload_file(self, filename, bucket, key, callback=None, extra_args=None): """Upload a file to an S3 object. Variants have also been injected into S3 client, Bucket and Object. You don't have to use S3Transfer.upload_file() directly. .. seealso:: :py:meth:`S3.Client.upload_file` :py:meth:`S3.Client.upload_fileobj` """ if not isinstance(filename, six.string_types): raise ValueError('Filename must be a string') subscribers = self._get_subscribers(callback) future = self._manager.upload(filename, bucket, key, extra_args, subscribers) try: future.result() # If a client error was raised, add the backwards compatibility layer # that raises a S3UploadFailedError. These specific errors were only # ever thrown for upload_parts but now can be thrown for any related # client error. except ClientError as e: raise S3UploadFailedError("Failed to upload %s to %s: %s" % (filename, '/'.join([bucket, key]), e))
def test_upload_failed_error(caplog): """Tests whether token expiry error is handled as expected when uploading.""" caplog.set_level(logging.DEBUG, "snowflake.connector") mock_resource, mock_object = MagicMock(), MagicMock() mock_resource.Object.return_value = mock_object mock_object.upload_file.side_effect = S3UploadFailedError("ExpiredToken") client_meta = { "cloud_client": mock_resource, "stage_info": { "location": "loc" }, } meta = { "name": "f", "src_file_name": "f", "stage_location_type": "S3", "client_meta": SFResourceMeta(**client_meta), "sha256_digest": "asd", "dst_file_name": "f", "put_callback": None, } meta = SnowflakeFileMeta(**meta) with mock.patch( "snowflake.connector.s3_util.SnowflakeS3Util.extract_bucket_name_and_path" ): assert SnowflakeS3Util.upload_file("f", meta, {}, 4, 67108864) is None assert ( "snowflake.connector.s3_util", logging.DEBUG, "Failed to upload a file: f, err: ExpiredToken. Renewing AWS Token and Retrying", ) in caplog.record_tuples assert meta.result_status == ResultStatus.RENEW_TOKEN
def upload_file(self, path, Config=None): if self.bucket_name not in self.meta.client.mock_s3_fs: # upload_file() is a higher-order operation, has fancy errors raise S3UploadFailedError( 'Failed to upload %s to %s/%s: %s' % (path, self.bucket_name, self.key, str(_no_such_bucket_error('PutObject')))) mock_keys = self._mock_bucket_keys('PutObject') with open(path, 'rb') as f: mock_keys[self.key] = (f.read(), _boto3_now())
def test_upload_failed_error(caplog): """Tests whether token expiry error is handled as expected when uploading.""" mock_resource, mock_object = MagicMock(), MagicMock() mock_resource.Object.return_value = mock_object mock_object.upload_file.side_effect = S3UploadFailedError('ExpiredToken') meta = {'client': mock_resource, 'sha256_digest': 'asd', 'stage_info': {'location': 'loc'}, 'dst_file_name': 'f', 'put_callback': None} with mock.patch('snowflake.connector.s3_util.SnowflakeS3Util.extract_bucket_name_and_path'): assert SnowflakeS3Util.upload_file('f', meta, {}, 4) is None assert ('snowflake.connector.s3_util', logging.DEBUG, 'Failed to upload a file: f, err: ExpiredToken. Renewing AWS Token and Retrying') in caplog.record_tuples assert meta['result_status'] == ResultStatus.RENEW_TOKEN
def upload_file(self, filename, bucket, key, callback, extra_args): response = self._client.create_multipart_upload(Bucket=bucket, Key=key, **extra_args) upload_id = response['UploadId'] try: parts = self._upload_parts(upload_id, filename, bucket, key, callback, extra_args) except Exception as e: logger.debug("Exception raised while uploading parts, " "aborting multipart upload.", exc_info=True) self._client.abort_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id) raise S3UploadFailedError( "Failed to upload %s to %s: %s" % ( filename, '/'.join([bucket, key]), e)) self._client.complete_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts})
def upload_backup(self): if 's3_bucket' not in self.bkp_conf: self.info('no s3 bucket specified, ignore backup to s3') return self.info_r('backup to s3://{s3_bucket}/{s3_key} ...') bc = boto_client(self.bkp_conf['s3_host'], self.bkp_conf['s3_access_key'], self.bkp_conf['s3_secret_key']) # boto adds Content-MD5 automatically extra_args = {'Metadata': self.bkp_conf['backup_tgz_des3_meta']} try: boto_put(bc, self.render('{backup_tgz_des3}'), self.render('{s3_bucket}'), self.render('{s3_key}'), extra_args) except S3UploadFailedError as e: self.info(repr(e) + 'while upload {backup_tgz_des3} to s2 cloud') try: resp = boto_head(bc, self.render('{s3_bucket}'), self.render('{s3_key}')) except ClientError as ee: self.error( repr(ee) + 'backup file: {backup_tgz_des3} not found in s2 cloud') raise if resp['ResponseMetadata']['HTTPStatusCode'] == 200: self.info('backup file: {backup_tgz_des3} already in s2 cloud') else: self.error( repr(e) + 'get backup file: {backup_tgz_des3} error') raise S3UploadFailedError( repr(e) + 'while upload backup file failed')
def upload_file(self, path, Config=None): if self.bucket_name not in self.meta.client.mock_s3_fs: # upload_file() is a higher-order operation, has fancy errors raise S3UploadFailedError( 'Failed to upload %s to %s/%s: %s' % (path, self.bucket_name, self.key, str(_no_such_bucket_error('PutObject')))) # verify that config doesn't have empty part size (see #2033) # # config is a boto3.s3.transfer.TransferConfig (we don't mock it), # which is actually part of s3transfer. Very old versions of s3transfer # (e.g. 0.10.0) disallow initializing TransferConfig with part sizes # that are zero or None if Config and not (Config.multipart_chunksize and Config.multipart_threshold): raise TypeError('part size may not be 0 or None') mock_keys = self._mock_bucket_keys('PutObject') with open(path, 'rb') as f: mock_keys[self.key] = dict(body=f.read(), time_modified=_boto3_now())
def test_upload_file_simulating_S3UploadFailedError(tmp_dir_fixture): # NOQA """ Mock scenario where upload fails with a S3UploadFailedError exception. """ from dtool_s3.storagebroker import _upload_file # NOQA import boto3 from boto3.exceptions import S3UploadFailedError s3client = boto3.client("s3") s3client.upload_file = MagicMock( side_effect=S3UploadFailedError() ) value = _upload_file( s3client, "dummy_fpath", "dummy_bucket", "dummy_dest_path", "dummy_extra_args", ) assert value is False
def test_upload_file_with_s3_upload_failed_error(): """Tests Upload file with S3UploadFailedError, which could indicate AWS token expires.""" upload_file = MagicMock(side_effect=S3UploadFailedError( "An error occurred (ExpiredToken) when calling the " "CreateMultipartUpload operation: The provided token has expired.")) client = Mock() client.Object.return_value = MagicMock(metadata=defaultdict(str), upload_file=upload_file) initial_parallel = 100 client_meta = { "stage_info": { "location": "sfc-teststage/rwyitestacco/users/1234/", "locationType": "S3", }, "cloud_client": client, } upload_meta = { "name": "data1.txt.gz", "stage_location_type": "S3", "no_sleeping_time": True, "parallel": initial_parallel, "put_callback": None, "put_callback_output_stream": None, SHA256_DIGEST: "123456789abcdef", "client_meta": SFResourceMeta(**client_meta), "dst_file_name": "data1.txt.gz", "src_file_name": path.join(THIS_DIR, "../data", "put_get_1.txt"), "overwrite": True, } upload_meta["real_src_file_name"] = upload_meta["src_file_name"] upload_meta["upload_size"] = os.stat(upload_meta["src_file_name"]).st_size meta = SnowflakeFileMeta(**upload_meta) akey = SnowflakeRemoteStorageUtil.upload_one_file(meta) assert akey is None assert meta.result_status == ResultStatus.RENEW_TOKEN
def s3_upload( bucket: str, local_filepath: Union[str, List[str]], s3_filepath: Union[str, List[str]], profile_name: str = "default", region_name: str = "us-west-2", multipart_threshold: int = 8388608, multipart_chunksize: int = 8388608, ) -> None: """Uploads a file or collection of files to S3 Parameters ---------- bucket : str name of S3 bucket local_filepath : str or list path and filename(s) to be uploaded s3_filepath : str or list path and filename(s) within the bucket for the file to be uploaded region_name : str name of AWS region (default value 'us-west-2') profile_name : str profile name for credentials (default 'default' or organization-specific) multipart_threshold : int minimum file size to initiate multipart upload multipart_chunksize : int chunksize for multipart upload Returns ------- None Example use ----------- # Uploading a single file to S3: s3_upload( bucket='my_bucket', local_filepath='../data/my_file.csv', s3_filepath='tmp/my_file.csv') # Uploading with a profile name: s3_upload( bucket='my_bucket', profile_name='my-profile-name', local_filepath='../data/my_file.csv', s3_filepath='tmp/my_file.csv') Uploading a list of files to S3 (will not upload contents of subdirectories): s3_upload( bucket='my_bucket', local_filepath=['../data/my_file1.csv', '../data/my_file2.csv', '../img.png'], s3_filepath=['tmp/my_file1.csv', 'tmp/my_file2.csv', 'img.png']) Uploading files matching a pattern to S3 (will not upload contents of subdirectories): s3_upload( bucket='my_bucket', local_filepath='../data/*.csv', s3_filepath='tmp/') Uploading all files in a directory to S3 (will not upload contents of subdirectories): s3_upload( bucket='my_bucket', local_filepath='../data/*' s3_filepath='tmp/') """ _download_upload_filepath_validator(s3_filepath=s3_filepath, local_filepath=local_filepath) my_bucket = s3_get_bucket(bucket=bucket, profile_name=profile_name, region_name=region_name) # multipart_threshold and multipart_chunksize, defaults = Amazon defaults config = TransferConfig(multipart_threshold=multipart_threshold, multipart_chunksize=multipart_chunksize) if isinstance(local_filepath, str): if "*" in local_filepath: items = glob.glob(local_filepath) # filter out directories local_filepath = [item for item in items if os.path.isfile(item)] tmp_s3_filepath = [ s3_filepath + f.split("/")[-1] for f in local_filepath ] s3_filepath = tmp_s3_filepath else: local_filepath = [local_filepath] s3_filepath = [s3_filepath] # upload all files to S3 for local_file, s3_key in zip(local_filepath, s3_filepath): try: my_bucket.upload_file(local_file, s3_key, Config=config) except boto3.exceptions.S3UploadFailedError as e: raise S3UploadFailedError(str(e)) return
def upload_file(**kwargs): """Upload manager.""" filename = kwargs.pop('file') client = CosmosIdS3Client(**kwargs) config = TransferConfig() osutil = OSUtils() transfer_manager = CosmosIdTransferManager(client, config=config, osutil=osutil) subscribers = None _, file_name = os.path.split(filename) try: response = requests.get(client.base_url + '/api/metagenid/v1/files/upload_init', json=dict(file_name=file_name), headers=client.header) if response.status_code == 403: raise AuthenticationFailed('Authentication Failed. Wrong API Key.') if response.status_code == requests.codes.ok: sources = response.json() future = transfer_manager.upload(filename, sources['upload_source'], sources['upload_key'], None, subscribers) s3path, _ = os.path.split(sources['upload_key']) data = dict(path=s3path, size=str(os.stat(filename)[6]), name=file_name, parent='') else: logger.error( "File upload inititalisation Failed. Response code: {}".format( response.status_code)) raise UploadException( "File upload inititalisation Failed. Response code: {}".format( response.status_code)) future.result() create_response = requests.post(client.base_url + '/api/metagenid/v1/files', json=data, headers=client.header) if create_response.status_code == 201: return create_response.json() else: raise UploadException( 'Failed to upload file: {}'.format(file_name)) ''' If a client error was raised, add the backwards compatibility layer that raises a S3UploadFailedError. These specific errors were only ever thrown for upload_parts but now can be thrown for any related client error. ''' except ClientError as e: raise S3UploadFailedError( "Failed to upload %s to %s: %s" % (filename, '/'.join( [sources['upload_source'], sources['upload_key']]), e)) return False except AuthenticationFailed as ae: logger.error('{}'.format(ae)) return False except UploadException as ue: logger.error("File Upload Failed. Error: {}".format(ue)) return False
def upload_file(self, filename, bucket, key, callback=None, extra_args=None): # Upload a file to an S3 object. # # Variants have also been injected into S3 client, Bucket and Object. # You don't have to use S3Transfer.upload_file() directly. # # .. seealso:: # :py:meth:`S3.Client.upload_file` # :py:meth:`S3.Client.upload_fileobj` if not isinstance(filename, six.string_types): raise ValueError('Filename must be a string') subscribers = self._get_subscribers(callback) future = self._manager.upload( filename, bucket, key, extra_args, subscribers) try: future.result() # If a client error was raised, add the backwards compatibility layer # that raises a S3UploadFailedError. These specific errors were only # ever thrown for upload_parts but now can be thrown for any related # client error. except ClientError as e: raise S3UploadFailedError( "Failed to upload %s to %s: %s" % ( filename, '/'.join([bucket, key]), e)) def download_file(self, bucket, key, filename, extra_args=None, callback=None): # Download an S3 object to a file. # # Variants have also been injected into S3 client, Bucket and Object. # You don't have to use S3Transfer.download_file() directly. # # .. seealso:: # :py:meth:`S3.Client.download_file` # :py:meth:`S3.Client.download_fileobj` if not isinstance(filename, six.string_types): raise ValueError('Filename must be a string') subscribers = self._get_subscribers(callback) future = self._manager.download( bucket, key, filename, extra_args, subscribers) try: future.result() # This is for backwards compatibility where when retries are # exceeded we need to throw the same error from boto3 instead of # s3transfer's built in RetriesExceededError as current users are # catching the boto3 one instead of the s3transfer exception to do # their own retries. except S3TransferRetriesExceededError as e: raise RetriesExceededError(e.last_exception) def _get_subscribers(self, callback): if not callback: return None return [ProgressCallbackInvoker(callback)] def __enter__(self): return self def __exit__(self, *args): self._manager.__exit__(*args) class ProgressCallbackInvoker(BaseSubscriber): """A back-compat wrapper to invoke a provided callback via a subscriber :param callback: A callable that takes a single positional argument for how many bytes were transferred. """ def __init__(self, callback): self._callback = callback def on_progress(self, bytes_transferred, **kwargs): self._callback(bytes_transferred)
def upload_file(**kwargs): """Upload manager.""" filename = kwargs.pop('file') parent_id = kwargs.pop('parent_id', None) multipart_chunksize = file_size = os.stat(filename)[ 6] #get size of file in bytes client = kwargs['client'] if file_size > MULTIPART_THRESHOLD: #bigger that 1GB multipart_chunksize = min(int(file_size / 10), int(MAX_CHUNK_SIZE)) multipart_chunksize = max(multipart_chunksize, int(MIN_CHUNK_SIZE)) LOGGER.info('File size: %s MB', file_size / MB) LOGGER.info('Chunk size: %s MB', int(multipart_chunksize / MB)) config = TransferConfig(multipart_threshold=MULTIPART_THRESHOLD, max_concurrency=MAX_CONCURRENCY, multipart_chunksize=multipart_chunksize) osutil = OSUtilsWithCallbacks() # Check if given parent folder exists if parent_id: fl_obj = Files(base_url=kwargs['base_url'], api_key=kwargs['api_key']) res = fl_obj.get_list(parent_id=parent_id) if not res['status']: raise NotFoundException('Parent folder for upload does ' 'not exists.') transfer_manager = TransferManager(client, config=config, osutil=osutil) subscribers = [ ProgressSubscriber(filename), ] _, file_name = os.path.split(filename) try: init_url = client.base_url + urls.UPLOAD_INIT_URL response = requests_retry_session().put(init_url, json=dict(file_name=file_name), headers=client.header) if response.status_code == 402: raise NotEnoughCredits('Insufficient credits for upload.') if response.status_code == 403: raise AuthenticationFailed('Authentication Failed. Wrong API Key.') if response.status_code == requests.codes.ok: sources = response.json() future = transfer_manager.upload(filename, sources['upload_source'], sources['upload_key'], extra_args=None, subscribers=subscribers) else: LOGGER.error( 'File upload inititalisation Failed. ' 'Response code: %s', response.status_code) raise UploadException('File upload inititalisation Failed. ' 'Response code: %s' % response.status_code) try: future.result() except KeyboardInterrupt: do_not_retry_event.set() return return sources['upload_key'] # If a client error was raised, add the backwards compatibility layer # that raises a S3UploadFailedError. These specific errors were only # ever thrown for upload_parts but now can be thrown for any related # client error. except ClientError as error: raise S3UploadFailedError("Failed to upload {} to {}: {}".format( filename, '/'.join([sources['upload_source'], sources['upload_key']]), error))
def s3_upload(bucket, s3_filepath, local_filepath, permission=None, region_name='us-west-2', environment=None, profile_name=None, multipart_threshold=8388608, multipart_chunksize=8388608): """ Uploads a file to an S3 bucket, allows you to set permssions on upload. If running locally you should have the AWSCREDS running. Parameters ---------- bucket : str S3 bucket name s3_filepath : str or list path and filename within the bucket for the file to be uploaded local_filepath : str or list path and filename for file to be uploaded permission : str 'private'|'public-read'|'public-read-write'|'authenticated-read' 'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control' region_name : str name of AWS region (default value 'us-west-2') environment : str 'aws' or 'local' depending on whether running locally or in AWS profile_name : str profile name for credential purposes when running locally, typically 'nordstrom-federated' multipart_threshold : int minimum filesize to initiate multipart upload multipart_chunksize : int chunksize for multipart upload Returns ------- None Example use ----------- # to upload a single file s3_upload(bucket='nordypy', s3_filepath='tmp/myfile.csv', filepath='..data/myfile.csv', environment='local') # to upload all files in a directory (will not upload contents of subdirectories) s3_upload(bucket='nordypy', s3_filepath='tmp/', filepath='..data/*', environment='local') # to upload all files in a directory matching a wildcard (will not upload contents of subdirectories) s3_upload(bucket='nordypy', s3_filepath='tmp/', filepath='../data/*.csv') """ # TODO check that permission is a proper type if type(s3_filepath) == list: if len(s3_filepath) != len(local_filepath): raise ValueError( 'Length of s3_filepath arguments must equal length of local_filepath arguments' ) mybucket = s3_get_bucket(bucket, region_name, environment, profile_name) # multipart_threshold and multipart_chunksize defaults = Amazon defaults config = TransferConfig(multipart_threshold=multipart_threshold, multipart_chunksize=multipart_chunksize) if '*' in local_filepath: items = glob.glob(local_filepath) # filter out directories filepaths = [item for item in items if os.path.isfile(item)] filenames = [f.split('/')[-1] for f in filepaths] else: filepaths = [local_filepath] filenames = [''] for i, filepath in enumerate(filepaths): try: mybucket.upload_file(filepath, s3_filepath + filenames[i], Config=config) if permission: obj = mybucket.Object(s3_filepath + filenames[i]) obj.Acl().put(ACL=permission) except boto3.exceptions.S3UploadFailedError as e: if '(ExpiredToken)' in str(e): raise S3UploadFailedError( 'If running locally, you must run awscreds in the background. ' + str(e)) else: raise e print('{} upload complete'.format(filepath))