def test_retry_on_failures_from_stream_reads(self): # If we get an exception during a call to the response body's .read() # method, we should retry the request. client = mock.Mock() response_body = b'foobarbaz' stream_with_errors = mock.Mock() stream_with_errors.read.side_effect = [ socket.error("fake error"), response_body ] client.get_object.return_value = {'Body': stream_with_errors} config = TransferConfig(multipart_threshold=4, multipart_chunksize=4) downloader = MultipartDownloader(client, config, InMemoryOSLayer({}), SequentialExecutor) downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) # We're storing these in **extra because the assertEqual # below is really about verifying we have the correct value # for the Range param. extra = {'Bucket': 'bucket', 'Key': 'key'} self.assertEqual( client.get_object.call_args_list, # The first call to range=0-3 fails because of the # side_effect above where we make the .read() raise a # socket.error. # The second call to range=0-3 then succeeds. [ mock.call(Range='bytes=0-3', **extra), mock.call(Range='bytes=0-3', **extra), mock.call(Range='bytes=4-7', **extra), mock.call(Range='bytes=8-', **extra) ])
def test_multipart_upload_uses_correct_client_calls(self): client = mock.Mock() uploader = MultipartUploader(client, TransferConfig(), InMemoryOSLayer({'filename': b'foobar'}), SequentialExecutor) client.create_multipart_upload.return_value = {'UploadId': 'upload_id'} client.upload_part.return_value = {'ETag': 'first'} uploader.upload_file('filename', 'bucket', 'key', None, {}) # We need to check both the sequence of calls (create/upload/complete) # as well as the params passed between the calls, including # 1. The upload_id was plumbed through # 2. The collected etags were added to the complete call. client.create_multipart_upload.assert_called_with(Bucket='bucket', Key='key') # Should be two parts. client.upload_part.assert_called_with(Body=mock.ANY, Bucket='bucket', UploadId='upload_id', Key='key', PartNumber=1) client.complete_multipart_upload.assert_called_with( MultipartUpload={'Parts': [{ 'PartNumber': 1, 'ETag': 'first' }]}, Bucket='bucket', UploadId='upload_id', Key='key')
def test_multipart_download_with_multiple_parts(self): client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} # For testing purposes, we're testing with a multipart threshold # of 4 bytes and a chunksize of 4 bytes. Given b'foobarbaz', # this should result in 3 calls. In python slices this would be: # r[0:4], r[4:8], r[8:9]. But the Range param will be slightly # different because they use inclusive ranges. config = TransferConfig(multipart_threshold=4, multipart_chunksize=4) downloader = MultipartDownloader(client, config, InMemoryOSLayer({}), SequentialExecutor) downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) # We're storing these in **extra because the assertEqual # below is really about verifying we have the correct value # for the Range param. extra = {'Bucket': 'bucket', 'Key': 'key'} self.assertEqual( client.get_object.call_args_list, # Note these are inclusive ranges. [ mock.call(Range='bytes=0-3', **extra), mock.call(Range='bytes=4-7', **extra), mock.call(Range='bytes=8-', **extra) ])
def test_multipart_upload_injects_proper_kwargs(self): client = mock.Mock() uploader = MultipartUploader( client, TransferConfig(), InMemoryOSLayer({'filename': b'foobar'}), SequentialExecutor) client.create_multipart_upload.return_value = {'UploadId': 'upload_id'} client.upload_part.return_value = {'ETag': 'first'} extra_args = { 'SSECustomerKey': 'fakekey', 'SSECustomerAlgorithm': 'AES256', 'StorageClass': 'REDUCED_REDUNDANCY' } uploader.upload_file('filename', 'bucket', 'key', None, extra_args) client.create_multipart_upload.assert_called_with( Bucket='bucket', Key='key', # The initial call should inject all the storage class params. SSECustomerKey='fakekey', SSECustomerAlgorithm='AES256', StorageClass='REDUCED_REDUNDANCY') client.upload_part.assert_called_with( Body=mock.ANY, Bucket='bucket', UploadId='upload_id', Key='key', PartNumber=1, # We only have to forward certain **extra_args in subsequent # UploadPart calls. SSECustomerKey='fakekey', SSECustomerAlgorithm='AES256', ) client.complete_multipart_upload.assert_called_with( MultipartUpload={'Parts': [{'PartNumber': 1, 'ETag': 'first'}]}, Bucket='bucket', UploadId='upload_id', Key='key')
def test_uses_multipart_upload_when_over_threshold(self): with mock.patch('s3transfer.MultipartUploader') as uploader: fake_files = { 'smallfile': b'foobar', } osutil = InMemoryOSLayer(fake_files) config = TransferConfig(multipart_threshold=2, multipart_chunksize=2) transfer = S3Transfer(self.client, osutil=osutil, config=config) transfer.upload_file('smallfile', 'bucket', 'key') uploader.return_value.upload_file.assert_called_with( 'smallfile', 'bucket', 'key', None, {})
def test_exception_raised_on_exceeded_retries(self): client = mock.Mock() response_body = b'foobarbaz' stream_with_errors = mock.Mock() stream_with_errors.read.side_effect = socket.error("fake error") client.get_object.return_value = {'Body': stream_with_errors} config = TransferConfig(multipart_threshold=4, multipart_chunksize=4) downloader = MultipartDownloader(client, config, InMemoryOSLayer({}), SequentialExecutor) with self.assertRaises(RetriesExceededError): downloader.download_file('bucket', 'key', 'filename', len(response_body), {})
def test_multipart_download_uses_correct_client_calls(self): client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} downloader = MultipartDownloader(client, TransferConfig(), InMemoryOSLayer({}), SequentialExecutor) downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) client.get_object.assert_called_with(Range='bytes=0-', Bucket='bucket', Key='key')
def test_multipart_upload_is_aborted_on_error(self): # If the create_multipart_upload succeeds and any upload_part # fails, then abort_multipart_upload will be called. client = mock.Mock() uploader = MultipartUploader( client, TransferConfig(), InMemoryOSLayer({'filename': b'foobar'}), SequentialExecutor) client.create_multipart_upload.return_value = {'UploadId': 'upload_id'} client.upload_part.side_effect = Exception( "Some kind of error occurred.") with self.assertRaises(S3UploadFailedError): uploader.upload_file('filename', 'bucket', 'key', None, {}) client.abort_multipart_upload.assert_called_with( Bucket='bucket', Key='key', UploadId='upload_id')
def test_io_thread_failure_triggers_shutdown(self): client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} os_layer = mock.Mock() mock_fileobj = mock.MagicMock() mock_fileobj.__enter__.return_value = mock_fileobj mock_fileobj.write.side_effect = Exception("fake IO error") os_layer.open.return_value = mock_fileobj downloader = MultipartDownloader(client, TransferConfig(), os_layer, SequentialExecutor) # We're verifying that the exception raised from the IO future # propogates back up via download_file(). with self.assertRaisesRegexp(Exception, "fake IO error"): downloader.download_file('bucket', 'key', 'filename', len(response_body), {})
def test_download_futures_fail_triggers_shutdown(self): class FailedDownloadParts(SequentialExecutor): def __init__(self, max_workers): self.is_first = True def submit(self, function): future = futures.Future() if self.is_first: # This is the download_parts_thread. future.set_exception( Exception("fake download parts error")) self.is_first = False return future client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} downloader = MultipartDownloader(client, TransferConfig(), InMemoryOSLayer({}), FailedDownloadParts) with self.assertRaisesRegexp(Exception, "fake download parts error"): downloader.download_file('bucket', 'key', 'filename', len(response_body), {})
log.info("Executing the process") init_result = subprocess.run(init_cmd) ### Check if the process was successful if init_result.returncode != 0: log.fatal("Non-zero exit code {} from '{}'".format(init_result.returncode, init_result.args)) raise SystemExit(init_result.returncode) log.info("Success, recording results") metadata["finished_at"] = datetime.now().strftime(MD_DATEFORMAT) ### Upload the files to S3 s3 = botocore.session.get_session().create_client("s3") transfer = S3Transfer( s3, config=TransferConfig(multipart_chunksize=1024 * 1024 * 1024)) # 1 GiB chunk should be enough s3_upload_success = True for dirname, subdirs, files in os.walk(SPACEMESH_DATADIR): # Get the path relative to data dir subdir = os.path.relpath(dirname, SPACEMESH_DATADIR) # Iterate over files for f in files: fullpath = os.path.join(dirname, f) if SPACEMESH_ID is None and f == "key.bin": SPACEMESH_ID = subdir log.info("Found client miner id '{}' from path '{}'".format( SPACEMESH_ID, fullpath)) s3_key = os.path.join(SPACEMESH_S3_PREFIX, subdir, f)
endpoint = "s3_endpoint" # e.g. https://s3.eu-central-1.amazonaws.com bucket = "s3_bucket" # ---------------------------------- # load all log files from local folder base_path = Path(__file__).parent fs = canedge_browser.LocalFileSystem(base_path=base_path) log_files = canedge_browser.get_log_files(fs, devices) print(f"Found a total of {len(log_files)} log files") s3 = boto3.client( "s3", endpoint_url=endpoint, aws_access_key_id=key, aws_secret_access_key=secret, config=Config(signature_version="s3v4"), ) transfer = S3Transfer(s3, TransferConfig(multipart_threshold=9999999999999999, max_concurrency=10, num_download_attempts=10,)) # for each log file, extract header information, create S3 key and upload for log_file in log_files: with fs.open(log_file, "rb") as handle: mdf_file = mdf_iter.MdfFile(handle) header = "HDComment.Device Information" device_id = mdf_file.get_metadata()[f"{header}.serial number"]["value_raw"] session = mdf_file.get_metadata()[f"{header}.File Information.session"]["value_raw"] session = f"{(int(session) + session_offset):08}" split = int(mdf_file.get_metadata()[f"{header}.File Information.split"]["value_raw"]) split = f"{split:08}" ext = log_file.split(".")[-1]