def complete_multipart_upload(self, request): MIN_PART_SIZE = 5 * 2**10 # 5 KiB parts_received = self.multipart_uploads[request.upload_id] # Check that we got all the parts that they intended to send part_numbers_to_confirm = set(part['PartNumber'] for part in request.parts) # Make sure all the expected parts are present if part_numbers_to_confirm != set(parts_received.keys()): raise messages.S3ClientError( 'One or more of the specified parts could not be found', 400) # Sort by part number sorted_parts = sorted(parts_received.items(), key=lambda pair: pair[0]) sorted_bytes = [bytes_ for (_, bytes_) in sorted_parts] # Make sure that the parts aren't too small (except the last part) part_sizes = [len(bytes_) for bytes_ in sorted_bytes] if any(size < MIN_PART_SIZE for size in part_sizes[:-1]): e_message = """ All parts but the last must be larger than %d bytes """ % MIN_PART_SIZE raise messages.S3ClientError(e_message, 400) # String together all bytes for the given upload final_contents = b''.join(sorted_bytes) # Create FakeFile object num_parts = len(parts_received) etag = '"%s-%d"' % ('x' * 32, num_parts) file_ = FakeFile(request.bucket, request.object, final_contents, etag=etag) # Store FakeFile in self.files self.add_file(file_)
def delete_batch(self, request): aws_request = { 'Bucket': request.bucket, 'Delete': { 'Objects': [{ 'Key': object } for object in request.objects] } } try: aws_response = self.client.delete_objects(**aws_request) except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e)) deleted = [obj['Key'] for obj in aws_response.get('Deleted', [])] failed = [obj['Key'] for obj in aws_response.get('Errors', [])] errors = [ messages.S3ClientError(obj['Message'], obj['Code']) for obj in aws_response.get('Errors', []) ] return messages.DeleteBatchResponse(deleted, failed, errors)
def delete_batch(self, request): aws_request = { 'Bucket': request.bucket, 'Delete': { 'Objects': [{ 'Key': object } for object in request.objects] } } try: aws_response = self.client.delete_objects(**aws_request) except Exception as e: message = e.response['Error']['Message'] code = int(e.response['ResponseMetadata']['HTTPStatusCode']) raise messages.S3ClientError(message, code) deleted = [obj['Key'] for obj in aws_response.get('Deleted', [])] failed = [obj['Key'] for obj in aws_response.get('Errors', [])] errors = [ messages.S3ClientError(obj['Message'], obj['Code']) for obj in aws_response.get('Errors', []) ] return messages.DeleteBatchResponse(deleted, failed, errors)
def upload_part(self, request): # Save off bytes passed to internal data store upload_id, part_number = request.upload_id, request.part_number if part_number < 0 or not isinstance(part_number, int): raise messages.S3ClientError('Param validation failed on part number', 400) if upload_id not in self.multipart_uploads: raise messages.S3ClientError('The specified upload does not exist', 404) self.multipart_uploads[upload_id][part_number] = request.bytes etag = '"%s"' % ('x' * 32) return messages.UploadPartResponse(etag, part_number)
def get_object_metadata(self, request): r"""Retrieves an object's metadata. Args: request: (GetRequest) input message Returns: (Object) The response message. """ kwargs = {'Bucket': request.bucket, 'Key': request.object} try: boto_response = self.client.head_object(**kwargs) except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code) item = messages.Item( boto_response['ETag'], request.object, boto_response['LastModified'], boto_response['ContentLength'], boto_response['ContentType']) return item
def test_delete_error(self, unused_mock_arg): # Prepare mocks. s3io_mock = mock.MagicMock() s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] problematic_directory = 's3://nonexistent-bucket/tree/' exception = messages.S3ClientError('Not found', 404) s3io_mock.delete_paths.return_value = { problematic_directory: exception, 's3://bucket/object1': None, 's3://bucket/object2': None, } s3io_mock.size.return_value = 0 files = [ problematic_directory, 's3://bucket/object1', 's3://bucket/object2', ] expected_results = {problematic_directory: exception} # Issue batch delete. with self.assertRaises(BeamIOError) as error: self.fs.delete(files) self.assertIn('Delete operation failed', str(error.exception)) self.assertEqual(error.exception.exception_details, expected_results) s3io_mock.delete_paths.assert_called()
def get_range(self, request, start, end): r"""Retrieves an object's contents. Args: request: (GetRequest) request start: (int) start offset end: (int) end offset (exclusive) Returns: (bytes) The response message. """ for i in range(2): try: stream = self.get_stream(request, start) data = stream.read(end - start) self._download_pos += len(data) return data except Exception as e: self._download_stream = None self._download_request = None if i == 0: # Read errors are likely with long-lived connections, retry immediately if a read fails once continue if isinstance(e, messages.S3ClientError): raise e raise messages.S3ClientError(str(e), get_http_error_code(e))
def get_stream(self, request, start): """Opens a stream object starting at the given position. Args: request: (GetRequest) request start: (int) start offset Returns: (Stream) Boto3 stream object. """ if self._download_request and ( start != self._download_pos or request.bucket != self._download_request.bucket or request.object != self._download_request.object): self._download_stream.close() self._download_stream = None # noinspection PyProtectedMember if not self._download_stream or self._download_stream._raw_stream.closed: try: self._download_stream = self.client.get_object( Bucket=request.bucket, Key=request.object, Range='bytes={}-'.format(start))['Body'] self._download_request = request self._download_pos = start except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e)) return self._download_stream
def copy(self, request): try: copy_src = {'Bucket': request.src_bucket, 'Key': request.src_key} self.client.copy(copy_src, request.dest_bucket, request.dest_key) except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code)
def delete(self, request): if request.bucket not in self.known_buckets: raise messages.S3ClientError('The specified bucket does not exist', 404) if (request.bucket, request.object) in self.files: self.delete_file(request.bucket, request.object) else: # S3 doesn't raise an error if you try to delete a nonexistent file from # an extant bucket return
def delete(self, request): r"""Deletes given object from bucket Args: request: (DeleteRequest) input message Returns: (void) Void, otherwise will raise if an error occurs """ try: self.client.delete_object(Bucket=request.bucket, Key=request.object) except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e))
def list(self, request): r"""Retrieves a list of objects matching the criteria. Args: request: (ListRequest) input message Returns: (ListResponse) The response message. """ kwargs = {'Bucket': request.bucket, 'Prefix': request.prefix} if request.continuation_token is not None: kwargs['ContinuationToken'] = request.continuation_token try: boto_response = self.client.list_objects_v2(**kwargs) except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code) if boto_response['KeyCount'] == 0: message = 'Tried to list nonexistent S3 path: s3://%s/%s' % ( request.bucket, request.prefix) raise messages.S3ClientError(message, 404) items = [ messages.Item(etag=content['ETag'], key=content['Key'], last_modified=content['LastModified'], size=content['Size']) for content in boto_response['Contents'] ] try: next_token = boto_response['NextContinuationToken'] except KeyError: next_token = None response = messages.ListResponse(items, next_token) return response
def delete(self, request): r"""Deletes given object from bucket Args: request: (DeleteRequest) input message Returns: (void) Void, otherwise will raise if an error occurs """ try: self.client.delete_object(Bucket=request.bucket, Key=request.object) except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code)
def copy_paths(self, src_dest_pairs): """Copies the given S3 objects from src to dest. This can handle directory or file paths. Args: src_dest_pairs: list of (src, dest) tuples of s3://<bucket>/<name> file paths to copy from src to dest Returns: List of tuples of (src, dest, exception) in the same order as the src_dest_pairs argument, where exception is None if the operation succeeded or the relevant exception if the operation failed. """ if not src_dest_pairs: return [] results = [] for src_path, dest_path in src_dest_pairs: # Copy a directory with self.copy_tree if src_path.endswith('/') and dest_path.endswith('/'): try: results += self.copy_tree(src_path, dest_path) except messages.S3ClientError as err: results.append((src_path, dest_path, err)) # Copy individual files with self.copy elif not src_path.endswith('/') and not dest_path.endswith('/'): src_bucket, src_key = parse_s3_path(src_path) dest_bucket, dest_key = parse_s3_path(dest_path) request = messages.CopyRequest(src_bucket, src_key, dest_bucket, dest_key) try: self.client.copy(request) results.append((src_path, dest_path, None)) except messages.S3ClientError as e: results.append((src_path, dest_path, e)) # Mismatched paths (one directory, one non-directory) get an error result else: err = messages.S3ClientError( "Can't copy mismatched paths (one directory, one non-directory):" + ' %s, %s' % (src_path, dest_path), 400) results.append((src_path, dest_path, err)) return results
def complete_multipart_upload(self, request): r"""Completes a multipart upload to S3 Args: request: (UploadPartRequest) input message Returns: (Void) The response message. """ parts = {'Parts': request.parts} try: self.client.complete_multipart_upload(Bucket=request.bucket, Key=request.object, UploadId=request.upload_id, MultipartUpload=parts) except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e))
def create_multipart_upload(self, request): r"""Initates a multipart upload to S3 for a given object Args: request: (UploadRequest) input message Returns: (UploadResponse) The response message. """ try: boto_response = self.client.create_multipart_upload( Bucket=request.bucket, Key=request.object, ContentType=request.mime_type) response = messages.UploadResponse(boto_response['UploadId']) except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e)) return response
def complete_multipart_upload(self, request): r"""Completes a multipart upload to S3 Args: request: (UploadPartRequest) input message Returns: (Void) The response message. """ parts = {'Parts': request.parts} try: self.client.complete_multipart_upload(Bucket=request.bucket, Key=request.object, UploadId=request.upload_id, MultipartUpload=parts) except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code)
def get_range(self, request, start, end): r"""Retrieves an object's contents. Args: request: (GetRequest) request Returns: (bytes) The response message. """ try: boto_response = self.client.get_object(Bucket=request.bucket, Key=request.object, Range='bytes={}-{}'.format( start, end - 1)) except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code) return boto_response['Body'].read() # A bytes object
def create_multipart_upload(self, request): r"""Initates a multipart upload to S3 for a given object Args: request: (UploadRequest) input message Returns: (UploadResponse) The response message. """ try: boto_response = self.client.create_multipart_upload( Bucket=request.bucket, Key=request.object, ContentType=request.mime_type) response = messages.UploadResponse(boto_response['UploadId']) except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code) return response
def upload_part(self, request): r"""Uploads part of a file to S3 during a multipart upload Args: request: (UploadPartRequest) input message Returns: (UploadPartResponse) The response message. """ try: boto_response = self.client.upload_part( Body=request.bytes, Bucket=request.bucket, Key=request.object, PartNumber=request.part_number, UploadId=request.upload_id) response = messages.UploadPartResponse(boto_response['ETag'], request.part_number) return response except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e))
def list(self, request): bucket = request.bucket prefix = request.prefix or '' matching_files = [] for file_bucket, file_name in sorted(iter(self.files)): if bucket == file_bucket and file_name.startswith(prefix): file_object = self.get_file(file_bucket, file_name).get_metadata() matching_files.append(file_object) if not matching_files: message = 'Tried to list nonexistent S3 path: s3://%s/%s' % ( bucket, prefix) raise messages.S3ClientError(message, 404) # Handle pagination. items_per_page = 5 if not request.continuation_token: range_start = 0 else: if request.continuation_token not in self.list_continuation_tokens: raise ValueError('Invalid page token.') range_start = self.list_continuation_tokens[ request.continuation_token] del self.list_continuation_tokens[request.continuation_token] result = messages.ListResponse( items=matching_files[range_start:range_start + items_per_page]) if range_start + items_per_page < len(matching_files): next_range_start = range_start + items_per_page next_continuation_token = '_page_token_%s_%s_%d' % ( bucket, prefix, next_range_start) self.list_continuation_tokens[ next_continuation_token] = next_range_start result.next_token = next_continuation_token return result
def upload_part(self, request): r"""Uploads part of a file to S3 during a multipart upload Args: request: (UploadPartRequest) input message Returns: (UploadPartResponse) The response message. """ try: boto_response = self.client.upload_part( Body=request.bytes, Bucket=request.bucket, Key=request.object, PartNumber=request.part_number, UploadId=request.upload_id) response = messages.UploadPartResponse(boto_response['ETag'], request.part_number) return response except Exception as e: message = e.response['Error']['Message'] code = e.response['ResponseMetadata']['HTTPStatusCode'] raise messages.S3ClientError(message, code)
def get_object_metadata(self, request): """Retrieves an object's metadata. Args: request: (GetRequest) input message Returns: (Object) The response message. """ kwargs = {'Bucket': request.bucket, 'Key': request.object} try: boto_response = self.client.head_object(**kwargs) except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e)) item = messages.Item(boto_response['ETag'], request.object, boto_response['LastModified'], boto_response['ContentLength'], boto_response['ContentType']) return item
def get_file(self, bucket, obj): try: return self.files[bucket, obj] except: raise messages.S3ClientError('Not Found', 404)
def copy(self, request): try: copy_src = {'Bucket': request.src_bucket, 'Key': request.src_key} self.client.copy(copy_src, request.dest_bucket, request.dest_key) except Exception as e: raise messages.S3ClientError(str(e), get_http_error_code(e))