def execute(self, callback=None): """Copies file by downloading and uploading in parallel.""" # TODO (b/168712813): Add option to use the Data Transfer component. daisy_chain_stream = QueuingStream(self._source_resource.size) # Perform download in a separate thread so that upload can be performed # simultaneously. download_thread = threading.Thread( target=self._run_download, args=(daisy_chain_stream,)) download_thread.start() destination_client = api_factory.get_api( self._destination_resource.storage_url.scheme) request_config = cloud_api.RequestConfig(size=self._source_resource.size) try: destination_client.upload_object( daisy_chain_stream.readable_stream, self._destination_resource, request_config=request_config) except _AbruptShutdownError: # Not raising daisy_chain_stream.exception_raised here because we want # to wait for the download thread to finish. pass except Exception as e: # pylint: disable=broad-except # For all the other errors raised during upload, we want to to make # sure that the download thread is terminated before we re-reaise. # Hence we catch any exception and store it to be re-raised later. daisy_chain_stream.shutdown(e) download_thread.join() if daisy_chain_stream.exception_raised: raise daisy_chain_stream.exception_raised
def upload_object(self, source_stream, destination_resource, progress_callback=None, request_config=None): """See super class.""" # TODO(b/160998556): Implement resumable upload. del progress_callback if request_config is None: request_config = cloud_api.RequestConfig() kwargs = { 'Bucket': destination_resource.storage_url.bucket_name, 'Key': destination_resource.storage_url.object_name, 'Body': source_stream.read(), } if request_config.predefined_acl_string: kwargs['ACL'] = _translate_predefined_acl_string_to_s3( request_config.predefined_acl_string) if request_config.md5_hash: kwargs['ContentMD5'] = request_config.md5_hash response = self.client.put_object(**kwargs) return _get_object_resource_from_s3_response( response, destination_resource.storage_url.bucket_name, destination_resource.storage_url.object_name)
def execute(self, task_status_queue=None): """Performs upload.""" progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, size=self._length, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, component_number=self._component_number, total_components=self._total_components, operation_name=task_status.OperationName.UPLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) source_stream = files.BinaryFileReader( self._source_resource.storage_url.object_name) provider = self._destination_resource.storage_url.scheme with file_part.FilePart(source_stream, self._offset, self._length) as upload_stream: api_factory.get_api(provider).upload_object( upload_stream, self._destination_resource, request_config=cloud_api.RequestConfig( md5_hash=self._source_resource.md5_hash, size=self._length), progress_callback=progress_callback)
def test_copy_handles_predefined_acl(self, arg_acl, translated_acl): source_resource = resource_reference.UnknownResource( storage_url.storage_url_from_string('gs://b/o')) destination_resource = resource_reference.UnknownResource( storage_url.storage_url_from_string('gs://b/o2')) params = { 'Bucket': destination_resource.storage_url.bucket_name, 'Key': destination_resource.storage_url.object_name, 'CopySource': {'Bucket': source_resource.storage_url.bucket_name, 'Key': source_resource.storage_url.object_name}, 'ACL': translated_acl} response = {'CopyObjectResult': {'ETag': ETAG, 'LastModified': LAST_MODIFIED}} self.stubber.add_response( 'copy_object', service_response=response, expected_params=params) expected_resource = self.s3_api._get_object_resource_from_s3_response( response, destination_resource.storage_url.bucket_name, destination_resource.storage_url.object_name) with self.stubber: observed_resource = self.s3_api.copy_object( source_resource, destination_resource, request_config=cloud_api.RequestConfig(arg_acl)) self.assertEqual(observed_resource, expected_resource)
def test_upload_raises_error_for_unrecognized_predefined_acl(self): upload_resource = resource_reference.UnknownResource( storage_url.CloudUrl(SCHEME, BUCKET_NAME, OBJECT_NAME)) with self.assertRaisesRegex( ValueError, ('Could not translate predefined_acl_string fake_acl to' ' AWS-accepted ACL.')): self.s3_api.upload_object( io.BytesIO(BINARY_DATA), upload_resource, request_config=cloud_api.RequestConfig('fake_acl'))
def upload_object(self, source_stream, destination_resource, progress_callback=None, request_config=None, serialization_data=None, tracker_callback=None, upload_strategy=cloud_api.UploadStrategy.SIMPLE): """See super class.""" del progress_callback, serialization_data, tracker_callback if upload_strategy != cloud_api.UploadStrategy.SIMPLE: raise command_errors.Error('Invalid upload strategy: {}.'.format( upload_strategy.value)) if request_config is None: request_config = cloud_api.RequestConfig() # All fields common to both put_object and upload_fileobj are added # to the extra_args dict. extra_args = {} if request_config.predefined_acl_string: extra_args['ACL'] = _translate_predefined_acl_string_to_s3( request_config.predefined_acl_string) if request_config.md5_hash: # The upload_fileobj method can perform multipart uploads, so it cannot # validate with user-provided MD5 hashes. Hence we use the put_object API # method if MD5 validation is requested. if request_config.size > MAX_PUT_OBJECT_SIZE: raise errors.S3ApiError( 'Cannot upload to destination: {url} because MD5 validation can' ' only be performed for file size <= {maxsize} Bytes. Current file' ' size is {filesize} Bytes. You can remove the MD5 validation' ' requirement to complete the upload'.format( url=destination_resource.storage_url.url_string, maxsize=MAX_PUT_OBJECT_SIZE, filesize=request_config.size)) extra_args['ContentMD5'] = request_config.md5_hash return self._upload_using_put_object(source_stream, destination_resource, extra_args) else: # We default to calling the upload_fileobj method provided by boto3 which # is a managed-transfer utility that can perform mulitpart uploads # automatically. It can be used for non-seekable source_streams as well. return self._upload_using_managed_transfer_utility( source_stream, destination_resource, extra_args)
def execute(self, callback=None): destination_url = self._destination_resource.storage_url provider = destination_url.scheme source_stream = files.BinaryFileReader( self._source_resource.storage_url.object_name) with file_part.FilePart(source_stream, self._offset, self._length) as upload_stream: api_factory.get_api(provider).upload_object( upload_stream, self._destination_resource, request_config=cloud_api.RequestConfig( md5_hash=self._source_resource.md5_hash, size=self._length))
def test_upload_translates_predefined_acl(self, arg_acl, translated_acl): params = {'Bucket': BUCKET_NAME, 'Key': OBJECT_NAME, 'Body': BINARY_DATA, 'ACL': translated_acl} response = {'ETag': ETAG} self.stubber.add_response( 'put_object', service_response=response, expected_params=params) expected_resource = self.s3_api._get_object_resource_from_s3_response( response, BUCKET_NAME, OBJECT_NAME) upload_resource = resource_reference.UnknownResource( storage_url.CloudUrl(SCHEME, BUCKET_NAME, OBJECT_NAME)) with self.stubber: observed_resource = self.s3_api.upload_object( io.BytesIO(BINARY_DATA), upload_resource, request_config=cloud_api.RequestConfig(arg_acl)) self.assertEqual(observed_resource, expected_resource)