def test_token_callback(server): original_creds = { "bucket": "my-bucket", "baseKey": "directupload/", "endpoint": None, "expiration": 1621345126000, "usePathStyleAccess": False, "region": "eu-west-1", "useS3Accelerate": False, "awsSecretKeyId": "...", "awsSessionToken": "...", "awsSecretAccessKey": "...", } batch = Batch(batchId=str(uuid4()), extraInfo=original_creds) check = {batch.uid: None} def callback(my_batch, creds): check[my_batch.uid] = creds.copy() # Using the default upload provider # => the callback is not even called creds = server.uploads.refresh_token(batch, token_callback=callback) assert creds == {} assert check[batch.uid] is None url = f"{server.client.host}{server.uploads.endpoint}/{batch.uid}/refreshToken" batch.provider = UP_AMAZON_S3 # Using S3 third-party upload provider, with credentials not expired # => new credentials are then the same as current ones with responses.RequestsMock() as rsps: rsps.add(responses.POST, url, json=original_creds) creds = server.uploads.refresh_token(batch, token_callback=callback) assert creds == original_creds assert check[batch.uid] is None # Using S3 third-party upload provider, with credentials expired # => new credentials are recieved with responses.RequestsMock() as rsps: new_creds = { "awsSecretKeyId": "updated 1", "awsSessionToken": "updated 2", "awsSecretAccessKey": "updated 3", } rsps.add(responses.POST, url, json=new_creds) creds = server.uploads.refresh_token(batch, token_callback=callback) assert creds == new_creds assert check[batch.uid] == new_creds assert batch.extraInfo["awsSecretKeyId"] == "updated 1" assert batch.extraInfo["awsSessionToken"] == "updated 2" assert batch.extraInfo["awsSecretAccessKey"] == "updated 3"
def test_data_leak_with_mutable_properties(): from nuxeo.models import Batch # Testing the Batch.blobs property assert isinstance(Batch.__slots__["blobs"], dict) # It is empty by default assert not Batch.__slots__["blobs"] # Mimic on old behavior when the class property could be altered Batch.__slots__["blobs"] = {0: "a blob"} # Check that the property is not leaked for new instances batch = Batch(batchId="1234") assert not batch.blobs batch = Batch(batchId="1234", blobs={1: "my own blob"}) assert batch.blobs == {1: "my own blob"}
def _complete_upload(batch: Batch, blob: FileBlob) -> Tuple[FileBlob, Batch]: """Helper to complete an upload.""" # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload_impl() blob.batch_id = batch.uid blob.fileIdx = 0 batch.upload_idx = 1 if not batch.blobs or not batch.blobs[0]: batch.blobs[0] = blob # Complete the upload on the S3 side if batch.is_s3(): batch.complete(timeout=TX_TIMEOUT) return blob, batch
def refresh(batch: Batch, **kwargs: Any) -> Any: # Call the original method try: return meth_orig(batch, **kwargs) finally: # Save changes in the database log.debug("Batch.extraInfo has been updated") transfer.batch = batch.as_dict() self.dao.update_upload(transfer)
def test_batch_is_s3(kwargs, expected): batch = Batch(**kwargs) assert batch.is_s3() is expected
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **kwargs: Any, ) -> Tuple[FileBlob, Batch]: """Upload a blob by chunks or in one go.""" engine_uid = kwargs.get("engine_uid", None) is_direct_edit = kwargs.pop("is_direct_edit", False) is_direct_transfer = kwargs.get("is_direct_transfer", False) remote_parent_path = kwargs.pop("remote_parent_path", "") remote_parent_ref = kwargs.pop("remote_parent_ref", "") blob = FileBlob(str(file_path)) action = self.upload_action( file_path, blob.size, reporter=QApplication.instance(), engine=engine_uid ) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch: Optional[Batch] = None chunk_size = None # See if there is already a transfer for this file transfer = self.get_upload(file_path) try: if transfer: log.debug(f"Retrieved transfer for {file_path!r}: {transfer}") if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(transfer.uid or -1) # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = None if transfer.batch.get("provider", "") == "s3" else 0 # Check if the associated batch still exists server-side try: self.remote.uploads.get( transfer.batch["batchId"], file_idx=file_idx ) except HTTPError as exc: if exc.status != 404: raise log.debug("No associated batch found, restarting from zero") else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.remote.uploads, **transfer.batch) chunk_size = transfer.chunk_size # The transfer was already completed on the third-party provider if batch.etag: return self._complete_upload(batch, blob) if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.remote.uploads.has_s3() else "" # Create a new batch and save it in the DB batch = self.remote.uploads.batch(handler=handler) # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = ( Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024 ) action.is_direct_transfer = is_direct_transfer try: uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.remote.upload_callback, ) except ClientError as exc: if exc.response["Error"]["Code"] != "NoSuchUpload": raise log.warning( "Either the upload ID does not exist, either the upload was already completed." ) return self._complete_upload(batch, blob) log.debug(f"Using {type(uploader).__name__!r} uploader") if not transfer: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database transfer = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, filesize=blob.size, batch=batch.as_dict(), chunk_size=chunk_size, is_direct_transfer=is_direct_transfer, remote_parent_path=remote_parent_path, remote_parent_ref=remote_parent_ref, ) self.dao.save_upload(transfer) elif transfer.batch["batchId"] != batch.uid: # The upload was not a fresh one but its batch ID was perimed. # Before NXDRIVE-2183, the batch ID was not updated and so the second step # of the upload (attaching the blob to a document) was failing. transfer.batch["batchId"] = batch.uid self.dao.update_upload(transfer) if uploader.chunked: # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Store the chunk size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() if batch.is_s3(): self._patch_refresh_token(uploader, transfer) # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Save the progression transfer.progress = action.get_percent() self.dao.set_transfer_progress("upload", transfer) # Handle status changes every time a chunk is sent _transfer = self.get_upload(file_path) if _transfer and _transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size transfer.progress = action.get_percent() if batch.is_s3(): if not batch.blobs: # This may happen when resuming an upload with all parts sent. # Trigger upload() that will complete the MPU and fill required # attributes like the Batch ETag, blob index, etc.. uploader.upload() # Save the final ETag in the database to prevent future issue if # the FileManager throws an error transfer.batch = batch.as_dict() self.dao.update_upload(transfer) self._complete_upload(batch, blob) # Transfer is completed, update the status in the database transfer.status = TransferStatus.DONE self.dao.set_transfer_status("upload", transfer) return blob, batch finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if transfer: transfer.progress = percent self.dao.set_transfer_progress("upload", transfer) action.finish_action() if blob.fd: blob.fd.close()
# is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = None if transfer.batch.get("provider", "") == UP_AMAZON_S3 else 0 # Check if the associated batch still exists server-side try: uploads.get(transfer.batch["batchId"], file_idx=file_idx) except HTTPError as exc: if exc.status != 404: raise log.debug("No associated batch found, restarting from zero") else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=uploads, **transfer.batch) if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = UP_AMAZON_S3 if Feature.s3 and uploads.has_s3() else "" # Create a new batch batch = uploads.batch(handler=handler) if not transfer: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", doc_pair=doc_pair, path=file_path)
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **params: Any, ) -> FileBlob: """Upload a blob by chunks or in one go.""" action = UploadAction(file_path, reporter=QApplication.instance()) blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch = None chunk_size = None upload: Optional[Upload] = None try: # See if there is already a transfer for this file upload = self.dao.get_upload(path=file_path) if upload: log.debug(f"Retrieved transfer for {file_path!r}: {upload}") if upload.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(upload.uid or -1) # Check if the associated batch still exists server-side try: self.uploads.get(upload.batch, upload.idx) except Exception: log.debug( f"No associated batch found, restarting from zero", exc_info=True, ) else: log.debug(f"Associated batch found, resuming the upload") batch = Batch(batchId=upload.batch, service=self.uploads) batch.upload_idx = upload.idx chunk_size = upload.chunk_size if not batch: # Create a new batch and save it in the DB batch = self.uploads.batch() # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = (Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024) engine_uid = params.pop("engine_uid", None) is_direct_edit = params.pop("is_direct_edit", False) if not upload: # Add an upload entry in the database upload = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, batch=batch.uid, idx=batch.upload_idx, chunk_size=chunk_size, ) self.dao.save_upload(upload) # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload() blob.batch_id = upload.batch blob.fileIdx = upload.idx uploader: Uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.upload_callback, ) # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. if uploader.chunked: action.progress = chunk_size * len( uploader.blob.uploadedChunkIds) log.debug( f"Upload progression is {action.get_percent():.2f}% " f"(data length is {sizeof_fmt(blob.size)}, " f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})" ) if action.get_percent() < 100.0 or not action.uploaded: if uploader.chunked: # Store the chunck size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 # Save the progression upload.progress = action.get_percent() self.dao.set_transfer_progress("upload", upload) # Handle status changes every time a chunk is sent transfer = self.dao.get_upload(path=file_path) if transfer and transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size upload.progress = action.get_percent() # Transfer is completed, update the status in the database upload.status = TransferStatus.DONE self.dao.set_transfer_status("upload", upload) return blob finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if upload: upload.progress = percent self.dao.set_transfer_progress("upload", upload) UploadAction.finish_action() if blob.fd: blob.fd.close()
def _get_transfer(self, file_path: Path, blob: FileBlob, **kwargs: Any) -> Upload: """Get and instantiate a new transfer.""" # See if there is already a transfer for this file transfer = self.get_upload(file_path) batch: Optional[Batch] = None if transfer: if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE): log.debug( f"Retrieved paused transfer {transfer}, kept paused then") raise UploadPaused(transfer.uid or -1) log.debug(f"Retrieved ongoing transfer {transfer}") # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = None if transfer.batch.get("provider", "") == "s3" else 0 # Check if the associated batch still exists server-side try: self.remote.uploads.get(transfer.batch["batchId"], file_idx=file_idx) except HTTPError as exc: if exc.status != 404: raise log.debug("No associated batch found, restarting from zero") else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.remote.uploads, **transfer.batch) if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.remote.uploads.has_s3( ) else "" # Create a new batch and save it in the DB batch = self.remote.uploads.batch(handler=handler) if not transfer: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database transfer = Upload( None, file_path, self._get_upload_status(kwargs.get("session", None)), batch=batch.as_dict(), chunk_size=Options.chunk_size * 1024 * 1024, engine=kwargs.get("engine_uid", None), filesize=blob.size, is_direct_edit=kwargs.get("is_direct_edit", False), is_direct_transfer=kwargs.get("is_direct_transfer", False), remote_parent_path=kwargs.pop("remote_parent_path", ""), remote_parent_ref=kwargs.pop("remote_parent_ref", ""), doc_pair=kwargs.pop("doc_pair", None), ) log.debug(f"Instantiated transfer {transfer}") self.dao.save_upload(transfer) elif transfer.batch["batchId"] != batch.uid: # The upload was not a fresh one but its batch ID was perimed. # Before NXDRIVE-2183, the batch ID was not updated and so the second step # of the upload (attaching the blob to a document) was failing. log.debug( f"Updating the batchId from {transfer.batch['batchId']} to {batch.uid}" ) transfer.batch["batchId"] = batch.uid self.dao.update_upload(transfer) transfer.batch_obj = batch return transfer
# When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = None if transfer.batch.get("provider", "") == UP_AMAZON_S3 else 0 # Check if the associated batch still exists server-side try: self.remote.uploads.get(transfer.batch["batchId"], file_idx=file_idx) except HTTPError as exc: if exc.status != 404: raise log.debug("No associated batch found, restarting from zero") else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.remote.uploads, **transfer.batch) if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = ( UP_AMAZON_S3 if Feature.s3 and self.remote.uploads.has_s3() else "" ) # Create a new batch batch = self.remote.uploads.batch(handler=handler) if not transfer: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", doc_pair=doc_pair, path=file_path)
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **params: Any, ) -> FileBlob: """Upload a blob by chunks or in one go.""" action = UploadAction(file_path, reporter=QApplication.instance()) blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch: Optional[Batch] = None chunk_size = None upload: Optional[Upload] = None try: # See if there is already a transfer for this file upload = self.dao.get_upload(path=file_path) if upload: log.debug(f"Retrieved transfer for {file_path!r}: {upload}") if upload.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(upload.uid or -1) # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = (None if upload.batch.get("provider", "") == "s3" else upload.batch["upload_idx"]) # Check if the associated batch still exists server-side try: self.uploads.get(upload.batch["batchId"], file_idx=file_idx) except Exception: log.debug( "No associated batch found, restarting from zero", exc_info=True) else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.uploads, **upload.batch) chunk_size = upload.chunk_size if batch.is_s3(): token_ttl = self._aws_token_ttl( batch.extraInfo["expiration"] / 1000) if token_ttl.total_seconds() < 1: batch = None upload = None log.warning( "AWS token has expired, restarting from zero") if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.uploads.has_s3() else "" # Create a new batch and save it in the DB batch = self.uploads.batch(handler=handler) if batch.is_s3(): self._aws_token_ttl(batch.extraInfo["expiration"] / 1000) # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = (Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024) engine_uid = params.pop("engine_uid", None) is_direct_edit = params.pop("is_direct_edit", False) # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload() blob.batch_id = batch.uid blob.fileIdx = batch.upload_idx uploader: Uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.upload_callback, ) log.debug(f"Using {type(uploader).__name__!r} uploader") if not upload: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database upload = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, batch=batch.as_dict(), chunk_size=chunk_size, ) self.dao.save_upload(upload) # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. if uploader.chunked: action.progress = chunk_size * len( uploader.blob.uploadedChunkIds) if action.get_percent() < 100.0 or not action.uploaded: if uploader.chunked: # Store the chunk size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 # Save the progression upload.progress = action.get_percent() self.dao.set_transfer_progress("upload", upload) # Handle status changes every time a chunk is sent transfer = self.dao.get_upload(path=file_path) if transfer and transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size upload.progress = action.get_percent() if batch.is_s3(): if not batch.blobs: # This may happen when resuming an upload with all parts sent. # Trigger upload() that will complete the MPU and fill required # attributes like the Batch ETag, blob index, etc.. uploader.upload() # Complete the S3 upload # (setting a big timeout to handle big files) batch.complete(timeout=(TX_TIMEOUT, TX_TIMEOUT)) # Transfer is completed, update the status in the database upload.status = TransferStatus.DONE self.dao.set_transfer_status("upload", upload) return blob finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if upload: upload.progress = percent self.dao.set_transfer_progress("upload", upload) UploadAction.finish_action() if blob.fd: blob.fd.close()
def token_callback(self, batch: Batch, _: Dict[str, Any]) -> None: """Callback triggered when token is refreshed.""" self.batch = batch.as_dict() self.is_dirty = True
def cancel_batch(self, batch_details: Dict[str, Any]) -> None: """Cancel an uploaded Batch.""" batch = Batch(service=self.uploads, **batch_details) with suppress(Exception): batch.cancel()