def make_file( self, parent: str, name: str, content: bytes = None, file_path: Path = None, ) -> str: """ Create a document with the given *name* and *content* using the FileManager. If *file_path* points to a local file, it will be used instead of *content*. Note: if *content* is "seen" as plain text by the FileManager, the created document will be a Note. It this is not what you want, use make_file_with_blob(). """ tmp_created = file_path is None if not file_path: file_path = make_tmp_file(self.upload_tmp_dir, content) try: file_blob = FileBlob(str(file_path)) file_blob.name = safe_filename(name) blob = self.uploads.batch().upload(file_blob) return self.file_manager_import(self.check_ref(parent), blob) finally: if tmp_created: file_path.unlink()
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **params: Any, ) -> FileBlob: """Upload a blob by chunks or in one go.""" action = UploadAction(file_path, reporter=QApplication.instance()) blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch = None chunk_size = None upload: Optional[Upload] = None try: # See if there is already a transfer for this file upload = self.dao.get_upload(path=file_path) if upload: log.debug(f"Retrieved transfer for {file_path!r}: {upload}") if upload.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(upload.uid or -1) # Check if the associated batch still exists server-side try: self.uploads.get(upload.batch, upload.idx) except Exception: log.debug( f"No associated batch found, restarting from zero", exc_info=True, ) else: log.debug(f"Associated batch found, resuming the upload") batch = Batch(batchId=upload.batch, service=self.uploads) batch.upload_idx = upload.idx chunk_size = upload.chunk_size if not batch: # Create a new batch and save it in the DB batch = self.uploads.batch() # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = (Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024) engine_uid = params.pop("engine_uid", None) is_direct_edit = params.pop("is_direct_edit", False) if not upload: # Add an upload entry in the database upload = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, batch=batch.uid, idx=batch.upload_idx, chunk_size=chunk_size, ) self.dao.save_upload(upload) # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload() blob.batch_id = upload.batch blob.fileIdx = upload.idx uploader: Uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.upload_callback, ) # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. if uploader.chunked: action.progress = chunk_size * len( uploader.blob.uploadedChunkIds) log.debug( f"Upload progression is {action.get_percent():.2f}% " f"(data length is {sizeof_fmt(blob.size)}, " f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})" ) if action.get_percent() < 100.0 or not action.uploaded: if uploader.chunked: # Store the chunck size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 # Save the progression upload.progress = action.get_percent() self.dao.set_transfer_progress("upload", upload) # Handle status changes every time a chunk is sent transfer = self.dao.get_upload(path=file_path) if transfer and transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size upload.progress = action.get_percent() # Transfer is completed, update the status in the database upload.status = TransferStatus.DONE self.dao.set_transfer_status("upload", upload) return blob finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if upload: upload.progress = percent self.dao.set_transfer_progress("upload", upload) UploadAction.finish_action() if blob.fd: blob.fd.close()
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **kwargs: Any, ) -> Tuple[FileBlob, Batch]: """Upload a blob by chunks or in one go.""" engine_uid = kwargs.get("engine_uid", None) is_direct_edit = kwargs.pop("is_direct_edit", False) is_direct_transfer = kwargs.get("is_direct_transfer", False) remote_parent_path = kwargs.pop("remote_parent_path", "") remote_parent_ref = kwargs.pop("remote_parent_ref", "") blob = FileBlob(str(file_path)) action = self.upload_action( file_path, blob.size, reporter=QApplication.instance(), engine=engine_uid ) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch: Optional[Batch] = None chunk_size = None # See if there is already a transfer for this file transfer = self.get_upload(file_path) try: if transfer: log.debug(f"Retrieved transfer for {file_path!r}: {transfer}") if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(transfer.uid or -1) # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = None if transfer.batch.get("provider", "") == "s3" else 0 # Check if the associated batch still exists server-side try: self.remote.uploads.get( transfer.batch["batchId"], file_idx=file_idx ) except HTTPError as exc: if exc.status != 404: raise log.debug("No associated batch found, restarting from zero") else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.remote.uploads, **transfer.batch) chunk_size = transfer.chunk_size # The transfer was already completed on the third-party provider if batch.etag: return self._complete_upload(batch, blob) if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.remote.uploads.has_s3() else "" # Create a new batch and save it in the DB batch = self.remote.uploads.batch(handler=handler) # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = ( Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024 ) action.is_direct_transfer = is_direct_transfer try: uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.remote.upload_callback, ) except ClientError as exc: if exc.response["Error"]["Code"] != "NoSuchUpload": raise log.warning( "Either the upload ID does not exist, either the upload was already completed." ) return self._complete_upload(batch, blob) log.debug(f"Using {type(uploader).__name__!r} uploader") if not transfer: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database transfer = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, filesize=blob.size, batch=batch.as_dict(), chunk_size=chunk_size, is_direct_transfer=is_direct_transfer, remote_parent_path=remote_parent_path, remote_parent_ref=remote_parent_ref, ) self.dao.save_upload(transfer) elif transfer.batch["batchId"] != batch.uid: # The upload was not a fresh one but its batch ID was perimed. # Before NXDRIVE-2183, the batch ID was not updated and so the second step # of the upload (attaching the blob to a document) was failing. transfer.batch["batchId"] = batch.uid self.dao.update_upload(transfer) if uploader.chunked: # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Store the chunk size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() if batch.is_s3(): self._patch_refresh_token(uploader, transfer) # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Save the progression transfer.progress = action.get_percent() self.dao.set_transfer_progress("upload", transfer) # Handle status changes every time a chunk is sent _transfer = self.get_upload(file_path) if _transfer and _transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size transfer.progress = action.get_percent() if batch.is_s3(): if not batch.blobs: # This may happen when resuming an upload with all parts sent. # Trigger upload() that will complete the MPU and fill required # attributes like the Batch ETag, blob index, etc.. uploader.upload() # Save the final ETag in the database to prevent future issue if # the FileManager throws an error transfer.batch = batch.as_dict() self.dao.update_upload(transfer) self._complete_upload(batch, blob) # Transfer is completed, update the status in the database transfer.status = TransferStatus.DONE self.dao.set_transfer_status("upload", transfer) return blob, batch finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if transfer: transfer.progress = percent self.dao.set_transfer_progress("upload", transfer) action.finish_action() if blob.fd: blob.fd.close()
If the error was raised at step 1, the upload will not start from zero: it will resume from the next chunk based on what previously chunks were sent. This is dependent of the chunk TTL configured on the server (it must be large enough to handle big files). If the error was raised at step 2, the step 1 will be checked to ensure the blob was successfully uploaded. But it most cases, nothing will be uploaded twice. Also, if the error is one of HTTP 502 or 503, the Processor will check for the file existence to bypass errors happening *after* the operation was successful. If it exists, the error is skipped and the upload is seen as a success. """ # Step 0: tweak the blob blob = FileBlob(str(file_path)) if filename: blob.name = filename # Step 0.5: retrieve or instantiate a new transfer transfer = self._get_transfer(file_path, blob, **kwargs) self._handle_transfer_status(transfer) # Step 0.75: delete superfluous arguments that would raise a BadQuery error later kwargs.pop("doc_pair", None), kwargs.pop("engine_uid", None) kwargs.pop("is_direct_edit", None) kwargs.pop("is_direct_transfer", None) kwargs.pop("remote_parent_path", None) kwargs.pop("remote_parent_ref", None) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default
def upload_impl( self, file_path: Path, command: str, filename: str = None, mime_type: str = None, **kwargs: Any, ) -> Dict[str, Any]: """ Upload flow implementation. If command is not None, the operation is executed with the batch as an input. If an exception happens at step 1 or 2, the upload will be continued the next time the Processor handle the document (it will be postponed due to the error). If the error was raised at step 1, the upload will not start from zero: it will resume from the next chunk based on what previously chunks were sent. This is dependent of the chunk TTL configured on the server (it must be large enough to handle big files). If the error was raised at step 2, the step 1 will be checked to ensure the blob was successfully uploaded. But it most cases, nothing will be uploaded twice. Also, if the error is one of HTTP 502 or 503, the Processor will check for the file existence to bypass errors happening *after* the operation was successful. If it exists, the error is skipped and the upload is seen as a success. """ # Step 0: tweak the blob blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type # Step 0.5: retrieve or instantiate a new transfer transfer = self._get_transfer(file_path, blob, **kwargs) self._handle_session_status(kwargs.pop("session", None), transfer) # Step 0.75: delete superfluous arguments that would raise a BadQuery error later kwargs.pop("doc_pair", None), kwargs.pop("engine_uid", None) kwargs.pop("is_direct_edit", None) kwargs.pop("is_direct_transfer", None) kwargs.pop("remote_parent_path", None) kwargs.pop("remote_parent_ref", None) # Step 1: upload the blob if transfer.status is not TransferStatus.DONE: try: self.upload_chunks(transfer, blob) finally: if blob.fd: blob.fd.close() # Step 1.5: complete the upload on the third-party provider self._complete_upload(transfer, blob) # The data was transferred, save the status for eventual future retries self._set_transfer_status(transfer, TransferStatus.DONE) else: # Ensure the blob has all required attributes self._complete_upload(transfer, blob) # Step 2: link the uploaded blob to the document doc: Dict[str, Any] = self._link_blob_to_doc(command, transfer, blob, **kwargs) # Lastly, we need to remove the batch as the "X-Batch-No-Drop" header was used in link_blob_to_doc() try: transfer.batch_obj.delete(0) except Exception: log.warning( f"Cannot delete the batchId {transfer.batch_obj.uid!r}", exc_info=True) return doc
def upload( self, file_path: Path, command: str, filename: str = None, mime_type: str = None, **params: Any, ) -> Dict[str, Any]: """ Upload a file with a batch. If command is not None, the operation is executed with the batch as an input. """ with self.upload_lock: tick = time.time() action = FileAction( "Upload", file_path, filename, reporter=QApplication.instance() ) try: # Init resumable upload getting a batch generated by the # server. This batch is to be used as a resumable session batch = self.uploads.batch() blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type # By default, Options.chunk_size is 20, so chunks will be 20Mio. # It can be set to a value between 1 and 20 through the config.ini chunk_size = Options.chunk_size * 1024 * 1024 # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = ( Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024 ) uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size ) if uploader.chunked: # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 else: uploader.upload() upload_result = uploader.response blob.fd.close() upload_duration = int(time.time() - tick) action.transfer_duration = upload_duration # Use upload duration * 2 as Nuxeo transaction timeout tx_timeout = max(TX_TIMEOUT, upload_duration * 2) log.debug( f"Using {tx_timeout} seconds [max({TX_TIMEOUT}, " f"2 * upload time={upload_duration})] as Nuxeo " f"transaction timeout for batch execution of {command!r} " f"with file {file_path!r}" ) if upload_duration > 0: size = os.stat(file_path).st_size log.debug( f"Speed for {size / 1000} kilobytes is {upload_duration} sec:" f" {size / upload_duration / 1024} Kib/s" ) headers = {"Nuxeo-Transaction-Timeout": str(tx_timeout)} return self.execute( command=command, input_obj=upload_result, headers=headers, **params ) finally: FileAction.finish_action()
def upload( self, file_path: str, filename: str = None, mime_type: str = None, command: str = None, **params: Any, ): """ Upload a file with a batch. If command is not None, the operation is executed with the batch as an input. """ with self.upload_lock: tick = time.time() action = FileAction("Upload", file_path, filename) try: # Init resumable upload getting a batch generated by the # server. This batch is to be used as a resumable session batch = self.uploads.batch() blob = FileBlob(file_path) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type upload_result = batch.upload(blob) blob.fd.close() upload_duration = int(time.time() - tick) action.transfer_duration = upload_duration # Use upload duration * 2 as Nuxeo transaction timeout tx_timeout = max(TX_TIMEOUT, upload_duration * 2) log.trace( "Using %d seconds [max(%d, 2 * upload time=%d)] as " "Nuxeo transaction timeout for batch execution of %r " "with file %r", tx_timeout, TX_TIMEOUT, upload_duration, command, file_path, ) if upload_duration > 0: size = os.stat(file_path).st_size log.trace( "Speed for %d bytes is %d sec: %f bytes/sec", size, upload_duration, size / upload_duration, ) if command: headers = {"Nuxeo-Transaction-Timeout": str(tx_timeout)} return self.operations.execute( command=command, input_obj=upload_result, headers=headers, **params, ) finally: FileAction.finish_action()
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **params: Any, ) -> FileBlob: """Upload a blob by chunks or in one go.""" action = UploadAction(file_path, reporter=QApplication.instance()) blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch: Optional[Batch] = None chunk_size = None upload: Optional[Upload] = None try: # See if there is already a transfer for this file upload = self.dao.get_upload(path=file_path) if upload: log.debug(f"Retrieved transfer for {file_path!r}: {upload}") if upload.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(upload.uid or -1) # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = (None if upload.batch.get("provider", "") == "s3" else upload.batch["upload_idx"]) # Check if the associated batch still exists server-side try: self.uploads.get(upload.batch["batchId"], file_idx=file_idx) except Exception: log.debug( "No associated batch found, restarting from zero", exc_info=True) else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.uploads, **upload.batch) chunk_size = upload.chunk_size if batch.is_s3(): token_ttl = self._aws_token_ttl( batch.extraInfo["expiration"] / 1000) if token_ttl.total_seconds() < 1: batch = None upload = None log.warning( "AWS token has expired, restarting from zero") if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.uploads.has_s3() else "" # Create a new batch and save it in the DB batch = self.uploads.batch(handler=handler) if batch.is_s3(): self._aws_token_ttl(batch.extraInfo["expiration"] / 1000) # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = (Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024) engine_uid = params.pop("engine_uid", None) is_direct_edit = params.pop("is_direct_edit", False) # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload() blob.batch_id = batch.uid blob.fileIdx = batch.upload_idx uploader: Uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.upload_callback, ) log.debug(f"Using {type(uploader).__name__!r} uploader") if not upload: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database upload = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, batch=batch.as_dict(), chunk_size=chunk_size, ) self.dao.save_upload(upload) # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. if uploader.chunked: action.progress = chunk_size * len( uploader.blob.uploadedChunkIds) if action.get_percent() < 100.0 or not action.uploaded: if uploader.chunked: # Store the chunk size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 # Save the progression upload.progress = action.get_percent() self.dao.set_transfer_progress("upload", upload) # Handle status changes every time a chunk is sent transfer = self.dao.get_upload(path=file_path) if transfer and transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size upload.progress = action.get_percent() if batch.is_s3(): if not batch.blobs: # This may happen when resuming an upload with all parts sent. # Trigger upload() that will complete the MPU and fill required # attributes like the Batch ETag, blob index, etc.. uploader.upload() # Complete the S3 upload # (setting a big timeout to handle big files) batch.complete(timeout=(TX_TIMEOUT, TX_TIMEOUT)) # Transfer is completed, update the status in the database upload.status = TransferStatus.DONE self.dao.set_transfer_status("upload", upload) return blob finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if upload: upload.progress = percent self.dao.set_transfer_progress("upload", upload) UploadAction.finish_action() if blob.fd: blob.fd.close()