예제 #1
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **kwargs: Any,
    ) -> Tuple[FileBlob, Batch]:
        """Upload a blob by chunks or in one go."""

        engine_uid = kwargs.get("engine_uid", None)
        is_direct_edit = kwargs.pop("is_direct_edit", False)
        is_direct_transfer = kwargs.get("is_direct_transfer", False)
        remote_parent_path = kwargs.pop("remote_parent_path", "")
        remote_parent_ref = kwargs.pop("remote_parent_ref", "")

        blob = FileBlob(str(file_path))
        action = self.upload_action(
            file_path, blob.size, reporter=QApplication.instance(), engine=engine_uid
        )
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch: Optional[Batch] = None
        chunk_size = None

        # See if there is already a transfer for this file
        transfer = self.get_upload(file_path)

        try:
            if transfer:
                log.debug(f"Retrieved transfer for {file_path!r}: {transfer}")
                if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE):
                    raise UploadPaused(transfer.uid or -1)

                # When fetching for an eventual batch, specifying the file index
                # is not possible for S3 as there is no blob at the current index
                # until the S3 upload is done itself and the call to
                # batch.complete() done.
                file_idx = None if transfer.batch.get("provider", "") == "s3" else 0

                # Check if the associated batch still exists server-side
                try:
                    self.remote.uploads.get(
                        transfer.batch["batchId"], file_idx=file_idx
                    )
                except HTTPError as exc:
                    if exc.status != 404:
                        raise
                    log.debug("No associated batch found, restarting from zero")
                else:
                    log.debug("Associated batch found, resuming the upload")
                    batch = Batch(service=self.remote.uploads, **transfer.batch)
                    chunk_size = transfer.chunk_size

                    # The transfer was already completed on the third-party provider
                    if batch.etag:
                        return self._complete_upload(batch, blob)

            if not batch:
                # .uploads.handlers() result is cached, so it is convenient to call it each time here
                # in case the server did not answer correctly the previous time and thus S3 would
                # be completely disabled because of a one-time server error.
                handler = "s3" if Feature.s3 and self.remote.uploads.has_s3() else ""

                # Create a new batch and save it in the DB
                batch = self.remote.uploads.batch(handler=handler)

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (
                Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024
            )

            action.is_direct_transfer = is_direct_transfer

            try:
                uploader = batch.get_uploader(
                    blob,
                    chunked=chunked,
                    chunk_size=chunk_size,
                    callback=self.remote.upload_callback,
                )
            except ClientError as exc:
                if exc.response["Error"]["Code"] != "NoSuchUpload":
                    raise

                log.warning(
                    "Either the upload ID does not exist, either the upload was already completed."
                )
                return self._complete_upload(batch, blob)

            log.debug(f"Using {type(uploader).__name__!r} uploader")

            if not transfer:
                # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas)
                self.dao.remove_transfer("upload", file_path)

                # Add an upload entry in the database
                transfer = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    filesize=blob.size,
                    batch=batch.as_dict(),
                    chunk_size=chunk_size,
                    is_direct_transfer=is_direct_transfer,
                    remote_parent_path=remote_parent_path,
                    remote_parent_ref=remote_parent_ref,
                )
                self.dao.save_upload(transfer)
            elif transfer.batch["batchId"] != batch.uid:
                # The upload was not a fresh one but its batch ID was perimed.
                # Before NXDRIVE-2183, the batch ID was not updated and so the second step
                # of the upload (attaching the blob to a document) was failing.
                transfer.batch["batchId"] = batch.uid
                self.dao.update_upload(transfer)

            if uploader.chunked:
                # Update the progress on chunked upload only as the first call to
                # action.progress will set the action.uploaded attr to True for
                # empty files. This is not what we want: empty files are legits.
                action.progress = chunk_size * len(uploader.blob.uploadedChunkIds)

                # Store the chunk size and start time for later transfer speed computation
                action.chunk_size = chunk_size
                action.chunk_transfer_start_time_ns = monotonic_ns()

                if batch.is_s3():
                    self._patch_refresh_token(uploader, transfer)

                # If there is an UploadError, we catch it from the processor
                for _ in uploader.iter_upload():
                    action.progress = chunk_size * len(uploader.blob.uploadedChunkIds)

                    # Save the progression
                    transfer.progress = action.get_percent()
                    self.dao.set_transfer_progress("upload", transfer)

                    # Handle status changes every time a chunk is sent
                    _transfer = self.get_upload(file_path)
                    if _transfer and _transfer.status not in (
                        TransferStatus.ONGOING,
                        TransferStatus.DONE,
                    ):
                        raise UploadPaused(transfer.uid or -1)
            else:
                uploader.upload()

                # For empty files, this will set action.uploaded to True,
                # telling us that the file was correctly sent to the server.
                action.progress += blob.size

                transfer.progress = action.get_percent()

            if batch.is_s3():
                if not batch.blobs:
                    # This may happen when resuming an upload with all parts sent.
                    # Trigger upload() that will complete the MPU and fill required
                    # attributes like the Batch ETag, blob index, etc..
                    uploader.upload()

                # Save the final ETag in the database to prevent future issue if
                # the FileManager throws an error
                transfer.batch = batch.as_dict()
                self.dao.update_upload(transfer)

            self._complete_upload(batch, blob)

            # Transfer is completed, update the status in the database
            transfer.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", transfer)

            return blob, batch
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if transfer:
                    transfer.progress = percent
                    self.dao.set_transfer_progress("upload", transfer)

            action.finish_action()

            if blob.fd:
                blob.fd.close()
예제 #2
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> FileBlob:
        """Upload a blob by chunks or in one go."""

        action = UploadAction(file_path, reporter=QApplication.instance())
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch = None
        chunk_size = None
        upload: Optional[Upload] = None

        try:
            # See if there is already a transfer for this file
            upload = self.dao.get_upload(path=file_path)
            if upload:
                log.debug(f"Retrieved transfer for {file_path!r}: {upload}")
                if upload.status not in (TransferStatus.ONGOING,
                                         TransferStatus.DONE):
                    raise UploadPaused(upload.uid or -1)

                # Check if the associated batch still exists server-side
                try:
                    self.uploads.get(upload.batch, upload.idx)
                except Exception:
                    log.debug(
                        f"No associated batch found, restarting from zero",
                        exc_info=True,
                    )
                else:
                    log.debug(f"Associated batch found, resuming the upload")
                    batch = Batch(batchId=upload.batch, service=self.uploads)
                    batch.upload_idx = upload.idx
                    chunk_size = upload.chunk_size

            if not batch:
                # Create a new batch and save it in the DB
                batch = self.uploads.batch()

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (Options.chunk_upload
                       and blob.size > Options.chunk_limit * 1024 * 1024)

            engine_uid = params.pop("engine_uid", None)
            is_direct_edit = params.pop("is_direct_edit", False)

            if not upload:
                # Add an upload entry in the database
                upload = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    batch=batch.uid,
                    idx=batch.upload_idx,
                    chunk_size=chunk_size,
                )
                self.dao.save_upload(upload)

            # Set those attributes as FileBlob does not have them
            # and they are required for the step 2 of .upload()
            blob.batch_id = upload.batch
            blob.fileIdx = upload.idx

            uploader: Uploader = batch.get_uploader(
                blob,
                chunked=chunked,
                chunk_size=chunk_size,
                callback=self.upload_callback,
            )

            # Update the progress on chunked upload only as the first call to
            # action.progress will set the action.uploaded attr to True for
            # empty files. This is not what we want: empty files are legits.
            if uploader.chunked:
                action.progress = chunk_size * len(
                    uploader.blob.uploadedChunkIds)

            log.debug(
                f"Upload progression is {action.get_percent():.2f}% "
                f"(data length is {sizeof_fmt(blob.size)}, "
                f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})"
            )

            if action.get_percent() < 100.0 or not action.uploaded:
                if uploader.chunked:
                    # Store the chunck size and start time for later transfer speed computation
                    action.chunk_size = chunk_size
                    action.chunk_transfer_start_time_ns = monotonic_ns()

                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0

                        # Save the progression
                        upload.progress = action.get_percent()
                        self.dao.set_transfer_progress("upload", upload)

                        # Handle status changes every time a chunk is sent
                        transfer = self.dao.get_upload(path=file_path)
                        if transfer and transfer.status not in (
                                TransferStatus.ONGOING,
                                TransferStatus.DONE,
                        ):
                            raise UploadPaused(transfer.uid or -1)
                else:
                    uploader.upload()

                    # For empty files, this will set action.uploaded to True,
                    # telling us that the file was correctly sent to the server.
                    action.progress += blob.size

                    upload.progress = action.get_percent()

            # Transfer is completed, update the status in the database
            upload.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", upload)

            return blob
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if upload:
                    upload.progress = percent
                    self.dao.set_transfer_progress("upload", upload)

            UploadAction.finish_action()

            if blob.fd:
                blob.fd.close()
예제 #3
0
    def upload_impl(
        self,
        file_path: Path,
        command: str,
        filename: str = None,
        mime_type: str = None,
        **kwargs: Any,
    ) -> Dict[str, Any]:
        """
        Upload flow implementation.
        If command is not None, the operation is executed with the batch as an input.

        If an exception happens at step 1 or 2, the upload will be continued the next
        time the Processor handle the document (it will be postponed due to the error).

        If the error was raised at step 1, the upload will not start from zero: it will
        resume from the next chunk based on what previously chunks were sent.
        This is dependent of the chunk TTL configured on the server (it must be large enough
        to handle big files).

        If the error was raised at step 2, the step 1 will be checked to ensure the blob
        was successfully uploaded. But it most cases, nothing will be uploaded twice.
        Also, if the error is one of HTTP 502 or 503, the Processor will check for
        the file existence to bypass errors happening *after* the operation was successful.
        If it exists, the error is skipped and the upload is seen as a success.
        """

        # Step 0: tweak the blob
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        # Step 0.5: retrieve or instantiate a new transfer
        transfer = self._get_transfer(file_path, blob, **kwargs)
        self._handle_session_status(kwargs.pop("session", None), transfer)

        # Step 0.75: delete superfluous arguments that would raise a BadQuery error later
        kwargs.pop("doc_pair", None),
        kwargs.pop("engine_uid", None)
        kwargs.pop("is_direct_edit", None)
        kwargs.pop("is_direct_transfer", None)
        kwargs.pop("remote_parent_path", None)
        kwargs.pop("remote_parent_ref", None)

        # Step 1: upload the blob
        if transfer.status is not TransferStatus.DONE:
            try:
                self.upload_chunks(transfer, blob)
            finally:
                if blob.fd:
                    blob.fd.close()

            # Step 1.5: complete the upload on the third-party provider
            self._complete_upload(transfer, blob)

            # The data was transferred, save the status for eventual future retries
            self._set_transfer_status(transfer, TransferStatus.DONE)
        else:
            # Ensure the blob has all required attributes
            self._complete_upload(transfer, blob)

        # Step 2: link the uploaded blob to the document
        doc: Dict[str, Any] = self._link_blob_to_doc(command, transfer, blob,
                                                     **kwargs)

        # Lastly, we need to remove the batch as the "X-Batch-No-Drop" header was used in link_blob_to_doc()
        try:
            transfer.batch_obj.delete(0)
        except Exception:
            log.warning(
                f"Cannot delete the batchId {transfer.batch_obj.uid!r}",
                exc_info=True)

        return doc
예제 #4
0
    def upload(
        self,
        file_path: Path,
        command: str,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> Dict[str, Any]:
        """ Upload a file with a batch.

        If command is not None, the operation is executed
        with the batch as an input.
        """
        with self.upload_lock:
            tick = time.time()
            action = FileAction(
                "Upload", file_path, filename, reporter=QApplication.instance()
            )
            try:
                # Init resumable upload getting a batch generated by the
                # server. This batch is to be used as a resumable session
                batch = self.uploads.batch()

                blob = FileBlob(str(file_path))
                if filename:
                    blob.name = filename
                if mime_type:
                    blob.mimetype = mime_type

                # By default, Options.chunk_size is 20, so chunks will be 20Mio.
                # It can be set to a value between 1 and 20 through the config.ini
                chunk_size = Options.chunk_size * 1024 * 1024
                # For the upload to be chunked, the Options.chunk_upload must be True
                # and the blob must be bigger than Options.chunk_limit, which by default
                # is equal to Options.chunk_size.
                chunked = (
                    Options.chunk_upload
                    and blob.size > Options.chunk_limit * 1024 * 1024
                )

                uploader = batch.get_uploader(
                    blob, chunked=chunked, chunk_size=chunk_size
                )

                if uploader.chunked:
                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0
                else:
                    uploader.upload()

                upload_result = uploader.response
                blob.fd.close()

                upload_duration = int(time.time() - tick)
                action.transfer_duration = upload_duration
                # Use upload duration * 2 as Nuxeo transaction timeout
                tx_timeout = max(TX_TIMEOUT, upload_duration * 2)
                log.debug(
                    f"Using {tx_timeout} seconds [max({TX_TIMEOUT}, "
                    f"2 * upload time={upload_duration})] as Nuxeo "
                    f"transaction timeout for batch execution of {command!r} "
                    f"with file {file_path!r}"
                )

                if upload_duration > 0:
                    size = os.stat(file_path).st_size
                    log.debug(
                        f"Speed for {size / 1000} kilobytes is {upload_duration} sec:"
                        f" {size / upload_duration / 1024} Kib/s"
                    )

                headers = {"Nuxeo-Transaction-Timeout": str(tx_timeout)}
                return self.execute(
                    command=command, input_obj=upload_result, headers=headers, **params
                )
            finally:
                FileAction.finish_action()
예제 #5
0
    def upload(
        self,
        file_path: str,
        filename: str = None,
        mime_type: str = None,
        command: str = None,
        **params: Any,
    ):
        """ Upload a file with a batch.

        If command is not None, the operation is executed
        with the batch as an input.
        """
        with self.upload_lock:
            tick = time.time()
            action = FileAction("Upload", file_path, filename)
            try:
                # Init resumable upload getting a batch generated by the
                # server. This batch is to be used as a resumable session
                batch = self.uploads.batch()

                blob = FileBlob(file_path)
                if filename:
                    blob.name = filename
                if mime_type:
                    blob.mimetype = mime_type
                upload_result = batch.upload(blob)
                blob.fd.close()

                upload_duration = int(time.time() - tick)
                action.transfer_duration = upload_duration
                # Use upload duration * 2 as Nuxeo transaction timeout
                tx_timeout = max(TX_TIMEOUT, upload_duration * 2)
                log.trace(
                    "Using %d seconds [max(%d, 2 * upload time=%d)] as "
                    "Nuxeo transaction timeout for batch execution of %r "
                    "with file %r",
                    tx_timeout,
                    TX_TIMEOUT,
                    upload_duration,
                    command,
                    file_path,
                )

                if upload_duration > 0:
                    size = os.stat(file_path).st_size
                    log.trace(
                        "Speed for %d bytes is %d sec: %f bytes/sec",
                        size,
                        upload_duration,
                        size / upload_duration,
                    )

                if command:
                    headers = {"Nuxeo-Transaction-Timeout": str(tx_timeout)}
                    return self.operations.execute(
                        command=command,
                        input_obj=upload_result,
                        headers=headers,
                        **params,
                    )
            finally:
                FileAction.finish_action()
예제 #6
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> FileBlob:
        """Upload a blob by chunks or in one go."""

        action = UploadAction(file_path, reporter=QApplication.instance())
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch: Optional[Batch] = None
        chunk_size = None
        upload: Optional[Upload] = None

        try:
            # See if there is already a transfer for this file
            upload = self.dao.get_upload(path=file_path)

            if upload:
                log.debug(f"Retrieved transfer for {file_path!r}: {upload}")
                if upload.status not in (TransferStatus.ONGOING,
                                         TransferStatus.DONE):
                    raise UploadPaused(upload.uid or -1)

                # When fetching for an eventual batch, specifying the file index
                # is not possible for S3 as there is no blob at the current index
                # until the S3 upload is done itself and the call to
                # batch.complete() done.
                file_idx = (None if upload.batch.get("provider", "") == "s3"
                            else upload.batch["upload_idx"])

                # Check if the associated batch still exists server-side
                try:
                    self.uploads.get(upload.batch["batchId"],
                                     file_idx=file_idx)
                except Exception:
                    log.debug(
                        "No associated batch found, restarting from zero",
                        exc_info=True)
                else:
                    log.debug("Associated batch found, resuming the upload")
                    batch = Batch(service=self.uploads, **upload.batch)
                    chunk_size = upload.chunk_size

                    if batch.is_s3():
                        token_ttl = self._aws_token_ttl(
                            batch.extraInfo["expiration"] / 1000)
                        if token_ttl.total_seconds() < 1:
                            batch = None
                            upload = None
                            log.warning(
                                "AWS token has expired, restarting from zero")

            if not batch:
                # .uploads.handlers() result is cached, so it is convenient to call it each time here
                # in case the server did not answer correctly the previous time and thus S3 would
                # be completely disabled because of a one-time server error.
                handler = "s3" if Feature.s3 and self.uploads.has_s3() else ""

                # Create a new batch and save it in the DB
                batch = self.uploads.batch(handler=handler)

                if batch.is_s3():
                    self._aws_token_ttl(batch.extraInfo["expiration"] / 1000)

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (Options.chunk_upload
                       and blob.size > Options.chunk_limit * 1024 * 1024)

            engine_uid = params.pop("engine_uid", None)
            is_direct_edit = params.pop("is_direct_edit", False)

            # Set those attributes as FileBlob does not have them
            # and they are required for the step 2 of .upload()
            blob.batch_id = batch.uid
            blob.fileIdx = batch.upload_idx

            uploader: Uploader = batch.get_uploader(
                blob,
                chunked=chunked,
                chunk_size=chunk_size,
                callback=self.upload_callback,
            )
            log.debug(f"Using {type(uploader).__name__!r} uploader")

            if not upload:
                # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas)
                self.dao.remove_transfer("upload", file_path)

                # Add an upload entry in the database
                upload = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    batch=batch.as_dict(),
                    chunk_size=chunk_size,
                )
                self.dao.save_upload(upload)

            # Update the progress on chunked upload only as the first call to
            # action.progress will set the action.uploaded attr to True for
            # empty files. This is not what we want: empty files are legits.
            if uploader.chunked:
                action.progress = chunk_size * len(
                    uploader.blob.uploadedChunkIds)

            if action.get_percent() < 100.0 or not action.uploaded:
                if uploader.chunked:
                    # Store the chunk size and start time for later transfer speed computation
                    action.chunk_size = chunk_size
                    action.chunk_transfer_start_time_ns = monotonic_ns()

                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0

                        # Save the progression
                        upload.progress = action.get_percent()
                        self.dao.set_transfer_progress("upload", upload)

                        # Handle status changes every time a chunk is sent
                        transfer = self.dao.get_upload(path=file_path)
                        if transfer and transfer.status not in (
                                TransferStatus.ONGOING,
                                TransferStatus.DONE,
                        ):
                            raise UploadPaused(transfer.uid or -1)
                else:
                    uploader.upload()

                    # For empty files, this will set action.uploaded to True,
                    # telling us that the file was correctly sent to the server.
                    action.progress += blob.size

                    upload.progress = action.get_percent()

            if batch.is_s3():
                if not batch.blobs:
                    # This may happen when resuming an upload with all parts sent.
                    # Trigger upload() that will complete the MPU and fill required
                    # attributes like the Batch ETag, blob index, etc..
                    uploader.upload()

                # Complete the S3 upload
                # (setting a big timeout to handle big files)
                batch.complete(timeout=(TX_TIMEOUT, TX_TIMEOUT))

            # Transfer is completed, update the status in the database
            upload.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", upload)

            return blob
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if upload:
                    upload.progress = percent
                    self.dao.set_transfer_progress("upload", upload)

            UploadAction.finish_action()

            if blob.fd:
                blob.fd.close()