Ejemplo n.º 1
0
    def _complete_upload(transfer: Upload, blob: FileBlob) -> None:
        """Helper to complete an upload."""

        # Set those attributes as FileBlob does not have them
        # and they are required for the step 2 of .upload_impl()
        blob.batch_id = transfer.batch_obj.uid
        blob.fileIdx = 0
        transfer.batch_obj.upload_idx = 1

        if not transfer.batch_obj.blobs or not transfer.batch_obj.blobs[0]:
            transfer.batch_obj.blobs[0] = blob

        # Complete the upload on the S3 side
        if transfer.batch_obj.is_s3(
        ) and transfer.status is not TransferStatus.DONE:
            transfer.batch_obj.complete(timeout=TX_TIMEOUT)
Ejemplo n.º 2
0
    def _complete_upload(transfer: Upload, blob: FileBlob, /) -> None:
        """Helper to complete an upload."""

        # Set those attributes as FileBlob does not have them and they are required to complete the upload
        blob.batchId = transfer.batch_obj.uid
        blob.fileIdx = 0
        transfer.batch_obj.upload_idx = 1

        if not transfer.batch_obj.blobs or not transfer.batch_obj.blobs[0]:
            transfer.batch_obj.blobs[0] = blob

        # Complete the upload
        if transfer.status is not TransferStatus.DONE:
            timeout = TX_TIMEOUT
            headers = {"Nuxeo-Transaction-Timeout": str(timeout)}
            transfer.batch_obj.complete(headers=headers, timeout=timeout)
Ejemplo n.º 3
0
    def _complete_upload(batch: Batch, blob: FileBlob) -> Tuple[FileBlob, Batch]:
        """Helper to complete an upload."""

        # Set those attributes as FileBlob does not have them
        # and they are required for the step 2 of .upload_impl()
        blob.batch_id = batch.uid
        blob.fileIdx = 0
        batch.upload_idx = 1

        if not batch.blobs or not batch.blobs[0]:
            batch.blobs[0] = blob

        # Complete the upload on the S3 side
        if batch.is_s3():
            batch.complete(timeout=TX_TIMEOUT)

        return blob, batch
Ejemplo n.º 4
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> FileBlob:
        """Upload a blob by chunks or in one go."""

        action = UploadAction(file_path, reporter=QApplication.instance())
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch = None
        chunk_size = None
        upload: Optional[Upload] = None

        try:
            # See if there is already a transfer for this file
            upload = self.dao.get_upload(path=file_path)
            if upload:
                log.debug(f"Retrieved transfer for {file_path!r}: {upload}")
                if upload.status not in (TransferStatus.ONGOING,
                                         TransferStatus.DONE):
                    raise UploadPaused(upload.uid or -1)

                # Check if the associated batch still exists server-side
                try:
                    self.uploads.get(upload.batch, upload.idx)
                except Exception:
                    log.debug(
                        f"No associated batch found, restarting from zero",
                        exc_info=True,
                    )
                else:
                    log.debug(f"Associated batch found, resuming the upload")
                    batch = Batch(batchId=upload.batch, service=self.uploads)
                    batch.upload_idx = upload.idx
                    chunk_size = upload.chunk_size

            if not batch:
                # Create a new batch and save it in the DB
                batch = self.uploads.batch()

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (Options.chunk_upload
                       and blob.size > Options.chunk_limit * 1024 * 1024)

            engine_uid = params.pop("engine_uid", None)
            is_direct_edit = params.pop("is_direct_edit", False)

            if not upload:
                # Add an upload entry in the database
                upload = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    batch=batch.uid,
                    idx=batch.upload_idx,
                    chunk_size=chunk_size,
                )
                self.dao.save_upload(upload)

            # Set those attributes as FileBlob does not have them
            # and they are required for the step 2 of .upload()
            blob.batch_id = upload.batch
            blob.fileIdx = upload.idx

            uploader: Uploader = batch.get_uploader(
                blob,
                chunked=chunked,
                chunk_size=chunk_size,
                callback=self.upload_callback,
            )

            # Update the progress on chunked upload only as the first call to
            # action.progress will set the action.uploaded attr to True for
            # empty files. This is not what we want: empty files are legits.
            if uploader.chunked:
                action.progress = chunk_size * len(
                    uploader.blob.uploadedChunkIds)

            log.debug(
                f"Upload progression is {action.get_percent():.2f}% "
                f"(data length is {sizeof_fmt(blob.size)}, "
                f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})"
            )

            if action.get_percent() < 100.0 or not action.uploaded:
                if uploader.chunked:
                    # Store the chunck size and start time for later transfer speed computation
                    action.chunk_size = chunk_size
                    action.chunk_transfer_start_time_ns = monotonic_ns()

                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0

                        # Save the progression
                        upload.progress = action.get_percent()
                        self.dao.set_transfer_progress("upload", upload)

                        # Handle status changes every time a chunk is sent
                        transfer = self.dao.get_upload(path=file_path)
                        if transfer and transfer.status not in (
                                TransferStatus.ONGOING,
                                TransferStatus.DONE,
                        ):
                            raise UploadPaused(transfer.uid or -1)
                else:
                    uploader.upload()

                    # For empty files, this will set action.uploaded to True,
                    # telling us that the file was correctly sent to the server.
                    action.progress += blob.size

                    upload.progress = action.get_percent()

            # Transfer is completed, update the status in the database
            upload.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", upload)

            return blob
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if upload:
                    upload.progress = percent
                    self.dao.set_transfer_progress("upload", upload)

            UploadAction.finish_action()

            if blob.fd:
                blob.fd.close()
Ejemplo n.º 5
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> FileBlob:
        """Upload a blob by chunks or in one go."""

        action = UploadAction(file_path, reporter=QApplication.instance())
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch: Optional[Batch] = None
        chunk_size = None
        upload: Optional[Upload] = None

        try:
            # See if there is already a transfer for this file
            upload = self.dao.get_upload(path=file_path)

            if upload:
                log.debug(f"Retrieved transfer for {file_path!r}: {upload}")
                if upload.status not in (TransferStatus.ONGOING,
                                         TransferStatus.DONE):
                    raise UploadPaused(upload.uid or -1)

                # When fetching for an eventual batch, specifying the file index
                # is not possible for S3 as there is no blob at the current index
                # until the S3 upload is done itself and the call to
                # batch.complete() done.
                file_idx = (None if upload.batch.get("provider", "") == "s3"
                            else upload.batch["upload_idx"])

                # Check if the associated batch still exists server-side
                try:
                    self.uploads.get(upload.batch["batchId"],
                                     file_idx=file_idx)
                except Exception:
                    log.debug(
                        "No associated batch found, restarting from zero",
                        exc_info=True)
                else:
                    log.debug("Associated batch found, resuming the upload")
                    batch = Batch(service=self.uploads, **upload.batch)
                    chunk_size = upload.chunk_size

                    if batch.is_s3():
                        token_ttl = self._aws_token_ttl(
                            batch.extraInfo["expiration"] / 1000)
                        if token_ttl.total_seconds() < 1:
                            batch = None
                            upload = None
                            log.warning(
                                "AWS token has expired, restarting from zero")

            if not batch:
                # .uploads.handlers() result is cached, so it is convenient to call it each time here
                # in case the server did not answer correctly the previous time and thus S3 would
                # be completely disabled because of a one-time server error.
                handler = "s3" if Feature.s3 and self.uploads.has_s3() else ""

                # Create a new batch and save it in the DB
                batch = self.uploads.batch(handler=handler)

                if batch.is_s3():
                    self._aws_token_ttl(batch.extraInfo["expiration"] / 1000)

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (Options.chunk_upload
                       and blob.size > Options.chunk_limit * 1024 * 1024)

            engine_uid = params.pop("engine_uid", None)
            is_direct_edit = params.pop("is_direct_edit", False)

            # Set those attributes as FileBlob does not have them
            # and they are required for the step 2 of .upload()
            blob.batch_id = batch.uid
            blob.fileIdx = batch.upload_idx

            uploader: Uploader = batch.get_uploader(
                blob,
                chunked=chunked,
                chunk_size=chunk_size,
                callback=self.upload_callback,
            )
            log.debug(f"Using {type(uploader).__name__!r} uploader")

            if not upload:
                # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas)
                self.dao.remove_transfer("upload", file_path)

                # Add an upload entry in the database
                upload = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    batch=batch.as_dict(),
                    chunk_size=chunk_size,
                )
                self.dao.save_upload(upload)

            # Update the progress on chunked upload only as the first call to
            # action.progress will set the action.uploaded attr to True for
            # empty files. This is not what we want: empty files are legits.
            if uploader.chunked:
                action.progress = chunk_size * len(
                    uploader.blob.uploadedChunkIds)

            if action.get_percent() < 100.0 or not action.uploaded:
                if uploader.chunked:
                    # Store the chunk size and start time for later transfer speed computation
                    action.chunk_size = chunk_size
                    action.chunk_transfer_start_time_ns = monotonic_ns()

                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0

                        # Save the progression
                        upload.progress = action.get_percent()
                        self.dao.set_transfer_progress("upload", upload)

                        # Handle status changes every time a chunk is sent
                        transfer = self.dao.get_upload(path=file_path)
                        if transfer and transfer.status not in (
                                TransferStatus.ONGOING,
                                TransferStatus.DONE,
                        ):
                            raise UploadPaused(transfer.uid or -1)
                else:
                    uploader.upload()

                    # For empty files, this will set action.uploaded to True,
                    # telling us that the file was correctly sent to the server.
                    action.progress += blob.size

                    upload.progress = action.get_percent()

            if batch.is_s3():
                if not batch.blobs:
                    # This may happen when resuming an upload with all parts sent.
                    # Trigger upload() that will complete the MPU and fill required
                    # attributes like the Batch ETag, blob index, etc..
                    uploader.upload()

                # Complete the S3 upload
                # (setting a big timeout to handle big files)
                batch.complete(timeout=(TX_TIMEOUT, TX_TIMEOUT))

            # Transfer is completed, update the status in the database
            upload.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", upload)

            return blob
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if upload:
                    upload.progress = percent
                    self.dao.set_transfer_progress("upload", upload)

            UploadAction.finish_action()

            if blob.fd:
                blob.fd.close()