def make_file( self, parent: str, name: str, content: bytes = None, file_path: Path = None, ) -> str: """ Create a document with the given *name* and *content* using the FileManager. If *file_path* points to a local file, it will be used instead of *content*. Note: if *content* is "seen" as plain text by the FileManager, the created document will be a Note. It this is not what you want, use make_file_with_blob(). """ tmp_created = file_path is None if not file_path: file_path = make_tmp_file(self.upload_tmp_dir, content) try: file_blob = FileBlob(str(file_path)) file_blob.name = safe_filename(name) blob = self.uploads.batch().upload(file_blob) return self.file_manager_import(self.check_ref(parent), blob) finally: if tmp_created: file_path.unlink()
def test_upload_chunked_error(tmp_path, s3, batch, server): file_in = tmp_path / "file_in" file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024 * 5) + b"\x00") blob = FileBlob(str(file_in)) def upload_part(*args, **kwargs): raise HTTPError(409, "Conflict", "Mock'ed error") def get_uploader(): return ChunkUploaderS3(server.uploads, batch, blob, 256 * 1024, s3_client=s3) # Simulate a new upload that failed at after the first uploaded part uploader = get_uploader() assert uploader.chunk_count == 2 assert uploader._data_packs == [] assert len(uploader.blob.uploadedChunkIds) == 0 iterator = uploader.iter_upload() with patch.object(uploader.s3_client, "upload_part", new=upload_part): with pytest.raises(UploadError): next(iterator) assert not uploader.is_complete() # Retry should work uploader.upload() assert uploader.is_complete() assert uploader.batch.etag is not None
def test_upload_resume(server): close_server = threading.Event() with SwapAttr(server.client, 'host', 'http://localhost:8081/nuxeo/'): try: serv = Server.upload_response_server( wait_to_close_event=close_server, port=8081, requests_to_handle=20, fail_args={ 'fail_at': 4, 'fail_number': 3 }) file_in = 'test_in' with serv: batch = server.uploads.batch() with open(file_in, 'wb') as f: f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00') blob = FileBlob(file_in, mimetype='application/octet-stream') with pytest.raises(UploadError) as e: batch.upload(blob, chunked=True, chunk_size=256 * 1024) assert text(e.value) batch.upload(blob, chunked=True, chunk_size=256 * 1024) close_server.set() # release server block finally: try: os.remove(file_in) except OSError: pass
def test_bad_mimetype(bad_mimetype, expected_mimetype, tmp_path, server): file_in = tmp_path / "file.pdf" file_in.write_bytes(b"0" * 42) blob = FileBlob(str(file_in), mimetype=bad_mimetype) assert blob.mimetype == (bad_mimetype or expected_mimetype) doc = server.documents.create(new_doc, parent_path=WORKSPACE_ROOT) try: # Upload the blob batch = server.uploads.batch() uploader = batch.get_uploader(blob) uploader.upload() # Attach the blob to the doc operation = server.operations.new("Blob.AttachOnDocument") operation.params = {"document": doc.path} operation.input_obj = batch.get(0) operation.execute(void_op=True) # Fetch doc metadata operation = server.operations.new("Document.Fetch") operation.params = {"value": doc.path} info = operation.execute() # Check the mimetype set by the server is correct mimetype = info["properties"]["file:content"]["mime-type"] assert mimetype == expected_mimetype finally: doc.delete()
def test_upload_not_chunked(tmp_path, batch, bucket, server, s3): file_in = tmp_path / "file_in" file_in.write_bytes(os.urandom(1024 * 1024 * 5)) blob = FileBlob(str(file_in)) # Simulate a new single upload uploader = UploaderS3(server.uploads, batch, blob, 1024 * 1024 * 10, s3_client=s3) assert uploader.chunk_count == 1 uploader.upload() assert uploader.is_complete() assert uploader.batch.etag is not None # Complete the upload batch.service = server.uploads # Simple check for additional arguments with pytest.raises(requests.exceptions.ConnectionError) as exc: batch.complete(timeout=(0.000001, 0.000001)) error = str(exc.value) assert "timed out" in error # This will not work as there is no real # batch ID existant. This is only to have a better coverage. with pytest.raises(HTTPError): batch.complete()
def test_upload_resume(tmp_path, server): file_in = tmp_path / "file_in" file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00") with patch.object(server.client, "host", new="http://localhost:8081/nuxeo/"): close_server = threading.Event() serv = Server.upload_response_server( wait_to_close_event=close_server, port=8081, requests_to_handle=20, fail_args={"fail_at": 4, "fail_number": 1}, ) with serv: batch = server.uploads.batch() blob = FileBlob(str(file_in), mimetype="application/octet-stream") with pytest.raises(UploadError) as e: batch.upload(blob, chunked=True, chunk_size=256 * 1024) assert str(e.value) # Resume the upload batch.upload(blob, chunked=True, chunk_size=256 * 1024) # No-op batch.complete() # Release the server block close_server.set()
def _complete_upload(transfer: Upload, blob: FileBlob) -> None: """Helper to complete an upload.""" # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload_impl() blob.batch_id = transfer.batch_obj.uid blob.fileIdx = 0 transfer.batch_obj.upload_idx = 1 if not transfer.batch_obj.blobs or not transfer.batch_obj.blobs[0]: transfer.batch_obj.blobs[0] = blob # Complete the upload on the S3 side if transfer.batch_obj.is_s3( ) and transfer.status is not TransferStatus.DONE: transfer.batch_obj.complete(timeout=TX_TIMEOUT)
def test_upload_error(tmp_path, server): batch = server.uploads.batch() file_in = tmp_path / "file_in" file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00") blob = FileBlob(str(file_in), mimetype="application/octet-stream") assert repr(blob) uploader = batch.get_uploader(blob, chunked=True, chunk_size=256 * 1024) gen = uploader.iter_upload() # Upload chunks 0 and 1 next(gen) next(gen) # Retry the chunk 0, it should end on a error backup = uploader._to_upload uploader._to_upload = [0] with pytest.raises(UploadError) as e: next(gen) assert e.value assert "already exists" in e.value.info # Finish the upload, it must succeed uploader._to_upload = backup list(uploader.iter_upload())
def test_upload_chunk_timeout(tmp_path, chunked, server): chunk_size = 1024 file_size = 4096 if chunked else chunk_size file_in = tmp_path / "file_in" file_in.write_bytes(b"\x00" * file_size) blob = FileBlob(str(file_in), mimetype="application/octet-stream") batch = server.uploads.batch() uploader = batch.get_uploader(blob, chunked=chunked, chunk_size=chunk_size) assert uploader.timeout(-1) == 60.0 assert uploader.timeout(0.000001) == 60.0 assert uploader.timeout(0) == 60.0 assert uploader.timeout(1) == 60.0 assert uploader.timeout(1024) == 60.0 assert uploader.timeout(1024 * 1024) == 60.0 * 1 # 1 MiB assert uploader.timeout(1024 * 1024 * 5) == 60.0 * 5 # 5 MiB assert uploader.timeout(1024 * 1024 * 10) == 60.0 * 10 # 10 MiB assert uploader.timeout(1024 * 1024 * 20) == 60.0 * 20 # 20 MiB uploader._timeout = 0.00001 assert uploader.timeout(chunk_size) == 0.00001 with pytest.raises(ConnectionError) as exc: uploader.upload() error = str(exc.value) assert "timed out" in error
def test_uploaderror(server): batch = server.uploads.batch() file_in = 'test_in' with open(file_in, 'wb') as f: f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00') try: blob = FileBlob(file_in, mimetype='application/octet-stream') assert repr(blob) uploader = batch.get_uploader(blob, chunked=True, chunk_size=256 * 1024) gen = uploader.iter_upload() next(gen) next(gen) backup = uploader._to_upload uploader._to_upload = {0} with pytest.raises(UploadError) as e: next(gen) assert e.value assert "already exists" in e.value.info.message uploader._to_upload = backup for _ in uploader.iter_upload(): pass finally: try: os.remove(file_in) except OSError: pass
def test_get_uploader(server): def callback(*args): assert args batch = server.uploads.batch() file_in = 'test_in' with open(file_in, 'wb') as f: f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00') try: blob = FileBlob(file_in, mimetype='application/octet-stream') uploader = batch.get_uploader(blob, chunked=True, chunk_size=256 * 1024, callback=callback) assert str(uploader) for idx, _ in enumerate(uploader.iter_upload(), 1): assert idx == len(uploader.blob.uploadedChunkIds) assert batch.get(0) finally: try: os.remove(file_in) except OSError: pass
def test_upload(chunked, server): def callback(*args): assert args batch = server.uploads.batch() file_in, file_out = 'test_in', 'test_out' with open(file_in, 'wb') as f: f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00') doc = server.documents.create(new_doc, parent_path=pytest.ws_root_path) try: blob = FileBlob(file_in, mimetype='application/octet-stream') assert repr(blob) assert batch.upload(blob, chunked=chunked, callback=callback) operation = server.operations.new('Blob.AttachOnDocument') operation.params = {'document': pytest.ws_root_path + '/Document'} operation.input_obj = batch.get(0) operation.execute(void_op=True) operation = server.operations.new('Document.Fetch') operation.params = {'value': pytest.ws_root_path + '/Document'} info = operation.execute() digest = info['properties']['file:content']['digest'] operation = server.operations.new('Blob.Get') operation.input_obj = pytest.ws_root_path + '/Document' file_out = operation.execute(file_out=file_out, digest=digest) finally: doc.delete() for file_ in (file_in, file_out): try: os.remove(file_) except OSError: pass
def _complete_upload(transfer: Upload, blob: FileBlob, /) -> None: """Helper to complete an upload.""" # Set those attributes as FileBlob does not have them and they are required to complete the upload blob.batchId = transfer.batch_obj.uid blob.fileIdx = 0 transfer.batch_obj.upload_idx = 1 if not transfer.batch_obj.blobs or not transfer.batch_obj.blobs[0]: transfer.batch_obj.blobs[0] = blob # Complete the upload if transfer.status is not TransferStatus.DONE: timeout = TX_TIMEOUT headers = {"Nuxeo-Transaction-Timeout": str(timeout)} transfer.batch_obj.complete(headers=headers, timeout=timeout)
def test_upload_chunked(tmp_path, s3, batch, server): file_in = tmp_path / "file_in" file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024 * 5) + b"\x00") blob = FileBlob(str(file_in)) def callback1(uploader): assert isinstance(uploader, UploaderS3) def callback2(uploader): assert isinstance(uploader, UploaderS3) def get_uploader(): callbacks = [callback1, callback2] return ChunkUploaderS3( server.uploads, batch, blob, 256 * 1024, s3_client=s3, callback=callbacks ) # Simulate a chunked upload uploader = get_uploader() assert uploader.chunk_count == 2 assert uploader._data_packs == [] assert len(uploader.blob.uploadedChunkIds) == 0 uploader.upload() assert uploader.is_complete() assert uploader.batch.etag is not None
def _complete_upload(batch: Batch, blob: FileBlob) -> Tuple[FileBlob, Batch]: """Helper to complete an upload.""" # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload_impl() blob.batch_id = batch.uid blob.fileIdx = 0 batch.upload_idx = 1 if not batch.blobs or not batch.blobs[0]: batch.blobs[0] = blob # Complete the upload on the S3 side if batch.is_s3(): batch.complete(timeout=TX_TIMEOUT) return blob, batch
def test_mimetype(): test = 'test.bmp' with open(test, 'wb') as f: f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00') try: blob = FileBlob(test) assert blob.mimetype in ['image/bmp', 'image/x-ms-bmp'] finally: os.remove(test)
def test_wrong_multipart_upload_id(tmp_path, s3, batch, server): file_in = tmp_path / "file_in" MiB = 1024 * 1024 file_in.write_bytes(os.urandom(6 * MiB)) blob = FileBlob(str(file_in)) batch.multiPartUploadId = "1234" with pytest.raises(KeyError): ChunkUploaderS3(server.uploads, batch, blob, 1024 * 1024 * 5, s3_client=s3)
def test_data(tmp_path): blob = BufferBlob(data="data", name="Test.txt", mimetype="text/plain") with blob: assert blob.data file_in = tmp_path / "file_in" file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00") blob = FileBlob(str(file_in)) with blob: assert blob.data
def test_upload_several_callbacks(tmp_path, chunked, server): check = 0 def callback1(upload): nonlocal check check += 1 def callback2(upload): assert upload assert isinstance(upload.blob.uploadedChunkIds, list) assert isinstance(upload.blob.uploadedSize, int) if not chunked: assert upload.blob.uploadedSize == file_size assert upload.blob.uploadType == "normal" else: # In chunked mode, we should have 1024, 2048, 3072 and 4096 respectively sizes = {1: 1024, 2: 1024 * 2, 3: 1024 * 3, 4: 1024 * 4} assert upload.blob.uploadedSize == sizes[len(upload.blob.uploadedChunkIds)] assert upload.blob.uploadType == "chunked" batch = server.uploads.batch() chunk_size = 1024 file_size = 4096 if chunked else 1024 file_in, file_out = tmp_path / "file_in", tmp_path / "file_out" file_in.write_bytes(b"\x00" * file_size) callbacks = [callback1, callback2, "callback3"] doc = server.documents.create(new_doc, parent_path=WORKSPACE_ROOT) try: blob = FileBlob(str(file_in), mimetype="application/octet-stream") assert repr(blob) assert batch.upload( blob, chunked=chunked, callback=callbacks, chunk_size=chunk_size ) operation = server.operations.new("Blob.AttachOnDocument") operation.params = {"document": WORKSPACE_ROOT + "/Document"} operation.input_obj = batch.get(0) operation.execute(void_op=True) operation = server.operations.new("Document.Fetch") operation.params = {"value": WORKSPACE_ROOT + "/Document"} info = operation.execute() digest = info["properties"]["file:content"]["digest"] operation = server.operations.new("Blob.Get") operation.input_obj = WORKSPACE_ROOT + "/Document" file_out = operation.execute(file_out=file_out, digest=digest) finally: doc.delete() # Check the callback count (1 for not chucked) assert check == 4 if chunked else 1
def test_upload_chunked_resume(tmp_path, s3, batch, server): file_in = tmp_path / "file_in" MiB = 1024 * 1024 file_in.write_bytes(os.urandom(25 * MiB)) blob = FileBlob(str(file_in)) def get_uploader(): return ChunkUploaderS3( server.uploads, batch, blob, 5 * MiB, s3_client=s3, max_parts=2 ) # Simulate a new upload that will fail uploader = get_uploader() assert uploader.chunk_count == 5 assert uploader._data_packs == [] assert len(uploader.blob.uploadedChunkIds) == 0 iterator = uploader.iter_upload() # Upload 4 parts (out of 5) and then fail uploaded_parts = [] for part in range(1, 5): next(iterator) uploaded_parts.append(part) assert uploader.blob.uploadedChunkIds == uploaded_parts assert len(uploader._data_packs) == len(uploaded_parts) for data_pack in uploader._data_packs: assert isinstance(data_pack, dict) assert data_pack["PartNumber"] in uploaded_parts assert "ETag" in data_pack assert not uploader.is_complete() assert uploader.batch.etag is None # Ask for new tokens, the upload should continue without issue # TODO: cannot be tested until using a real server configured with S3 # old_info = batch.extraInfo.copy() # uploader.refresh_token() # new_info = batch.extraInfo.copy() # for key in ("awsSecretKeyId", "awsSecretAccessKey", "awsSessionToken"): # assert old_info[key] != new_info[key] # assert old_info["expiration"] <= new_info["expiration"] # Simulate a resume of the same upload, it should succeed # (AWS details are stored into the *batch* object, that's why it works) uploader = get_uploader() assert uploader.chunk_count == 5 assert len(uploader._data_packs) == 4 assert uploader.blob.uploadedChunkIds == [1, 2, 3, 4] uploader.upload() assert uploader.blob.uploadedChunkIds == [1, 2, 3, 4, 5] assert uploader.is_complete() assert uploader.batch.etag is not None
def test_upload_blob_with_bad_characters(tmp_path, batch, bucket, server, s3): file_in = tmp_path / "file_in (1).bin" file_in.write_bytes(os.urandom(1024 * 1024 * 5)) blob = FileBlob(str(file_in)) # Simulate a single upload that failed uploader = UploaderS3(server.uploads, batch, blob, 1024 * 1024 * 10, s3_client=s3) # Upload he file, it must work uploader.upload() assert uploader.batch.etag is not None
def test_data(): blob = BufferBlob(data='data', name='Test.txt', mimetype='text/plain') with blob: assert blob.data test = 'test_file' with open(test, 'wb') as f: f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00') try: blob = FileBlob(test) with blob: assert blob.data finally: os.remove(test)
def create_doc(self, path, name, type, properties, file_path=""): new_doc = Document( name=name, type=type, properties=properties) # a dict of propertyname: value ws = self.nuxeo.documents.create(new_doc, parent_path=path) if file_path: blob = FileBlob(file_path) batch = self.nuxeo.uploads.batch() batch.upload(blob) uploaded = self.nuxeo.uploads.upload(batch, blob) operation = self.nuxeo.operations.new('Blob.AttachOnDocument') operation.params = {'document': path + "/" + name} operation.input_obj = uploaded operation.execute() ws.save() return ws
def upload_file(server, filename): batch = server.uploads.batch() batch.upload(FileBlob(filename, mimetype="application/octet-stream")) doc = server.documents.create( Document(name=filename, type="File", properties={"dc:title": filename}), parent_path="/default-domain/workspaces", ) try: operation = server.operations.new("Blob.AttachOnDocument") operation.params = {"document": doc.path} operation.input_obj = batch.get(0) operation.execute(void_op=True) except Exception: doc.delete() return doc
def test_upload_not_chunked_error(tmp_path, batch, bucket, server, s3): file_in = tmp_path / "file_in" file_in.write_bytes(os.urandom(1024 * 1024 * 5)) blob = FileBlob(str(file_in)) def put_object(*args, **kwargs): raise HTTPError(409, "Conflict", "Mock'ed error") # Simulate a single upload that failed uploader = UploaderS3(server.uploads, batch, blob, 1024 * 1024 * 10, s3_client=s3) with patch.object(uploader.s3_client, "put_object", new=put_object): with pytest.raises(UploadError): uploader.upload() assert uploader.batch.etag is None
def test_get_uploader(tmp_path, server): def callback(*args): assert args batch = server.uploads.batch() file_in = tmp_path / "file_in" file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00") blob = FileBlob(str(file_in), mimetype="application/octet-stream") uploader = batch.get_uploader( blob, chunked=True, chunk_size=256 * 1024, callback=callback ) assert str(uploader) for idx, _ in enumerate(uploader.iter_upload(), 1): assert idx == len(uploader.blob.uploadedChunkIds) assert batch.get(0)
def test_upload_retry(tmp_path, retry_server): server = retry_server close_server = threading.Event() file_in = tmp_path / "χρυσαφὶ" file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00") with patch.object(server.client, "host", new="http://localhost:8081/nuxeo/"): serv = Server.upload_response_server( wait_to_close_event=close_server, port=8081, requests_to_handle=20, fail_args={"fail_at": 4, "fail_number": 1}, ) with serv: batch = server.uploads.batch() blob = FileBlob(str(file_in), mimetype="application/octet-stream") batch.upload(blob, chunked=True, chunk_size=256 * 1024) close_server.set() # release server block
def test_idempotent_requests(tmp_path, server): """ - upload a file in chunked mode - call 5 times (concurrently) the FileManager.Import operation with that file - check there are both conflict errors and only one created document """ file_in = tmp_path / "file_in" file_in.write_bytes(os.urandom(1024 * 1024 * 10)) batch = server.uploads.batch() blob = FileBlob(str(file_in)) batch.upload(blob, chunked=True, chunk_size=1024 * 1024) idempotency_key = str(uuid.uuid4()) res = defaultdict(int) def func(): try: op = server.operations.execute( command="FileManager.Import", context={"currentDocument": doc.path}, input_obj=blob, headers={ IDEMPOTENCY_KEY: idempotency_key, "X-Batch-No-Drop": "true", }, ) res[op["uid"]] += 1 except OngoingRequestError as exc: res[str(exc)] += 1 # Create a folder name = str(uuid.uuid4()) folder = Document(name=name, type="Folder", properties={"dc:title": name}) doc = server.documents.create(folder, parent_path=WORKSPACE_ROOT) try: # Concurrent calls to the same endpoint threads = [threading.Thread(target=func) for _ in range(5)] threads[0].start() threads[0].join(0.001) for thread in threads[1:]: thread.start() for thread in threads: thread.join() # Checks # 1 docid + 1 error (both can be present multiple times) assert len(res.keys()) == 2 error = ( "OngoingRequestError: a request with the idempotency key" f" {idempotency_key!r} is already being processed." ) assert error in res # Ensure there is only 1 doc on the server children = server.documents.get_children(path=doc.path) assert len(children) == 1 assert children[0].title == file_in.name # Check calling the same request with the same idempotency key returns always the same result current_identical_doc = res[children[0].uid] current_identical_errors = res[error] for _ in range(10): func() assert res[error] == current_identical_errors assert res[children[0].uid] == current_identical_doc + 10 finally: doc.delete()
def upload( self, file_path: Path, command: str, filename: str = None, mime_type: str = None, **params: Any, ) -> Dict[str, Any]: """ Upload a file with a batch. If command is not None, the operation is executed with the batch as an input. """ with self.upload_lock: tick = time.time() action = FileAction( "Upload", file_path, filename, reporter=QApplication.instance() ) try: # Init resumable upload getting a batch generated by the # server. This batch is to be used as a resumable session batch = self.uploads.batch() blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type # By default, Options.chunk_size is 20, so chunks will be 20Mio. # It can be set to a value between 1 and 20 through the config.ini chunk_size = Options.chunk_size * 1024 * 1024 # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = ( Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024 ) uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size ) if uploader.chunked: # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 else: uploader.upload() upload_result = uploader.response blob.fd.close() upload_duration = int(time.time() - tick) action.transfer_duration = upload_duration # Use upload duration * 2 as Nuxeo transaction timeout tx_timeout = max(TX_TIMEOUT, upload_duration * 2) log.debug( f"Using {tx_timeout} seconds [max({TX_TIMEOUT}, " f"2 * upload time={upload_duration})] as Nuxeo " f"transaction timeout for batch execution of {command!r} " f"with file {file_path!r}" ) if upload_duration > 0: size = os.stat(file_path).st_size log.debug( f"Speed for {size / 1000} kilobytes is {upload_duration} sec:" f" {size / upload_duration / 1024} Kib/s" ) headers = {"Nuxeo-Transaction-Timeout": str(tx_timeout)} return self.execute( command=command, input_obj=upload_result, headers=headers, **params ) finally: FileAction.finish_action()
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **params: Any, ) -> FileBlob: """Upload a blob by chunks or in one go.""" action = UploadAction(file_path, reporter=QApplication.instance()) blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch = None chunk_size = None upload: Optional[Upload] = None try: # See if there is already a transfer for this file upload = self.dao.get_upload(path=file_path) if upload: log.debug(f"Retrieved transfer for {file_path!r}: {upload}") if upload.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(upload.uid or -1) # Check if the associated batch still exists server-side try: self.uploads.get(upload.batch, upload.idx) except Exception: log.debug( f"No associated batch found, restarting from zero", exc_info=True, ) else: log.debug(f"Associated batch found, resuming the upload") batch = Batch(batchId=upload.batch, service=self.uploads) batch.upload_idx = upload.idx chunk_size = upload.chunk_size if not batch: # Create a new batch and save it in the DB batch = self.uploads.batch() # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = (Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024) engine_uid = params.pop("engine_uid", None) is_direct_edit = params.pop("is_direct_edit", False) if not upload: # Add an upload entry in the database upload = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, batch=batch.uid, idx=batch.upload_idx, chunk_size=chunk_size, ) self.dao.save_upload(upload) # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload() blob.batch_id = upload.batch blob.fileIdx = upload.idx uploader: Uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.upload_callback, ) # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. if uploader.chunked: action.progress = chunk_size * len( uploader.blob.uploadedChunkIds) log.debug( f"Upload progression is {action.get_percent():.2f}% " f"(data length is {sizeof_fmt(blob.size)}, " f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})" ) if action.get_percent() < 100.0 or not action.uploaded: if uploader.chunked: # Store the chunck size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 # Save the progression upload.progress = action.get_percent() self.dao.set_transfer_progress("upload", upload) # Handle status changes every time a chunk is sent transfer = self.dao.get_upload(path=file_path) if transfer and transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size upload.progress = action.get_percent() # Transfer is completed, update the status in the database upload.status = TransferStatus.DONE self.dao.set_transfer_status("upload", upload) return blob finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if upload: upload.progress = percent self.dao.set_transfer_progress("upload", upload) UploadAction.finish_action() if blob.fd: blob.fd.close()
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **kwargs: Any, ) -> Tuple[FileBlob, Batch]: """Upload a blob by chunks or in one go.""" engine_uid = kwargs.get("engine_uid", None) is_direct_edit = kwargs.pop("is_direct_edit", False) is_direct_transfer = kwargs.get("is_direct_transfer", False) remote_parent_path = kwargs.pop("remote_parent_path", "") remote_parent_ref = kwargs.pop("remote_parent_ref", "") blob = FileBlob(str(file_path)) action = self.upload_action( file_path, blob.size, reporter=QApplication.instance(), engine=engine_uid ) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch: Optional[Batch] = None chunk_size = None # See if there is already a transfer for this file transfer = self.get_upload(file_path) try: if transfer: log.debug(f"Retrieved transfer for {file_path!r}: {transfer}") if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(transfer.uid or -1) # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = None if transfer.batch.get("provider", "") == "s3" else 0 # Check if the associated batch still exists server-side try: self.remote.uploads.get( transfer.batch["batchId"], file_idx=file_idx ) except HTTPError as exc: if exc.status != 404: raise log.debug("No associated batch found, restarting from zero") else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.remote.uploads, **transfer.batch) chunk_size = transfer.chunk_size # The transfer was already completed on the third-party provider if batch.etag: return self._complete_upload(batch, blob) if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.remote.uploads.has_s3() else "" # Create a new batch and save it in the DB batch = self.remote.uploads.batch(handler=handler) # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = ( Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024 ) action.is_direct_transfer = is_direct_transfer try: uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.remote.upload_callback, ) except ClientError as exc: if exc.response["Error"]["Code"] != "NoSuchUpload": raise log.warning( "Either the upload ID does not exist, either the upload was already completed." ) return self._complete_upload(batch, blob) log.debug(f"Using {type(uploader).__name__!r} uploader") if not transfer: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database transfer = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, filesize=blob.size, batch=batch.as_dict(), chunk_size=chunk_size, is_direct_transfer=is_direct_transfer, remote_parent_path=remote_parent_path, remote_parent_ref=remote_parent_ref, ) self.dao.save_upload(transfer) elif transfer.batch["batchId"] != batch.uid: # The upload was not a fresh one but its batch ID was perimed. # Before NXDRIVE-2183, the batch ID was not updated and so the second step # of the upload (attaching the blob to a document) was failing. transfer.batch["batchId"] = batch.uid self.dao.update_upload(transfer) if uploader.chunked: # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Store the chunk size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() if batch.is_s3(): self._patch_refresh_token(uploader, transfer) # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Save the progression transfer.progress = action.get_percent() self.dao.set_transfer_progress("upload", transfer) # Handle status changes every time a chunk is sent _transfer = self.get_upload(file_path) if _transfer and _transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size transfer.progress = action.get_percent() if batch.is_s3(): if not batch.blobs: # This may happen when resuming an upload with all parts sent. # Trigger upload() that will complete the MPU and fill required # attributes like the Batch ETag, blob index, etc.. uploader.upload() # Save the final ETag in the database to prevent future issue if # the FileManager throws an error transfer.batch = batch.as_dict() self.dao.update_upload(transfer) self._complete_upload(batch, blob) # Transfer is completed, update the status in the database transfer.status = TransferStatus.DONE self.dao.set_transfer_status("upload", transfer) return blob, batch finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if transfer: transfer.progress = percent self.dao.set_transfer_progress("upload", transfer) action.finish_action() if blob.fd: blob.fd.close()