def make_file(
        self,
        parent: str,
        name: str,
        content: bytes = None,
        file_path: Path = None,
    ) -> str:
        """
        Create a document with the given *name* and *content* using the FileManager.
        If *file_path* points to a local file, it will be used instead of *content*.

        Note: if *content* is "seen" as plain text by the FileManager, the created document
              will be a Note. It this is not what you want, use make_file_with_blob().
        """
        tmp_created = file_path is None
        if not file_path:
            file_path = make_tmp_file(self.upload_tmp_dir, content)

        try:
            file_blob = FileBlob(str(file_path))
            file_blob.name = safe_filename(name)
            blob = self.uploads.batch().upload(file_blob)
            return self.file_manager_import(self.check_ref(parent), blob)
        finally:
            if tmp_created:
                file_path.unlink()
Exemple #2
0
def test_upload_chunked_error(tmp_path, s3, batch, server):
    file_in = tmp_path / "file_in"
    file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024 * 5) + b"\x00")

    blob = FileBlob(str(file_in))

    def upload_part(*args, **kwargs):
        raise HTTPError(409, "Conflict", "Mock'ed error")

    def get_uploader():
        return ChunkUploaderS3(server.uploads, batch, blob, 256 * 1024, s3_client=s3)

    # Simulate a new upload that failed at after the first uploaded part
    uploader = get_uploader()
    assert uploader.chunk_count == 2
    assert uploader._data_packs == []
    assert len(uploader.blob.uploadedChunkIds) == 0

    iterator = uploader.iter_upload()

    with patch.object(uploader.s3_client, "upload_part", new=upload_part):
        with pytest.raises(UploadError):
            next(iterator)
    assert not uploader.is_complete()

    # Retry should work
    uploader.upload()
    assert uploader.is_complete()
    assert uploader.batch.etag is not None
Exemple #3
0
def test_upload_resume(server):
    close_server = threading.Event()
    with SwapAttr(server.client, 'host', 'http://localhost:8081/nuxeo/'):
        try:
            serv = Server.upload_response_server(
                wait_to_close_event=close_server,
                port=8081,
                requests_to_handle=20,
                fail_args={
                    'fail_at': 4,
                    'fail_number': 3
                })
            file_in = 'test_in'

            with serv:
                batch = server.uploads.batch()
                with open(file_in, 'wb') as f:
                    f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00')
                blob = FileBlob(file_in, mimetype='application/octet-stream')
                with pytest.raises(UploadError) as e:
                    batch.upload(blob, chunked=True, chunk_size=256 * 1024)
                assert text(e.value)
                batch.upload(blob, chunked=True, chunk_size=256 * 1024)
                close_server.set()  # release server block

        finally:
            try:
                os.remove(file_in)
            except OSError:
                pass
Exemple #4
0
def test_bad_mimetype(bad_mimetype, expected_mimetype, tmp_path, server):
    file_in = tmp_path / "file.pdf"
    file_in.write_bytes(b"0" * 42)
    blob = FileBlob(str(file_in), mimetype=bad_mimetype)
    assert blob.mimetype == (bad_mimetype or expected_mimetype)

    doc = server.documents.create(new_doc, parent_path=WORKSPACE_ROOT)
    try:
        # Upload the blob
        batch = server.uploads.batch()
        uploader = batch.get_uploader(blob)
        uploader.upload()

        # Attach the blob to the doc
        operation = server.operations.new("Blob.AttachOnDocument")
        operation.params = {"document": doc.path}
        operation.input_obj = batch.get(0)
        operation.execute(void_op=True)

        # Fetch doc metadata
        operation = server.operations.new("Document.Fetch")
        operation.params = {"value": doc.path}
        info = operation.execute()

        # Check the mimetype set by the server is correct
        mimetype = info["properties"]["file:content"]["mime-type"]
        assert mimetype == expected_mimetype
    finally:
        doc.delete()
Exemple #5
0
def test_upload_not_chunked(tmp_path, batch, bucket, server, s3):
    file_in = tmp_path / "file_in"
    file_in.write_bytes(os.urandom(1024 * 1024 * 5))

    blob = FileBlob(str(file_in))

    # Simulate a new single upload
    uploader = UploaderS3(server.uploads, batch, blob, 1024 * 1024 * 10, s3_client=s3)

    assert uploader.chunk_count == 1
    uploader.upload()
    assert uploader.is_complete()
    assert uploader.batch.etag is not None

    # Complete the upload
    batch.service = server.uploads
    # Simple check for additional arguments
    with pytest.raises(requests.exceptions.ConnectionError) as exc:
        batch.complete(timeout=(0.000001, 0.000001))
    error = str(exc.value)
    assert "timed out" in error

    # This will not work as there is no real
    # batch ID existant. This is only to have a better coverage.
    with pytest.raises(HTTPError):
        batch.complete()
Exemple #6
0
def test_upload_resume(tmp_path, server):
    file_in = tmp_path / "file_in"
    file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00")

    with patch.object(server.client, "host", new="http://localhost:8081/nuxeo/"):
        close_server = threading.Event()
        serv = Server.upload_response_server(
            wait_to_close_event=close_server,
            port=8081,
            requests_to_handle=20,
            fail_args={"fail_at": 4, "fail_number": 1},
        )

        with serv:
            batch = server.uploads.batch()
            blob = FileBlob(str(file_in), mimetype="application/octet-stream")
            with pytest.raises(UploadError) as e:
                batch.upload(blob, chunked=True, chunk_size=256 * 1024)
            assert str(e.value)

            # Resume the upload
            batch.upload(blob, chunked=True, chunk_size=256 * 1024)

            # No-op
            batch.complete()

            # Release the server block
            close_server.set()
    def _complete_upload(transfer: Upload, blob: FileBlob) -> None:
        """Helper to complete an upload."""

        # Set those attributes as FileBlob does not have them
        # and they are required for the step 2 of .upload_impl()
        blob.batch_id = transfer.batch_obj.uid
        blob.fileIdx = 0
        transfer.batch_obj.upload_idx = 1

        if not transfer.batch_obj.blobs or not transfer.batch_obj.blobs[0]:
            transfer.batch_obj.blobs[0] = blob

        # Complete the upload on the S3 side
        if transfer.batch_obj.is_s3(
        ) and transfer.status is not TransferStatus.DONE:
            transfer.batch_obj.complete(timeout=TX_TIMEOUT)
Exemple #8
0
def test_upload_error(tmp_path, server):
    batch = server.uploads.batch()
    file_in = tmp_path / "file_in"
    file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00")

    blob = FileBlob(str(file_in), mimetype="application/octet-stream")
    assert repr(blob)
    uploader = batch.get_uploader(blob, chunked=True, chunk_size=256 * 1024)
    gen = uploader.iter_upload()

    # Upload chunks 0 and 1
    next(gen)
    next(gen)

    # Retry the chunk 0, it should end on a error
    backup = uploader._to_upload
    uploader._to_upload = [0]
    with pytest.raises(UploadError) as e:
        next(gen)
    assert e.value
    assert "already exists" in e.value.info

    # Finish the upload, it must succeed
    uploader._to_upload = backup
    list(uploader.iter_upload())
Exemple #9
0
def test_upload_chunk_timeout(tmp_path, chunked, server):

    chunk_size = 1024
    file_size = 4096 if chunked else chunk_size
    file_in = tmp_path / "file_in"
    file_in.write_bytes(b"\x00" * file_size)

    blob = FileBlob(str(file_in), mimetype="application/octet-stream")

    batch = server.uploads.batch()
    uploader = batch.get_uploader(blob, chunked=chunked, chunk_size=chunk_size)

    assert uploader.timeout(-1) == 60.0
    assert uploader.timeout(0.000001) == 60.0
    assert uploader.timeout(0) == 60.0
    assert uploader.timeout(1) == 60.0
    assert uploader.timeout(1024) == 60.0
    assert uploader.timeout(1024 * 1024) == 60.0 * 1  # 1 MiB
    assert uploader.timeout(1024 * 1024 * 5) == 60.0 * 5  # 5 MiB
    assert uploader.timeout(1024 * 1024 * 10) == 60.0 * 10  # 10 MiB
    assert uploader.timeout(1024 * 1024 * 20) == 60.0 * 20  # 20 MiB

    uploader._timeout = 0.00001
    assert uploader.timeout(chunk_size) == 0.00001

    with pytest.raises(ConnectionError) as exc:
        uploader.upload()
    error = str(exc.value)
    assert "timed out" in error
Exemple #10
0
def test_uploaderror(server):
    batch = server.uploads.batch()
    file_in = 'test_in'
    with open(file_in, 'wb') as f:
        f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00')

    try:
        blob = FileBlob(file_in, mimetype='application/octet-stream')
        assert repr(blob)
        uploader = batch.get_uploader(blob,
                                      chunked=True,
                                      chunk_size=256 * 1024)
        gen = uploader.iter_upload()

        next(gen)
        next(gen)
        backup = uploader._to_upload
        uploader._to_upload = {0}
        with pytest.raises(UploadError) as e:
            next(gen)
        assert e.value
        assert "already exists" in e.value.info.message
        uploader._to_upload = backup

        for _ in uploader.iter_upload():
            pass

    finally:
        try:
            os.remove(file_in)
        except OSError:
            pass
Exemple #11
0
def test_get_uploader(server):
    def callback(*args):
        assert args

    batch = server.uploads.batch()
    file_in = 'test_in'
    with open(file_in, 'wb') as f:
        f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00')

    try:
        blob = FileBlob(file_in, mimetype='application/octet-stream')
        uploader = batch.get_uploader(blob,
                                      chunked=True,
                                      chunk_size=256 * 1024,
                                      callback=callback)
        assert str(uploader)
        for idx, _ in enumerate(uploader.iter_upload(), 1):
            assert idx == len(uploader.blob.uploadedChunkIds)

        assert batch.get(0)
    finally:
        try:
            os.remove(file_in)
        except OSError:
            pass
Exemple #12
0
def test_upload(chunked, server):
    def callback(*args):
        assert args

    batch = server.uploads.batch()
    file_in, file_out = 'test_in', 'test_out'
    with open(file_in, 'wb') as f:
        f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00')

    doc = server.documents.create(new_doc, parent_path=pytest.ws_root_path)
    try:
        blob = FileBlob(file_in, mimetype='application/octet-stream')
        assert repr(blob)
        assert batch.upload(blob, chunked=chunked, callback=callback)
        operation = server.operations.new('Blob.AttachOnDocument')
        operation.params = {'document': pytest.ws_root_path + '/Document'}
        operation.input_obj = batch.get(0)
        operation.execute(void_op=True)

        operation = server.operations.new('Document.Fetch')
        operation.params = {'value': pytest.ws_root_path + '/Document'}
        info = operation.execute()
        digest = info['properties']['file:content']['digest']

        operation = server.operations.new('Blob.Get')
        operation.input_obj = pytest.ws_root_path + '/Document'
        file_out = operation.execute(file_out=file_out, digest=digest)
    finally:
        doc.delete()
        for file_ in (file_in, file_out):
            try:
                os.remove(file_)
            except OSError:
                pass
Exemple #13
0
    def _complete_upload(transfer: Upload, blob: FileBlob, /) -> None:
        """Helper to complete an upload."""

        # Set those attributes as FileBlob does not have them and they are required to complete the upload
        blob.batchId = transfer.batch_obj.uid
        blob.fileIdx = 0
        transfer.batch_obj.upload_idx = 1

        if not transfer.batch_obj.blobs or not transfer.batch_obj.blobs[0]:
            transfer.batch_obj.blobs[0] = blob

        # Complete the upload
        if transfer.status is not TransferStatus.DONE:
            timeout = TX_TIMEOUT
            headers = {"Nuxeo-Transaction-Timeout": str(timeout)}
            transfer.batch_obj.complete(headers=headers, timeout=timeout)
Exemple #14
0
def test_upload_chunked(tmp_path, s3, batch, server):
    file_in = tmp_path / "file_in"
    file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024 * 5) + b"\x00")

    blob = FileBlob(str(file_in))

    def callback1(uploader):
        assert isinstance(uploader, UploaderS3)

    def callback2(uploader):
        assert isinstance(uploader, UploaderS3)

    def get_uploader():
        callbacks = [callback1, callback2]
        return ChunkUploaderS3(
            server.uploads, batch, blob, 256 * 1024, s3_client=s3, callback=callbacks
        )

    # Simulate a chunked upload
    uploader = get_uploader()
    assert uploader.chunk_count == 2
    assert uploader._data_packs == []
    assert len(uploader.blob.uploadedChunkIds) == 0
    uploader.upload()
    assert uploader.is_complete()
    assert uploader.batch.etag is not None
Exemple #15
0
    def _complete_upload(batch: Batch, blob: FileBlob) -> Tuple[FileBlob, Batch]:
        """Helper to complete an upload."""

        # Set those attributes as FileBlob does not have them
        # and they are required for the step 2 of .upload_impl()
        blob.batch_id = batch.uid
        blob.fileIdx = 0
        batch.upload_idx = 1

        if not batch.blobs or not batch.blobs[0]:
            batch.blobs[0] = blob

        # Complete the upload on the S3 side
        if batch.is_s3():
            batch.complete(timeout=TX_TIMEOUT)

        return blob, batch
Exemple #16
0
def test_mimetype():
    test = 'test.bmp'
    with open(test, 'wb') as f:
        f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00')
    try:
        blob = FileBlob(test)
        assert blob.mimetype in ['image/bmp', 'image/x-ms-bmp']
    finally:
        os.remove(test)
Exemple #17
0
def test_wrong_multipart_upload_id(tmp_path, s3, batch, server):
    file_in = tmp_path / "file_in"
    MiB = 1024 * 1024
    file_in.write_bytes(os.urandom(6 * MiB))

    blob = FileBlob(str(file_in))

    batch.multiPartUploadId = "1234"
    with pytest.raises(KeyError):
        ChunkUploaderS3(server.uploads, batch, blob, 1024 * 1024 * 5, s3_client=s3)
Exemple #18
0
def test_data(tmp_path):
    blob = BufferBlob(data="data", name="Test.txt", mimetype="text/plain")
    with blob:
        assert blob.data

    file_in = tmp_path / "file_in"
    file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00")
    blob = FileBlob(str(file_in))
    with blob:
        assert blob.data
Exemple #19
0
def test_upload_several_callbacks(tmp_path, chunked, server):
    check = 0

    def callback1(upload):
        nonlocal check
        check += 1

    def callback2(upload):
        assert upload
        assert isinstance(upload.blob.uploadedChunkIds, list)
        assert isinstance(upload.blob.uploadedSize, int)

        if not chunked:
            assert upload.blob.uploadedSize == file_size
            assert upload.blob.uploadType == "normal"
        else:
            # In chunked mode, we should have 1024, 2048, 3072 and 4096 respectively
            sizes = {1: 1024, 2: 1024 * 2, 3: 1024 * 3, 4: 1024 * 4}
            assert upload.blob.uploadedSize == sizes[len(upload.blob.uploadedChunkIds)]
            assert upload.blob.uploadType == "chunked"

    batch = server.uploads.batch()

    chunk_size = 1024
    file_size = 4096 if chunked else 1024
    file_in, file_out = tmp_path / "file_in", tmp_path / "file_out"
    file_in.write_bytes(b"\x00" * file_size)

    callbacks = [callback1, callback2, "callback3"]
    doc = server.documents.create(new_doc, parent_path=WORKSPACE_ROOT)
    try:
        blob = FileBlob(str(file_in), mimetype="application/octet-stream")
        assert repr(blob)
        assert batch.upload(
            blob, chunked=chunked, callback=callbacks, chunk_size=chunk_size
        )
        operation = server.operations.new("Blob.AttachOnDocument")
        operation.params = {"document": WORKSPACE_ROOT + "/Document"}
        operation.input_obj = batch.get(0)
        operation.execute(void_op=True)

        operation = server.operations.new("Document.Fetch")
        operation.params = {"value": WORKSPACE_ROOT + "/Document"}
        info = operation.execute()
        digest = info["properties"]["file:content"]["digest"]

        operation = server.operations.new("Blob.Get")
        operation.input_obj = WORKSPACE_ROOT + "/Document"
        file_out = operation.execute(file_out=file_out, digest=digest)
    finally:
        doc.delete()

    # Check the callback count (1 for not chucked)
    assert check == 4 if chunked else 1
Exemple #20
0
def test_upload_chunked_resume(tmp_path, s3, batch, server):
    file_in = tmp_path / "file_in"
    MiB = 1024 * 1024
    file_in.write_bytes(os.urandom(25 * MiB))

    blob = FileBlob(str(file_in))

    def get_uploader():
        return ChunkUploaderS3(
            server.uploads, batch, blob, 5 * MiB, s3_client=s3, max_parts=2
        )

    # Simulate a new upload that will fail
    uploader = get_uploader()
    assert uploader.chunk_count == 5
    assert uploader._data_packs == []
    assert len(uploader.blob.uploadedChunkIds) == 0

    iterator = uploader.iter_upload()

    # Upload 4 parts (out of 5) and then fail
    uploaded_parts = []
    for part in range(1, 5):
        next(iterator)
        uploaded_parts.append(part)
        assert uploader.blob.uploadedChunkIds == uploaded_parts
        assert len(uploader._data_packs) == len(uploaded_parts)
        for data_pack in uploader._data_packs:
            assert isinstance(data_pack, dict)
            assert data_pack["PartNumber"] in uploaded_parts
            assert "ETag" in data_pack
        assert not uploader.is_complete()
        assert uploader.batch.etag is None

    # Ask for new tokens, the upload should continue without issue
    # TODO: cannot be tested until using a real server configured with S3
    # old_info = batch.extraInfo.copy()
    # uploader.refresh_token()
    # new_info = batch.extraInfo.copy()
    # for key in ("awsSecretKeyId", "awsSecretAccessKey", "awsSessionToken"):
    #     assert old_info[key] != new_info[key]
    # assert old_info["expiration"] <= new_info["expiration"]

    # Simulate a resume of the same upload, it should succeed
    # (AWS details are stored into the *batch* object, that's why it works)
    uploader = get_uploader()
    assert uploader.chunk_count == 5
    assert len(uploader._data_packs) == 4
    assert uploader.blob.uploadedChunkIds == [1, 2, 3, 4]
    uploader.upload()
    assert uploader.blob.uploadedChunkIds == [1, 2, 3, 4, 5]
    assert uploader.is_complete()
    assert uploader.batch.etag is not None
Exemple #21
0
def test_upload_blob_with_bad_characters(tmp_path, batch, bucket, server, s3):
    file_in = tmp_path / "file_in (1).bin"
    file_in.write_bytes(os.urandom(1024 * 1024 * 5))

    blob = FileBlob(str(file_in))

    # Simulate a single upload that failed
    uploader = UploaderS3(server.uploads, batch, blob, 1024 * 1024 * 10, s3_client=s3)

    # Upload he file, it must work
    uploader.upload()
    assert uploader.batch.etag is not None
Exemple #22
0
def test_data():
    blob = BufferBlob(data='data', name='Test.txt', mimetype='text/plain')
    with blob:
        assert blob.data

    test = 'test_file'
    with open(test, 'wb') as f:
        f.write(b'\x00' + os.urandom(1024 * 1024) + b'\x00')
    try:
        blob = FileBlob(test)
        with blob:
            assert blob.data
    finally:
        os.remove(test)
Exemple #23
0
 def create_doc(self, path, name, type, properties, file_path=""):
     new_doc = Document(
         name=name, type=type,
         properties=properties)  # a dict of propertyname: value
     ws = self.nuxeo.documents.create(new_doc, parent_path=path)
     if file_path:
         blob = FileBlob(file_path)
         batch = self.nuxeo.uploads.batch()
         batch.upload(blob)
         uploaded = self.nuxeo.uploads.upload(batch, blob)
         operation = self.nuxeo.operations.new('Blob.AttachOnDocument')
         operation.params = {'document': path + "/" + name}
         operation.input_obj = uploaded
         operation.execute()
     ws.save()
     return ws
Exemple #24
0
def upload_file(server, filename):
    batch = server.uploads.batch()
    batch.upload(FileBlob(filename, mimetype="application/octet-stream"))
    doc = server.documents.create(
        Document(name=filename, type="File", properties={"dc:title":
                                                         filename}),
        parent_path="/default-domain/workspaces",
    )
    try:
        operation = server.operations.new("Blob.AttachOnDocument")
        operation.params = {"document": doc.path}
        operation.input_obj = batch.get(0)
        operation.execute(void_op=True)
    except Exception:
        doc.delete()
    return doc
Exemple #25
0
def test_upload_not_chunked_error(tmp_path, batch, bucket, server, s3):
    file_in = tmp_path / "file_in"
    file_in.write_bytes(os.urandom(1024 * 1024 * 5))

    blob = FileBlob(str(file_in))

    def put_object(*args, **kwargs):
        raise HTTPError(409, "Conflict", "Mock'ed error")

    # Simulate a single upload that failed
    uploader = UploaderS3(server.uploads, batch, blob, 1024 * 1024 * 10, s3_client=s3)

    with patch.object(uploader.s3_client, "put_object", new=put_object):
        with pytest.raises(UploadError):
            uploader.upload()
        assert uploader.batch.etag is None
Exemple #26
0
def test_get_uploader(tmp_path, server):
    def callback(*args):
        assert args

    batch = server.uploads.batch()
    file_in = tmp_path / "file_in"
    file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00")

    blob = FileBlob(str(file_in), mimetype="application/octet-stream")
    uploader = batch.get_uploader(
        blob, chunked=True, chunk_size=256 * 1024, callback=callback
    )
    assert str(uploader)
    for idx, _ in enumerate(uploader.iter_upload(), 1):
        assert idx == len(uploader.blob.uploadedChunkIds)

    assert batch.get(0)
Exemple #27
0
def test_upload_retry(tmp_path, retry_server):
    server = retry_server
    close_server = threading.Event()

    file_in = tmp_path / "χρυσαφὶ"
    file_in.write_bytes(b"\x00" + os.urandom(1024 * 1024) + b"\x00")

    with patch.object(server.client, "host", new="http://localhost:8081/nuxeo/"):
        serv = Server.upload_response_server(
            wait_to_close_event=close_server,
            port=8081,
            requests_to_handle=20,
            fail_args={"fail_at": 4, "fail_number": 1},
        )

        with serv:
            batch = server.uploads.batch()
            blob = FileBlob(str(file_in), mimetype="application/octet-stream")
            batch.upload(blob, chunked=True, chunk_size=256 * 1024)
            close_server.set()  # release server block
Exemple #28
0
def test_idempotent_requests(tmp_path, server):
    """
    - upload a file in chunked mode
    - call 5 times (concurrently) the FileManager.Import operation with that file
    - check there are both conflict errors and only one created document
    """
    file_in = tmp_path / "file_in"
    file_in.write_bytes(os.urandom(1024 * 1024 * 10))

    batch = server.uploads.batch()
    blob = FileBlob(str(file_in))
    batch.upload(blob, chunked=True, chunk_size=1024 * 1024)

    idempotency_key = str(uuid.uuid4())
    res = defaultdict(int)

    def func():
        try:
            op = server.operations.execute(
                command="FileManager.Import",
                context={"currentDocument": doc.path},
                input_obj=blob,
                headers={
                    IDEMPOTENCY_KEY: idempotency_key,
                    "X-Batch-No-Drop": "true",
                },
            )
            res[op["uid"]] += 1
        except OngoingRequestError as exc:
            res[str(exc)] += 1

    # Create a folder
    name = str(uuid.uuid4())
    folder = Document(name=name, type="Folder", properties={"dc:title": name})
    doc = server.documents.create(folder, parent_path=WORKSPACE_ROOT)

    try:
        # Concurrent calls to the same endpoint
        threads = [threading.Thread(target=func) for _ in range(5)]
        threads[0].start()
        threads[0].join(0.001)

        for thread in threads[1:]:
            thread.start()
        for thread in threads:
            thread.join()

        # Checks
        # 1 docid + 1 error (both can be present multiple times)
        assert len(res.keys()) == 2
        error = (
            "OngoingRequestError: a request with the idempotency key"
            f" {idempotency_key!r} is already being processed."
        )
        assert error in res

        # Ensure there is only 1 doc on the server
        children = server.documents.get_children(path=doc.path)
        assert len(children) == 1
        assert children[0].title == file_in.name

        # Check calling the same request with the same idempotency key returns always the same result
        current_identical_doc = res[children[0].uid]
        current_identical_errors = res[error]
        for _ in range(10):
            func()
        assert res[error] == current_identical_errors
        assert res[children[0].uid] == current_identical_doc + 10
    finally:
        doc.delete()
Exemple #29
0
    def upload(
        self,
        file_path: Path,
        command: str,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> Dict[str, Any]:
        """ Upload a file with a batch.

        If command is not None, the operation is executed
        with the batch as an input.
        """
        with self.upload_lock:
            tick = time.time()
            action = FileAction(
                "Upload", file_path, filename, reporter=QApplication.instance()
            )
            try:
                # Init resumable upload getting a batch generated by the
                # server. This batch is to be used as a resumable session
                batch = self.uploads.batch()

                blob = FileBlob(str(file_path))
                if filename:
                    blob.name = filename
                if mime_type:
                    blob.mimetype = mime_type

                # By default, Options.chunk_size is 20, so chunks will be 20Mio.
                # It can be set to a value between 1 and 20 through the config.ini
                chunk_size = Options.chunk_size * 1024 * 1024
                # For the upload to be chunked, the Options.chunk_upload must be True
                # and the blob must be bigger than Options.chunk_limit, which by default
                # is equal to Options.chunk_size.
                chunked = (
                    Options.chunk_upload
                    and blob.size > Options.chunk_limit * 1024 * 1024
                )

                uploader = batch.get_uploader(
                    blob, chunked=chunked, chunk_size=chunk_size
                )

                if uploader.chunked:
                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0
                else:
                    uploader.upload()

                upload_result = uploader.response
                blob.fd.close()

                upload_duration = int(time.time() - tick)
                action.transfer_duration = upload_duration
                # Use upload duration * 2 as Nuxeo transaction timeout
                tx_timeout = max(TX_TIMEOUT, upload_duration * 2)
                log.debug(
                    f"Using {tx_timeout} seconds [max({TX_TIMEOUT}, "
                    f"2 * upload time={upload_duration})] as Nuxeo "
                    f"transaction timeout for batch execution of {command!r} "
                    f"with file {file_path!r}"
                )

                if upload_duration > 0:
                    size = os.stat(file_path).st_size
                    log.debug(
                        f"Speed for {size / 1000} kilobytes is {upload_duration} sec:"
                        f" {size / upload_duration / 1024} Kib/s"
                    )

                headers = {"Nuxeo-Transaction-Timeout": str(tx_timeout)}
                return self.execute(
                    command=command, input_obj=upload_result, headers=headers, **params
                )
            finally:
                FileAction.finish_action()
Exemple #30
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> FileBlob:
        """Upload a blob by chunks or in one go."""

        action = UploadAction(file_path, reporter=QApplication.instance())
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch = None
        chunk_size = None
        upload: Optional[Upload] = None

        try:
            # See if there is already a transfer for this file
            upload = self.dao.get_upload(path=file_path)
            if upload:
                log.debug(f"Retrieved transfer for {file_path!r}: {upload}")
                if upload.status not in (TransferStatus.ONGOING,
                                         TransferStatus.DONE):
                    raise UploadPaused(upload.uid or -1)

                # Check if the associated batch still exists server-side
                try:
                    self.uploads.get(upload.batch, upload.idx)
                except Exception:
                    log.debug(
                        f"No associated batch found, restarting from zero",
                        exc_info=True,
                    )
                else:
                    log.debug(f"Associated batch found, resuming the upload")
                    batch = Batch(batchId=upload.batch, service=self.uploads)
                    batch.upload_idx = upload.idx
                    chunk_size = upload.chunk_size

            if not batch:
                # Create a new batch and save it in the DB
                batch = self.uploads.batch()

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (Options.chunk_upload
                       and blob.size > Options.chunk_limit * 1024 * 1024)

            engine_uid = params.pop("engine_uid", None)
            is_direct_edit = params.pop("is_direct_edit", False)

            if not upload:
                # Add an upload entry in the database
                upload = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    batch=batch.uid,
                    idx=batch.upload_idx,
                    chunk_size=chunk_size,
                )
                self.dao.save_upload(upload)

            # Set those attributes as FileBlob does not have them
            # and they are required for the step 2 of .upload()
            blob.batch_id = upload.batch
            blob.fileIdx = upload.idx

            uploader: Uploader = batch.get_uploader(
                blob,
                chunked=chunked,
                chunk_size=chunk_size,
                callback=self.upload_callback,
            )

            # Update the progress on chunked upload only as the first call to
            # action.progress will set the action.uploaded attr to True for
            # empty files. This is not what we want: empty files are legits.
            if uploader.chunked:
                action.progress = chunk_size * len(
                    uploader.blob.uploadedChunkIds)

            log.debug(
                f"Upload progression is {action.get_percent():.2f}% "
                f"(data length is {sizeof_fmt(blob.size)}, "
                f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})"
            )

            if action.get_percent() < 100.0 or not action.uploaded:
                if uploader.chunked:
                    # Store the chunck size and start time for later transfer speed computation
                    action.chunk_size = chunk_size
                    action.chunk_transfer_start_time_ns = monotonic_ns()

                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0

                        # Save the progression
                        upload.progress = action.get_percent()
                        self.dao.set_transfer_progress("upload", upload)

                        # Handle status changes every time a chunk is sent
                        transfer = self.dao.get_upload(path=file_path)
                        if transfer and transfer.status not in (
                                TransferStatus.ONGOING,
                                TransferStatus.DONE,
                        ):
                            raise UploadPaused(transfer.uid or -1)
                else:
                    uploader.upload()

                    # For empty files, this will set action.uploaded to True,
                    # telling us that the file was correctly sent to the server.
                    action.progress += blob.size

                    upload.progress = action.get_percent()

            # Transfer is completed, update the status in the database
            upload.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", upload)

            return blob
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if upload:
                    upload.progress = percent
                    self.dao.set_transfer_progress("upload", upload)

            UploadAction.finish_action()

            if blob.fd:
                blob.fd.close()
Exemple #31
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **kwargs: Any,
    ) -> Tuple[FileBlob, Batch]:
        """Upload a blob by chunks or in one go."""

        engine_uid = kwargs.get("engine_uid", None)
        is_direct_edit = kwargs.pop("is_direct_edit", False)
        is_direct_transfer = kwargs.get("is_direct_transfer", False)
        remote_parent_path = kwargs.pop("remote_parent_path", "")
        remote_parent_ref = kwargs.pop("remote_parent_ref", "")

        blob = FileBlob(str(file_path))
        action = self.upload_action(
            file_path, blob.size, reporter=QApplication.instance(), engine=engine_uid
        )
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch: Optional[Batch] = None
        chunk_size = None

        # See if there is already a transfer for this file
        transfer = self.get_upload(file_path)

        try:
            if transfer:
                log.debug(f"Retrieved transfer for {file_path!r}: {transfer}")
                if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE):
                    raise UploadPaused(transfer.uid or -1)

                # When fetching for an eventual batch, specifying the file index
                # is not possible for S3 as there is no blob at the current index
                # until the S3 upload is done itself and the call to
                # batch.complete() done.
                file_idx = None if transfer.batch.get("provider", "") == "s3" else 0

                # Check if the associated batch still exists server-side
                try:
                    self.remote.uploads.get(
                        transfer.batch["batchId"], file_idx=file_idx
                    )
                except HTTPError as exc:
                    if exc.status != 404:
                        raise
                    log.debug("No associated batch found, restarting from zero")
                else:
                    log.debug("Associated batch found, resuming the upload")
                    batch = Batch(service=self.remote.uploads, **transfer.batch)
                    chunk_size = transfer.chunk_size

                    # The transfer was already completed on the third-party provider
                    if batch.etag:
                        return self._complete_upload(batch, blob)

            if not batch:
                # .uploads.handlers() result is cached, so it is convenient to call it each time here
                # in case the server did not answer correctly the previous time and thus S3 would
                # be completely disabled because of a one-time server error.
                handler = "s3" if Feature.s3 and self.remote.uploads.has_s3() else ""

                # Create a new batch and save it in the DB
                batch = self.remote.uploads.batch(handler=handler)

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (
                Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024
            )

            action.is_direct_transfer = is_direct_transfer

            try:
                uploader = batch.get_uploader(
                    blob,
                    chunked=chunked,
                    chunk_size=chunk_size,
                    callback=self.remote.upload_callback,
                )
            except ClientError as exc:
                if exc.response["Error"]["Code"] != "NoSuchUpload":
                    raise

                log.warning(
                    "Either the upload ID does not exist, either the upload was already completed."
                )
                return self._complete_upload(batch, blob)

            log.debug(f"Using {type(uploader).__name__!r} uploader")

            if not transfer:
                # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas)
                self.dao.remove_transfer("upload", file_path)

                # Add an upload entry in the database
                transfer = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    filesize=blob.size,
                    batch=batch.as_dict(),
                    chunk_size=chunk_size,
                    is_direct_transfer=is_direct_transfer,
                    remote_parent_path=remote_parent_path,
                    remote_parent_ref=remote_parent_ref,
                )
                self.dao.save_upload(transfer)
            elif transfer.batch["batchId"] != batch.uid:
                # The upload was not a fresh one but its batch ID was perimed.
                # Before NXDRIVE-2183, the batch ID was not updated and so the second step
                # of the upload (attaching the blob to a document) was failing.
                transfer.batch["batchId"] = batch.uid
                self.dao.update_upload(transfer)

            if uploader.chunked:
                # Update the progress on chunked upload only as the first call to
                # action.progress will set the action.uploaded attr to True for
                # empty files. This is not what we want: empty files are legits.
                action.progress = chunk_size * len(uploader.blob.uploadedChunkIds)

                # Store the chunk size and start time for later transfer speed computation
                action.chunk_size = chunk_size
                action.chunk_transfer_start_time_ns = monotonic_ns()

                if batch.is_s3():
                    self._patch_refresh_token(uploader, transfer)

                # If there is an UploadError, we catch it from the processor
                for _ in uploader.iter_upload():
                    action.progress = chunk_size * len(uploader.blob.uploadedChunkIds)

                    # Save the progression
                    transfer.progress = action.get_percent()
                    self.dao.set_transfer_progress("upload", transfer)

                    # Handle status changes every time a chunk is sent
                    _transfer = self.get_upload(file_path)
                    if _transfer and _transfer.status not in (
                        TransferStatus.ONGOING,
                        TransferStatus.DONE,
                    ):
                        raise UploadPaused(transfer.uid or -1)
            else:
                uploader.upload()

                # For empty files, this will set action.uploaded to True,
                # telling us that the file was correctly sent to the server.
                action.progress += blob.size

                transfer.progress = action.get_percent()

            if batch.is_s3():
                if not batch.blobs:
                    # This may happen when resuming an upload with all parts sent.
                    # Trigger upload() that will complete the MPU and fill required
                    # attributes like the Batch ETag, blob index, etc..
                    uploader.upload()

                # Save the final ETag in the database to prevent future issue if
                # the FileManager throws an error
                transfer.batch = batch.as_dict()
                self.dao.update_upload(transfer)

            self._complete_upload(batch, blob)

            # Transfer is completed, update the status in the database
            transfer.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", transfer)

            return blob, batch
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if transfer:
                    transfer.progress = percent
                    self.dao.set_transfer_progress("upload", transfer)

            action.finish_action()

            if blob.fd:
                blob.fd.close()