Beispiel #1
0
def test_put(client, db, bucket, permissions, multipart, multipart_url,
             get_md5, get_json):
    """Test part upload."""
    cases = [
        (None, 404),
        ('auth', 404),
        ('objects', 404),  # TODO - use 403 instead
        ('bucket', 200),
        ('location', 200),
    ]

    for user, expected in cases:
        login_user(client, permissions[user])

        data = b'a' * multipart.chunk_size
        res = client.put(
            multipart_url + '&partNumber={0}'.format(1),
            input_stream=BytesIO(data),
        )
        assert res.status_code == expected

        if res.status_code == 200:
            assert res.get_etag()[0] == get_md5(data)

            # Assert content
            with open(multipart.file.uri, 'rb') as fp:
                fp.seek(multipart.chunk_size)
                content = fp.read(multipart.chunk_size)
            assert content == data
            assert Part.count(multipart) == 1
            assert Part.get_or_none(multipart, 1).checksum == get_md5(data)
Beispiel #2
0
def test_post_by_reference_segmented(api, users, location, task_delay):
    with api.test_request_context(), api.test_client() as client:
        # Assemble a segmented upload from parts, and complete it
        segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create(
            {}
        )
        multipart_object = MultipartObject.create(
            bucket=segmented_upload_record.bucket,
            key="some-key",
            size=15,
            chunk_size=10,
        )
        Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij"))
        Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno"))
        multipart_object.complete()

        login(client)

        ttl = (
            datetime.datetime.now(tz=datetime.timezone.utc)
            + datetime.timedelta(0, 3600)
        ).isoformat()

        response = client.post(
            "/sword/service-document",
            data=json.dumps(
                {
                    "@context": JSON_LD_CONTEXT,
                    "@type": "ByReference",
                    "byReferenceFiles": [
                        {
                            "@id": f"http://localhost/sword/staging/{segmented_upload_record.id}",
                            "contentDisposition": "attachment; filename=some-resource.json",
                            "contentType": "application/json",
                            "dereference": True,
                            "ttl": ttl,
                        }
                    ],
                }
            ),
            headers={
                "Content-Disposition": "attachment; by-reference=true",
                "Content-Type": "application/ld+json",
            },
        )

        assert response.status_code == HTTPStatus.CREATED

        object_version = ObjectVersion.query.one()
        tags = TagManager(object_version)

        assert tags == {
            ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary",
            ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id),
            ObjectTagKey.FileState: FileState.Pending,
            ObjectTagKey.ByReferenceDereference: "true",
            ObjectTagKey.ByReferenceNotDeleted: "true",
            ObjectTagKey.OriginalDeposit: "true",
            ObjectTagKey.ByReferenceTTL: ttl,
        }
def test_put(client, db, bucket, permissions, multipart, multipart_url,
             get_md5, get_json):
    """Test part upload."""
    cases = [
        (None, 404),
        ('auth', 404),
        ('objects', 404),  # TODO - use 403 instead
        ('bucket', 200),
        ('location', 200),
    ]

    for user, expected in cases:
        login_user(client, permissions[user])

        data = b'a' * multipart.chunk_size
        res = client.put(
            multipart_url + '&partNumber={0}'.format(1),
            input_stream=BytesIO(data),
        )
        assert res.status_code == expected

        if res.status_code == 200:
            assert res.get_etag()[0] == get_md5(data)

            # Assert content
            with open(multipart.file.uri, 'rb') as fp:
                fp.seek(multipart.chunk_size)
                content = fp.read(multipart.chunk_size)
            assert content == data
            assert Part.count(multipart) == 1
            assert Part.get_or_none(multipart, 1).checksum == get_md5(data)
Beispiel #4
0
def test_by_reference_sets_tag(api, users, location, task_delay):
    with api.test_request_context():
        # Assemble a segmented upload from parts, and complete it
        segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create(
            {}
        )
        multipart_object = MultipartObject.create(
            bucket=segmented_upload_record.bucket,
            key="some-key",
            size=15,
            chunk_size=10,
        )
        Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij"))
        Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno"))
        multipart_object.complete()

        record: SWORDDeposit = SWORDDeposit.create({})
        record.set_by_reference_files(
            [
                ByReferenceFileDefinition(
                    temporary_id=segmented_upload_record.id,
                    content_disposition="attachment; filename=something.txt",
                    content_type="text/plain",
                    packaging=PackagingFormat.Binary,
                    dereference=True,
                ),
            ],
            lambda *args: True,
            "http://localhost/",
        )

        object_version = ObjectVersion.query.one()
        tags = TagManager(object_version)

        assert tags == {
            ObjectTagKey.OriginalDeposit: "true",
            ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id),
            ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary",
            ObjectTagKey.FileState: FileState.Pending,
            ObjectTagKey.ByReferenceDereference: "true",
            ObjectTagKey.ByReferenceNotDeleted: "true",
        }

        tasks.dereference_object(record.id, object_version.version_id)

        assert object_version.file.storage().open().read() == b"abcdefghijklmno"
def test_multipart_full(app, db, bucket):
    """Test full multipart object."""
    app.config.update(
        dict(
            FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024,
            FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024,
        ))

    # Initial parameters
    chunks = 20
    chunk_size = 5 * 1024 * 1024  # 5 MiB
    last_chunk = 1024 * 1024  # 1 MiB
    size = (chunks - 1) * chunk_size + last_chunk

    # Initiate
    mp = MultipartObject.create(bucket,
                                'testfile',
                                size=size,
                                chunk_size=chunk_size)
    db.session.commit()

    # Create parts
    for i in range(chunks):
        part_size = chunk_size if i < chunks - 1 else last_chunk
        Part.create(mp, i, stream=make_stream(part_size))
        db.session.commit()

    # Complete
    mp.complete()
    db.session.commit()

    # Merge parts.
    pre_size = mp.bucket.size
    mp.merge_parts()
    db.session.commit()

    # Test size update
    bucket = Bucket.get(bucket.id)
    assert bucket.size == pre_size

    app.config.update(
        dict(
            FILES_REST_MULTIPART_CHUNKSIZE_MIN=2,
            FILES_REST_MULTIPART_CHUNKSIZE_MAX=20,
        ))
def test_multipart_full(app, db, bucket):
    """Test full multipart object."""
    app.config.update(dict(
        FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024,
        FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024,
    ))

    # Initial parameters
    chunks = 20
    chunk_size = 5 * 1024 * 1024  # 5 MiB
    last_chunk = 1024 * 1024  # 1 MiB
    size = (chunks - 1) * chunk_size + last_chunk

    # Initiate
    mp = MultipartObject.create(
        bucket, 'testfile', size=size, chunk_size=chunk_size)
    db.session.commit()

    # Create parts
    for i in range(chunks):
        part_size = chunk_size if i < chunks - 1 else last_chunk
        Part.create(mp, i, stream=make_stream(part_size))
        db.session.commit()

    # Complete
    mp.complete()
    db.session.commit()

    # Merge parts.
    pre_size = mp.bucket.size
    mp.merge_parts()
    db.session.commit()

    # Test size update
    bucket = Bucket.get(bucket.id)
    assert bucket.size == pre_size

    app.config.update(dict(
        FILES_REST_MULTIPART_CHUNKSIZE_MIN=2,
        FILES_REST_MULTIPART_CHUNKSIZE_MAX=20,
    ))
Beispiel #7
0
def test_get(client, db, bucket, permissions, multipart, multipart_url,
             get_json):
    """Test get parts."""
    Part.create(multipart, 0)
    Part.create(multipart, 1)
    Part.create(multipart, 3)
    db.session.commit()

    cases = [
        (None, 404),
        ('auth', 404),
        ('objects', 404),
        ('bucket', 200),
        ('location', 200),
    ]

    for user, expected in cases:
        login_user(client, permissions[user])

        res = client.get(multipart_url)
        assert res.status_code == expected

        if res.status_code == 200:
            data = get_json(res)
            assert len(data['parts']) == 3
def test_get(client, db, bucket, permissions, multipart, multipart_url,
             get_json):
    """Test get parts."""
    Part.create(multipart, 0)
    Part.create(multipart, 1)
    Part.create(multipart, 3)
    db.session.commit()

    cases = [
        (None, 404),
        ('auth', 404),
        ('objects', 404),
        ('bucket', 200),
        ('location', 200),
    ]

    for user, expected in cases:
        login_user(client, permissions[user])

        res = client.get(multipart_url)
        assert res.status_code == expected

        if res.status_code == 200:
            data = get_json(res)
            assert len(data['parts']) == 3
Beispiel #9
0
def parts(db, multipart):
    """All parts for a multipart object."""
    items = []
    for i in range(multipart.last_part_number + 1):
        chunk_size = multipart.chunk_size \
            if not i == multipart.last_part_number \
            else multipart.last_part_size
        p = Part.create(multipart,
                        i,
                        stream=BytesIO(u'{0}'.format(i).encode('ascii') *
                                       chunk_size))
        items.append(p)

    db.session.commit()
    return items
def parts(db, multipart):
    """All parts for a multipart object."""
    items = []
    for i in range(multipart.last_part_number + 1):
        chunk_size = multipart.chunk_size \
            if not i == multipart.last_part_number \
            else multipart.last_part_size
        p = Part.create(
            multipart,
            i,
            stream=BytesIO(u'{0}'.format(i).encode('ascii') * chunk_size)
        )
        items.append(p)

    db.session.commit()
    return items
Beispiel #11
0
def test_part_creation(app, db, bucket, get_sha256):
    """Test part creation."""
    assert bucket.size == 0
    mp = MultipartObject.create(bucket, 'test.txt', 5, 2)
    db.session.commit()
    assert bucket.size == 5

    Part.create(mp, 2, stream=BytesIO(b'p'))
    Part.create(mp, 0, stream=BytesIO(b'p1'))
    Part.create(mp, 1, stream=BytesIO(b'p2'))
    db.session.commit()
    assert bucket.size == 5

    mp.complete()
    db.session.commit()
    assert bucket.size == 5

    # Assert checksum of part.
    m = hashlib.sha256()
    m.update(b'p2')
    assert "sha256:{0}".format(m.hexdigest()) == \
        Part.get_or_none(mp, 1).checksum

    obj = mp.merge_parts()
    db.session.commit()
    assert bucket.size == 5

    assert MultipartObject.query.count() == 0
    assert Part.query.count() == 0

    assert obj.file.size == 5
    assert obj.file.checksum == get_sha256(b'p1p2p')
    assert obj.file.storage().open().read() == b'p1p2p'
    assert obj.file.writable is False
    assert obj.file.readable is True

    assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def test_part_creation(app, db, bucket, get_md5):
    """Test part creation."""
    assert bucket.size == 0
    mp = MultipartObject.create(bucket, 'test.txt', 5, 2)
    db.session.commit()
    assert bucket.size == 5

    Part.create(mp, 2, stream=BytesIO(b'p'))
    Part.create(mp, 0, stream=BytesIO(b'p1'))
    Part.create(mp, 1, stream=BytesIO(b'p2'))
    db.session.commit()
    assert bucket.size == 5

    mp.complete()
    db.session.commit()
    assert bucket.size == 5

    # Assert checksum of part.
    m = hashlib.md5()
    m.update(b'p2')
    assert "md5:{0}".format(m.hexdigest()) == Part.get_or_none(mp, 1).checksum

    obj = mp.merge_parts()
    db.session.commit()
    assert bucket.size == 5

    assert MultipartObject.query.count() == 0
    assert Part.query.count() == 0

    assert obj.file.size == 5
    assert obj.file.checksum == get_md5(b'p1p2p')
    assert obj.file.storage().open().read() == b'p1p2p'
    assert obj.file.writable is False
    assert obj.file.readable is True

    assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id