def test_dereference_missing_upload(api, users, location, task_delay): with api.test_request_context(): # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( temporary_id=segmented_upload_record.id, content_disposition="attachment; filename=something.txt", content_type="text/plain", packaging=PackagingFormat.Binary, dereference=True, ), ], lambda *args: True, "http://localhost/", ) object_version = ObjectVersion.query.one() with pytest.raises(ValueError): tasks.dereference_object(record.id, object_version.version_id) assert TagManager(object_version)[ObjectTagKey.FileState] == FileState.Error
def test_multipart_last_part(app, db, bucket): """Test multipart creation.""" mp = MultipartObject.create(bucket, 'test.txt', 100, 20) assert mp.last_part_size == 0 assert mp.last_part_number == 4 mp = MultipartObject.create(bucket, 'test.txt', 101, 20) assert mp.last_part_size == 1 assert mp.last_part_number == 5
def test_post_by_reference_segmented(api, users, location, task_delay): with api.test_request_context(), api.test_client() as client: # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) multipart_object = MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij")) Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno")) multipart_object.complete() login(client) ttl = ( datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(0, 3600) ).isoformat() response = client.post( "/sword/service-document", data=json.dumps( { "@context": JSON_LD_CONTEXT, "@type": "ByReference", "byReferenceFiles": [ { "@id": f"http://localhost/sword/staging/{segmented_upload_record.id}", "contentDisposition": "attachment; filename=some-resource.json", "contentType": "application/json", "dereference": True, "ttl": ttl, } ], } ), headers={ "Content-Disposition": "attachment; by-reference=true", "Content-Type": "application/ld+json", }, ) assert response.status_code == HTTPStatus.CREATED object_version = ObjectVersion.query.one() tags = TagManager(object_version) assert tags == { ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary", ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id), ObjectTagKey.FileState: FileState.Pending, ObjectTagKey.ByReferenceDereference: "true", ObjectTagKey.ByReferenceNotDeleted: "true", ObjectTagKey.OriginalDeposit: "true", ObjectTagKey.ByReferenceTTL: ttl, }
def test_multipart_creation(app, db, bucket): """Test multipart creation.""" mp = MultipartObject.create(bucket, 'test.txt', 100, 20) db.session.commit() assert mp.upload_id assert mp.size == 100 assert mp.chunk_size == 20 assert mp.completed is False assert mp.bucket.size == 100 assert exists(mp.file.uri)
def test_by_reference_sets_tag(api, users, location, task_delay): with api.test_request_context(): # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) multipart_object = MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij")) Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno")) multipart_object.complete() record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( temporary_id=segmented_upload_record.id, content_disposition="attachment; filename=something.txt", content_type="text/plain", packaging=PackagingFormat.Binary, dereference=True, ), ], lambda *args: True, "http://localhost/", ) object_version = ObjectVersion.query.one() tags = TagManager(object_version) assert tags == { ObjectTagKey.OriginalDeposit: "true", ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id), ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary", ObjectTagKey.FileState: FileState.Pending, ObjectTagKey.ByReferenceDereference: "true", ObjectTagKey.ByReferenceNotDeleted: "true", } tasks.dereference_object(record.id, object_version.version_id) assert object_version.file.storage().open().read() == b"abcdefghijklmno"
def test_multipart_full(app, db, bucket): """Test full multipart object.""" app.config.update( dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024, FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024, )) # Initial parameters chunks = 20 chunk_size = 5 * 1024 * 1024 # 5 MiB last_chunk = 1024 * 1024 # 1 MiB size = (chunks - 1) * chunk_size + last_chunk # Initiate mp = MultipartObject.create(bucket, 'testfile', size=size, chunk_size=chunk_size) db.session.commit() # Create parts for i in range(chunks): part_size = chunk_size if i < chunks - 1 else last_chunk Part.create(mp, i, stream=make_stream(part_size)) db.session.commit() # Complete mp.complete() db.session.commit() # Merge parts. pre_size = mp.bucket.size mp.merge_parts() db.session.commit() # Test size update bucket = Bucket.get(bucket.id) assert bucket.size == pre_size app.config.update( dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=2, FILES_REST_MULTIPART_CHUNKSIZE_MAX=20, ))
def test_multipart_full(app, db, bucket): """Test full multipart object.""" app.config.update(dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024, FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024, )) # Initial parameters chunks = 20 chunk_size = 5 * 1024 * 1024 # 5 MiB last_chunk = 1024 * 1024 # 1 MiB size = (chunks - 1) * chunk_size + last_chunk # Initiate mp = MultipartObject.create( bucket, 'testfile', size=size, chunk_size=chunk_size) db.session.commit() # Create parts for i in range(chunks): part_size = chunk_size if i < chunks - 1 else last_chunk Part.create(mp, i, stream=make_stream(part_size)) db.session.commit() # Complete mp.complete() db.session.commit() # Merge parts. pre_size = mp.bucket.size mp.merge_parts() db.session.commit() # Test size update bucket = Bucket.get(bucket.id) assert bucket.size == pre_size app.config.update(dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=2, FILES_REST_MULTIPART_CHUNKSIZE_MAX=20, ))
def test_part_creation(app, db, bucket, get_sha256): """Test part creation.""" assert bucket.size == 0 mp = MultipartObject.create(bucket, 'test.txt', 5, 2) db.session.commit() assert bucket.size == 5 Part.create(mp, 2, stream=BytesIO(b'p')) Part.create(mp, 0, stream=BytesIO(b'p1')) Part.create(mp, 1, stream=BytesIO(b'p2')) db.session.commit() assert bucket.size == 5 mp.complete() db.session.commit() assert bucket.size == 5 # Assert checksum of part. m = hashlib.sha256() m.update(b'p2') assert "sha256:{0}".format(m.hexdigest()) == \ Part.get_or_none(mp, 1).checksum obj = mp.merge_parts() db.session.commit() assert bucket.size == 5 assert MultipartObject.query.count() == 0 assert Part.query.count() == 0 assert obj.file.size == 5 assert obj.file.checksum == get_sha256(b'p1p2p') assert obj.file.storage().open().read() == b'p1p2p' assert obj.file.writable is False assert obj.file.readable is True assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def test_part_creation(app, db, bucket, get_md5): """Test part creation.""" assert bucket.size == 0 mp = MultipartObject.create(bucket, 'test.txt', 5, 2) db.session.commit() assert bucket.size == 5 Part.create(mp, 2, stream=BytesIO(b'p')) Part.create(mp, 0, stream=BytesIO(b'p1')) Part.create(mp, 1, stream=BytesIO(b'p2')) db.session.commit() assert bucket.size == 5 mp.complete() db.session.commit() assert bucket.size == 5 # Assert checksum of part. m = hashlib.md5() m.update(b'p2') assert "md5:{0}".format(m.hexdigest()) == Part.get_or_none(mp, 1).checksum obj = mp.merge_parts() db.session.commit() assert bucket.size == 5 assert MultipartObject.query.count() == 0 assert Part.query.count() == 0 assert obj.file.size == 5 assert obj.file.checksum == get_md5(b'p1p2p') assert obj.file.storage().open().read() == b'p1p2p' assert obj.file.writable is False assert obj.file.readable is True assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def multipart(db, bucket): """Multipart object.""" mp = MultipartObject.create(bucket, 'mykey', 110, 20) db.session.commit() return mp