def test_dereference_missing_upload(api, users, location, task_delay): with api.test_request_context(): # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( temporary_id=segmented_upload_record.id, content_disposition="attachment; filename=something.txt", content_type="text/plain", packaging=PackagingFormat.Binary, dereference=True, ), ], lambda *args: True, "http://localhost/", ) object_version = ObjectVersion.query.one() with pytest.raises(ValueError): tasks.dereference_object(record.id, object_version.version_id) assert TagManager(object_version)[ObjectTagKey.FileState] == FileState.Error
def test_multipart_last_part(app, db, bucket): """Test multipart creation.""" mp = MultipartObject.create(bucket, 'test.txt', 100, 20) assert mp.last_part_size == 0 assert mp.last_part_number == 4 mp = MultipartObject.create(bucket, 'test.txt', 101, 20) assert mp.last_part_size == 1 assert mp.last_part_number == 5
def delete(self, *args, **kwargs): """Delete the deposit.""" if self['_deposit'].get('pid'): raise PIDInvalidAction() # Delete reserved recid. pid_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['recid']) if pid_recid.status == PIDStatus.RESERVED: db.session.delete(pid_recid) # Completely remove bucket q = RecordsBuckets.query.filter_by(record_id=self.id) bucket = q.one().bucket with db.session.begin_nested(): # Remove Record-Bucket link q.delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_(mp_q.with_entities( MultipartObject.upload_id).subquery()) ).delete(synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.remove() return super(ZenodoDeposit, self).delete(*args, **kwargs)
def delete(self, delete_published=False, *args, **kwargs): """Delete the deposit. :param delete_published: If True, even deposit of a published record will be deleted (usually used by admin operations). :type delete_published: bool """ is_published = self['_deposit'].get('pid') if is_published and not delete_published: raise PIDInvalidAction() # Delete the recid recid = PersistentIdentifier.get(pid_type='recid', pid_value=self['recid']) versioning = PIDVersioning(child=recid) if versioning.exists: if versioning.draft_child and \ self.pid == versioning.draft_child_deposit: versioning.remove_draft_child() if versioning.last_child: index_siblings(versioning.last_child, children=versioning.children.all(), include_pid=True, neighbors_eager=True, with_deposits=True) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) if 'conceptrecid' in self: concept_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['conceptrecid']) if concept_recid.status == PIDStatus.RESERVED: db.session.delete(concept_recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_( mp_q.with_entities( MultipartObject.upload_id).subquery())).delete( synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() depid = kwargs.get('pid', self.pid) if depid: depid.delete() # NOTE: We call the parent of Deposit, invenio_records.api.Record since # we need to completely override eveything that the Deposit.delete # method does. return super(Deposit, self).delete(*args, **kwargs)
def delete(self, *args, **kwargs): """Delete the deposit.""" if self['_deposit'].get('pid'): raise PIDInvalidAction() # Delete reserved recid. pid_recid = PersistentIdentifier.get(pid_type='recid', pid_value=self['recid']) if pid_recid.status == PIDStatus.RESERVED: db.session.delete(pid_recid) # Completely remove bucket q = RecordsBuckets.query.filter_by(record_id=self.id) bucket = q.one().bucket with db.session.begin_nested(): # Remove Record-Bucket link q.delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_( mp_q.with_entities( MultipartObject.upload_id).subquery())).delete( synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.remove() return super(ZenodoDeposit, self).delete(*args, **kwargs)
def test_post_by_reference_segmented(api, users, location, task_delay): with api.test_request_context(), api.test_client() as client: # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) multipart_object = MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij")) Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno")) multipart_object.complete() login(client) ttl = ( datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(0, 3600) ).isoformat() response = client.post( "/sword/service-document", data=json.dumps( { "@context": JSON_LD_CONTEXT, "@type": "ByReference", "byReferenceFiles": [ { "@id": f"http://localhost/sword/staging/{segmented_upload_record.id}", "contentDisposition": "attachment; filename=some-resource.json", "contentType": "application/json", "dereference": True, "ttl": ttl, } ], } ), headers={ "Content-Disposition": "attachment; by-reference=true", "Content-Type": "application/ld+json", }, ) assert response.status_code == HTTPStatus.CREATED object_version = ObjectVersion.query.one() tags = TagManager(object_version) assert tags == { ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary", ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id), ObjectTagKey.FileState: FileState.Pending, ObjectTagKey.ByReferenceDereference: "true", ObjectTagKey.ByReferenceNotDeleted: "true", ObjectTagKey.OriginalDeposit: "true", ObjectTagKey.ByReferenceTTL: ttl, }
def delete(self, delete_published=False, *args, **kwargs): """Delete the deposit. :param delete_published: If True, even deposit of a published record will be deleted (usually used by admin operations). :type delete_published: bool """ is_published = self['_deposit'].get('pid') if is_published and not delete_published: raise PIDInvalidAction() # Delete the recid recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['recid']) versioning = PIDVersioning(child=recid) if versioning.exists: if versioning.draft_child and \ self.pid == versioning.draft_child_deposit: versioning.remove_draft_child() if versioning.last_child: index_siblings(versioning.last_child, children=versioning.children.all(), include_pid=True, neighbors_eager=True, with_deposits=True) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) if 'conceptrecid' in self: concept_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['conceptrecid']) if concept_recid.status == PIDStatus.RESERVED: db.session.delete(concept_recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_(mp_q.with_entities( MultipartObject.upload_id).subquery()) ).delete(synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() depid = kwargs.get('pid', self.pid) if depid: depid.delete() # NOTE: We call the parent of Deposit, invenio_records.api.Record since # we need to completely override eveything that the Deposit.delete # method does. return super(Deposit, self).delete(*args, **kwargs)
def test_multipart_creation(app, db, bucket): """Test multipart creation.""" mp = MultipartObject.create(bucket, 'test.txt', 100, 20) db.session.commit() assert mp.upload_id assert mp.size == 100 assert mp.chunk_size == 20 assert mp.completed is False assert mp.bucket.size == 100 assert exists(mp.file.uri)
def test_by_reference_sets_tag(api, users, location, task_delay): with api.test_request_context(): # Assemble a segmented upload from parts, and complete it segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create( {} ) multipart_object = MultipartObject.create( bucket=segmented_upload_record.bucket, key="some-key", size=15, chunk_size=10, ) Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij")) Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno")) multipart_object.complete() record: SWORDDeposit = SWORDDeposit.create({}) record.set_by_reference_files( [ ByReferenceFileDefinition( temporary_id=segmented_upload_record.id, content_disposition="attachment; filename=something.txt", content_type="text/plain", packaging=PackagingFormat.Binary, dereference=True, ), ], lambda *args: True, "http://localhost/", ) object_version = ObjectVersion.query.one() tags = TagManager(object_version) assert tags == { ObjectTagKey.OriginalDeposit: "true", ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id), ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary", ObjectTagKey.FileState: FileState.Pending, ObjectTagKey.ByReferenceDereference: "true", ObjectTagKey.ByReferenceNotDeleted: "true", } tasks.dereference_object(record.id, object_version.version_id) assert object_version.file.storage().open().read() == b"abcdefghijklmno"
def test_multipart_full(app, db, bucket): """Test full multipart object.""" app.config.update( dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024, FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024, )) # Initial parameters chunks = 20 chunk_size = 5 * 1024 * 1024 # 5 MiB last_chunk = 1024 * 1024 # 1 MiB size = (chunks - 1) * chunk_size + last_chunk # Initiate mp = MultipartObject.create(bucket, 'testfile', size=size, chunk_size=chunk_size) db.session.commit() # Create parts for i in range(chunks): part_size = chunk_size if i < chunks - 1 else last_chunk Part.create(mp, i, stream=make_stream(part_size)) db.session.commit() # Complete mp.complete() db.session.commit() # Merge parts. pre_size = mp.bucket.size mp.merge_parts() db.session.commit() # Test size update bucket = Bucket.get(bucket.id) assert bucket.size == pre_size app.config.update( dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=2, FILES_REST_MULTIPART_CHUNKSIZE_MAX=20, ))
def test_multipart_full(app, db, bucket): """Test full multipart object.""" app.config.update(dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024, FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024, )) # Initial parameters chunks = 20 chunk_size = 5 * 1024 * 1024 # 5 MiB last_chunk = 1024 * 1024 # 1 MiB size = (chunks - 1) * chunk_size + last_chunk # Initiate mp = MultipartObject.create( bucket, 'testfile', size=size, chunk_size=chunk_size) db.session.commit() # Create parts for i in range(chunks): part_size = chunk_size if i < chunks - 1 else last_chunk Part.create(mp, i, stream=make_stream(part_size)) db.session.commit() # Complete mp.complete() db.session.commit() # Merge parts. pre_size = mp.bucket.size mp.merge_parts() db.session.commit() # Test size update bucket = Bucket.get(bucket.id) assert bucket.size == pre_size app.config.update(dict( FILES_REST_MULTIPART_CHUNKSIZE_MIN=2, FILES_REST_MULTIPART_CHUNKSIZE_MAX=20, ))
def test_part_creation(app, db, bucket, get_sha256): """Test part creation.""" assert bucket.size == 0 mp = MultipartObject.create(bucket, 'test.txt', 5, 2) db.session.commit() assert bucket.size == 5 Part.create(mp, 2, stream=BytesIO(b'p')) Part.create(mp, 0, stream=BytesIO(b'p1')) Part.create(mp, 1, stream=BytesIO(b'p2')) db.session.commit() assert bucket.size == 5 mp.complete() db.session.commit() assert bucket.size == 5 # Assert checksum of part. m = hashlib.sha256() m.update(b'p2') assert "sha256:{0}".format(m.hexdigest()) == \ Part.get_or_none(mp, 1).checksum obj = mp.merge_parts() db.session.commit() assert bucket.size == 5 assert MultipartObject.query.count() == 0 assert Part.query.count() == 0 assert obj.file.size == 5 assert obj.file.checksum == get_sha256(b'p1p2p') assert obj.file.storage().open().read() == b'p1p2p' assert obj.file.writable is False assert obj.file.readable is True assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def test_part_creation(app, db, bucket, get_md5): """Test part creation.""" assert bucket.size == 0 mp = MultipartObject.create(bucket, 'test.txt', 5, 2) db.session.commit() assert bucket.size == 5 Part.create(mp, 2, stream=BytesIO(b'p')) Part.create(mp, 0, stream=BytesIO(b'p1')) Part.create(mp, 1, stream=BytesIO(b'p2')) db.session.commit() assert bucket.size == 5 mp.complete() db.session.commit() assert bucket.size == 5 # Assert checksum of part. m = hashlib.md5() m.update(b'p2') assert "md5:{0}".format(m.hexdigest()) == Part.get_or_none(mp, 1).checksum obj = mp.merge_parts() db.session.commit() assert bucket.size == 5 assert MultipartObject.query.count() == 0 assert Part.query.count() == 0 assert obj.file.size == 5 assert obj.file.checksum == get_md5(b'p1p2p') assert obj.file.storage().open().read() == b'p1p2p' assert obj.file.writable is False assert obj.file.readable is True assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def delete(self, force=True, pid=None): """Delete deposit. Status required: ``'draft'``. :param force: Force deposit delete. (Default: ``True``) :param pid: Force pid object. (Default: ``None``) :returns: A new Deposit object. """ # Delete the recid recid = PersistentIdentifier.get(pid_type='recid', pid_value=self.pid.pid_value) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) # if this item has been deleted self.delete_es_index_attempt(recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_( mp_q.with_entities( MultipartObject.upload_id).subquery())).delete( synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() return super(Deposit, self).delete()
def multipart(db, bucket): """Multipart object.""" mp = MultipartObject.create(bucket, 'mykey', 110, 20) db.session.commit() return mp
def multipart_files(self): """Get all multipart files.""" return MultipartObject.query_by_bucket(self.files.bucket)