def test_download_to_object_version(db, bucket): """Test download to object version task.""" with mock.patch('requests.get') as mock_request: obj = ObjectVersion.create(bucket=bucket, key='test.pdf') bid = bucket.id db.session.commit() # Mock download request file_size = 1024 mock_request.return_value = type( 'Response', (object, ), { 'raw': BytesIO(b'\x00' * file_size), 'headers': {'Content-Length': file_size} }) assert obj.file is None task_s = DownloadTask().s('http://example.com/test.pdf', version_id=obj.version_id) # Download task = task_s.delay() assert ObjectVersion.query.count() == 1 obj = ObjectVersion.query.first() assert obj.key == 'test.pdf' assert str(obj.version_id) == task.result assert obj.file assert obj.file.size == file_size assert Bucket.get(bid).size == file_size assert FileInstance.query.count() == 1 # Undo DownloadTask().clean(version_id=obj.version_id) assert ObjectVersion.query.count() == 0 assert FileInstance.query.count() == 1 assert Bucket.get(bid).size == 0
def test_bucket_sync(app, db, dummy_location): """Test that a bucket is correctly synced.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename1").set_location("b1v11", 1, "achecksum") ObjectVersion.create(b1, "filename2").set_location("b1v12", 1, "achecksum") ObjectVersion.create(b1, "filename3").set_location("b1v13", 1, "achecksum") ObjectVersion.create(b2, "extra1").set_location("b2v11", 1, "achecksum") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 4 ObjectVersion.delete(b1, "filename1") ObjectVersion.create(b2, "extra2").set_location("b2v12", 1, "achecksum") ObjectVersion.create(b2, "extra3").set_location("b2v13", 1, "achecksum") ObjectVersion.delete(b2, "extra3") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b2).count() == 2
def test_bucket_sync_deleted(app, db, dummy_location): """Test bucket sync of a deleted bucket.""" b1 = Bucket.create() b1.deleted = True db.session.commit() with pytest.raises(InvalidOperationError) as excinfo: b1.sync(Bucket.create()) assert excinfo.value.get_body() != {}
def remove_bucket(self, force=False): """Remove the bucket.""" if self.bucket: bucket = self.bucket self.unset_bucket() # TODO: not sure this makes sense??? if force: bucket.remove() else: Bucket.delete(bucket.id)
def test_object_set_contents(app, db, dummy_location): """Test object set contents.""" with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") assert obj.file_id is None assert FileInstance.query.count() == 0 # Save a file. with open('LICENSE', 'rb') as fp: obj.set_contents(fp) # Assert size, location and checksum assert obj.file_id is not None assert obj.file.uri is not None assert obj.file.size == getsize('LICENSE') assert obj.file.checksum is not None assert b1.size == obj.file.size # Try to overwrite with db.session.begin_nested(): with open('LICENSE', 'rb') as fp: pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp) # Save a new version with different content with db.session.begin_nested(): obj2 = ObjectVersion.create(b1, "LICENSE") with open('README.rst', 'rb') as fp: obj2.set_contents(fp) assert obj2.file_id is not None and obj2.file_id != obj.file_id assert obj2.file.size == getsize('README.rst') assert obj2.file.uri != obj.file.uri assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size obj2.file.verify_checksum() assert obj2.file.last_check_at assert obj2.file.last_check is True old_checksum = obj2.file.checksum obj2.file.checksum = "md5:invalid" assert obj2.file.verify_checksum() is False previous_last_check = obj2.file.last_check previous_last_check_date = obj2.file.last_check_at with db.session.begin_nested(): obj2.file.checksum = old_checksum obj2.file.uri = 'invalid' pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum) assert obj2.file.last_check == previous_last_check assert obj2.file.last_check_at == previous_last_check_date obj2.file.verify_checksum(throws=False) assert obj2.file.last_check is None assert obj2.file.last_check_at != previous_last_check_date
def post_delete(self, record, force=False): """Called after a record is deleted.""" if self._delete: files = getattr(record, self.attr_name) if files is not None: if record.bucket: bucket = record.bucket if force: record.bucket = None record.bucket_id = None bucket.remove() else: Bucket.delete(bucket.id)
def test_bucket_sync_new_object(app, db, dummy_location): """Test that a new file in src in synced to dest.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") db.session.commit() assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 0 b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b2, "filename")
def test_object_snapshot_deleted(app, db, dummy_location): """Deleted bucket.""" b1 = Bucket.create() b2 = Bucket.create() b2.deleted = True db.session.commit() b3 = b1.snapshot() assert b3.id != b1.id assert b3.locked is False # b2 is deleted. pytest.raises(InvalidOperationError, b2.snapshot)
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.add(loc) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def test_bucket_retrieval(app, db, dummy_location): """Test bucket get/create.""" # Create two buckets with db.session.begin_nested(): b1 = Bucket.create() Bucket.create() assert Bucket.all().count() == 2 # Mark one as deleted. with db.session.begin_nested(): b1.deleted = True assert Bucket.all().count() == 1
def test_bucket_sync_delete_extras(app, db, dummy_location): """Test that an extra object in dest is deleted when syncing.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum") ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert not ObjectVersion.get(b2, "extra-deleted")
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def test_object_snapshot_deleted(app, db, dummy_location): """Test snapshot creation of a deleted bucket.""" b1 = Bucket.create() b2 = Bucket.create() b2.deleted = True db.session.commit() b3 = b1.snapshot() assert b3.id != b1.id assert b3.locked is False # b2 is deleted. with pytest.raises(InvalidOperationError) as excinfo: b2.snapshot() assert excinfo.value.get_body() != {}
def test_transfer_cp(db): """Test factories.transfer_cp function.""" # first we create a record recid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) record = Record.create({'title': 'record test'}, recid) # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we add a file to the record bucket = Bucket.create() content = b'Aaah! A headcrab!!!\n' record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket) record.files['crab.txt'] = BytesIO(content) # test! rec_dir = join(tmppath, create_accessioned_id('1337', 'recid')) factories.transfer_cp(record.id, tmppath) assert isdir(rec_dir) assert isfile(join(rec_dir, 'crab.txt')) with open(join(rec_dir, 'crab.txt'), "r") as f: assert f.read() == content # finalization rmtree(tmppath)
def test_SIP_files(db): """Test the files methods of API SIP.""" # we create a SIP model sip = SIP_.create() db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert len(api_sip.files) == 0 # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we create a file content = b'test lol\n' bucket = Bucket.create() obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content)) db.session.commit() # we attach it to the SIP sf = api_sip.attach_file(obj) db.session.commit() assert len(api_sip.files) == 1 assert api_sip.files[0].filepath == 'test.txt' assert sip.sip_files[0].filepath == 'test.txt' # finalization rmtree(tmppath)
def test_pyfilesystemstorage(app, db, dummy_location): """Test pyfs storage.""" # Create bucket and object with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") obj.file = FileInstance.create() storage = PyFilesystemStorage(obj.file, base_uri=obj.bucket.location.uri) counter = dict(size=0) def callback(total, size): counter['size'] = size data = b("this is some content") stream = BytesIO(data) loc, size, checksum = storage.save(stream, progress_callback=callback) # Verify checksum, size and location. m = hashlib.md5() m.update(data) assert "md5:{0}".format(m.hexdigest()) == checksum assert size == len(data) assert loc == join( dummy_location.uri, str(obj.file.id), "data")
def test_pyfilesystemstorage(app, db, dummy_location): """Test pyfs storage.""" # Create bucket and object with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, "LICENSE") obj.file = FileInstance() db.session.add(obj.file) storage = PyFilesystemStorage(obj, obj.file) with open('LICENSE', 'rb') as fp: loc, size, checksum = storage.save(fp) # Verify checksum, size and location. with open('LICENSE', 'rb') as fp: m = hashlib.md5() m.update(fp.read()) assert "md5:{0}".format(m.hexdigest()) == checksum assert size == getsize('LICENSE') assert size == getsize('LICENSE') assert loc == \ join( dummy_location.uri, str(b.id), str(obj.version_id), "data")
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users): """Check that the storage class will redirect pid files.""" pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047' with app.app_context(): # Disable access control for this test tmp_location = Location.query.first() with db.session.begin_nested(): bucket = Bucket.create(tmp_location, storage_class='B') pid_file = FileInstance.create() pid_file.set_uri(pid, 1, 0, storage_class='B') ObjectVersion.create(bucket, 'test.txt', pid_file.id) db.session.commit() url = url_for('invenio_files_rest.object_api', bucket_id=bucket.id, key='test.txt') try: with app.app_context(): permission = current_files_rest.permission_factory current_files_rest.permission_factory = allow_all # Check that accessing the file redirects to the PID with app.test_client() as client: resp = client.get(url) assert resp.headers['Location'] == pid assert resp.status_code == 302 finally: with app.app_context(): current_files_rest.permission_factory = permission
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( default_location=Location.get_default() ) try: schema = data.get("$schema", None) \ .split('/schemas/', 1)[1] except (IndexError, AttributeError): return None if schema: _deposit_group = \ next( (depgroup for dg, depgroup in current_app.config.get('DEPOSIT_GROUPS').iteritems() if schema in depgroup['schema'] ), None ) data["_experiment"] = _deposit_group.get("experiment", "Unknown") deposit = super(CAPDeposit, cls).create(data, id_=id_) add_owner_permissions(deposit.id) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def test_verify_checksum(app, db, dummy_location): """Test celery tasks for checksum verification.""" b1 = Bucket.create() with open('README.rst', 'rb') as fp: obj = ObjectVersion.create(b1, 'README.rst', stream=fp) db.session.commit() file_id = obj.file_id verify_checksum(str(file_id)) f = FileInstance.query.get(file_id) assert f.last_check_at assert f.last_check is True f.uri = 'invalid' db.session.add(f) db.session.commit() pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id), throws=True) f = FileInstance.query.get(file_id) assert f.last_check is True verify_checksum(str(file_id), throws=False) f = FileInstance.query.get(file_id) assert f.last_check is None f.last_check = True db.session.add(f) db.session.commit() with pytest.raises(ResourceNotFoundError): verify_checksum(str(file_id), pessimistic=True) f = FileInstance.query.get(file_id) assert f.last_check is None
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): request = urllib2.Request(file_['url'], headers=file_.get('headers', {})) f = urllib2.urlopen(request) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', obj, eng)
def test_admin_views(app, db, dummy_location): """Test admin views.""" app.config['SECRET_KEY'] = 'CHANGEME' InvenioAdmin(app, permission_factory=None, view_class_factory=lambda x: x) b1 = Bucket.create(location=dummy_location) obj = ObjectVersion.create(b1, 'test').set_location('placeuri', 1, 'chk') db.session.commit() with app.test_client() as client: res = client.get('/admin/bucket/') assert res.status_code == 200 assert str(b1.id) in res.get_data(as_text=True) res = client.get('/admin/fileinstance/') assert res.status_code == 200 assert str(obj.file_id) in res.get_data(as_text=True) res = client.get('/admin/location/') assert res.status_code == 200 assert str(b1.location.name) in res.get_data(as_text=True) res = client.get('/admin/objectversion/') assert res.status_code == 200 assert str(obj.version_id) in res.get_data(as_text=True)
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): headers = file_.get('headers', {}) data = requests_retry_session().get(file_['url'], headers=headers) if data.status_code != 200: __halt_and_notify( "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s" % (data.status_code, file_['url'], headers), eng) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def create(cls, data, id_=None, **kwargs): """Create a CDS deposit. Adds bucket creation immediately on deposit creation. """ if '_deposit' not in data: id_ = id_ or uuid.uuid4() cls.deposit_minter(id_, data) bucket = Bucket.create(location=Location.get_by_name( kwargs.get('bucket_location', 'default'))) data['_buckets'] = {'deposit': str(bucket.id)} data.setdefault('_cds', {}) data['_cds'].setdefault('state', {}) data.setdefault('keywords', []) data.setdefault('license', [{ 'license': 'CERN', 'material': '', 'url': 'http://copyright.web.cern.ch', }]) if '_access' not in data: data.setdefault('_access', {}) deposit = super(CDSDeposit, cls).create(data, id_=id_, validator=PartialDraft4Validator) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def test_scheduling(app, test_communities, login_user): """Test that scheduling files happens properly.""" with app.app_context(): b1 = Bucket.create() objects = [] for i in range(10): objects.append( ObjectVersion.create(b1, str(i), stream=BytesIO(b'test'))) db.session.commit() # corrupt 1 file corrupted_file = objects[0].file with open(corrupted_file.uri, 'w') as file_writer: file_writer.write('modified content') # schedule all files schedule_all_files_for_checksum() # assert that all will be checked assert not corrupted_file.last_check for o in objects[1:]: assert o.file.last_check # make 1 file fail failed_file = objects[1].file failed_file.last_check = None # schedule all failed schedule_failed_checksum_files() # assert that 1 wiil run again assert failed_file.last_check assert not corrupted_file.last_check
def test_verify_checksum(app, tmp_location): """Test that verify_checksum sets last_check=False if the checksum is different and last_check=None if it failed to calculate the checksum, e.g. an exception was raised.""" with app.app_context(): b1 = Bucket.create(tmp_location) objects = [] for i in range(10): objects.append( ObjectVersion.create(b1, str(i), stream=BytesIO(b'test'))) db.session.commit() for obj in objects: verify_checksum.apply([str(obj.file_id)]) assert obj.file.last_check # assert that mismatches in md5 checksums are caught corrupted_file = objects[0].file with open(corrupted_file.uri, 'w') as file_writer: file_writer.write('modified content') verify_checksum.apply([str(corrupted_file.id)]) assert corrupted_file.last_check is False # assert that when exceptions occur last_check=None failed_file = objects[1].file with patch.object(FileStorage, 'checksum') \ as mock_check: mock_check.side_effect = KeyError() verify_checksum.apply_async(args=[str(failed_file.id)], kwargs={'throws': False}) assert failed_file.last_check is None
def put_file_into_bucket(bucket_id, key, stream, content_length): # TODO: refactor invenio_files_rest to have a proper API and use that one here from invenio_db import db from invenio_files_rest.models import Bucket, ObjectVersion from invenio_files_rest.views import need_bucket_permission from invenio_files_rest.errors import FileSizeError bucket = Bucket.get(bucket_id) if bucket is None: abort(404, 'Bucket does not exist.') # WARNING: this function should be isomorphic with # invenio_files_rest.views:ObjectResource.create_object @need_bucket_permission('bucket-update') def create_object(bucket, key): size_limit = bucket.size_limit if size_limit and int(content_length or 0) > size_limit: desc = 'File size limit exceeded.' \ if isinstance(size_limit, int) else size_limit.reason raise FileSizeError(description=desc) with db.session.begin_nested(): obj = ObjectVersion.create(bucket, key) obj.set_contents( stream, size=content_length, size_limit=size_limit) db.session.commit() return obj return create_object(key=key, bucket=bucket)
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get( 'recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {})) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def put_file_into_bucket(bucket_id, key, stream, content_length): # TODO: refactor invenio_files_rest to have a proper API and use that one here from invenio_db import db from invenio_files_rest.models import Bucket, ObjectVersion from invenio_files_rest.views import need_bucket_permission from invenio_files_rest.errors import FileSizeError bucket = Bucket.get(bucket_id) if bucket is None: abort(404, 'Bucket does not exist.') # WARNING: this function should be isomorphic with # invenio_files_rest.views:ObjectResource.create_object @need_bucket_permission('bucket-update') def create_object(bucket, key): size_limit = bucket.size_limit if size_limit and content_length > size_limit: desc = 'File size limit exceeded.' \ if isinstance(size_limit, int) else size_limit.reason raise FileSizeError(description=desc) with db.session.begin_nested(): obj = ObjectVersion.create(bucket, key) obj.set_contents( stream, size=content_length, size_limit=size_limit) db.session.commit() return obj return create_object(key=key, bucket=bucket)
def test_object_restore(app, db, dummy_location): """Restore object.""" f1 = FileInstance(uri="f1", size=1, checksum="mychecksum") f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2") db.session.add(f1) db.session.add(f2) b1 = Bucket.create() obj1 = ObjectVersion.create(b1, "test").set_file(f1) ObjectVersion.create(b1, "test").set_file(f2) obj_deleted = ObjectVersion.delete(b1, "test") db.session.commit() assert ObjectVersion.query.count() == 3 # Cannot restore a deleted version. pytest.raises(InvalidOperationError, obj_deleted.restore) # Restore first version obj_new = obj1.restore() db.session.commit() assert ObjectVersion.query.count() == 4 assert obj_new.is_head is True assert obj_new.version_id != obj1.version_id assert obj_new.key == obj1.key assert obj_new.file_id == obj1.file_id assert obj_new.bucket == obj1.bucket
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.is_deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def test_object_relink_all(app, db, dummy_location): """Test relinking files.""" b1 = Bucket.create() obj1 = ObjectVersion.create( b1, "relink-test", stream=BytesIO(b('relinkthis'))) ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na'))) b1.snapshot() db.session.commit() assert ObjectVersion.query.count() == 4 assert FileInstance.query.count() == 2 fnew = FileInstance.create() fnew.copy_contents(obj1.file, default_location=b1.location.uri) db.session.commit() fold = obj1.file assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0 ObjectVersion.relink_all(obj1.file, fnew) db.session.commit() assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def test_object_relink_all(app, db, dummy_location): """Test relinking files.""" b1 = Bucket.create() obj1 = ObjectVersion.create( b1, "relink-test", stream=BytesIO(b('relinkthis'))) ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na'))) b1.snapshot() db.session.commit() assert ObjectVersion.query.count() == 4 assert FileInstance.query.count() == 2 fnew = FileInstance.create() fnew.copy_contents(obj1.file, location=b1.location) db.session.commit() fold = obj1.file assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0 ObjectVersion.relink_all(obj1.file, fnew) db.session.commit() assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_bucket(cls, data): """Create a bucket for this record. Override this method to provide more advanced bucket creation capabilities. This method may return a new or existing bucket, or may return None, in case no bucket should be created. """ return Bucket.create()
def test_object_get_by_bucket(app, db, dummy_location): """Test object listing.""" b1 = Bucket.create() b2 = Bucket.create() # First version of object obj1_first = ObjectVersion.create(b1, "test") obj1_first.set_location("b1test1", 1, "achecksum") # Intermediate version which is a delete marker. obj1_intermediate = ObjectVersion.create(b1, "test") obj1_intermediate.set_location("b1test2", 1, "achecksum") # Latest version of object obj1_latest = ObjectVersion.create(b1, "test") obj1_latest.set_location("b1test3", 1, "achecksum") # Create objects in/not in same bucket using different key. ObjectVersion.create(b1, "another").set_location( "b1another1", 1, "achecksum") ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum") db.session.commit() # Sanity check assert ObjectVersion.query.count() == 5 assert ObjectVersion.get(b1, "test") assert ObjectVersion.get(b1, "another") assert ObjectVersion.get(b2, "test") # Retrieve objects for a bucket with/without versions assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1 # Assert order of returned objects (alphabetical) objs = ObjectVersion.get_by_bucket(b1.id).all() assert objs[0].key == "another" assert objs[1].key == "test" # Assert order of returned objects verions (creation date ascending) objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all() assert objs[0].key == "another" assert objs[1].key == "test" assert objs[1].version_id == obj1_latest.version_id assert objs[2].key == "test" assert objs[2].version_id == obj1_intermediate.version_id assert objs[3].key == "test" assert objs[3].version_id == obj1_first.version_id
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def delete_buckets(record): """Delete the bucket.""" files = record.get('files', []) buckets = set() for f in files: buckets.add(f.get('bucket')) for b_id in buckets: b = Bucket.get(b_id) b.deleted = True
def test_object_create(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() # Create one object version obj1 = ObjectVersion.create(b, "test") assert obj1.bucket_id == b.id assert obj1.key == 'test' assert obj1.version_id assert obj1.file_id is None assert obj1.is_head is True assert obj1.bucket == b # Set fake location. obj1.set_location("file:///tmp/obj1", 1, "checksum") # Create one object version for same object key obj2 = ObjectVersion.create(b, "test") assert obj2.bucket_id == b.id assert obj2.key == 'test' assert obj2.version_id != obj1.version_id assert obj2.file_id is None assert obj2.is_head is True assert obj2.bucket == b # Set fake location obj2.set_location("file:///tmp/obj2", 2, "checksum") # Create a new object version for a different object with no location. # I.e. it is considered a delete marker. obj3 = ObjectVersion.create(b, "deleted_obj") # Object __repr__ assert str(obj1) == \ "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key) # Sanity check assert ObjectVersion.query.count() == 3 # Assert that obj2 is the head version obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id) assert obj.version_id == obj1.version_id assert obj.is_head is False obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id) assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that getting latest version gets obj2 obj = ObjectVersion.get(b.id, "test") assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that obj3 is not retrievable (without specifying version id). assert ObjectVersion.get(b.id, "deleted_obj") is None # Assert that obj3 *is* retrievable (when specifying version id). assert \ ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \ obj3
def test_object_set_file(app, db, dummy_location): """Test object set file.""" b = Bucket.create() f = FileInstance(uri="f1", size=1, checksum="mychecksum") obj = ObjectVersion.create(b, "test").set_file(f) db.session.commit() assert obj.file == f assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
def test_bucket_create_object(app, db): """Test bucket creation.""" with db.session.begin_nested(): l1 = Location(name='test1', uri='file:///tmp/1', default=False) l2 = Location(name='test2', uri='file:///tmp/2', default=True) db.session.add(l1) db.session.add(l2) assert Location.query.count() == 2 # Simple create with db.session.begin_nested(): b = Bucket.create() assert b.id assert b.default_location == Location.get_default().id assert b.location == Location.get_default() assert b.default_storage_class == \ app.config['FILES_REST_DEFAULT_STORAGE_CLASS'] assert b.size == 0 assert b.quota_size is None assert b.max_file_size is None assert b.deleted is False # __repr__ test assert str(b) == str(b.id) # Retrieve one assert Bucket.get(b.id).id == b.id # Create with location_name and storage class with db.session.begin_nested(): b = Bucket.create(location=l1, storage_class='A') assert b.default_location == Location.get_by_name('test1').id assert b.default_storage_class == 'A' # Create using location name instead b = Bucket.create(location=l2.name, storage_class='A') assert b.default_location == Location.get_by_name('test2').id # Retrieve one assert Bucket.all().count() == 3 # Invalid storage class. pytest.raises(ValueError, Bucket.create, storage_class='X')
def test_object_create_with_fileid(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test')) assert b.size == 4 ObjectVersion.create(b, 'test', _file_id=obj.file) assert b.size == 8
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 # check that for 'undeleted' key there is only one HEAD heads = [o for o in ObjectVersion.query.filter_by( bucket_id=b1.id, key='undeleted').all() if o.is_head] assert len(heads) == 1 assert heads[0].file.uri == 'b1u2' b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def create_files(cls, record, files, existing_files): """Create files. This method is currently limited to a single bucket per record. """ default_bucket = None # Look for bucket id in existing files. for f in existing_files: if 'bucket' in f: default_bucket = f['bucket'] break # Create a bucket in default location if none is found. if default_bucket is None: b = Bucket.create() BucketTag.create(b, 'record', str(record.id)) default_bucket = str(b.id) db.session.commit() else: b = Bucket.get(default_bucket) record['_files'] = [] for key, meta in files.items(): obj = cls.create_file(b, key, meta) ext = splitext(obj.key)[1].lower() if ext.startswith('.'): ext = ext[1:] record['_files'].append(dict( bucket=str(obj.bucket.id), key=obj.key, version_id=str(obj.version_id), size=obj.file.size, checksum=obj.file.checksum, type=ext, )) db.session.add( RecordsBuckets(record_id=record.id, bucket_id=b.id) ) record.commit() db.session.commit() return [b]
def image_object(database, location, image_path): """Get ObjectVersion of test image.""" bucket = Bucket.create() database.session.commit() with open(image_path, 'rb') as fp: obj = ObjectVersion.create( bucket, 'test.jpg', stream=fp, size=getsize(image_path) ) database.session.commit() return obj
def test_RecordSIP_create(db, mocker): """Test create method from the API class RecordSIP.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) # setup metadata mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url://to/schema') db.session.add(mtype) db.session.commit() # first we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) mocker.patch('invenio_records.api.RecordBase.validate', return_value=True, autospec=True) record = Record.create( {'title': 'record test', '$schema': 'url://to/schema'}, recid) # we add a file to the record bucket = Bucket.create() content = b'Test file\n' RecordsBuckets.create(record=record.model, bucket=bucket) record.files['test.txt'] = BytesIO(content) db.session.commit() # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent) db.session.commit() # test! assert RecordSIP_.query.count() == 1 assert SIP_.query.count() == 1 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 1 assert len(rsip.sip.files) == 1 assert len(rsip.sip.metadata) == 1 metadata = rsip.sip.metadata[0] assert metadata.type.format == 'json' assert '"title": "record test"' in metadata.content assert rsip.sip.archivable is True # we try with no files rsip = RecordSIP.create(pid, record, True, create_sip_files=False, user_id=user.id, agent=agent) assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert len(rsip.sip.files) == 0 assert len(rsip.sip.metadata) == 1 # finalization rmtree(tmppath)
def test_object_set_location(app, db, dummy_location): """Test object set contents.""" with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") assert obj.file_id is None assert FileInstance.query.count() == 0 obj.set_location("b1test1", 1, "achecksum") assert FileInstance.query.count() == 1 pytest.raises( FileInstanceAlreadySetError, obj.set_location, "b1test1", 1, "achecksum")
def test_object_multibucket(app, db, dummy_location): """Test object creation in multiple buckets.""" with db.session.begin_nested(): # Create two buckets each with an object using the same key b1 = Bucket.create() b2 = Bucket.create() obj1 = ObjectVersion.create(b1, "test") obj1.set_location("file:///tmp/obj1", 1, "checksum") obj2 = ObjectVersion.create(b2, "test") obj2.set_location("file:///tmp/obj2", 2, "checksum") # Sanity check assert ObjectVersion.query.count() == 2 # Assert object versions are correctly created in each bucket. obj = ObjectVersion.get(b1.id, "test") assert obj.is_head is True assert obj.version_id == obj1.version_id obj = ObjectVersion.get(b2.id, "test") assert obj.is_head is True assert obj.version_id == obj2.version_id
def test_verify_checksum(app, db, dummy_location): """Test celery tasks for checksum verification.""" b = Bucket.create() with open('README.rst', 'rb') as fp: obj = ObjectVersion.create(b, 'README.rst', stream=fp) db.session.commit() verify_checksum(str(obj.file_id)) f = FileInstance.query.get(obj.file_id) assert f.last_check_at assert f.last_check is True