def test_bucket_sync(app, db, dummy_location): """Test that a bucket is correctly synced.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename1").set_location("b1v11", 1, "achecksum") ObjectVersion.create(b1, "filename2").set_location("b1v12", 1, "achecksum") ObjectVersion.create(b1, "filename3").set_location("b1v13", 1, "achecksum") ObjectVersion.create(b2, "extra1").set_location("b2v11", 1, "achecksum") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 4 ObjectVersion.delete(b1, "filename1") ObjectVersion.create(b2, "extra2").set_location("b2v12", 1, "achecksum") ObjectVersion.create(b2, "extra3").set_location("b2v13", 1, "achecksum") ObjectVersion.delete(b2, "extra3") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b2).count() == 2
def test_bucket_sync_deleted(app, db, dummy_location): """Test bucket sync of a deleted bucket.""" b1 = Bucket.create() b1.deleted = True db.session.commit() with pytest.raises(InvalidOperationError) as excinfo: b1.sync(Bucket.create()) assert excinfo.value.get_body() != {}
def test_object_snapshot_deleted(app, db, dummy_location): """Deleted bucket.""" b1 = Bucket.create() b2 = Bucket.create() b2.deleted = True db.session.commit() b3 = b1.snapshot() assert b3.id != b1.id assert b3.locked is False # b2 is deleted. pytest.raises(InvalidOperationError, b2.snapshot)
def test_bucket_sync_new_object(app, db, dummy_location): """Test that a new file in src in synced to dest.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") db.session.commit() assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 0 b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b2, "filename")
def test_bucket_sync_delete_extras(app, db, dummy_location): """Test that an extra object in dest is deleted when syncing.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum") ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert not ObjectVersion.get(b2, "extra-deleted")
def test_bucket_retrieval(app, db, dummy_location): """Test bucket get/create.""" # Create two buckets with db.session.begin_nested(): b1 = Bucket.create() Bucket.create() assert Bucket.all().count() == 2 # Mark one as deleted. with db.session.begin_nested(): b1.deleted = True assert Bucket.all().count() == 1
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.add(loc) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def test_object_snapshot_deleted(app, db, dummy_location): """Test snapshot creation of a deleted bucket.""" b1 = Bucket.create() b2 = Bucket.create() b2.deleted = True db.session.commit() b3 = b1.snapshot() assert b3.id != b1.id assert b3.locked is False # b2 is deleted. with pytest.raises(InvalidOperationError) as excinfo: b2.snapshot() assert excinfo.value.get_body() != {}
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): request = urllib2.Request(file_['url'], headers=file_.get('headers', {})) f = urllib2.urlopen(request) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', obj, eng)
def test_pyfilesystemstorage(app, db, dummy_location): """Test pyfs storage.""" # Create bucket and object with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") obj.file = FileInstance.create() storage = PyFilesystemStorage(obj.file, base_uri=obj.bucket.location.uri) counter = dict(size=0) def callback(total, size): counter['size'] = size data = b("this is some content") stream = BytesIO(data) loc, size, checksum = storage.save(stream, progress_callback=callback) # Verify checksum, size and location. m = hashlib.md5() m.update(data) assert "md5:{0}".format(m.hexdigest()) == checksum assert size == len(data) assert loc == join( dummy_location.uri, str(obj.file.id), "data")
def test_SIP_files(db): """Test the files methods of API SIP.""" # we create a SIP model sip = SIP_.create() db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert len(api_sip.files) == 0 # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we create a file content = b'test lol\n' bucket = Bucket.create() obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content)) db.session.commit() # we attach it to the SIP sf = api_sip.attach_file(obj) db.session.commit() assert len(api_sip.files) == 1 assert api_sip.files[0].filepath == 'test.txt' assert sip.sip_files[0].filepath == 'test.txt' # finalization rmtree(tmppath)
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.is_deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def test_object_restore(app, db, dummy_location): """Restore object.""" f1 = FileInstance(uri="f1", size=1, checksum="mychecksum") f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2") db.session.add(f1) db.session.add(f2) b1 = Bucket.create() obj1 = ObjectVersion.create(b1, "test").set_file(f1) ObjectVersion.create(b1, "test").set_file(f2) obj_deleted = ObjectVersion.delete(b1, "test") db.session.commit() assert ObjectVersion.query.count() == 3 # Cannot restore a deleted version. pytest.raises(InvalidOperationError, obj_deleted.restore) # Restore first version obj_new = obj1.restore() db.session.commit() assert ObjectVersion.query.count() == 4 assert obj_new.is_head is True assert obj_new.version_id != obj1.version_id assert obj_new.key == obj1.key assert obj_new.file_id == obj1.file_id assert obj_new.bucket == obj1.bucket
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( default_location=Location.get_default() ) try: schema = data.get("$schema", None) \ .split('/schemas/', 1)[1] except (IndexError, AttributeError): return None if schema: _deposit_group = \ next( (depgroup for dg, depgroup in current_app.config.get('DEPOSIT_GROUPS').iteritems() if schema in depgroup['schema'] ), None ) data["_experiment"] = _deposit_group.get("experiment", "Unknown") deposit = super(CAPDeposit, cls).create(data, id_=id_) add_owner_permissions(deposit.id) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get( 'recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): headers = file_.get('headers', {}) data = requests_retry_session().get(file_['url'], headers=headers) if data.status_code != 200: __halt_and_notify( "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s" % (data.status_code, file_['url'], headers), eng) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def test_admin_views(app, db, dummy_location): """Test admin views.""" app.config['SECRET_KEY'] = 'CHANGEME' InvenioAdmin(app, permission_factory=None, view_class_factory=lambda x: x) b1 = Bucket.create(location=dummy_location) obj = ObjectVersion.create(b1, 'test').set_location('placeuri', 1, 'chk') db.session.commit() with app.test_client() as client: res = client.get('/admin/bucket/') assert res.status_code == 200 assert str(b1.id) in res.get_data(as_text=True) res = client.get('/admin/fileinstance/') assert res.status_code == 200 assert str(obj.file_id) in res.get_data(as_text=True) res = client.get('/admin/location/') assert res.status_code == 200 assert str(b1.location.name) in res.get_data(as_text=True) res = client.get('/admin/objectversion/') assert res.status_code == 200 assert str(obj.version_id) in res.get_data(as_text=True)
def test_transfer_cp(db): """Test factories.transfer_cp function.""" # first we create a record recid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) record = Record.create({'title': 'record test'}, recid) # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we add a file to the record bucket = Bucket.create() content = b'Aaah! A headcrab!!!\n' record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket) record.files['crab.txt'] = BytesIO(content) # test! rec_dir = join(tmppath, create_accessioned_id('1337', 'recid')) factories.transfer_cp(record.id, tmppath) assert isdir(rec_dir) assert isfile(join(rec_dir, 'crab.txt')) with open(join(rec_dir, 'crab.txt'), "r") as f: assert f.read() == content # finalization rmtree(tmppath)
def test_object_relink_all(app, db, dummy_location): """Test relinking files.""" b1 = Bucket.create() obj1 = ObjectVersion.create( b1, "relink-test", stream=BytesIO(b('relinkthis'))) ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na'))) b1.snapshot() db.session.commit() assert ObjectVersion.query.count() == 4 assert FileInstance.query.count() == 2 fnew = FileInstance.create() fnew.copy_contents(obj1.file, default_location=b1.location.uri) db.session.commit() fold = obj1.file assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0 ObjectVersion.relink_all(obj1.file, fnew) db.session.commit() assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users): """Check that the storage class will redirect pid files.""" pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047' with app.app_context(): # Disable access control for this test tmp_location = Location.query.first() with db.session.begin_nested(): bucket = Bucket.create(tmp_location, storage_class='B') pid_file = FileInstance.create() pid_file.set_uri(pid, 1, 0, storage_class='B') ObjectVersion.create(bucket, 'test.txt', pid_file.id) db.session.commit() url = url_for('invenio_files_rest.object_api', bucket_id=bucket.id, key='test.txt') try: with app.app_context(): permission = current_files_rest.permission_factory current_files_rest.permission_factory = allow_all # Check that accessing the file redirects to the PID with app.test_client() as client: resp = client.get(url) assert resp.headers['Location'] == pid assert resp.status_code == 302 finally: with app.app_context(): current_files_rest.permission_factory = permission
def test_pyfilesystemstorage(app, db, dummy_location): """Test pyfs storage.""" # Create bucket and object with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, "LICENSE") obj.file = FileInstance() db.session.add(obj.file) storage = PyFilesystemStorage(obj, obj.file) with open('LICENSE', 'rb') as fp: loc, size, checksum = storage.save(fp) # Verify checksum, size and location. with open('LICENSE', 'rb') as fp: m = hashlib.md5() m.update(fp.read()) assert "md5:{0}".format(m.hexdigest()) == checksum assert size == getsize('LICENSE') assert size == getsize('LICENSE') assert loc == \ join( dummy_location.uri, str(b.id), str(obj.version_id), "data")
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {})) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def test_object_relink_all(app, db, dummy_location): """Test relinking files.""" b1 = Bucket.create() obj1 = ObjectVersion.create( b1, "relink-test", stream=BytesIO(b('relinkthis'))) ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na'))) b1.snapshot() db.session.commit() assert ObjectVersion.query.count() == 4 assert FileInstance.query.count() == 2 fnew = FileInstance.create() fnew.copy_contents(obj1.file, location=b1.location) db.session.commit() fold = obj1.file assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0 ObjectVersion.relink_all(obj1.file, fnew) db.session.commit() assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def test_verify_checksum(app, tmp_location): """Test that verify_checksum sets last_check=False if the checksum is different and last_check=None if it failed to calculate the checksum, e.g. an exception was raised.""" with app.app_context(): b1 = Bucket.create(tmp_location) objects = [] for i in range(10): objects.append( ObjectVersion.create(b1, str(i), stream=BytesIO(b'test'))) db.session.commit() for obj in objects: verify_checksum.apply([str(obj.file_id)]) assert obj.file.last_check # assert that mismatches in md5 checksums are caught corrupted_file = objects[0].file with open(corrupted_file.uri, 'w') as file_writer: file_writer.write('modified content') verify_checksum.apply([str(corrupted_file.id)]) assert corrupted_file.last_check is False # assert that when exceptions occur last_check=None failed_file = objects[1].file with patch.object(FileStorage, 'checksum') \ as mock_check: mock_check.side_effect = KeyError() verify_checksum.apply_async(args=[str(failed_file.id)], kwargs={'throws': False}) assert failed_file.last_check is None
def test_scheduling(app, test_communities, login_user): """Test that scheduling files happens properly.""" with app.app_context(): b1 = Bucket.create() objects = [] for i in range(10): objects.append( ObjectVersion.create(b1, str(i), stream=BytesIO(b'test'))) db.session.commit() # corrupt 1 file corrupted_file = objects[0].file with open(corrupted_file.uri, 'w') as file_writer: file_writer.write('modified content') # schedule all files schedule_all_files_for_checksum() # assert that all will be checked assert not corrupted_file.last_check for o in objects[1:]: assert o.file.last_check # make 1 file fail failed_file = objects[1].file failed_file.last_check = None # schedule all failed schedule_failed_checksum_files() # assert that 1 wiil run again assert failed_file.last_check assert not corrupted_file.last_check
def test_verify_checksum(app, db, dummy_location): """Test celery tasks for checksum verification.""" b1 = Bucket.create() with open('README.rst', 'rb') as fp: obj = ObjectVersion.create(b1, 'README.rst', stream=fp) db.session.commit() file_id = obj.file_id verify_checksum(str(file_id)) f = FileInstance.query.get(file_id) assert f.last_check_at assert f.last_check is True f.uri = 'invalid' db.session.add(f) db.session.commit() pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id), throws=True) f = FileInstance.query.get(file_id) assert f.last_check is True verify_checksum(str(file_id), throws=False) f = FileInstance.query.get(file_id) assert f.last_check is None f.last_check = True db.session.add(f) db.session.commit() with pytest.raises(ResourceNotFoundError): verify_checksum(str(file_id), pessimistic=True) f = FileInstance.query.get(file_id) assert f.last_check is None
def create(cls, data, id_=None, **kwargs): """Create a CDS deposit. Adds bucket creation immediately on deposit creation. """ if '_deposit' not in data: id_ = id_ or uuid.uuid4() cls.deposit_minter(id_, data) bucket = Bucket.create(location=Location.get_by_name( kwargs.get('bucket_location', 'default'))) data['_buckets'] = {'deposit': str(bucket.id)} data.setdefault('_cds', {}) data['_cds'].setdefault('state', {}) data.setdefault('keywords', []) data.setdefault('license', [{ 'license': 'CERN', 'material': '', 'url': 'http://copyright.web.cern.ch', }]) if '_access' not in data: data.setdefault('_access', {}) deposit = super(CDSDeposit, cls).create(data, id_=id_, validator=PartialDraft4Validator) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_bucket(cls, data): """Create a bucket for this record. Override this method to provide more advanced bucket creation capabilities. This method may return a new or existing bucket, or may return None, in case no bucket should be created. """ return Bucket.create()
def test_bucket_sync_same_object(app, db, dummy_location): """Test that an exiting file in src and dest is not changed.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") b1.sync(b2) db.session.commit() b1_version_id = ObjectVersion.get(b1, "filename").version_id b2_version_id = ObjectVersion.get(b2, "filename").version_id b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b1, "filename").version_id == b1_version_id assert ObjectVersion.get(b2, "filename").version_id == b2_version_id
def _resolve_bucket(cls, deposit, record): """Build bucket.""" logging.debug('Creating new buckets, record and deposit.') bucket = Bucket.create(location=Location.get_by_name('videos')) deposit['_buckets'] = {'deposit': str(bucket.id)} RecordsBuckets.create(record=deposit.model, bucket=bucket) record['_buckets'] = deepcopy(deposit['_buckets']) db.session.commit()
def test_object_get_by_bucket(app, db, dummy_location): """Test object listing.""" b1 = Bucket.create() b2 = Bucket.create() # First version of object obj1_first = ObjectVersion.create(b1, "test") obj1_first.set_location("b1test1", 1, "achecksum") # Intermediate version which is a delete marker. obj1_intermediate = ObjectVersion.create(b1, "test") obj1_intermediate.set_location("b1test2", 1, "achecksum") # Latest version of object obj1_latest = ObjectVersion.create(b1, "test") obj1_latest.set_location("b1test3", 1, "achecksum") # Create objects in/not in same bucket using different key. ObjectVersion.create(b1, "another").set_location( "b1another1", 1, "achecksum") ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum") db.session.commit() # Sanity check assert ObjectVersion.query.count() == 5 assert ObjectVersion.get(b1, "test") assert ObjectVersion.get(b1, "another") assert ObjectVersion.get(b2, "test") # Retrieve objects for a bucket with/without versions assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1 # Assert order of returned objects (alphabetical) objs = ObjectVersion.get_by_bucket(b1.id).all() assert objs[0].key == "another" assert objs[1].key == "test" # Assert order of returned objects verions (creation date ascending) objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all() assert objs[0].key == "another" assert objs[1].key == "test" assert objs[1].version_id == obj1_latest.version_id assert objs[2].key == "test" assert objs[2].version_id == obj1_intermediate.version_id assert objs[3].key == "test" assert objs[3].version_id == obj1_first.version_id
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 # check that for 'undeleted' key there is only one HEAD heads = [ o for o in ObjectVersion.query.filter_by(bucket_id=b1.id, key='undeleted').all() if o.is_head ] assert len(heads) == 1 assert heads[0].file.uri == 'b1u2' b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def test_object_set_file(app, db, dummy_location): """Test object set file.""" b = Bucket.create() f = FileInstance(uri="f1", size=1, checksum="mychecksum") obj = ObjectVersion.create(b, "test").set_file(f) db.session.commit() assert obj.file == f assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
def test_object_create(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() # Create one object version obj1 = ObjectVersion.create(b, "test") assert obj1.bucket_id == b.id assert obj1.key == 'test' assert obj1.version_id assert obj1.file_id is None assert obj1.is_head is True assert obj1.bucket == b # Set fake location. obj1.set_location("file:///tmp/obj1", 1, "checksum") # Create one object version for same object key obj2 = ObjectVersion.create(b, "test") assert obj2.bucket_id == b.id assert obj2.key == 'test' assert obj2.version_id != obj1.version_id assert obj2.file_id is None assert obj2.is_head is True assert obj2.bucket == b # Set fake location obj2.set_location("file:///tmp/obj2", 2, "checksum") # Create a new object version for a different object with no location. # I.e. it is considered a delete marker. obj3 = ObjectVersion.create(b, "deleted_obj") # Object __repr__ assert str(obj1) == \ "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key) # Sanity check assert ObjectVersion.query.count() == 3 # Assert that obj2 is the head version obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id) assert obj.version_id == obj1.version_id assert obj.is_head is False obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id) assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that getting latest version gets obj2 obj = ObjectVersion.get(b.id, "test") assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that obj3 is not retrievable (without specifying version id). assert ObjectVersion.get(b.id, "deleted_obj") is None # Assert that obj3 *is* retrievable (when specifying version id). assert \ ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \ obj3
def create_eitem_with_bucket_for_document(document_pid): """Create EItem and its file bucket.""" eitem = create_eitem(document_pid, open_access=True) with db.session.begin_nested(): bucket = Bucket.create() eitem["bucket_id"] = str(bucket.id) eitem.commit() db.session.commit() return eitem, bucket
def test_bucket_create_object(app, db): """Test bucket creation.""" with db.session.begin_nested(): l1 = Location(name='test1', uri='file:///tmp/1', default=False) l2 = Location(name='test2', uri='file:///tmp/2', default=True) db.session.add(l1) db.session.add(l2) assert Location.query.count() == 2 # Simple create with db.session.begin_nested(): b = Bucket.create() assert b.id assert b.default_location == Location.get_default().id assert b.location == Location.get_default() assert b.default_storage_class == \ app.config['FILES_REST_DEFAULT_STORAGE_CLASS'] assert b.size == 0 assert b.quota_size is None assert b.max_file_size is None assert b.deleted is False # __repr__ test assert str(b) == str(b.id) # Retrieve one assert Bucket.get(b.id).id == b.id # Create with location_name and storage class with db.session.begin_nested(): b = Bucket.create(location=l1, storage_class='A') assert b.default_location == Location.get_by_name('test1').id assert b.default_storage_class == 'A' # Create using location name instead b = Bucket.create(location=l2.name, storage_class='A') assert b.default_location == Location.get_by_name('test2').id # Retrieve one assert Bucket.all().count() == 3 # Invalid storage class. pytest.raises(ValueError, Bucket.create, storage_class='X')
def test_object_create_with_fileid(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test')) assert b.size == 4 ObjectVersion.create(b, 'test', _file_id=obj.file) assert b.size == 8
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 # check that for 'undeleted' key there is only one HEAD heads = [o for o in ObjectVersion.query.filter_by( bucket_id=b1.id, key='undeleted').all() if o.is_head] assert len(heads) == 1 assert heads[0].file.uri == 'b1u2' b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def test_object_set_contents(app, db, dummy_location): """Test object set contents.""" with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") assert obj.file_id is None assert FileInstance.query.count() == 0 # Save a file. with open('LICENSE', 'rb') as fp: obj.set_contents(fp) # Assert size, location and checksum assert obj.file_id is not None assert obj.file.uri is not None assert obj.file.size == getsize('LICENSE') assert obj.file.checksum is not None assert b1.size == obj.file.size # Try to overwrite with db.session.begin_nested(): with open('LICENSE', 'rb') as fp: pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp) # Save a new version with different content with db.session.begin_nested(): obj2 = ObjectVersion.create(b1, "LICENSE") with open('README.rst', 'rb') as fp: obj2.set_contents(fp) assert obj2.file_id is not None and obj2.file_id != obj.file_id assert obj2.file.size == getsize('README.rst') assert obj2.file.uri != obj.file.uri assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size obj2.file.verify_checksum() assert obj2.file.last_check_at assert obj2.file.last_check is True old_checksum = obj2.file.checksum obj2.file.checksum = "md5:invalid" assert obj2.file.verify_checksum() is False previous_last_check = obj2.file.last_check previous_last_check_date = obj2.file.last_check_at with db.session.begin_nested(): obj2.file.checksum = old_checksum obj2.file.uri = 'invalid' pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum) assert obj2.file.last_check == previous_last_check assert obj2.file.last_check_at == previous_last_check_date obj2.file.verify_checksum(throws=False) assert obj2.file.last_check is None assert obj2.file.last_check_at != previous_last_check_date
def test_RecordSIP_create(db, mocker): """Test create method from the API class RecordSIP.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) # setup metadata mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url://to/schema') db.session.add(mtype) db.session.commit() # first we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) mocker.patch('invenio_records.api.RecordBase.validate', return_value=True, autospec=True) record = Record.create( {'title': 'record test', '$schema': 'url://to/schema'}, recid) # we add a file to the record bucket = Bucket.create() content = b'Test file\n' RecordsBuckets.create(record=record.model, bucket=bucket) record.files['test.txt'] = BytesIO(content) db.session.commit() # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent) db.session.commit() # test! assert RecordSIP_.query.count() == 1 assert SIP_.query.count() == 1 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 1 assert len(rsip.sip.files) == 1 assert len(rsip.sip.metadata) == 1 metadata = rsip.sip.metadata[0] assert metadata.type.format == 'json' assert '"title": "record test"' in metadata.content assert rsip.sip.archivable is True # we try with no files rsip = RecordSIP.create(pid, record, True, create_sip_files=False, user_id=user.id, agent=agent) assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert len(rsip.sip.files) == 0 assert len(rsip.sip.metadata) == 1 # finalization rmtree(tmppath)
def image_object(database, location, image_path): """Get ObjectVersion of test image.""" bucket = Bucket.create() database.session.commit() with open(image_path, 'rb') as fp: obj = ObjectVersion.create( bucket, 'test.jpg', stream=fp, size=getsize(image_path) ) database.session.commit() return obj
def test_verify_checksum(app, db, dummy_location): """Test celery tasks for checksum verification.""" b = Bucket.create() with open('README.rst', 'rb') as fp: obj = ObjectVersion.create(b, 'README.rst', stream=fp) db.session.commit() verify_checksum(str(obj.file_id)) f = FileInstance.query.get(obj.file_id) assert f.last_check_at assert f.last_check is True
def test_object_set_location(app, db, dummy_location): """Test object set contents.""" with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") assert obj.file_id is None assert FileInstance.query.count() == 0 obj.set_location("b1test1", 1, "achecksum") assert FileInstance.query.count() == 1 pytest.raises( FileInstanceAlreadySetError, obj.set_location, "b1test1", 1, "achecksum")
def files(): """Load files.""" data_path = os.path.join(os.path.dirname(__file__), 'data') # Create location loc = Location(name='local', uri=data_path, default=True) db.session.commit() # Bucket bucket = Bucket.create(loc) # Example files from the data folder example_files = ( 'markdown.md', 'csvfile.csv', 'zipfile.zip', 'jsonfile.json', 'xmlfile.xml', 'notebook.ipynb', 'jpgfile.jpg', 'pngfile.png', ) # Create single file records for f in example_files: with open(os.path.join(data_path, f), 'rb') as fp: create_object(bucket, f, fp) # Create a multi-file record rec_uuid = uuid4() provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid) data = { 'pid_value': provider.pid.pid_value, 'files': [] } # Template to create different files template_file = { 'uri': '/files/{0}/{1}', 'key': '', 'bucket': str(bucket.id), 'local': True } for filename in example_files: file_data = template_file.copy() file_data['uri'] = file_data['uri'].format(str(bucket.id), filename) file_data['key'] = filename data['files'].append(file_data) Record.create(data, id_=rec_uuid) db.session.commit()
def test_object_multibucket(app, db, dummy_location): """Test object creation in multiple buckets.""" with db.session.begin_nested(): # Create two buckets each with an object using the same key b1 = Bucket.create() b2 = Bucket.create() obj1 = ObjectVersion.create(b1, "test") obj1.set_location("file:///tmp/obj1", 1, "checksum") obj2 = ObjectVersion.create(b2, "test") obj2.set_location("file:///tmp/obj2", 2, "checksum") # Sanity check assert ObjectVersion.query.count() == 2 # Assert object versions are correctly created in each bucket. obj = ObjectVersion.get(b1.id, "test") assert obj.is_head is True assert obj.version_id == obj1.version_id obj = ObjectVersion.get(b2.id, "test") assert obj.is_head is True assert obj.version_id == obj2.version_id
def test_bucket_tags(app, db, dummy_location): """Test bucket tags.""" b = Bucket.create() BucketTag.create(b, "mykey", "testvalue") BucketTag.create(b, "another_key", "another value") db.session.commit() # Duplicate key pytest.raises(Exception, BucketTag.create, b, "mykey", "newvalue") # Test get assert BucketTag.query.count() == 2 assert BucketTag.get(b.id, "mykey").value == "testvalue" assert BucketTag.get_value(b, "another_key") == "another value" assert BucketTag.get_value(b.id, "invalid") is None # Test delete BucketTag.delete(b, "mykey") assert BucketTag.query.count() == 1 BucketTag.delete(b, "invalid") assert BucketTag.query.count() == 1 # Create or update BucketTag.create_or_update(b, "another_key", "newval") BucketTag.create_or_update(b, "newkey", "testval") db.session.commit() assert BucketTag.get_value(b, "another_key") == "newval" assert BucketTag.get_value(b, "newkey") == "testval" # Get tags as dictionary assert b.get_tags() == dict(another_key="newval", newkey="testval") b2 = Bucket.create() assert b2.get_tags() == dict() # Test cascading delete. Bucket.query.delete() db.session.commit() assert BucketTag.query.count() == 0
def test_object_copy(app, db, dummy_location): """Copy object.""" f = FileInstance(uri="f1", size=1, checksum="mychecksum") db.session.add(f) db.session.commit() b1 = Bucket.create() b2 = Bucket.create() # Delete markers cannot be copied obj_deleted = ObjectVersion.create(b1, "deleted") assert pytest.raises(InvalidOperationError, obj_deleted.copy, b2) # Copy onto self. obj = ObjectVersion.create(b1, "selftest").set_file(f) db.session.commit() obj_copy = obj.copy() db.session.commit() assert obj_copy.version_id != obj.version_id assert obj_copy.key == obj.key assert obj_copy.bucket == obj.bucket assert obj_copy.file_id == obj.file_id versions = ObjectVersion.get_versions(b1, "selftest").all() assert versions[0] == obj_copy assert versions[1] == obj # Copy new key obj_copy2 = obj_copy.copy(key='newkeytest') db.session.commit() assert obj_copy2.version_id != obj_copy.version_id assert obj_copy2.key == "newkeytest" assert obj_copy2.bucket == obj_copy.bucket assert obj_copy2.file_id == obj_copy.file_id # Copy to bucket obj_copy3 = obj_copy2.copy(bucket=b2) assert obj_copy3.version_id != obj_copy2.version_id assert obj_copy3.key == obj_copy2.key assert obj_copy3.bucket == b2 assert obj_copy3.file_id == obj_copy2.file_id