def test_get_empty_bucket(db, client, headers, bucket, objects, permissions, get_json): """Test getting objects from an empty bucket.""" # Delete the objects created in the fixtures to have an empty bucket with # permissions set up. for obj in objects: ObjectVersion.delete(obj.bucket_id, obj.key) db.session.commit() cases = [ (None, 404), ('auth', 404), ('objects', 404), # TODO - return 403 instead ('bucket', 200), ('location', 200), ] for user, expected in cases: login_user(client, permissions[user]) resp = client.get( url_for('invenio_files_rest.bucket_api', bucket_id=bucket.id), headers=headers ) assert resp.status_code == expected if resp.status_code == 200: assert get_json(resp)['contents'] == []
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users): """Check that the storage class will redirect pid files.""" pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047' with app.app_context(): # Disable access control for this test tmp_location = Location.query.first() with db.session.begin_nested(): bucket = Bucket.create(tmp_location, storage_class='B') pid_file = FileInstance.create() pid_file.set_uri(pid, 1, 0, storage_class='B') ObjectVersion.create(bucket, 'test.txt', pid_file.id) db.session.commit() url = url_for('invenio_files_rest.object_api', bucket_id=bucket.id, key='test.txt') try: with app.app_context(): permission = current_files_rest.permission_factory current_files_rest.permission_factory = allow_all # Check that accessing the file redirects to the PID with app.test_client() as client: resp = client.get(url) assert resp.headers['Location'] == pid assert resp.status_code == 302 finally: with app.app_context(): current_files_rest.permission_factory = permission
def test_object_restore(app, db, dummy_location): """Restore object.""" f1 = FileInstance(uri="f1", size=1, checksum="mychecksum") f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2") db.session.add(f1) db.session.add(f2) b1 = Bucket.create() obj1 = ObjectVersion.create(b1, "test").set_file(f1) ObjectVersion.create(b1, "test").set_file(f2) obj_deleted = ObjectVersion.delete(b1, "test") db.session.commit() assert ObjectVersion.query.count() == 3 # Cannot restore a deleted version. pytest.raises(InvalidOperationError, obj_deleted.restore) # Restore first version obj_new = obj1.restore() db.session.commit() assert ObjectVersion.query.count() == 4 assert obj_new.is_head is True assert obj_new.version_id != obj1.version_id assert obj_new.key == obj1.key assert obj_new.file_id == obj1.file_id assert obj_new.bucket == obj1.bucket
def test_record_publish_adds_no_handles_for_external_files(app, records_data_with_external_pids, test_records_data): """Test that no handle PIDs are created for external files.""" for metadata in test_records_data: with app.app_context(): app.config.update({'FAKE_EPIC_PID': True}) external_pids = records_data_with_external_pids['external_pids'] external_dict = {x['key']: x['ePIC_PID'] for x in external_pids} data = deepcopy(metadata) data['external_pids'] = deepcopy(external_pids) record_uuid = uuid.uuid4() b2share_deposit_uuid_minter(record_uuid, data=data) deposit = Deposit.create(data, id_=record_uuid) ObjectVersion.create(deposit.files.bucket, 'real_file_1.txt', stream=BytesIO(b'mycontent')) ObjectVersion.create(deposit.files.bucket, 'real_file_2.txt', stream=BytesIO(b'mycontent')) deposit.submit() deposit.publish() deposit.commit() _, record = deposit.fetch_published() # external files don't get a handle PID, they already have one # which is stored in record['_deposit']['external_pids'] for f in record.files: if f['key'] in external_dict: assert f.get('ePIC_PID') is None else: assert '0000' in f['ePIC_PID'] # is a new fake PID
def test_object_relink_all(app, db, dummy_location): """Test relinking files.""" b1 = Bucket.create() obj1 = ObjectVersion.create( b1, "relink-test", stream=BytesIO(b('relinkthis'))) ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na'))) b1.snapshot() db.session.commit() assert ObjectVersion.query.count() == 4 assert FileInstance.query.count() == 2 fnew = FileInstance.create() fnew.copy_contents(obj1.file, location=b1.location) db.session.commit() fold = obj1.file assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0 ObjectVersion.relink_all(obj1.file, fnew) db.session.commit() assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def _create_record_from_filepath(path, rec_uuid, indexer, versions, verbose): with open(path) as record_file: record_str = record_file.read() record_str = resolve_community_id(record_str) record_str = resolve_block_schema_id(record_str) json_data = json.loads(record_str) b2share_deposit_uuid_minter(rec_uuid, data=json_data) deposit = Deposit.create(json_data, id_=rec_uuid) ObjectVersion.create(deposit.files.bucket, 'myfile', stream=BytesIO(b'mycontent')) deposit.publish() pid, record = deposit.fetch_published() indexer.index(record) if verbose > 0: click.secho('created new record: {}'.format(str(rec_uuid))) last_id = pid.pid_value for i in range(2*versions): rec_uuid = uuid4() json_data = json.loads(record_str) b2share_deposit_uuid_minter(rec_uuid, data=json_data) deposit2 = Deposit.create(json_data, id_=rec_uuid, version_of=last_id) ObjectVersion.create(deposit2.files.bucket, 'myfile-ver{}'.format(i), stream=BytesIO(b'mycontent')) deposit2.publish() pid, record2 = deposit2.fetch_published() indexer.index(record2) last_id = pid.pid_value if verbose > 0: click.secho('created new version: {}'.format(str(rec_uuid))) return record, deposit
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def objects(db, bucket): """File system location.""" obj1 = ObjectVersion.create( bucket, 'LICENSE', stream=BytesIO(b('license file'))) obj2 = ObjectVersion.create( bucket, 'README.rst', stream=BytesIO(b('readme file'))) db.session.commit() yield [obj1, obj2]
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2, key2): """Attach a file to a record or deposit. You must provide the information which will determine the first file, i.e.: either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'. Additionally you need to specify the information on the target record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'. """ assert ((file_id or (pid_type1 and pid_value1 and key1)) and (pid_type2 and pid_value2 and key2)) msg = u"PID type must be 'recid' or 'depid'." if pid_type1: assert pid_type1 in ('recid', 'depid', ), msg assert pid_type2 in ('recid', 'depid', ), msg if not file_id: resolver = record_resolver if pid_type1 == 'recid' \ else deposit_resolver pid1, record1 = resolver.resolve(pid_value1) bucket1 = record1.files.bucket obj1 = ObjectVersion.get(bucket1, key1) if obj1 is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key1), fg='red')) return file_id = obj1.file.id resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver pid2, record2 = resolver.resolve(pid_value2) bucket2 = record2.files.bucket obj2 = ObjectVersion.get(bucket2, key2) if obj2 is not None: click.echo(click.style(u'File with key "{key}" already exists on' u' bucket {bucket}.'.format( key=key2, bucket=bucket2.id), fg='red')) return if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}' u' as "{key2}". Continue?'.format( file_id=file_id, key2=key2, bucket2=bucket2.id)): record2.files.bucket.locked = False ObjectVersion.create(bucket2, key2, _file_id=file_id) if pid_type2 == 'recid': record2.files.bucket.locked = True record2.files.flush() record2.commit() db.session.commit() click.echo(click.style(u'File attached successfully.', fg='green')) else: click.echo(click.style(u'Aborted file attaching.', fg='green'))
def test_exporter(app, db, es, exporter_bucket, record_with_files_creation): """Test record exporter.""" pid, record, record_url = record_with_files_creation RecordIndexer().index_by_id(record.id) current_search.flush_and_refresh('records') with app.app_context(): assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 0 export_job(job_id='records') assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 1
def test_object_remove_marker(app, db, bucket, objects): """Test object remove.""" obj = objects[0] assert ObjectVersion.query.count() == 4 obj = ObjectVersion.delete(obj.bucket, obj.key) db.session.commit() assert ObjectVersion.query.count() == 5 obj = ObjectVersion.get(obj.bucket, obj.key, version_id=obj.version_id) obj.remove() assert ObjectVersion.query.count() == 4
def test_object_create_with_fileid(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test')) assert b.size == 4 ObjectVersion.create(b, 'test', _file_id=obj.file) assert b.size == 8
def test_object_set_contents(app, db, dummy_location): """Test object set contents.""" with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") assert obj.file_id is None assert FileInstance.query.count() == 0 # Save a file. with open('LICENSE', 'rb') as fp: obj.set_contents(fp) # Assert size, location and checksum assert obj.file_id is not None assert obj.file.uri is not None assert obj.file.size == getsize('LICENSE') assert obj.file.checksum is not None assert b1.size == obj.file.size # Try to overwrite with db.session.begin_nested(): with open('LICENSE', 'rb') as fp: pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp) # Save a new version with different content with db.session.begin_nested(): obj2 = ObjectVersion.create(b1, "LICENSE") with open('README.rst', 'rb') as fp: obj2.set_contents(fp) assert obj2.file_id is not None and obj2.file_id != obj.file_id assert obj2.file.size == getsize('README.rst') assert obj2.file.uri != obj.file.uri assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size obj2.file.verify_checksum() assert obj2.file.last_check_at assert obj2.file.last_check is True old_checksum = obj2.file.checksum obj2.file.checksum = "md5:invalid" assert obj2.file.verify_checksum() is False previous_last_check = obj2.file.last_check previous_last_check_date = obj2.file.last_check_at with db.session.begin_nested(): obj2.file.checksum = old_checksum obj2.file.uri = 'invalid' pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum) assert obj2.file.last_check == previous_last_check assert obj2.file.last_check_at == previous_last_check_date obj2.file.verify_checksum(throws=False) assert obj2.file.last_check is None assert obj2.file.last_check_at != previous_last_check_date
def _create_bucket(deposit, record_json, directory, logfile): for index, file_dict in enumerate(record_json.get('files', [])): click.secho(' Load file "{}"'.format(file_dict.get('name'))) filepath = os.path.join(directory, 'file_{}'.format(index)) if int(os.path.getsize(filepath)) != int(file_dict.get('size')): logfile.write("\n********************") logfile.write("\nERROR: downloaded file size differs for file {}: {} instead of {}" .format(filepath, os.path.getsize(filepath), file_dict.get('size'))) logfile.write("\n********************") else: with open(filepath, 'r+b') as f: ObjectVersion.create(deposit.files.bucket, file_dict['name'], stream=BytesIO(f.read()))
def test_object_mimetype(app, db, dummy_location): """Test object set file.""" b = Bucket.create() db.session.commit() obj1 = ObjectVersion.create(b, "test.pdf", stream=BytesIO(b'pdfdata')) obj2 = ObjectVersion.create(b, "README", stream=BytesIO(b'pdfdata')) assert obj1.mimetype == "application/pdf" assert obj2.mimetype == "application/octet-stream" # Override computed MIME type. obj2.mimetype = "text/plain" db.session.commit() assert ObjectVersion.get(b, "README").mimetype == "text/plain"
def rename(self, old_key, new_key): """Rename a file.""" assert new_key not in self file_ = self[old_key] # create a new version with the new name obj = ObjectVersion.create( bucket=self.bucket, key=new_key, _file_id=file_.obj.file_id ) self.record['_files'][self.keys.index(old_key)]['key'] = new_key # delete the old version ObjectVersion.delete(bucket=self.bucket, key=old_key) return obj
def objects(db, bucket): """File system location.""" data_bytes = b('license file') obj1 = ObjectVersion.create( bucket, 'LICENSE', stream=BytesIO(data_bytes), size=len(data_bytes) ) data_bytes2 = b('readme file') obj2 = ObjectVersion.create( bucket, 'README.rst', stream=BytesIO(data_bytes2), size=len(data_bytes2) ) db.session.commit() yield [obj1, obj2]
def save_and_validate_logo(logo_stream, logo_filename, community_id): """Validate if communities logo is in limit size and save it.""" cfg = current_app.config logos_bucket_id = cfg['COMMUNITIES_BUCKET_UUID'] logos_bucket = Bucket.query.get(logos_bucket_id) ext = os.path.splitext(logo_filename)[1] ext = ext[1:] if ext.startswith('.') else ext if ext in cfg['COMMUNITIES_LOGO_EXTENSIONS']: key = "{0}/logo.{1}".format(community_id, ext) ObjectVersion.create(logos_bucket, key, stream=logo_stream) return ext else: return None
def test_new_record(app, db, dummy_location, record_dumps, resolver): """Test creation of new record.""" RecordDumpLoader.create(record_dumps) pid, record = resolver.resolve('11783') created = datetime(2011, 10, 13, 8, 27, 47) # Basic some test that record exists assert record['title'] assert record.created == created # Test that this is a completely new record assert len(record.revisions) == 3 # check revisions assert record.revisions[2].created == created assert record.revisions[2].updated == datetime(2012, 10, 13, 8, 27, 47) assert record.revisions[1].created == created assert record.revisions[1].updated == datetime(2012, 10, 13, 8, 27, 47) assert record.revisions[0].created == created assert record.revisions[0].updated == datetime(2011, 10, 13, 8, 27, 47) pytest.raises(IntegrityError, RecordIdentifier.insert, 11783) # Test the PIDs are extracted and created assert PersistentIdentifier.get('doi', '10.5281/zenodo.11783') assert len(record['_files']) == 1 f = record['_files'][0] obj = ObjectVersion.get(f['bucket'], f['key']) assert obj.file.checksum == f['checksum'] assert obj.file.size == f['size'] assert BucketTag.get_value(f['bucket'], 'record') == str(record.id)
def versions(objects): """Get objects with all their versions.""" versions = [] for obj in objects: versions.extend(ObjectVersion.get_versions(obj.bucket, obj.key)) yield versions
def test_pyfilesystemstorage(app, db, dummy_location): """Test pyfs storage.""" # Create bucket and object with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, "LICENSE") obj.file = FileInstance() db.session.add(obj.file) storage = PyFilesystemStorage(obj, obj.file) with open('LICENSE', 'rb') as fp: loc, size, checksum = storage.save(fp) # Verify checksum, size and location. with open('LICENSE', 'rb') as fp: m = hashlib.md5() m.update(fp.read()) assert "md5:{0}".format(m.hexdigest()) == checksum assert size == getsize('LICENSE') assert size == getsize('LICENSE') assert loc == \ join( dummy_location.uri, str(b.id), str(obj.version_id), "data")
def test_verify_checksum(app, db, dummy_location): """Test celery tasks for checksum verification.""" b1 = Bucket.create() with open('README.rst', 'rb') as fp: obj = ObjectVersion.create(b1, 'README.rst', stream=fp) db.session.commit() file_id = obj.file_id verify_checksum(str(file_id)) f = FileInstance.query.get(file_id) assert f.last_check_at assert f.last_check is True f.uri = 'invalid' db.session.add(f) db.session.commit() pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id), throws=True) f = FileInstance.query.get(file_id) assert f.last_check is True verify_checksum(str(file_id), throws=False) f = FileInstance.query.get(file_id) assert f.last_check is None f.last_check = True db.session.add(f) db.session.commit() with pytest.raises(ResourceNotFoundError): verify_checksum(str(file_id), pessimistic=True) f = FileInstance.query.get(file_id) assert f.last_check is None
def handle_record_files(data, bucket, files, skip_files): """Handles record files.""" for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file try: f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) obj = ObjectVersion.create( bucket, filename, _file_id=f.id ) file.update({ 'bucket': str(obj.bucket_id), 'checksum': obj.file.checksum, 'key': obj.key, 'version_id': str(obj.version_id), }) except Exception as e: click.echo( 'Recid {0} file {1} could not be loaded due ' 'to {2}.'.format(data.get('recid'), filename, str(e))) continue
def index_attachments(sender, json=None, record=None, index=None, doc_type=None): """Load and index attached files for given record. It iterates over ``_files`` field in ``record`` and checks if ``_attachment`` subfiled has been configured with following values: * ``True``/``False`` simply enables/disables automatic fulltext indexing for given file instance; * Alternativelly, one can provide a ``dict`` instance with all configuration options as defined in Elasticsearch guide on https://www.elastic.co/guide/en/elasticsearch/ search for mapper-attachment. .. note:: Make sure that ``mapper-attachment`` plugin is installed and running in Elasticsearch when using this signal handler. """ for index, data in enumerate(record['_files']): attachment = json['_files'][index].pop('_attachment', None) if attachment: obj = ObjectVersion.get(data['bucket'], data['key'], version_id=data.get('version_id')) attachment = attachment if isinstance(attachment, dict) else {} attachment.setdefault('_content', base64.b64encode( obj.file.storage().open().read() ).decode('utf-8')) json['_files'][index]['_attachment'] = attachment
def test_SIP_files(db): """Test the files methods of API SIP.""" # we create a SIP model sip = SIP_.create() db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert len(api_sip.files) == 0 # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we create a file content = b'test lol\n' bucket = Bucket.create() obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content)) db.session.commit() # we attach it to the SIP sf = api_sip.attach_file(obj) db.session.commit() assert len(api_sip.files) == 1 assert api_sip.files[0].filepath == 'test.txt' assert sip.sip_files[0].filepath == 'test.txt' # finalization rmtree(tmppath)
def test_update_record(app, db, dummy_location, record_dump, record_db, resolver, record_file): """Test update of a record.""" # Smoke test record_db['files'] = [record_file] record_db.commit() db.session.commit() pytest.raises(IntegrityError, RecordIdentifier.insert, 11782) # Update record instead of create a new RecordDumpLoader.create(record_dump) pid, record = resolver.resolve('11782') # Basic some test that record exists assert record['title'] assert record.created == datetime(2014, 10, 13, 8, 27, 47) # Test that old revisions are kept assert len(record.revisions) == 4 # Test the PIDs are extracted and created assert PersistentIdentifier.get('doi', '10.5281/zenodo.11782') assert Bucket.query.count() == 1 assert ObjectVersion.query.filter_by(is_head=True).count() == 1 assert FileInstance.query.count() == 2 assert len(record['files']) == 1 f = record['files'][0] obj = ObjectVersion.get(f['bucket'], f['filename']) assert obj.file.checksum != record_file['checksum'] assert obj.file.size != record_file['size']
def test_migrate_file(app, db, dummy_location, extra_location, bucket, objects): """Test file migration.""" obj = objects[0] # Test pre-condition old_uri = obj.file.uri assert exists(old_uri) assert old_uri == join(dummy_location.uri, str(obj.file.id)[0:2], str(obj.file.id)[2:4], str(obj.file.id)[4:], 'data') assert FileInstance.query.count() == 4 # Migrate file with patch('invenio_files_rest.tasks.verify_checksum') as verify_checksum: migrate_file( obj.file_id, location_name=extra_location.name, post_fixity_check=True) assert verify_checksum.delay.called # Get object again obj = ObjectVersion.get(bucket, obj.key) new_uri = obj.file.uri assert exists(old_uri) assert exists(new_uri) assert new_uri != old_uri assert FileInstance.query.count() == 5
def sorted_files_from_bucket(bucket, keys=None): """Return files from bucket sorted by given keys.""" keys = keys or [] total = len(keys) sortby = dict(zip(keys, range(total))) values = ObjectVersion.get_by_bucket(bucket).all() return sorted(values, key=lambda x: sortby.get(x.key, total))
def __delitem__(self, key): """Delete a file from the deposit.""" obj = ObjectVersion.delete(bucket=self.bucket, key=key) self.record['_files'] = [file_ for file_ in self.record['_files'] if file_['key'] != key] if obj is None: raise KeyError(key)
def test_import_record(app, db, dummy_location, record_dump, records_json, resolver): """Test import record celery task.""" assert RecordMetadata.query.count() == 0 import_record(records_json[0], source_type='json') assert RecordMetadata.query.count() == 1 pid, record = resolver.resolve('11782') assert record['_collections'] == [] assert len(record['_files']) == 1 assert ObjectVersion.get( record['_files'][0]['bucket'], record['_files'][0]['key']) import_record(records_json[1], source_type='marcxml') assert RecordMetadata.query.count() == 2 pid, record = resolver.resolve('10') assert record['_collections'] == [ "ALEPH Papers", "Articles & Preprints", "Experimental Physics (EP)", "CERN Divisions", "Atlantis Institute of Fictive Science", "CERN Experiments", "Preprints", "ALEPH", ] assert len(record['_files']) == 2
def _update_tag_master(cls, record): """Update tag master of files dependent from master.""" bucket = cls._get_bucket(record=record) master_video = CDSVideosFilesIterator.get_master_video_file(record) for obj in ObjectVersion.get_by_bucket(bucket=bucket): if obj.get_tags()['context_type'] in cls.dependent_objs: ObjectVersionTag.create_or_update(obj, 'master', master_video['version_id'])
def local_file(db, bucket, location, online_video): """A local file.""" response = requests.get(online_video, stream=True) object_version = ObjectVersion.create( bucket, "test.mp4", stream=response.raw) version_id = object_version.version_id db.session.commit() return version_id
def __delitem__(self, key): """Delete a file from the deposit.""" obj = ObjectVersion.delete(bucket=self.bucket, key=key) self.record['_files'] = [ file_ for file_ in self.record['_files'] if file_['key'] != key ] if obj is None: raise KeyError(key)
def _update_timestamp(cls, deposit): """Update timestamp from percentage to seconds.""" duration = float(deposit['_cds']['extracted_metadata']['duration']) bucket = CDSRecordDumpLoader._get_bucket(record=deposit) for obj in ObjectVersion.get_by_bucket(bucket=bucket): if 'timestamp' in obj.get_tags().keys(): timestamp = duration * float(obj.get_tags()['timestamp']) / 100 ObjectVersionTag.create_or_update(obj, 'timestamp', timestamp)
def check_deposit_record_files(deposit, deposit_expected, record, record_expected): """Check deposit and record files expected.""" # check deposit deposit_objs = [ obj.key for obj in ObjectVersion.query_heads_by_bucket( deposit.files.bucket).all() ] assert sorted(deposit_expected) == sorted(deposit_objs) assert deposit.files.bucket.locked is False # check record record_objs = [ obj.key for obj in ObjectVersion.query_heads_by_bucket( record.files.bucket).all() ] assert sorted(record_expected) == sorted(record_objs) assert record.files.bucket.locked is True
def delete_file(bucket_id): key = '' deleted_file = ObjectVersion.delete(bucket_id, key) if deleted_file: return jsonify({"status": "ok"}) return jsonify({"error": "not found"}), 404
def delete(self, key): """Delete a file.""" rf = self[key] ov = rf.object_version # Delete the entire row rf.delete(force=True) if ov: # TODO: Should we also remove the FileInstance? Configurable? ObjectVersion.delete(ov.bucket, key) del self._entries[key] # Unset the default preview if the file is removed if self.default_preview == key: self.default_preview = None if key in self._order: self._order.remove(key) return rf
def open(self): """Open the bucket for writing.""" self.obj = ObjectVersion.create( self.bucket_id, self.key() if callable(self.key) else self.key ) db.session.commit() return self
def software(skip_files): """Load demo software records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.softid import \ cernopendata_softid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/software-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/software') software_json = glob.glob(os.path.join(data, '*.json')) for filename in software_json: with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_softid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def __setitem__(self, key, stream): """Add file inside a deposit.""" with db.session.begin_nested(): # save the file obj = ObjectVersion.create( bucket=self.bucket, key=key, stream=stream) self.filesmap[key] = self.file_cls(obj, {}).dumps() self.flush()
def test_get_webhook_event_view_when_release_event(m_gitlab, deposit, client, gitlab_release_webhook_sub, git_repo_tar): class MockBranchManager: def get(self, name): m = Mock(commit=dict(id='mybranchsha')) m.name = 'mybranch' return m class MockProjectManager: def get(self, name, lazy): return Mock(branches=MockBranchManager(), id='12345') m_gitlab.return_value = Mock(projects=MockProjectManager()) responses.add(responses.GET, ( 'https://gitlab.cern.ch/api/v4/projects/12345/repository/archive?sha=mybranchsha' ), body=git_repo_tar, content_type='application/octet_stream', headers={ 'Transfer-Encoding': 'binary', 'Content-Length': '287' }, stream=True, status=200) resp = client.post('/repos/event', headers=tag_push_headers, data=json.dumps(tag_push_payload_shortened)) assert resp.status_code == 200 assert resp.json == {'message': 'Snapshot of repository was saved.'} assert responses.calls[0].request.headers['Private-Token'] == 'some-token' obj = ObjectVersion.get( deposit.files.bucket.id, 'repositories/gitlab.cern.ch/owner_name/myrepository/v3.0.0.tar.gz') tar_obj = tarfile.open(obj.file.uri) repo_file_name = tar_obj.getmembers()[1] repo_content = tar_obj.extractfile(repo_file_name).read() assert repo_content == b'test repo for cap\n' snapshot = gitlab_release_webhook_sub.snapshots[0] assert obj.snapshot_id == snapshot.id assert GitSnapshot.query.count() == 1 assert snapshot.payload == { 'event_type': 'release', 'author': { 'name': 'owner_name', 'id': 1 }, 'link': 'https://gitlab.com/owner_name/myrepository/tags/v3.0.0', 'release': { 'tag': 'v3.0.0', 'name': 'My release' } }
def sync_buckets(src_bucket, dest_bucket, delete_extras=False): """Sync source bucket ObjectVersions to the destination bucket. The bucket is fully mirrored with the destination bucket following the logic: * same ObjectVersions are not touched * new ObjectVersions are added to destination * deleted ObjectVersions are deleted in destination * extra ObjectVersions in dest are deleted if `delete_extras` param is True :param src_bucket: Source bucket. :param dest_bucket: Destination bucket. :param delete_extras: Delete extra ObjectVersions in destination if True. :returns: The bucket with an exact copy of ObjectVersions in ` `src_bucket``. """ assert not dest_bucket.locked src_ovs = ObjectVersion.query.filter( ObjectVersion.bucket_id == src_bucket.id, ObjectVersion.is_head.is_(True)).all() dest_ovs = ObjectVersion.query.filter( ObjectVersion.bucket_id == dest_bucket.id, ObjectVersion.is_head.is_(True)).all() # transform into a dict { key: object version } src_keys = {ov.key: ov for ov in src_ovs} dest_keys = {ov.key: ov for ov in dest_ovs} for key, ov in src_keys.items(): if not ov.deleted: if key not in dest_keys or \ ov.file_id != dest_keys[key].file_id: ov.copy(bucket=dest_bucket) elif key in dest_keys and not dest_keys[key].deleted: ObjectVersion.delete(dest_bucket, key) if delete_extras: for key, ov in dest_keys.items(): if key not in src_keys: ObjectVersion.delete(dest_bucket, key) return dest_bucket
def test_bucket_sync_delete_extras(app, db, dummy_location): """Test that an extra object in dest is deleted when syncing.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum") ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert not ObjectVersion.get(b2, "extra-deleted")
def create_b2safe_file(external_pids, bucket): """Create a FileInstance which contains a PID in its uri.""" validate_schema(external_pids, { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'ePIC_PID': {'type': 'string'}, 'key': {'type': 'string'} }, 'additionalProperties': False, 'required': ['ePIC_PID', 'key'] } }) keys_list = [e['key'] for e in external_pids] keys_set = set(keys_list) if len(keys_list) != len(keys_set): raise InvalidDepositError([FieldError('external_pids', 'Field external_pids contains duplicate keys.')]) for external_pid in external_pids: if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'): external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \ external_pid['ePIC_PID'] if external_pid['key'].startswith('/'): raise InvalidDepositError( [FieldError('external_pids', 'File key cannot start with a "/".')]) try: # Create the file instance if it does not already exist file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID']) if file_instance is None: file_instance = FileInstance.create() file_instance.set_uri( external_pid['ePIC_PID'], 1, 0, storage_class='B') assert file_instance.storage_class == 'B' # Add the file to the bucket if it is not already in it current_version = ObjectVersion.get(bucket, external_pid['key']) if not current_version or \ current_version.file_id != file_instance.id: ObjectVersion.create(bucket, external_pid['key'], file_instance.id) except IntegrityError as e: raise InvalidDepositError( [FieldError('external_pids', 'File URI already exists.')])
def test_object_set_file(app, db, dummy_location): """Test object set file.""" b = Bucket.create() f = FileInstance(uri="f1", size=1, checksum="mychecksum") obj = ObjectVersion.create(b, "test").set_file(f) db.session.commit() assert obj.file == f assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
def get_master_object(bucket): """Get master ObjectVersion from a bucket.""" # TODO do as we do in `get_master_video_file()`? return ObjectVersion.get_by_bucket(bucket).join( ObjectVersionTag ).filter( ObjectVersionTag.key == 'context_type', ObjectVersionTag.value == 'master' ).one_or_none()
def test_SIP_create(app, db, mocker): """Test the create method from SIP API.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we create a file content = b'test lol\n' bucket = Bucket.create() obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content)) db.session.commit() files = [obj] # setup metadata mjson = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url') marcxml = SIPMetadataType(title='MARC XML Test', name='marcxml-test', format='xml', schema='uri') db.session.add(mjson) db.session.add(marcxml) metadata = { 'json-test': json.dumps({ 'this': 'is', 'not': 'sparta' }), 'marcxml-test': '<record></record>' } # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} sip = SIP.create(True, files=files, metadata=metadata, user_id=user.id, agent=agent) db.session.commit() assert SIP_.query.count() == 1 assert len(sip.files) == 1 assert len(sip.metadata) == 2 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert sip.user.id == user.id assert sip.agent == agent # we mock the user and the agent to test if the creation works app.config['SIPSTORE_AGENT_JSONSCHEMA_ENABLED'] = False mock_current_user = mocker.patch('invenio_sipstore.api.current_user') type(mock_current_user).is_anonymous = mocker.PropertyMock( return_value=True) sip = SIP.create(True, files=files, metadata=metadata) assert sip.model.user_id is None assert sip.user is None assert sip.agent == {} # finalization rmtree(tmppath)
def test_file_listener(db, document_with_file): """Test file listener when file is modified.""" # Remove files document_with_file['_files'] = [] document_with_file.commit() db.session.commit() # Reload record record = DocumentRecord.get_record_by_pid(document_with_file['pid']) assert not record['_files'] object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket']) file_uploaded_listener(object_version) assert len(document_with_file.files) == 3 object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket']) file_deleted_listener(object_version)
def get_version(self, version_id=None): """Return specific version ``ObjectVersion`` instance or HEAD. :param version_id: Version ID of the object. :returns: :class:`~invenio_files_rest.models.ObjectVersion` instance or HEAD of the stored object. """ return ObjectVersion.get(bucket=self.obj.bucket, key=self.obj.key, version_id=version_id)
def _create_object(cls, bucket, key, stream, size, media_type, context_type, master_id, **tags): """Create object versions with given type and tags.""" obj = ObjectVersion.create( bucket=bucket, key=key, stream=stream, size=size) ObjectVersionTag.create(obj, 'master', str(master_id)) ObjectVersionTag.create(obj, 'media_type', media_type) ObjectVersionTag.create(obj, 'context_type', context_type) [ObjectVersionTag.create(obj, k, tags[k]) for k in tags]
def create_object(key, media_type, context_type, **tags): """Create object versions with given type and tags.""" obj = ObjectVersion.create(bucket=self.object.bucket, key=key, stream=open(in_output(key), 'rb')) ObjectVersionTag.create(obj, 'master', self.obj_id) ObjectVersionTag.create(obj, 'media_type', media_type) ObjectVersionTag.create(obj, 'context_type', context_type) [ObjectVersionTag.create(obj, k, tags[k]) for k in tags]
def _force_sync_deposit_bucket(record): """Replace deposit bucket with a copy of the record bucket.""" deposit = Video.get_record(record.depid.object_uuid) # if deposit['_deposit']['status'] == 'draft': # raise RuntimeError('Deposit in edit mode: {0}'.format(deposit.id)) deposit_old_bucket = deposit.files.bucket # create a copy of record bucket new_bucket = record.files.bucket.snapshot() new_bucket.locked = False db.session.commit() rb = RecordsBuckets.query.filter( RecordsBuckets.bucket_id == deposit_old_bucket.id).one() rb.bucket = new_bucket db.session.add(rb) db.session.commit() # Put tags correctly pointing to the right object master_file = CDSVideosFilesIterator.get_master_video_file(record) if master_file: master_deposit_obj = ObjectVersion.get(new_bucket, master_file['key']) for slave in ObjectVersion.query_heads_by_bucket( bucket=new_bucket).join(ObjectVersion.tags).filter( ObjectVersion.file_id.isnot(None), ObjectVersionTag.key == 'master'): ObjectVersionTag.create_or_update( slave, 'master', str(master_deposit_obj.version_id)) db.session.add(slave) db.session.commit() # Delete the old bucket deposit_old_bucket.locked = False _ = deposit_old_bucket.remove() deposit['_buckets']['deposit'] = str(new_bucket.id) record['_buckets']['deposit'] = str(new_bucket.id) record['_deposit'] = deposit['_deposit'] deposit['_files'] = deposit.files.dumps() deposit.commit() record.commit() db.session.commit() return deposit_old_bucket.id, new_bucket.id
def test_tag_manager_update(api, users, location, es, update_style): with api.test_request_context(): bucket = Bucket.create() object_version = ObjectVersion.create(bucket=bucket, key="hello") ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.Packaging.value, value="old-packaging", ) ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.MetadataFormat.value, value="old-metadata", ) tags = TagManager(object_version) assert ( ObjectVersionTag.query.filter_by(object_version=object_version).count() == 2 ) assert tags == { ObjectTagKey.Packaging: "old-packaging", ObjectTagKey.MetadataFormat: "old-metadata", } if update_style == "dict": tags.update( { ObjectTagKey.MetadataFormat: "new-metadata", ObjectTagKey.DerivedFrom: "new-derived-from", } ) elif update_style == "kwargs": tags.update( **{ ObjectTagKey.MetadataFormat.value: "new-metadata", ObjectTagKey.DerivedFrom.value: "new-derived-from", } ) assert tags == { ObjectTagKey.Packaging: "old-packaging", ObjectTagKey.MetadataFormat: "new-metadata", ObjectTagKey.DerivedFrom: "new-derived-from", } assert ( ObjectVersionTag.query.filter_by(object_version=object_version).count() == 3 ) db.session.refresh(object_version) assert object_version.get_tags() == { ObjectTagKey.Packaging.value: "old-packaging", ObjectTagKey.MetadataFormat.value: "new-metadata", ObjectTagKey.DerivedFrom.value: "new-derived-from", }
def test_object(db, bucket): """File system location.""" data_bytes = b('test object') obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(data_bytes), size=len(data_bytes)) db.session.commit() return obj
def _copy_files(self, source_record, target_record, source_record_context, target_record_context): draft_by_key = {x['key']: x for x in source_record.get('_files', [])} published_files = [] for ov in ObjectVersion.get_by_bucket(bucket=source_record.bucket): file_md = copy.copy(draft_by_key.get(ov.key, {})) if self._copy_file(source_record, ov, target_record, file_md, source_record_context, target_record_context): published_files.append(file_md) target_record['_files'] = published_files
def _create_bucket(deposit, record_json,directory, logfile, verbose): for index, file_dict in enumerate(record_json.get('files', [])): if not file_dict.get('name'): click.secho(' Ignore file with no name "{}"'.format( file_dict.get('url')), fg='red') else: if verbose: click.secho(' Load file "{}"'.format( file_dict.get('name'))) filepath = os.path.join(directory, 'file_{}'.format(index)) if int(os.path.getsize(filepath)) != int(file_dict.get('size')): logfile.write("***** downloaded file size differs, {} ******".format(filepath)) else: with open(filepath, 'r+b') as f: ObjectVersion.create(deposit.files.bucket, file_dict['name'], stream=BytesIO(f.read()))
def test_deposit_poster_tags(api_app, db, api_project, users): """Test poster tag generation.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] master_video_filename = 'test.mp4' poster_filename = 'poster.jpg' poster_filename2 = 'poster.png' # insert a master file inside the video add_master_to_video( video_deposit=video_1, filename=master_video_filename, stream=BytesIO(b'1234'), video_duration='15' ) # try to insert a new vtt object obj = ObjectVersion.create( video_1._bucket, key=poster_filename, stream=BytesIO(b'hello')) # publish the video prepare_videos_for_publish([video_1]) video_1 = deposit_video_resolver(video_1_depid) login_user(User.query.get(users[0])) video_1 = video_1.publish() # check tags check_object_tags(obj, video_1, content_type='jpg', context_type='poster', media_type='image') # edit the video video_1 = video_1.edit() # try to delete the old poster frame and substitute with a new one video_1 = deposit_video_resolver(video_1_depid) ObjectVersion.delete(bucket=video_1._bucket, key=obj.key) obj2 = ObjectVersion.create( video_1._bucket, key=poster_filename2, stream=BytesIO(b'hello')) # publish again the video video_1 = video_1.publish() # check tags check_object_tags(obj2, video_1, content_type='png', context_type='poster', media_type='image')
def test_bucket_modification(app, db, location, record): """Test direct modification of bucket.""" record.files['hello.txt'] = BytesIO(b'Hello world!') record.files['hello.txt']['type'] = 'txt' # Modify bucket outside of record.files property ObjectVersion.create( record.bucket, 'second.txt', stream=BytesIO(b'Second')) # Bucket and record are out of sync: assert len(record.files) == 2 assert len(record['_files']) == 1 # Flush changes to ensure they are in sync. record.files.flush() assert len(record['_files']) == 2 # Check that extra metadata is not overwritten. assert [f.get('type') for f in record.files] == ['txt', None]
def test_non_binary_doesnt_shortcut_unpack( api, location, es, packaging_cls: Type[Packaging] ): with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create( bucket=record.bucket, key="some-file.txt", stream=io.BytesIO(b"data") ) packaging = packaging_cls(record) assert packaging.shortcut_unpack(object_version) == NotImplemented
def __delitem__(self, key): """Delete a file from the deposit.""" obj = ObjectVersion.delete(bucket=self.bucket, key=key) if obj is None: raise KeyError(key) if key in self.filesmap: del self.filesmap[key] self.flush()