def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.is_deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def test_bucket_sync(app, db, dummy_location): """Test that a bucket is correctly synced.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename1").set_location("b1v11", 1, "achecksum") ObjectVersion.create(b1, "filename2").set_location("b1v12", 1, "achecksum") ObjectVersion.create(b1, "filename3").set_location("b1v13", 1, "achecksum") ObjectVersion.create(b2, "extra1").set_location("b2v11", 1, "achecksum") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 4 ObjectVersion.delete(b1, "filename1") ObjectVersion.create(b2, "extra2").set_location("b2v12", 1, "achecksum") ObjectVersion.create(b2, "extra3").set_location("b2v13", 1, "achecksum") ObjectVersion.delete(b2, "extra3") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b2).count() == 2
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def test_exporter(app, db, es, exporter_bucket, record_with_files_creation): """Test record exporter.""" pid, record, record_url = record_with_files_creation RecordIndexer().index_by_id(record.id) current_search.flush_and_refresh('records') with app.app_context(): assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 0 export_job(job_id='records') assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 1
def test_bucket_sync_new_object(app, db, dummy_location): """Test that a new file in src in synced to dest.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") db.session.commit() assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 0 b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b2, "filename")
def test_bucket_sync_delete_extras(app, db, dummy_location): """Test that an extra object in dest is deleted when syncing.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum") ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert not ObjectVersion.get(b2, "extra-deleted")
def get(key, record): bucket = CDSRecordDumpLoader._get_bucket(record=record) files = [ dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket) ] return [file_[key] for file_ in files]
def check_files(video): bucket = CDSRecordDumpLoader._get_bucket(record=video) files = [ dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket) ] for file_ in files: assert as_bucket(file_['bucket_id']) is not None assert 'checksum' in file_ assert 'content_type' in file_ assert 'context_type' in file_ assert FileInstance.query.filter_by( id=file_['file_id']) is not None assert 'key' in file_ assert 'links' in file_ assert 'content_type' in file_ assert 'context_type' in file_ assert 'media_type' in file_ assert 'tags' in file_ # check extracted metadata master_video = CDSVideosFilesIterator.get_master_video_file(video) assert any([ key in master_video['tags'] for key in ExtractMetadataTask._all_keys ]) assert any([ key in video['_cds']['extracted_metadata'] for key in ExtractMetadataTask._all_keys ])
def dumps(self): """Generate IIIF manifest using the IIIF Image API.""" bucket = '' if '_buckets' in self.record: if 'deposit' in self.record['_buckets']: bucket = self.record['_buckets']['deposit'] images = [ obj for obj in ObjectVersion.get_by_bucket(bucket).all() if can_preview(PreviewFile(None, None, obj)) ] if not images: return {} # Didn't find any image inside the bucket sequence = self.manifest.sequence() for page, image in enumerate(images): canvas = sequence.canvas(ident=f'page-{page}', label=f'page-{page}') anno = canvas.annotation() image = anno.image(ident=iiif_image_key(image), iiif=True) image.set_hw_from_iiif() canvas.height = image.height canvas.width = image.width #canvas.set_image_annotation(iiif_image_key(image), iiif=True) #anno = canvas.annotation() #image = anno.image(iiif_image_key(image), iiif=True) # image.set_hw_from_iiif() #canvas.height = image.height #canvas.width = image.width return self.manifest.toJSON(top=True)
def sorted_files_from_bucket(bucket, keys=None): """Return files from bucket sorted by given keys.""" keys = keys or [] total = len(keys) sortby = dict(zip(keys, range(total))) values = ObjectVersion.get_by_bucket(bucket).all() return sorted(values, key=lambda x: sortby.get(x.key, total))
def test_bucket_sync_same_object(app, db, dummy_location): """Test that an exiting file in src and dest is not changed.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") b1.sync(b2) db.session.commit() b1_version_id = ObjectVersion.get(b1, "filename").version_id b2_version_id = ObjectVersion.get(b2, "filename").version_id b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b1, "filename").version_id == b1_version_id assert ObjectVersion.get(b2, "filename").version_id == b2_version_id
def _update_timestamp(cls, deposit): """Update timestamp from percentage to seconds.""" duration = float(deposit['_cds']['extracted_metadata']['duration']) bucket = CDSRecordDumpLoader._get_bucket(record=deposit) for obj in ObjectVersion.get_by_bucket(bucket=bucket): if 'timestamp' in obj.get_tags().keys(): timestamp = duration * float(obj.get_tags()['timestamp']) / 100 ObjectVersionTag.create_or_update(obj, 'timestamp', timestamp)
def check_tag_master(record): bucket = CDSRecordDumpLoader._get_bucket(record=record) master = CDSVideosFilesIterator.get_master_video_file(record) files = [dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket) if obj.get_tags().get('master')] assert all([file_['tags']['master'] == master['version_id'] for file_ in files])
def _update_tag_master(cls, record): """Update tag master of files dependent from master.""" bucket = cls._get_bucket(record=record) master_video = CDSVideosFilesIterator.get_master_video_file(record) for obj in ObjectVersion.get_by_bucket(bucket=bucket): if obj.get_tags()['context_type'] in cls.dependent_objs: ObjectVersionTag.create_or_update(obj, 'master', master_video['version_id'])
def test_file_listener(db, document_with_file): """Test file listener when file is modified.""" # Remove files document_with_file['_files'] = [] document_with_file.commit() db.session.commit() # Reload record record = DocumentRecord.get_record_by_pid(document_with_file['pid']) assert not record['_files'] object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket']) file_uploaded_listener(object_version) assert len(document_with_file.files) == 3 object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket']) file_deleted_listener(object_version)
def get_master_object(bucket): """Get master ObjectVersion from a bucket.""" # TODO do as we do in `get_master_video_file()`? return ObjectVersion.get_by_bucket(bucket).join( ObjectVersionTag ).filter( ObjectVersionTag.key == 'context_type', ObjectVersionTag.value == 'master' ).one_or_none()
def _copy_files(self, source_record, target_record, source_record_context, target_record_context): draft_by_key = {x['key']: x for x in source_record.get('_files', [])} published_files = [] for ov in ObjectVersion.get_by_bucket(bucket=source_record.bucket): file_md = copy.copy(draft_by_key.get(ov.key, {})) if self._copy_file(source_record, ov, target_record, file_md, source_record_context, target_record_context): published_files.append(file_md) target_record['_files'] = published_files
def delete_old_file_index(self): """Delete old file index before file upload when edit an item.""" if self.is_edit: lst = ObjectVersion.get_by_bucket( self.files.bucket, True).filter_by(is_head=False).all() klst = [] for obj in lst: if obj.file_id: klst.append(obj.file_id) if klst: self.indexer.delete_file_index(klst, self.pid.object_uuid)
def _resolve_dumps(cls, record): """Build files dump.""" bucket = cls._get_bucket(record=record) files = [] for o in ObjectVersion.get_by_bucket(bucket=bucket): # skip for dependent objs (like subformats) if o.get_tags()['context_type'] not in cls.dependent_objs: dump = {} dump_generic_object(obj=o, data=dump) if dump: files.append(dump) record['_files'] = files
def sorted_files_from_bucket(bucket, keys=None): """Return files from bucket sorted by given keys. :param bucket: :class:`~invenio_files_rest.models.Bucket` containing the files. :param keys: Keys order to be used. :returns: Sorted list of bucket items. """ keys = keys or [] total = len(keys) sortby = dict(zip(keys, range(total))) values = ObjectVersion.get_by_bucket(bucket).all() return sorted(values, key=lambda x: sortby.get(x.key, total))
def _resolve_master_tag(cls, deposit): """Create the master tag for dependent files.""" # build a partial files dump cls._resolve_dumps(record=deposit) # get master master_video = CDSVideosFilesIterator.get_master_video_file(deposit) # get deposit bucket bucket = cls._get_bucket(record=deposit) # attach the master tag for obj in ObjectVersion.get_by_bucket(bucket=bucket): if obj.get_tags()['context_type'] in cls.dependent_objs: ObjectVersionTag.create(obj, 'master', master_video['version_id'])
def test_bucket_sync_deleted_object(app, db, dummy_location): """Test that a deleted object in src is deleted in dest.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum") ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum") ObjectVersion.delete(b1, "filename") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 0 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b2, "extra-deleted") ObjectVersion.delete(b2, "extra-deleted") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 0 assert ObjectVersion.get_by_bucket(b2).count() == 0
def _merge_related_objects(self, record_id, snapshot, data): """.""" # dict of version_ids in original bucket to version_ids in # snapshot bucket for the each file snapshot_obj_list = ObjectVersion.get_by_bucket(bucket=snapshot) old_to_new_version = { str(self.files[obj.key]['version_id']): str(obj.version_id) for obj in snapshot_obj_list if 'master' not in obj.get_tags() and obj.key in self.files } # list of tags with 'master' key slave_tags = [ tag for obj in snapshot_obj_list for tag in obj.tags if tag.key == 'master' ] # change master of slave videos to new master object versions for tag in slave_tags: # note: the smil file probably already point to the right # record bucket and it doesn't need update new_master_id = old_to_new_version.get(tag.value) if new_master_id: tag.value = new_master_id db.session.add_all(slave_tags) # FIXME bug when dump a different bucket backup = deepcopy(self['_files']) # Generate SMIL file data['_files'] = self.files.dumps(bucket=snapshot.id) master_video = get_master_object(snapshot) if master_video: from cds.modules.records.serializers.smil import generate_smil_file generate_smil_file(record_id, data, snapshot, master_video) # Update metadata with SMIL file information data['_files'] = self.files.dumps(bucket=snapshot.id) # FIXME bug when dump a different bucket self['_files'] = backup snapshot.locked = True yield data db.session.add( RecordsBuckets(record_id=record_id, bucket_id=snapshot.id))
def preview(pid, record, **kwargs): """Preview file.""" # Get filename from request parameters filename = request.view_args.get('filename', request.args.get('filename', type=str)) if not filename: # Get filename from 'preview' tag bucket_id = record['_buckets']['deposit'] obj = ObjectVersion.get_by_bucket(bucket_id).join( ObjectVersion.tags).filter( ObjectVersionTag.key == 'preview').one_or_none() if obj is None: abort(404) filename = obj.key return _try_previewers(pid, record, filename, **kwargs)
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 # check that for 'undeleted' key there is only one HEAD heads = [ o for o in ObjectVersion.query.filter_by(bucket_id=b1.id, key='undeleted').all() if o.is_head ] assert len(heads) == 1 assert heads[0].file.uri == 'b1u2' b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def update_record(pid, schema, data, files, skip_files): """Updates the given record.""" record = Record.get_record(pid.object_uuid) with db.session.begin_nested(): if record.files and not skip_files: bucket_id = record.files.bucket bucket = Bucket.get(bucket_id.id) for o in ObjectVersion.get_by_bucket(bucket).all(): o.remove() o.file.delete() RecordsBuckets.query.filter_by(record=record.model, bucket=bucket).delete() bucket_id.remove() db.session.commit() record.update(data) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create(record=record.model, bucket=bucket) return record
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 # check that for 'undeleted' key there is only one HEAD heads = [o for o in ObjectVersion.query.filter_by( bucket_id=b1.id, key='undeleted').all() if o.is_head] assert len(heads) == 1 assert heads[0].file.uri == 'b1u2' b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def test_object_get_by_bucket(app, db, dummy_location): """Test object listing.""" b1 = Bucket.create() b2 = Bucket.create() # First version of object obj1_first = ObjectVersion.create(b1, "test") obj1_first.set_location("b1test1", 1, "achecksum") # Intermediate version which is a delete marker. obj1_intermediate = ObjectVersion.create(b1, "test") obj1_intermediate.set_location("b1test2", 1, "achecksum") # Latest version of object obj1_latest = ObjectVersion.create(b1, "test") obj1_latest.set_location("b1test3", 1, "achecksum") # Create objects in/not in same bucket using different key. ObjectVersion.create(b1, "another").set_location( "b1another1", 1, "achecksum") ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum") db.session.commit() # Sanity check assert ObjectVersion.query.count() == 5 assert ObjectVersion.get(b1, "test") assert ObjectVersion.get(b1, "another") assert ObjectVersion.get(b2, "test") # Retrieve objects for a bucket with/without versions assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1 # Assert order of returned objects (alphabetical) objs = ObjectVersion.get_by_bucket(b1.id).all() assert objs[0].key == "another" assert objs[1].key == "test" # Assert order of returned objects verions (creation date ascending) objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all() assert objs[0].key == "another" assert objs[1].key == "test" assert objs[1].version_id == obj1_latest.version_id assert objs[2].key == "test" assert objs[2].version_id == obj1_intermediate.version_id assert objs[3].key == "test" assert objs[3].version_id == obj1_first.version_id
def update_record(pid, schema, data, files, skip_files): """Updates the given record.""" record = Record.get_record(pid.object_uuid) with db.session.begin_nested(): if record.files and not skip_files: bucket_id = record.files.bucket bucket = Bucket.get(bucket_id.id) for o in ObjectVersion.get_by_bucket(bucket).all(): o.remove() o.file.delete() RecordsBuckets.query.filter_by( record=record.model, bucket=bucket ).delete() bucket_id.remove() db.session.commit() record.update(data) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create( record=record.model, bucket=bucket) return record
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def __contains__(self, key): """Test if file exists.""" return ObjectVersion.get_by_bucket( self.bucket).filter_by(key=str(key)).count()
def checked_files(): return len([o for o in ObjectVersion.get_by_bucket(b1) if o.file.last_check_at])
def __len__(self): """Get number of files.""" return ObjectVersion.get_by_bucket(self.bucket).count()
def get_bucket_keys(): return [o.key for o in list(ObjectVersion.get_by_bucket(bucket))]