def create_slave(key): """Create a slave.""" slave = ObjectVersion.create(bucket=bucket_id, key=key, stream=open(video, 'rb')) ObjectVersionTag.create(slave, 'master', str(master_obj.version_id)) return slave
def _copy_file(self, source_record, ov, target_record, file_md, source_record_context, target_record_context): bucket = target_record.bucket new_ob = ObjectVersion.create(bucket, ov.key, _file_id=ov.file_id) tags = {tag.key: tag.value for tag in ov.tags} for _, res in file_copied.send( source_record, source_record=source_record, target_record=target_record, object_version=ov, tags=tags, metadata=file_md, source_record_context=source_record_context, target_record_context=target_record_context): if res is False: return False # skip this file for key, value in tags: ObjectVersionTag.create_or_update(object_version=new_ob, key=key, value=value) file_md['bucket'] = str(bucket.id) file_md['file_id'] = str(new_ob.file_id) file_md['version_id'] = str(new_ob.version_id) return True
def _init_object_version(event): """Create, if doesn't exists, the version object.""" event_id = str(event.id) with db.session.begin_nested(): # create a object version if doesn't exists if 'version_id' in event.payload: version_id = event.payload['version_id'] object_version = as_object_version(version_id) else: object_version = ObjectVersion.create( bucket=event.payload['bucket_id'], key=event.payload['key']) ObjectVersionTag.create(object_version, 'uri_origin', event.payload['uri']) version_id = str(object_version.version_id) # add tag with corresponding event ObjectVersionTag.create_or_update(object_version, '_event_id', event_id) # add tag for preview ObjectVersionTag.create_or_update(object_version, 'preview', True) # add tags for file type ObjectVersionTag.create_or_update(object_version, 'media_type', 'video') ObjectVersionTag.create_or_update(object_version, 'context_type', 'master') event.response['version_id'] = version_id return object_version
def save_file(self, content, filename, size, failed=False): """Save file with given content in deposit bucket. If downloading a content failed, file will be still created, with tag `failed`. :param content: stream :param filename: name that file will be saved with :param size: size of content :param failed: if failed during downloading the content """ obj = ObjectVersion.create(bucket=self.files.bucket, key=filename) obj.file = FileInstance.create() self.files.flush() if not failed: self.files[filename].file.set_contents( content, default_location=self.files.bucket.location.uri, size=size) print('File {} saved ({}b).\n'.format(filename, size)) else: ObjectVersionTag.create(object_version=obj, key='status', value='failed') print('File {} not saved.\n'.format(filename)) self.files.flush() db.session.commit() return obj
def _resolve_file(cls, bucket, file_): """Resolve file.""" def progress_callback(size, total): logging.debug('Moving file {0} of {1}'.format(total, size)) # resolve preset info tags_to_guess_preset = file_.get('tags_to_guess_preset', {}) if tags_to_guess_preset: file_['tags'].update( **cls._resolve_preset(obj=None, clues=tags_to_guess_preset)) # we cannot deal with it now delete the file if 'preset_quality' not in file_['tags']: return None # create object stream, size = cls._get_migration_file_stream_and_size(file_=file_) obj = ObjectVersion.create(bucket=bucket, key=file_['key'], stream=stream, size=size, progress_callback=progress_callback) tags_to_transform = file_.get('tags_to_transform', {}) # resolve timestamp if 'timestamp' in tags_to_transform: file_['tags']['timestamp'] = tags_to_transform['timestamp'] # Add DFS path to run ffmpeg without copying the file file_['tags']['dfs_path'] = cls._get_full_path( filepath=file_['filepath']) # create tags for key, value in file_.get('tags', {}).items(): ObjectVersionTag.create(obj, key, value) db.session.commit() return obj.version_id
def _update_timestamp(cls, deposit): """Update timestamp from percentage to seconds.""" duration = float(deposit['_cds']['extracted_metadata']['duration']) bucket = CDSRecordDumpLoader._get_bucket(record=deposit) for obj in ObjectVersion.get_by_bucket(bucket=bucket): if 'timestamp' in obj.get_tags().keys(): timestamp = duration * float(obj.get_tags()['timestamp']) / 100 ObjectVersionTag.create_or_update(obj, 'timestamp', timestamp)
def __setitem__(self, key: ObjectTagKey, value: Union[str, Enum]): # type: ignore super().__setitem__(key, self.enum_keys.get(ObjectTagKey(key), str)(value)) if key in self.enum_keys: # Check this is a valid value self.enum_keys[key](value) if isinstance(value, Enum): value = value.value ObjectVersionTag.create_or_update(self._object_version, key.value, value)
def _update_tag_master(cls, record): """Update tag master of files dependent from master.""" bucket = cls._get_bucket(record=record) master_video = CDSVideosFilesIterator.get_master_video_file(record) for obj in ObjectVersion.get_by_bucket(bucket=bucket): if obj.get_tags()['context_type'] in cls.dependent_objs: ObjectVersionTag.create_or_update(obj, 'master', master_video['version_id'])
def multipart_uploader(record, key, files, pid, request, endpoint, resolver, ctype, size=None, multipart=False, **kwargs): """Multipart upload handler.""" from oarepo_s3.views import MultipartUploadAbortResource, \ MultipartUploadCompleteResource expiration = current_app.config['S3_MULTIPART_UPLOAD_EXPIRATION'] date_expiry = datetime.utcnow() + timedelta(seconds=expiration) if multipart and size: mu = MultipartUpload(key=key, base_uri=files.bucket.location.uri, expires=expiration, size=size, content_type=ctype) files[key] = mu file_obj = files[key] complete = resolver(MultipartUploadCompleteResource.view_name, key=key, upload_id=mu.response['upload_id']) abort = resolver(MultipartUploadAbortResource.view_name, key=key, upload_id=mu.response['upload_id']) mu.complete_url = complete mu.abort_url = abort with db.session.begin_nested(): # create tags with multipart upload configuration mc_tag = ObjectVersionTag(object_version=file_obj.obj, key=MULTIPART_CONFIG_TAG, value=json.dumps( dict( **mu.response, complete_url=mu.complete_url, abort_url=mu.abort_url, ))) db.session.add(mc_tag) exp_tag = ObjectVersionTag(object_version=file_obj.obj, key=MULTIPART_EXPIRATION_TAG, value=date_expiry.isoformat()) db.session.add(exp_tag) else: files[key] = request.stream return create_multipart_upload_response_factory(files[key])
def test_tag_manager_update(api, users, location, es, update_style): with api.test_request_context(): bucket = Bucket.create() object_version = ObjectVersion.create(bucket=bucket, key="hello") ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.Packaging.value, value="old-packaging", ) ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.MetadataFormat.value, value="old-metadata", ) tags = TagManager(object_version) assert ( ObjectVersionTag.query.filter_by(object_version=object_version).count() == 2 ) assert tags == { ObjectTagKey.Packaging: "old-packaging", ObjectTagKey.MetadataFormat: "old-metadata", } if update_style == "dict": tags.update( { ObjectTagKey.MetadataFormat: "new-metadata", ObjectTagKey.DerivedFrom: "new-derived-from", } ) elif update_style == "kwargs": tags.update( **{ ObjectTagKey.MetadataFormat.value: "new-metadata", ObjectTagKey.DerivedFrom.value: "new-derived-from", } ) assert tags == { ObjectTagKey.Packaging: "old-packaging", ObjectTagKey.MetadataFormat: "new-metadata", ObjectTagKey.DerivedFrom: "new-derived-from", } assert ( ObjectVersionTag.query.filter_by(object_version=object_version).count() == 3 ) db.session.refresh(object_version) assert object_version.get_tags() == { ObjectTagKey.Packaging.value: "old-packaging", ObjectTagKey.MetadataFormat.value: "new-metadata", ObjectTagKey.DerivedFrom.value: "new-derived-from", }
def test_put_fileset_url(api, users, location, es, task_delay): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() object_version = ObjectVersion.create( record.bucket, key="old-file.txt", stream=io.BytesIO(b"hello"), mimetype="text/plain", ) ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.FileSetFile.value, value="true", ) db.session.commit() response = client.put( url_for("invenio_sword.depid_fileset", pid_value=record.pid.pid_value), data=b"hello again", headers={ "Content-Disposition": "attachment; filename=new-file.txt", "Content-Type": "text/plain", }, ) assert response.status_code == HTTPStatus.NO_CONTENT assert task_delay.call_count == 1 task_self = task_delay.call_args[0][0] task_self.apply() # Check original ObjectVersion is marked deleted original_object_versions = list( ObjectVersion.query.filter_by( bucket=record.bucket, key="old-file.txt").order_by("created")) assert len(original_object_versions) == 2 assert not original_object_versions[0].is_head assert original_object_versions[1].is_head assert original_object_versions[1].file is None # Check new object has been created new_object_version = ObjectVersion.query.filter_by( bucket=record.bucket, key="new-file.txt").one() assert new_object_version.is_head
def _resolve_master_tag(cls, deposit): """Create the master tag for dependent files.""" # build a partial files dump cls._resolve_dumps(record=deposit) # get master master_video = CDSVideosFilesIterator.get_master_video_file(deposit) # get deposit bucket bucket = cls._get_bucket(record=deposit) # attach the master tag for obj in ObjectVersion.get_by_bucket(bucket=bucket): if obj.get_tags()['context_type'] in cls.dependent_objs: ObjectVersionTag.create(obj, 'master', master_video['version_id'])
def _upload_video_and_publish(video): bucket_id = video['_buckets']['deposit'] random_file_content = 'fake video file ' + _random_string() random_bytes = random_file_content.encode('utf-8') video_file = ObjectVersion.create(bucket=bucket_id, key='master.mp4', stream=BytesIO(random_bytes)) ObjectVersionTag.create(video_file, 'context_type', 'master') prepare_videos_for_publish([video]) published_video = video.publish() (_, record_published_video) = published_video.fetch_published() return published_video, record_published_video
def video_extract_frames(self, object_version, frames_start=5, frames_end=95, frames_gap=1, **kwargs): """Extract images from some frames of the video. Each of the frame images generates an ``ObjectVersion`` tagged as "frame" using ``ObjectVersionTags``. :param object_version: master video to extract frames from. :param frames_start: start percentage, default 5. :param frames_end: end percentage, default 95. :param frames_gap: percentage between frames from start to end, default 10. """ object_version = as_object_version(object_version) self._base_payload = dict() input_file = object_version.file.uri output_folder = tempfile.mkdtemp() def progress_updater(seconds, duration): """Progress reporter.""" meta = dict( payload=dict( size=duration, percentage=seconds or 0.0 / duration * 100, ), message='Extracting frames {0} of {1} seconds'.format(seconds, duration), ) self.update_state(state=STARTED, meta=meta) ff_frames( object_version.file.uri, frames_start, frames_end, frames_gap, os.path.join(output_folder, 'frame-%d.jpg'), progress_callback=progress_updater) for filename in os.listdir(output_folder): obj = ObjectVersion.create( bucket=object_version.bucket, key=filename, stream=open(os.path.join(output_folder, filename),'rb')) ObjectVersionTag.create(obj, 'master', object_version.version_id) shutil.rmtree(output_folder) db.session.commit()
def transcode_task(bucket, filesize, filename, preset_qualities): """Get a transcode task.""" obj = ObjectVersion.create(bucket, key=filename, stream=BytesIO(b'\x00' * filesize)) ObjectVersionTag.create(obj, 'display_aspect_ratio', '16:9') obj_id = str(obj.version_id) db.session.commit() return (obj_id, [ TranscodeVideoTask().s(version_id=obj_id, preset_quality=preset_quality, sleep_time=0) for preset_quality in preset_qualities ])
def _resolve_file(cls, deposit, bucket, file_): """Resolve file.""" # create object stream = cls._get_migration_file_stream(file_=file_) obj = ObjectVersion.create( bucket=bucket, key=file_['key'], stream=stream) # resolve preset info tags_to_guess_preset = file_.get('tags_to_guess_preset', {}) if tags_to_guess_preset: file_['tags'].update(**cls._resolve_preset( obj=obj, clues=tags_to_guess_preset)) tags_to_transform = file_.get('tags_to_transform', {}) # resolve timestamp if 'timestamp' in tags_to_transform: file_['tags']['timestamp'] = tags_to_transform['timestamp'] # create tags for key, value in file_.get('tags', {}).items(): ObjectVersionTag.create(obj, key, value)
def create_metadata_tags(cls, object_, keys, uri=None): """Extract metadata from the video and create corresponding tags.""" extracted_dict = cls.get_metadata_tags(object_=object_, uri=uri) # Add technical information to the ObjectVersion as Tags [ObjectVersionTag.create_or_update(object_, k, v) for k, v in extracted_dict.items() if k in keys] db.session.refresh(object_) return extracted_dict
def create_video_tags(obj, context_type, bitrate=None, smil=True): """Create video tags.""" tags = [('width', 1000), ('height', 1000), ('bit_rate', 123456), ('video_bitrate', bitrate or 123456), ('media_type', 'video'), ('context_type', context_type), ] # Append smil tag if smil: tags.append(('smil', True)) [ObjectVersionTag.create(obj, key, val) for key, val in tags]
def test_transcode_and_undo(db, cds_depid, mock_sorenson): """Test TranscodeVideoTask task.""" def get_bucket_keys(): return [o.key for o in list(ObjectVersion.get_by_bucket(bucket))] bucket = deposit_project_resolver(cds_depid).files.bucket filesize = 1024 filename = 'test.mp4' preset_quality = '480p' new_filename = '{0}.mp4'.format(preset_quality) obj = ObjectVersion.create(bucket, key=filename, stream=BytesIO(b'\x00' * filesize)) ObjectVersionTag.create(obj, 'display_aspect_ratio', '16:9') obj_id = str(obj.version_id) db.session.commit() assert get_bucket_keys() == [filename] assert bucket.size == filesize task_s = TranscodeVideoTask().s(version_id=obj_id, preset_quality=preset_quality, sleep_time=0) # Transcode task_s.delay(deposit_id=cds_depid) db.session.add(bucket) keys = get_bucket_keys() assert len(keys) == 2 assert filename in keys assert new_filename in keys assert bucket.size == 2 * filesize # Undo TranscodeVideoTask().clean(version_id=obj_id, preset_quality=preset_quality) db.session.add(bucket) keys = get_bucket_keys() assert len(keys) == 1 assert filename in keys assert new_filename not in keys # file size doesn't change assert bucket.size == 2 * filesize
def _force_sync_deposit_bucket(record): """Replace deposit bucket with a copy of the record bucket.""" deposit = Video.get_record(record.depid.object_uuid) # if deposit['_deposit']['status'] == 'draft': # raise RuntimeError('Deposit in edit mode: {0}'.format(deposit.id)) deposit_old_bucket = deposit.files.bucket # create a copy of record bucket new_bucket = record.files.bucket.snapshot() new_bucket.locked = False db.session.commit() rb = RecordsBuckets.query.filter( RecordsBuckets.bucket_id == deposit_old_bucket.id).one() rb.bucket = new_bucket db.session.add(rb) db.session.commit() # Put tags correctly pointing to the right object master_file = CDSVideosFilesIterator.get_master_video_file(record) if master_file: master_deposit_obj = ObjectVersion.get(new_bucket, master_file['key']) for slave in ObjectVersion.query_heads_by_bucket( bucket=new_bucket).join(ObjectVersion.tags).filter( ObjectVersion.file_id.isnot(None), ObjectVersionTag.key == 'master'): ObjectVersionTag.create_or_update( slave, 'master', str(master_deposit_obj.version_id)) db.session.add(slave) db.session.commit() # Delete the old bucket deposit_old_bucket.locked = False _ = deposit_old_bucket.remove() deposit['_buckets']['deposit'] = str(new_bucket.id) record['_buckets']['deposit'] = str(new_bucket.id) record['_deposit'] = deposit['_deposit'] deposit['_files'] = deposit.files.dumps() deposit.commit() record.commit() db.session.commit() return deposit_old_bucket.id, new_bucket.id
def test_legacy_embed(previewer_app, db, api_project, video, users): """Test backwards-compatibility with legacy embed URL for videos.""" project, video_1, _ = api_project filename = 'test.mp4' bucket_id = video_1['_buckets']['deposit'] obj = ObjectVersion.create(bucket=bucket_id, key=filename, stream=open(video, 'rb')) ObjectVersionTag.create(obj, 'context_type', 'master') ObjectVersionTag.create(obj, 'preview', True) login_user(User.query.get(users[0])) prepare_videos_for_publish([video_1]) video_1 = video_1.publish() with previewer_app.test_client() as client: res = client.get('/video/{0}'.format(video_1.report_number)) assert res.location.endswith(url_for( 'invenio_records_ui.recid_embed_default', pid_value=video_1['recid'], ))
def create_object(key, media_type, context_type, **tags): """Create object versions with given type and tags.""" obj = ObjectVersion.create(bucket=self.object.bucket, key=key, stream=open(in_output(key), 'rb')) ObjectVersionTag.create(obj, 'master', self.obj_id) ObjectVersionTag.create(obj, 'media_type', media_type) ObjectVersionTag.create(obj, 'context_type', context_type) [ObjectVersionTag.create(obj, k, tags[k]) for k in tags]
def _create_object(cls, bucket, key, stream, size, media_type, context_type, master_id, **tags): """Create object versions with given type and tags.""" obj = ObjectVersion.create( bucket=bucket, key=key, stream=stream, size=size) ObjectVersionTag.create(obj, 'master', str(master_id)) ObjectVersionTag.create(obj, 'media_type', media_type) ObjectVersionTag.create(obj, 'context_type', context_type) [ObjectVersionTag.create(obj, k, tags[k]) for k in tags]
def _resolve_file(cls, bucket, file_): """Resolve file.""" def progress_callback(size, total): logging.debug('Moving file {0} of {1}'.format(total, size)) # create object stream, size = cls._get_migration_file_stream_and_size(file_=file_) obj = ObjectVersion.create( bucket=bucket, key=file_['key'], stream=stream, size=size, progress_callback=progress_callback) # resolve preset info tags_to_guess_preset = file_.get('tags_to_guess_preset', {}) if tags_to_guess_preset: file_['tags'].update(**cls._resolve_preset( obj=obj, clues=tags_to_guess_preset)) tags_to_transform = file_.get('tags_to_transform', {}) # resolve timestamp if 'timestamp' in tags_to_transform: file_['tags']['timestamp'] = tags_to_transform['timestamp'] # create tags for key, value in file_.get('tags', {}).items(): ObjectVersionTag.create(obj, key, value)
def _process_files(record, files_metadata): """Attach files to a record with a given metadata. Assumptions: - The source must be a URL pointing to a tar file. - All files listed in the metadata are inside the source tar. - Master files are listed before slaves. - The reference from the slave to master is done via key. """ if not files_metadata: return bucket = Bucket.create(location=Location.get_by_name('videos')) RecordsBuckets.create(record=record.model, bucket=bucket) response = requests.get( files_metadata['source'], stream=True, verify=False) # Throw an error for bad status codes response.raise_for_status() with tempfile.NamedTemporaryFile(suffix='.tar', delete=False) as f: for chunk in response: f.write(chunk) tar = tarfile.open(name=f.name) tar.extractall(path=tempfile.gettempdir()) files_base_dir = os.path.join(tempfile.gettempdir(), tar.getnames()[0]) tar.close() os.remove(f.name) for f in files_metadata['metadata']: obj = ObjectVersion.create(bucket, f['key']) with open(os.path.join(files_base_dir, f['key']), 'rb') as fp: obj.set_contents(fp) for k, v in f['tags'].items(): if k == 'master': v = ObjectVersion.get(bucket, v).version_id ObjectVersionTag.create(obj, k, v) shutil.rmtree(files_base_dir) record['_files'] = record.files.dumps()
def create_metadata_tags(cls, object_, keys, uri=None): """Extract metadata from the video and create corresponding tags.""" uri = uri or object_.file.uri # Extract video's metadata using `ff_probe` metadata = ff_probe_all(uri) extracted_dict = dict(metadata['format'], **metadata['streams'][0]) # Add technical information to the ObjectVersion as Tags [ ObjectVersionTag.create(object_, k, v) for k, v in extracted_dict.items() if k in keys ] db.session.refresh(object_) return extracted_dict
def clean(self, deposit_id, version_id, *args, **kwargs): """Undo metadata extraction.""" # 1. Revert patch on record recid = str(PersistentIdentifier.get('depid', deposit_id).object_uuid) patch = [{ 'op': 'remove', 'path': '/_cds/extracted_metadata', }] validator = 'cds.modules.records.validators.PartialDraft4Validator' try: patch_record(recid=recid, patch=patch, validator=validator) except jsonpatch.JsonPatchConflict as c: logger.warning( 'Failed to apply JSON Patch to deposit {0}: {1}'.format( recid, c)) # Delete tmp file if any obj = as_object_version(version_id) temp_location = obj.get_tags().get('temp_location', None) if temp_location: shutil.rmtree(temp_location) ObjectVersionTag.delete(obj, 'temp_location') db.session.commit()
def test_put_status_document(api, users, location, es): with api.test_request_context(), api.test_client() as client: client.post( url_for_security("login"), data={ "email": users[0]["email"], "password": "******" }, ) record = SWORDDeposit.create({}) record.commit() db.session.commit() object_version = ObjectVersion.create( record.bucket, "file.n3", mimetype="text/n3", stream=io.BytesIO(b"1 _:a 2 ."), ) ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.FileSetFile.value, value="true", ) response = client.put("/sword/deposit/{}".format(record.pid.pid_value), data=b"") assert response.status_code == HTTPStatus.OK # This should have removed the previous file, as the empty PUT is a reset. object_versions = list( ObjectVersion.query.filter_by( bucket=record.bucket).order_by("created")) assert len(object_versions) == 2 assert not object_versions[0].is_head assert object_versions[1].is_head assert object_versions[1].file is None
def post(self, pid, record, key, files, file_rec, multipart_config, upload_id, parts): if multipart_config['upload_id'] != upload_id: abort(404) before_upload_complete.send(file_rec, record=record, file=file_rec, multipart_config=multipart_config) res = current_s3.client.complete_multipart_upload( bucket=multipart_config['bucket'], key=multipart_config['key'], upload_id=upload_id, parts=parts) with db.session.begin_nested(): ObjectVersionTag.delete(file_rec.obj, MULTIPART_CONFIG_TAG) ObjectVersionTag.delete(file_rec.obj, MULTIPART_EXPIRATION_TAG) etag = 'etag:{}'.format(res['ETag']) file_rec.obj.file.checksum = etag file_rec['checksum'] = etag after_upload_complete.send(file_rec, record=record, file=file_rec, files=files) files.flush() record.commit() db.session.commit() return jsonify({ 'location': file_rec.data['url'], 'checksum': file_rec['checksum'] })
def test_tag_manager_delitem(api, users, location, es): value = "http://example.org/" with api.test_request_context(): bucket = Bucket.create() object_version = ObjectVersion.create(bucket=bucket, key="hello") ObjectVersionTag.create( object_version=object_version, key=ObjectTagKey.Packaging.value, value=value ) tags = TagManager(object_version) assert tags == {ObjectTagKey.Packaging: value} assert tags[ObjectTagKey.Packaging] == value del tags[ObjectTagKey.Packaging] assert tags == {} with pytest.raises(KeyError): _ = tags[ObjectTagKey.Packaging] # We've deleted the database object assert ( ObjectVersionTag.query.filter_by(object_version=object_version).count() == 0 )