Beispiel #1
0
def dispose_object_version(object_version):
    """Clean up resources related to an ObjectVersion."""
    if object_version:
        object_version = as_object_version(object_version)
        # remove the object version
        ObjectVersion.delete(bucket=object_version.bucket,
                             key=object_version.key)
Beispiel #2
0
 def clean(self, version_id, preset_quality, *args, **kwargs):
     """Delete generated ObjectVersion slaves."""
     object_version = as_object_version(version_id)
     obj_key = self._build_subformat_key(preset_quality=preset_quality)
     object_version = ObjectVersion.query.filter_by(
         bucket_id=object_version.bucket_id, key=obj_key).first()
     dispose_object_version(object_version)
Beispiel #3
0
 def _init_object_version(event):
     """Create, if doesn't exists, the version object."""
     event_id = str(event.id)
     with db.session.begin_nested():
         # create a object version if doesn't exists
         if 'version_id' in event.payload:
             version_id = event.payload['version_id']
             object_version = as_object_version(version_id)
         else:
             object_version = ObjectVersion.create(
                 bucket=event.payload['bucket_id'],
                 key=event.payload['key'])
             ObjectVersionTag.create(object_version, 'uri_origin',
                                     event.payload['uri'])
             version_id = str(object_version.version_id)
         # add tag with corresponding event
         ObjectVersionTag.create_or_update(object_version, '_event_id',
                                           event_id)
         # add tag for preview
         ObjectVersionTag.create_or_update(object_version, 'preview', True)
         # add tags for file type
         ObjectVersionTag.create_or_update(object_version, 'media_type',
                                           'video')
         ObjectVersionTag.create_or_update(object_version, 'context_type',
                                           'master')
         event.response['version_id'] = version_id
     return object_version
Beispiel #4
0
def on_download_rename_file(sender, obj):
    """Rename files generated from master file when downloading."""
    master_version_id = obj.get_tags().get('master') if obj else None
    if master_version_id:
        master_obj = as_object_version(master_version_id)
        filename_no_ext = splitext(master_obj.key)[0]
        obj.key = '{}-{}'.format(filename_no_ext, obj.key)
Beispiel #5
0
 def _update_event_response(event, version_id):
     """Update event response."""
     event_id = str(event.id)
     object_version = as_object_version(version_id)
     obj_tags = object_version.get_tags()
     obj_key = object_version.key
     obj_bucket_id = str(object_version.bucket_id)
     with db.session.begin_nested():
         event.response.update(
             links={
                 'self':
                 url_for(
                     'invenio_files_rest.object_api',
                     bucket_id=obj_bucket_id,
                     key=obj_key,
                     _external=True,
                 ),
                 'cancel':
                 url_for(
                     'invenio_webhooks.event_item',
                     receiver_id='avc',
                     event_id=event_id,
                     _external=True,
                 ),
             },
             key=obj_key,
             version_id=version_id,
             tags=obj_tags,
         )
         flag_modified(event, 'response')
         flag_modified(event, 'response_headers')
Beispiel #6
0
def get_relative_path(object_version):
    """Get ObjectVersion's full path relative to its bucket location."""
    object_version = as_object_version(object_version)
    location_root = object_version.bucket.location.uri
    filepath, filename = split(object_version.file.uri)
    relative_path = relpath(filepath, location_root)
    return join(relative_path, filename)
Beispiel #7
0
 def _rename_master_file(self, master_file):
     """Rename master file."""
     master_obj = as_object_version(master_file['version_id'])
     master_obj.key = '{}.{}'.format(
         self['report_number'][0],
         master_file.get('content_type')
         or splitext(master_file['key'])[1][1:].lower())
     db.session.add(master_obj)
Beispiel #8
0
 def _resolve_extracted_metadata(cls, deposit, record):
     """Extract metadata from the video."""
     master_video = CDSVideosFilesIterator.get_master_video_file(deposit)
     master = as_object_version(master_video['version_id'])
     extracted_metadata = ExtractMetadataTask.create_metadata_tags(
         object_=master, keys=ExtractMetadataTask._all_keys)
     deposit['_cds']['extracted_metadata'] = extracted_metadata
     record['_cds']['extracted_metadata'] = extracted_metadata
Beispiel #9
0
 def _resolve_extracted_metadata(cls, deposit, record):
     """Extract metadata from the video."""
     master_video = CDSVideosFilesIterator.get_master_video_file(deposit)
     master = as_object_version(master_video['version_id'])
     extracted_metadata = cls._run_extracted_metadata(master=master)
     logging.debug(
         'Adding extracted metadata {0}'.format(extracted_metadata))
     deposit['_cds']['extracted_metadata'] = extracted_metadata
     record['_cds']['extracted_metadata'] = extracted_metadata
Beispiel #10
0
 def _rename_subtitles(self):
     """Rename subtitles."""
     # Pattern to extract subtitle's filename and iso language
     pattern = re.compile('.*_(?P<iso_lang>[a-zA-Z]{2})\.vtt$')
     subtitles = CDSVideosFilesIterator.get_video_subtitles(self)
     for subtitle_file in subtitles:
         subtitle_obj = as_object_version(subtitle_file['version_id'])
         match = pattern.match(subtitle_file['key'])
         if match:
             subtitle_obj.key = '{}_{}.vtt'.format(self['report_number'][0],
                                                   match.group('iso_lang'))
             db.session.add(subtitle_obj)
Beispiel #11
0
 def _create_or_update_frames(cls, record, master_file):
     """Check and rebuild frames if needed."""
     files = record.get('_files', [])
     filtered = [
         f for f in files if f['tags']['media_type'] != 'image'
         or f['tags']['context_type'] != 'frame'
     ]
     if len(files) - len(filtered) < cls._get_minimum_frames():
         # filter frames if there are
         record['_files'] = filtered
         # create frames and add them inside the record
         record['_files'] = record['_files'] + cls._create_frame(
             object_=as_object_version(master_file))
Beispiel #12
0
def video_extract_frames(self,
                         object_version,
                         frames_start=5,
                         frames_end=95,
                         frames_gap=1,
                         **kwargs):
    """Extract images from some frames of the video.

    Each of the frame images generates an ``ObjectVersion`` tagged as "frame"
    using ``ObjectVersionTags``.

    :param object_version: master video to extract frames from.
    :param frames_start: start percentage, default 5.
    :param frames_end: end percentage, default 95.
    :param frames_gap: percentage between frames from start to end, default 10.
    """
    object_version = as_object_version(object_version)

    self._base_payload = dict()

    input_file = object_version.file.uri
    output_folder = tempfile.mkdtemp()

    def progress_updater(seconds, duration):
        """Progress reporter."""
        meta = dict(
            payload=dict(
                size=duration,
                percentage=seconds or 0.0 / duration * 100, ),
            message='Extracting frames {0} of {1} seconds'.format(seconds, duration), )

        self.update_state(state=STARTED, meta=meta)

    ff_frames(
        object_version.file.uri,
        frames_start,
        frames_end,
        frames_gap,
        os.path.join(output_folder, 'frame-%d.jpg'),
        progress_callback=progress_updater)

    for filename in os.listdir(output_folder):
        obj = ObjectVersion.create(
            bucket=object_version.bucket,
            key=filename,
            stream=open(os.path.join(output_folder, filename),'rb'))
        ObjectVersionTag.create(obj, 'master', object_version.version_id)

    shutil.rmtree(output_folder)
    db.session.commit()
Beispiel #13
0
    def __call__(self, *args, **kwargs):
        """Extract keyword arguments."""
        arg_list = ['event_id', 'deposit_id', 'key']
        kwargs = self._extract_call_arguments(arg_list, **kwargs)

        with self.app.flask_app.app_context():
            if kwargs.get('_clean', False):
                self.clean(*args, **kwargs)

            self.object = as_object_version(kwargs.pop('version_id', None))
            if self.object:
                self.obj_id = str(self.object.version_id)
            self.set_base_payload()
            return self.run(*args, **kwargs)
Beispiel #14
0
def generate_smil_file(record_id, record, bucket, master_object, **kwargs):
    """Generate SMIL file for Video record (on publish)."""
    #  output_folder = tempfile.mkdtemp()
    master_object = as_object_version(master_object)

    # Generate SMIL file
    master_key = master_object.key
    smil_key = '{0}.smil'.format(master_key.rsplit('.', 1)[0])
    smil_content = SmilSerializer.serialize(record_id, record, **kwargs)

    # Create ObjectVersion for SMIL file
    with db.session.begin_nested():
        obj = ObjectVersion.create(bucket=bucket,
                                   key=smil_key,
                                   stream=BytesIO(smil_content.encode()))
        ObjectVersionTag.create(obj, 'master', str(master_object.version_id))
        ObjectVersionTag.create(obj, 'context_type', 'playlist')
        ObjectVersionTag.create(obj, 'media_type', 'text')
Beispiel #15
0
def download_to_object_version(self, uri, object_version, **kwargs):
    r"""Download file from a URL.

    :param uri: URL of the file to download.
    :param object_version: ``ObjectVersion`` instance or object version id.
    :param chunk_size: Size of the chunks for downloading.
    :param \**kwargs:
    """
    object_version = as_object_version(object_version)

    self._base_payload = dict(
        key=object_version.key,
        version_id=str(object_version.version_id),
        tags=object_version.get_tags(),
        event_id=kwargs.get('event_id', None),
        deposit_id=kwargs.get('deposit_id', None), )

    # Make HTTP request
    response = requests.get(uri, stream=True)

    if 'Content-Length' in response.headers:
        headers_size = int(response.headers.get('Content-Length'))
    else:
        headers_size = None

    def progress_updater(size, total):
        """Progress reporter."""
        size = size or headers_size or 0
        meta = dict(
            payload=dict(
                size=size,
                total=total,
                percentage=total * 100 / size, ),
            message='Downloading {0} of {1}'.format(total, size),
        )

        self.update_state(state=STARTED, meta=meta)

    object_version.set_contents(
        response.raw, progress_callback=progress_updater, size=headers_size)

    db.session.commit()

    return str(object_version.version_id)
Beispiel #16
0
 def delete(self, event):
     """Delete tasks and everything created by them."""
     super(AVCWorkflow, self).delete(event)
     self.clean_task(event=event, task_name='file_video_extract_frames')
     for preset_quality in get_available_preset_qualities():
         self.clean_task(event=event,
                         task_name='file_transcode',
                         preset_quality=preset_quality)
     self.clean_task(event=event,
                     task_name='file_video_metadata_extraction')
     if 'version_id' not in event.payload:
         self.clean_task(event=event, task_name='file_download')
     else:
         # Remove tags on pre-existing ObjectVersion
         object_version = as_object_version(event.payload['version_id'])
         ObjectVersionTag.query.filter(
             ObjectVersionTag.object_version == object_version,
             ObjectVersionTag.key.in_(
                 ['_event_id', 'preview', 'media_type',
                  'context_type'])).delete(synchronize_session=False)
Beispiel #17
0
    def clean(self, deposit_id, version_id, *args, **kwargs):
        """Undo metadata extraction."""
        # 1. Revert patch on record
        recid = str(PersistentIdentifier.get('depid', deposit_id).object_uuid)
        patch = [{
            'op': 'remove',
            'path': '/_cds/extracted_metadata',
        }]
        validator = 'cds.modules.records.validators.PartialDraft4Validator'
        try:
            patch_record(recid=recid, patch=patch, validator=validator)
        except jsonpatch.JsonPatchConflict as c:
            logger.warning(
                'Failed to apply JSON Patch to deposit {0}: {1}'.format(
                    recid, c))

        # Delete tmp file if any
        obj = as_object_version(version_id)
        temp_location = obj.get_tags().get('temp_location', None)
        if temp_location:
            shutil.rmtree(temp_location)
            ObjectVersionTag.delete(obj, 'temp_location')
            db.session.commit()
Beispiel #18
0
def subformats_integrity_report(start_date=None, end_date=None):
    """Send a report of all corrupted subformats to CDS admins."""
    report = []
    update_cache = True

    def _probe_video_file(obj, record):
        """Run ffmpeg on a video file
        Return a touple containing (report, accessible)
        """
        file_report = {}
        path = obj.file.uri.replace(
            current_app.config['VIDEOS_XROOTD_ENDPOINT'], '')

        if not os.path.exists(path):
            # Check if the file exists on disk
            file_report = {
                'file_name': obj.key,
                'message': 'The file cannot be accessed',
                'error': repr(e)
            }

            # Return the file report and the file accessibility
            return (file_report, False)

        try:
            # Expecting the storage to be mounted on the machine
            probe = ff_probe_all(path)

            if not probe.get('streams'):
                file_report = {
                    'file_name': obj.key,
                    'message': 'No video stream'
                }

        except Exception as e:
            file_report = {
                'file_name': obj.key,
                'message': 'Error while running ff_probe_all',
                'error': repr(e)
            }

        # Return the file report and the file accessibility
        return (file_report, True)

    def _format_report(report):
        """Format the email body for the subformats integrity report."""
        lines = []
        for entry in report:
            lines.append(u'Record: {}'.format(
                format_pid_link(current_app.config['RECORDS_UI_ENDPOINT'],
                                entry.get('recid'))))
            lines.append('Message: {}'.format(entry.get('message')))

            if entry.get('report_number'):
                lines.append('Report number: {}'.format(
                    entry.get('report_number')))

            subreports = entry.get('subreports')
            if subreports:
                lines.append(('-' * 10) + '\n')

                for subreport in subreports:
                    lines.append('  File name: {}'.format(
                        subreport.get('file_name')))
                    lines.append('  Message: {}'.format(
                        subreport.get('message')))

                    if subreport.get('error'):
                        lines.append('  Error: {}'.format(
                            subreport.get('error')))

            lines.append(('-' * 80) + '\n')

        return '\n'.join(lines)

    cache = current_cache.get('task_subformats_integrity:details') or {}
    two_days_ago = datetime.utcnow() - timedelta(days=2)
    if 'start_date' not in cache:
        # Set the start date to 4 days ago
        cache['start_date'] = datetime.utcnow() - timedelta(days=4)

    record_uuids = _filter_by_last_created(_get_all_records_with_bucket(),
                                           start_date or cache['start_date'],
                                           end_date or two_days_ago)

    for record_uuid in record_uuids:
        record = CDSRecord.get_record(record_uuid.id)
        master = CDSVideosFilesIterator.get_master_video_file(record)

        if not master:
            report.append({
                'recid': record['recid'],
                'message': 'No master video found for the given record',
                'report_number': record['report_number'][0]
            })
            continue

        master_obj = as_object_version(master['version_id'])
        subreport_master, accessible = _probe_video_file(master_obj, record)

        if not accessible:
            update_cache = False

        if subreport_master:
            report.append({
                'recid': record['recid'],
                'message': 'Master file issue report',
                'report_number': record['report_number'][0],
                'subreports': subreport_master
            })

        subformats = CDSVideosFilesIterator.get_video_subformats(master)
        if not subformats:
            report.append({
                'recid': record['recid'],
                'message': 'No subformats found'
            })
            continue

        subformats_subreport = []
        for subformat in subformats:
            subformat_obj = as_object_version(subformat['version_id'])
            subformat_subreport, accessible = _probe_video_file(
                subformat_obj, record)

            if not accessible:
                update_cache = False

            if subformat_subreport:
                subformats_subreport.append(subformat_subreport)

        if subformats_subreport:
            report.append({
                'recid': record['recid'],
                'message': 'Subformats issues found',
                'report_number': record['report_number'][0],
                'subreports': subformats_subreport
            })

    if update_cache:
        # Set the start date for next time when the task will run
        cache['start_date'] = two_days_ago
        current_cache.set('task_subformats_integrity:details',
                          cache,
                          timeout=-1)

    if report:
        # Format and send the email
        subject = u'[CDS Videos] Subformats integrity report [{}]'.format(
            datetime.now())
        body = _format_report(report)
        sender = current_app.config['NOREPLY_EMAIL']
        recipients = [current_app.config['CDS_ADMIN_EMAIL']]
        _send_email(subject, body, sender, recipients)
Beispiel #19
0
def video_transcode(self,
                    object_version,
                    video_presets=None,
                    sleep_time=5,
                    **kwargs):
    """Launch video transcoding.

    For each of the presets generate a new ``ObjectVersion`` tagged as slave
    with the preset name as key and a link to the master version.

    :param object_version: Master video.
    :param video_presets: List of presets to use for transcoding. If ``None``
        it will use the default values set in ``VIDEO_DEFAULT_PRESETS``.
    :param sleep_time: the time interval between requests for Sorenson status
    """
    object_version = as_object_version(object_version)

    self._base_payload = dict(
        object_version=str(object_version.version_id),
        video_presets=video_presets,
        tags=object_version.get_tags(),
        deposit_id=kwargs.get('deposit_id', None),
        event_id=kwargs.get('event_id', None),
    )

    job_ids = deque()
    # Set handler for canceling all jobs
    def handler(signum, frame):
        # TODO handle better file deleting and ObjectVersion cleaning
        map(lambda _info: stop_encoding(info['job_id']), job_ids)
    signal.signal(signal.SIGTERM, handler)

    # Get master file's bucket_id
    bucket_id = object_version.bucket_id
    bucket_location = object_version.bucket.location.uri

    preset_config = current_app.config['CDS_SORENSON_PRESETS']
    for preset in video_presets or preset_config.keys():
        with db.session.begin_nested():
            # Create FileInstance and get generated UUID
            file_instance = FileInstance.create()
            # Create ObjectVersion
            base_name = object_version.key.rsplit('.', 1)[0]
            new_extension = preset_config[preset][1]
            obj = ObjectVersion.create(
                bucket=bucket_id,
                key='{0}-{1}{2}'.format(base_name, preset, new_extension)
            )
            obj.set_file(file_instance)
            ObjectVersionTag.create(
                obj, 'master', str(object_version.version_id))
            ObjectVersionTag.create(obj, 'preset', preset)

            # Extract new location
            storage = file_instance.storage(default_location=bucket_location)
            directory, filename = storage._get_fs()

            # Start Sorenson
            input_file = object_version.file.uri
            output_file = os.path.join(directory.root_path, filename)

            job_id = start_encoding(input_file, preset, output_file)
            ObjectVersionTag.create(obj, '_sorenson_job_id', job_id)
            job_info = dict(
                preset=preset,
                job_id=job_id,
                file_instance=str(file_instance.id),
                uri=output_file,
                object_version=str(obj.version_id),
                key=obj.key,
                tags=obj.get_tags(),
            )
        db.session.commit()

        self.update_state(
            state=STARTED,
            meta=dict(
                payload=dict(job_info=job_info),
                message='Started transcoding.'
            )
        )
        job_ids.append(job_info)

    # Monitor jobs and report accordingly
    while job_ids:
        info = job_ids.popleft()

        # Get job status
        status = get_encoding_status(info['job_id'])['Status']
        percentage = 100 if status['TimeFinished'] else status['Progress']
        info['percentage'] = percentage

        # Update task's state for each individual preset
        self.update_state(
            state=STARTED,
            meta=dict(
                payload=dict(job_info=job_info),
                message='Transcoding {0}'.format(percentage),
            )
        )

        # Set file's location for completed jobs
        if percentage == 100:
            with db.session.begin_nested():
                uri = info['uri']
                with open(uri, 'rb') as transcoded_file:
                    digest = hashlib.md5(transcoded_file.read()).hexdigest()
                size = os.path.getsize(uri)
                checksum = '{0}:{1}'.format('md5', digest)
                FileInstance.get(
                    info['file_instance']).set_uri(uri, size, checksum)
            db.session.commit()
        else:
            job_ids.append(info)

        time.sleep(sleep_time)
Beispiel #20
0
def video_metadata_extraction(self, uri, object_version, deposit_id,
                              *args, **kwargs):
    """Extract metadata from given video file.

    All technical metadata, i.e. bitrate, will be translated into
    ``ObjectVersionTags``, plus all the metadata extracted will be store under
    ``_deposit`` as ``extracted_metadta``.

    :param uri: the video's URI
    :param object_version: the object version that (will) contain the actual
           video
    :param deposit_id: the ID od the deposit
    """
    with db.session.begin_nested():
        object_version = as_object_version(object_version)

        self._base_payload = dict(
            object_version=str(object_version.version_id),
            uri=uri,
            tags=object_version.get_tags(),
            deposit_id=deposit_id,
            event_id=kwargs.get('event_id', None), )

        recid = str(PersistentIdentifier.get('depid', deposit_id).object_uuid)

        # Extract video's metadata using `ff_probe`
        metadata = json.loads(ff_probe_all(uri))

        # Add technical information to the ObjectVersion as Tags
        format_keys = [
            'duration',
            'bit_rate',
            'size',
        ]
        stream_keys = [
            'avg_frame_rate',
            'codec_name',
            'width',
            'height',
            'nb_frames',
            'display_aspect_ratio',
            'color_range',
        ]

        [ObjectVersionTag.create(object_version, k, v)
        for k, v in dict(metadata['format'], **metadata['streams'][0]).items()
        if k in (format_keys + stream_keys)]

    db.session.commit()

    # Insert metadata into deposit's metadata
    patch = [{
        'op': 'add',
        'path': '/_deposit/extracted_metadata',
        'value': metadata
    }]
    result = update_record.s(recid, patch).apply_async()
    result.get()

    # Update state
    self.update_state(
        state=SUCCESS,
        meta=dict(
            payload=dict(
                tags=object_version.get_tags(), ),
            message='Attached video metadata'))
Beispiel #21
0
    def run(self, event):
        """Run AVC workflow for video transcoding.

        Steps:
          * Download the video file (if not done yet).
          * Extract metadata from the video.
          * Run video transcoding.
          * Extract frames from the video.

        Mandatory fields in the payload:
          * uri, if the video needs to be downloaded.
          * bucket_id, only if URI is provided.
          * key, only if URI is provided.
          * version_id, if the video has been downloaded via HTTP (the previous
            fields are not needed in this case).
          * deposit_id

        Optional:
          * sse_channel, if set all the tasks will publish their status update
            to it.
          * video_presets, if not set the default presets will be used.
          * frames_start, if not set the default value will be used.
          * frames_end, if not set the default value will be used.
          * frames_gap, if not set the default value will be used.

        For more info see the tasks used in the workflow:
          * :func: `~cds.modules.webhooks.tasks.download_to_object_version`
          * :func: `~cds.modules.webhooks.tasks.video_metadata_extraction`
          * :func: `~cds.modules.webhooks.tasks.video_extract_frames`
          * :func: `~cds.modules.webhooks.tasks.video_transcode`
        """
        assert ('uri' in event.payload and 'bucket_id' in event.payload
                and 'key' in event.payload) or ('version_id' in event.payload)
        assert 'deposit_id' in event.payload

        event_id = str(event.id)

        with db.session.begin_nested():
            if 'version_id' in event.payload:
                object_version = as_object_version(event.payload['version_id'])
                first_step = video_metadata_extraction.si(
                    uri=object_version.file.uri,
                    object_version=str(object_version.version_id),
                    deposit_id=event.payload['deposit_id'])
            else:
                object_version = ObjectVersion.create(
                    bucket=event.payload['bucket_id'],
                    key=event.payload['key'])
                ObjectVersionTag.create(object_version, 'uri_origin',
                                        event.payload['uri'])
                first_step = group(
                    download_to_object_version.si(
                        #  event.payload['uri'],
                        object_version=str(object_version.version_id),
                        event_id=event_id,
                        **event.payload),
                    video_metadata_extraction.si(
                        #  event.payload['uri'],
                        object_version=str(object_version.version_id),
                        event_id=event_id,
                        **event.payload),
                )

            ObjectVersionTag.create(object_version, '_event_id', event_id)

        mypayload = event.payload
        obj_id = str(object_version.version_id)
        obj_key = object_version.key
        obj_tags = object_version.get_tags()
        db.session.expunge(event)
        db.session.commit()

        result = chain(
            first_step,
            group(
                video_transcode.si(object_version=obj_id,
                                   event_id=event_id,
                                   **mypayload),
                video_extract_frames.si(object_version=str(obj_id),
                                        event_id=event_id,
                                        **mypayload),
            ),
        ).apply_async()

        with db.session.begin_nested():
            self._serialize_result(event=event, result=result)

            event.response.update(
                links=dict(),
                key=object_version.key,
                version_id=obj_id,
                tags=obj_tags,
            )
            flag_modified(event, 'response')
            flag_modified(event, 'response_headers')
            db.session.add(event)
        db.session.commit()
Beispiel #22
0
    def run(self, preset_quality, sleep_time=5, *args, **kwargs):
        """Launch video transcoding.

        For each of the presets generate a new ``ObjectVersion`` tagged as
        slave with the preset name as key and a link to the master version.

        :param self: reference to instance of task base class
        :param preset_quality: preset quality to use for transcoding.
        :param sleep_time: time interval between requests for the Sorenson
            status.
        """
        self._base_payload.update(preset_quality=preset_quality)

        # Get master file's bucket_id
        bucket_id = self.object.bucket_id
        bucket_location = self.object.bucket.location.uri
        # Get master file's key
        master_key = self.object.key

        tags = self.object.get_tags()
        # Get master file's aspect ratio
        aspect_ratio = tags['display_aspect_ratio']
        # Get master file's width x height
        width = int(tags['width']) if 'width' in tags else None
        height = int(tags['height']) if 'height' in tags else None

        with db.session.begin_nested():
            # Create FileInstance
            file_instance = FileInstance.create()

            # Create ObjectVersion
            obj_key = self._build_slave_key(preset_quality=preset_quality,
                                            master_key=master_key)
            obj = ObjectVersion.create(bucket=bucket_id, key=obj_key)

            # Extract new location
            storage = file_instance.storage(default_location=bucket_location)
            directory, filename = storage._get_fs()

            input_file = self.object.file.uri
            output_file = os.path.join(directory.root_path, filename)

            try:
                # Start Sorenson
                job_id = start_encoding(input_file,
                                        output_file,
                                        preset_quality,
                                        aspect_ratio,
                                        max_height=height,
                                        max_width=width)
            except (InvalidResolutionError, TooHighResolutionError) as e:
                exception = self._meta_exception_envelope(exc=e)
                self.update_state(state=REVOKED, meta=exception)
                raise Ignore()

            # Set revoke handler, in case of an abrupt execution halt.
            self.set_revoke_handler(partial(stop_encoding, job_id))

            # Create ObjectVersionTags
            ObjectVersionTag.create(obj, 'master', self.obj_id)
            ObjectVersionTag.create(obj, '_sorenson_job_id', job_id)
            ObjectVersionTag.create(obj, 'preset_quality', preset_quality)
            ObjectVersionTag.create(obj, 'media_type', 'video')
            ObjectVersionTag.create(obj, 'context_type', 'subformat')
            preset_info = get_preset_info(aspect_ratio, preset_quality)
            for key, value in preset_info.items():
                ObjectVersionTag.create(obj, key, value)

            # Information necessary for monitoring
            job_info = dict(
                preset_quality=preset_quality,
                job_id=job_id,
                file_instance=str(file_instance.id),
                uri=output_file,
                version_id=str(obj.version_id),
                key=obj_key,
                tags=obj.get_tags(),
                percentage=0,
            )

        db.session.commit()

        self.update_state(state=STARTED,
                          meta=dict(payload=dict(**job_info),
                                    message='Started transcoding.'))

        status = ''
        # Monitor job and report accordingly
        while status != 'Finished':
            # Get job status
            status, percentage = get_encoding_status(job_id)
            if status == 'Error':
                raise RuntimeError('Error transcoding')
            job_info['percentage'] = percentage

            # Update task's state for this preset
            self.update_state(
                state=STARTED,
                meta=dict(payload=dict(**job_info),
                          message='Transcoding {0}'.format(percentage)))

            time.sleep(sleep_time)

        # Set file's location, if job has completed
        self._clean_file_name(output_file)
        with db.session.begin_nested():
            uri = output_file
            with open(uri, 'rb') as transcoded_file:
                digest = hashlib.md5(transcoded_file.read()).hexdigest()
            size = os.path.getsize(uri)
            checksum = '{0}:{1}'.format('md5', digest)
            file_instance.set_uri(uri, size, checksum)
            as_object_version(job_info['version_id']).set_file(file_instance)
        db.session.commit()
Beispiel #23
0
def dispose_object_version(object_version):
    """Clean up resources related to an ObjectVersion."""
    if object_version:
        object_version = as_object_version(object_version)
        # remove the object version
        object_version.remove()
Beispiel #24
0
    def run(self, event):
        """Create object version and send celery task to download.

        Mandatory fields in the payload:
          * uri, location to download the view.
          * bucket_id
          * key, file name.
          * deposit_id

        Optional:
          * sse_channel, if set all the tasks will publish their status update
            to it.

        For more info see the task
        :func: `~cds.modules.webhooks.tasks.download_to_object_version` this
        receiver is using.
        """
        assert 'bucket_id' in event.payload
        assert 'uri' in event.payload
        assert 'key' in event.payload
        assert 'deposit_id' in event.payload

        event_id = str(event.id)

        with db.session.begin_nested():
            object_version = ObjectVersion.create(
                bucket=event.payload['bucket_id'], key=event.payload['key'])

            ObjectVersionTag.create(object_version, 'uri_origin',
                                    event.payload['uri'])
            ObjectVersionTag.create(object_version, '_event_id', event_id)
            db.session.expunge(event)
        db.session.commit()

        task = download_to_object_version.s(
            #event.payload['uri'],
            object_version=str(object_version.version_id),
            event_id=event_id,
            **event.payload)

        self._serialize_result(event=event, result=task.apply_async())

        with db.session.begin_nested():
            object_version = as_object_version(object_version.version_id)
            event.response.update(
                links={
                    'self':
                    url_for(
                        'invenio_files_rest.object_api',
                        bucket_id=str(object_version.bucket_id),
                        key=object_version.key,
                        _external=True,
                    ),
                    'version':
                    url_for(
                        'invenio_files_rest.object_api',
                        bucket_id=str(object_version.bucket_id),
                        key=object_version.key,
                        versionId=str(object_version.version_id),
                        _external=True,
                    ),
                    'cancel':
                    url_for(
                        'invenio_webhooks.event_list',
                        receiver_id='downloader',
                        _external=True,
                    )
                },
                key=object_version.key,
                version_id=str(object_version.version_id),
                tags=object_version.get_tags(),
            )
            flag_modified(event, 'response')
            flag_modified(event, 'response_headers')
            db.session.add(event)
        db.session.commit()