Ejemplo n.º 1
0
    def check_smil(video):
        _, record = video.fetch_published()
        master = CDSVideosFilesIterator.get_master_video_file(record)
        playlist = master['playlist']
        assert playlist[0]['key'] == 'test.smil'
        assert playlist[0]['content_type'] == 'smil'
        assert playlist[0]['context_type'] == 'playlist'
        assert playlist[0]['media_type'] == 'text'
        assert playlist[0]['tags']['master'] == master['version_id']

        # check bucket dump is done correctly
        master_video = CDSVideosFilesIterator.get_master_video_file(video)
        assert master_video['version_id'] != master['version_id']
Ejemplo n.º 2
0
    def check_files(video):
        bucket = CDSRecordDumpLoader._get_bucket(record=video)
        files = [
            dump_object(obj)
            for obj in ObjectVersion.get_by_bucket(bucket=bucket)
        ]
        for file_ in files:
            assert as_bucket(file_['bucket_id']) is not None
            assert 'checksum' in file_
            assert 'content_type' in file_
            assert 'context_type' in file_
            assert FileInstance.query.filter_by(
                id=file_['file_id']) is not None
            assert 'key' in file_
            assert 'links' in file_
            assert 'content_type' in file_
            assert 'context_type' in file_
            assert 'media_type' in file_
            assert 'tags' in file_

        # check extracted metadata
        master_video = CDSVideosFilesIterator.get_master_video_file(video)
        assert any([
            key in master_video['tags']
            for key in ExtractMetadataTask._all_keys
        ])
        assert any([
            key in video['_cds']['extracted_metadata']
            for key in ExtractMetadataTask._all_keys
        ])
Ejemplo n.º 3
0
    def _get_master_video(record):
        """Return master video."""
        master = CDSVideosFilesIterator.get_master_video_file(record)
        if not master:
            raise Exception("No master video found for the given record")

        return master, master['tags']['display_aspect_ratio'], \
            int(master['tags']['width']), int(master['tags']['height'])
Ejemplo n.º 4
0
 def check_tag_master(record):
     bucket = CDSRecordDumpLoader._get_bucket(record=record)
     master = CDSVideosFilesIterator.get_master_video_file(record)
     files = [dump_object(obj)
              for obj in ObjectVersion.get_by_bucket(bucket=bucket)
              if obj.get_tags().get('master')]
     assert all([file_['tags']['master'] == master['version_id']
                 for file_ in files])
Ejemplo n.º 5
0
 def check_gif(video, mock_gif):
     # called only once for deposit
     (_, _, mock_args) = mock_gif.mock_calls[0]
     # check gif record
     video = CDSRecord(dict(video), video.model)
     # check gif deposit
     deposit = deposit_video_resolver(video['_deposit']['id'])
     master_video = CDSVideosFilesIterator.get_master_video_file(deposit)
     assert mock_args['master_id'] == master_video['version_id']
     assert str(deposit.files.bucket.id) == mock_args['bucket']
     #  assert mock_args['bucket'].id == deposit.files.bucket.id
     assert len(mock_args['frames']) == 10
     assert 'output_dir' in mock_args
Ejemplo n.º 6
0
def extract_frames(recid, depid):
    """Re-trigger the extract frames task."""
    if not recid and not depid:
        raise ClickException('Missing option "--recid" or "--depid"')

    if recid:
        _, record = record_resolver.resolve(recid)
        depid = record['_deposit']['id']

    video_deposit = deposit_video_resolver(depid)
    master = CDSVideosFilesIterator.get_master_video_file(video_deposit)

    ExtractFramesTask().si(version_id=master['version_id'],
                           deposit_id=depid).apply_async()
Ejemplo n.º 7
0
    def _force_sync_deposit_bucket(record):
        """Replace deposit bucket with a copy of the record bucket."""
        deposit = Video.get_record(record.depid.object_uuid)
        # if deposit['_deposit']['status'] == 'draft':
        #     raise RuntimeError('Deposit in edit mode: {0}'.format(deposit.id))
        deposit_old_bucket = deposit.files.bucket
        # create a copy of record bucket
        new_bucket = record.files.bucket.snapshot()
        new_bucket.locked = False
        db.session.commit()
        rb = RecordsBuckets.query.filter(
            RecordsBuckets.bucket_id == deposit_old_bucket.id).one()
        rb.bucket = new_bucket
        db.session.add(rb)
        db.session.commit()

        # Put tags correctly pointing to the right object
        master_file = CDSVideosFilesIterator.get_master_video_file(record)
        if master_file:
            master_deposit_obj = ObjectVersion.get(new_bucket,
                                                   master_file['key'])

            for slave in ObjectVersion.query_heads_by_bucket(
                    bucket=new_bucket).join(ObjectVersion.tags).filter(
                        ObjectVersion.file_id.isnot(None),
                        ObjectVersionTag.key == 'master'):
                ObjectVersionTag.create_or_update(
                    slave, 'master', str(master_deposit_obj.version_id))
                db.session.add(slave)
                db.session.commit()

        # Delete the old bucket
        deposit_old_bucket.locked = False
        _ = deposit_old_bucket.remove()

        deposit['_buckets']['deposit'] = str(new_bucket.id)
        record['_buckets']['deposit'] = str(new_bucket.id)
        record['_deposit'] = deposit['_deposit']
        deposit['_files'] = deposit.files.dumps()
        deposit.commit()
        record.commit()
        db.session.commit()

        return deposit_old_bucket.id, new_bucket.id
Ejemplo n.º 8
0
def test_video_name_after_publish(api_app, db, api_project, users):
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']
    master_video_filename = 'test.mp4'

    # insert a master file inside the video
    add_master_to_video(video_deposit=video_1,
                        filename=master_video_filename,
                        stream=BytesIO(b'1234'),
                        video_duration='15')

    # publish the video
    prepare_videos_for_publish([video_1])
    video_1 = deposit_video_resolver(video_1_depid)
    login_user(User.query.get(users[0]))
    video_1 = video_1.publish()

    _, record = video_1.fetch_published()
    master = CDSVideosFilesIterator.get_master_video_file(record)
    assert master['key'] == '{}.mp4'.format(record['report_number'][0])
Ejemplo n.º 9
0
def create_all_missing_subformats(id_type, id_value):
    """Create all missing subformats."""
    _validate(id_type=id_type)

    video_deposit, dep_uuid = _resolve_deposit(id_type, id_value)
    master, ar, w, h = _get_master_video(video_deposit)
    subformats = CDSVideosFilesIterator.get_video_subformats(master)

    dones = [subformat['tags']['preset_quality'] for subformat in subformats]
    missing = set(get_all_distinct_qualities()) - set(dones)
    transcodables = list(
        filter(lambda q: can_be_transcoded(q, ar, w, h), missing))

    # sequential (and immutable) transcoding to avoid MergeConflicts on bucket
    if transcodables:
        chain([
            MaintenanceTranscodeVideoTask().si(version_id=master['version_id'],
                                               preset_quality=quality,
                                               deposit_id=dep_uuid)
            for quality in transcodables
        ]).apply_async()

    return transcodables
Ejemplo n.º 10
0
def missing_subformats_report(start_date=None, end_date=None):
    """Send a report of missing subformats to CDS admins."""
    report = []

    def _get_master_video(record):
        """Return master video."""
        master = CDSVideosFilesIterator.get_master_video_file(record)
        if not master:
            raise Exception("No master video found for the given record")

        return master, master['tags']['display_aspect_ratio'], \
            int(master['tags']['width']), int(master['tags']['height'])

    def _get_missing_subformats(subformats, ar, w, h):
        """Return missing and transcodable subformats."""
        dones = [
            subformat['tags']['preset_quality'] for subformat in subformats
        ]
        missing = set(get_all_distinct_qualities()) - set(dones)
        transcodables = list(
            filter(lambda q: can_be_transcoded(q, ar, w, h), missing))
        return transcodables

    def _format_report(report):
        """Format the email body for the file integrity report."""
        lines = []
        for entry in report:
            lines.append('Message: {}'.format(entry.get('message')))
            lines.append(u'Record: {}'.format(
                format_pid_link(current_app.config['RECORDS_UI_ENDPOINT'],
                                entry.get('recid'))))
            lines.append('Report number: {}'.format(
                entry.get('report_number')))
            lines.append('Missing subformats: {}'.format(
                entry.get('missing_subformats')))
            lines.append(('-' * 80) + '\n')

        return '\n'.join(lines)

    cache = current_cache.get('task_missing_subformats:details') or {}
    if 'end_date' not in cache:
        # Set the end date to 7 days ago
        cache['end_date'] = datetime.utcnow() - timedelta(days=7)

    record_uuids = _filter_by_last_created(_get_all_records_with_bucket(),
                                           start_date, end_date
                                           or cache['end_date'])

    for record_uuid in record_uuids:
        record = CDSRecord.get_record(record_uuid.id)
        master, ar, w, h = _get_master_video(record)

        if not master:
            report.append({
                'message': 'No master video found for the given record',
                'recid': record.get('recid'),
                'report_number': record['report_number'][0]
            })
            continue

        # check missing subformats
        subformats = CDSVideosFilesIterator.get_video_subformats(master)
        missing = _get_missing_subformats(subformats, ar, w, h)
        if missing:
            report.append({
                'message': 'Missing subformats for the given record',
                'recid': record.get('recid'),
                'report_number': record['report_number'][0],
                'missing_subformats': missing
            })

        # check bucket ids consistency
        bucket_id = master['bucket_id']
        for f in \
            subformats + CDSVideosFilesIterator.get_video_frames(master) + \
                CDSVideosFilesIterator.get_video_subtitles(record):

            if f['bucket_id'] != bucket_id:
                report.append({
                    'message':
                    'Different buckets in the same record',
                    'recid':
                    record.get('recid'),
                    'report_number':
                    record['report_number'][0],
                    'buckets':
                    'Master: {0} - {1}: {2}'.format(bucket_id, f['key'],
                                                    f['bucket_id'])
                })

    cache['end_date'] = datetime.utcnow()
    current_cache.set('task_missing_subformats:details', cache, timeout=-1)

    if report:
        # Format and send the email
        subject = u'[CDS Videos] Missing subformats report [{}]'.format(
            datetime.now())
        body = _format_report(report)
        sender = current_app.config['NOREPLY_EMAIL']
        recipients = [current_app.config['CDS_ADMIN_EMAIL']]
        _send_email(subject, body, sender, recipients)
Ejemplo n.º 11
0
def subformats_integrity_report(start_date=None, end_date=None):
    """Send a report of all corrupted subformats to CDS admins."""
    report = []
    update_cache = True

    def _probe_video_file(obj, record):
        """Run ffmpeg on a video file
        Return a touple containing (report, accessible)
        """
        file_report = {}
        path = obj.file.uri.replace(
            current_app.config['VIDEOS_XROOTD_ENDPOINT'], '')

        if not os.path.exists(path):
            # Check if the file exists on disk
            file_report = {
                'file_name': obj.key,
                'message': 'The file cannot be accessed',
                'error': repr(e)
            }

            # Return the file report and the file accessibility
            return (file_report, False)

        try:
            # Expecting the storage to be mounted on the machine
            probe = ff_probe_all(path)

            if not probe.get('streams'):
                file_report = {
                    'file_name': obj.key,
                    'message': 'No video stream'
                }

        except Exception as e:
            file_report = {
                'file_name': obj.key,
                'message': 'Error while running ff_probe_all',
                'error': repr(e)
            }

        # Return the file report and the file accessibility
        return (file_report, True)

    def _format_report(report):
        """Format the email body for the subformats integrity report."""
        lines = []
        for entry in report:
            lines.append(u'Record: {}'.format(
                format_pid_link(current_app.config['RECORDS_UI_ENDPOINT'],
                                entry.get('recid'))))
            lines.append('Message: {}'.format(entry.get('message')))

            if entry.get('report_number'):
                lines.append('Report number: {}'.format(
                    entry.get('report_number')))

            subreports = entry.get('subreports')
            if subreports:
                lines.append(('-' * 10) + '\n')

                for subreport in subreports:
                    lines.append('  File name: {}'.format(
                        subreport.get('file_name')))
                    lines.append('  Message: {}'.format(
                        subreport.get('message')))

                    if subreport.get('error'):
                        lines.append('  Error: {}'.format(
                            subreport.get('error')))

            lines.append(('-' * 80) + '\n')

        return '\n'.join(lines)

    cache = current_cache.get('task_subformats_integrity:details') or {}
    two_days_ago = datetime.utcnow() - timedelta(days=2)
    if 'start_date' not in cache:
        # Set the start date to 4 days ago
        cache['start_date'] = datetime.utcnow() - timedelta(days=4)

    record_uuids = _filter_by_last_created(_get_all_records_with_bucket(),
                                           start_date or cache['start_date'],
                                           end_date or two_days_ago)

    for record_uuid in record_uuids:
        record = CDSRecord.get_record(record_uuid.id)
        master = CDSVideosFilesIterator.get_master_video_file(record)

        if not master:
            report.append({
                'recid': record['recid'],
                'message': 'No master video found for the given record',
                'report_number': record['report_number'][0]
            })
            continue

        master_obj = as_object_version(master['version_id'])
        subreport_master, accessible = _probe_video_file(master_obj, record)

        if not accessible:
            update_cache = False

        if subreport_master:
            report.append({
                'recid': record['recid'],
                'message': 'Master file issue report',
                'report_number': record['report_number'][0],
                'subreports': subreport_master
            })

        subformats = CDSVideosFilesIterator.get_video_subformats(master)
        if not subformats:
            report.append({
                'recid': record['recid'],
                'message': 'No subformats found'
            })
            continue

        subformats_subreport = []
        for subformat in subformats:
            subformat_obj = as_object_version(subformat['version_id'])
            subformat_subreport, accessible = _probe_video_file(
                subformat_obj, record)

            if not accessible:
                update_cache = False

            if subformat_subreport:
                subformats_subreport.append(subformat_subreport)

        if subformats_subreport:
            report.append({
                'recid': record['recid'],
                'message': 'Subformats issues found',
                'report_number': record['report_number'][0],
                'subreports': subformats_subreport
            })

    if update_cache:
        # Set the start date for next time when the task will run
        cache['start_date'] = two_days_ago
        current_cache.set('task_subformats_integrity:details',
                          cache,
                          timeout=-1)

    if report:
        # Format and send the email
        subject = u'[CDS Videos] Subformats integrity report [{}]'.format(
            datetime.now())
        body = _format_report(report)
        sender = current_app.config['NOREPLY_EMAIL']
        recipients = [current_app.config['CDS_ADMIN_EMAIL']]
        _send_email(subject, body, sender, recipients)