Example #1
def _fix_variation(fdoc, variation, nice_name):
    from pillar.api.file_storage_backends import Bucket

    # See if we can reuse the bucket we already had.
    backend = fdoc['backend']
    pid_str = str(fdoc['project'])
    bucket_cls = Bucket.for_backend(backend)
    bucket = bucket_cls(pid_str)

    var_path = PurePosixPath(variation["file_path"])
    # NOTE: this breaks for variations with double extensions
    var_stem = var_path.stem
    m = _var_type_re.search(var_stem)
    var_type = m.group(0) if m else ''
    var_name = f'{nice_name}{var_type}{var_path.suffix}'
    log.info(f'    - %s → %s', variation["file_path"], var_name)

    blob = bucket.blob(variation['file_path'])
    if not blob.exists():
        log.warning('Blob %s does not exist', blob)

    except Exception:
        log.warning('Unable to update blob %s filename to %r', blob, var_name, exc_info=True)
Example #2
def move_to_bucket(file_id: ObjectId, dest_project_id: ObjectId, *, skip_storage=False):
    """Move a file + variations from its own bucket to the new project_id bucket.

    :param file_id: ID of the file to move.
    :param dest_project_id: Project to move to.
    :param skip_storage: If True, the storage bucket will not be touched.
        Only use this when you know what you're doing.

    files_coll = current_app.db('files')
    f = files_coll.find_one(file_id)
    if f is None:
        raise ValueError(f'File with _id: {file_id} not found')

    # Move file and variations to the new bucket.
    if skip_storage:
        log.warning('NOT ACTUALLY MOVING file %s on storage, just updating MongoDB', file_id)
        from pillar.api.file_storage_backends import Bucket
        bucket_class = Bucket.for_backend(f['backend'])
        src_bucket = bucket_class(str(f['project']))
        dst_bucket = bucket_class(str(dest_project_id))

        src_blob = src_bucket.get_blob(f['file_path'])
        src_bucket.copy_blob(src_blob, dst_bucket)

        for var in f.get('variations', []):
            src_blob = src_bucket.get_blob(var['file_path'])
            src_bucket.copy_blob(src_blob, dst_bucket)

    # Update the file document after moving was successful.
    # No need to update _etag or _updated, since that'll be done when
    # the links are regenerated at the end of this function.
    log.info('Switching file %s to project %s', file_id, dest_project_id)
    update_result = files_coll.update_one({'_id': file_id},
                                          {'$set': {'project': dest_project_id}})
    if update_result.matched_count != 1:
        raise RuntimeError(
            'Unable to update file %s in MongoDB: matched_count=%i; modified_count=%i' % (
                file_id, update_result.matched_count, update_result.modified_count))

    log.info('Switching file %s: matched_count=%i; modified_count=%i',
             file_id, update_result.matched_count, update_result.modified_count)

    # Regenerate the links for this file
    f['project'] = dest_project_id
    generate_all_links(f, now=utils.utcnow())
Example #3
def _process_image(bucket: Bucket, file_id: ObjectId,
                   local_file: tempfile._TemporaryFileWrapper, src_file: dict):
    from PIL import Image

    im = Image.open(local_file)
    res = im.size
    src_file['width'] = res[0]
    src_file['height'] = res[1]

    # Generate previews
    log.info('Generating thumbnails for file %s', file_id)
    local_path = pathlib.Path(local_file.name)
    name_base = pathlib.Path(src_file['name']).stem
    src_file['variations'] = imaging.generate_local_thumbnails(
        name_base, local_path)

    # Send those previews to Google Cloud Storage.
        'Uploading %i thumbnails for file %s to Google Cloud Storage '
        '(GCS)', len(src_file['variations']), file_id)

    # TODO: parallelize this at some point.
    for variation in src_file['variations']:
        fname = variation['file_path']
        if current_app.config['TESTING']:
            log.warning('  - NOT sending thumbnail %s to %s', fname, bucket)
            blob = bucket.blob(fname)
            log.debug('  - Sending thumbnail %s to %s', fname, blob)

            if variation.get('size') == 't':

        except OSError:
                'Unable to unlink %s, ignoring this but it will need '
                'cleanup later.', variation['local_path'])

        del variation['local_path']

    log.info('Done processing file %s', file_id)
    src_file['status'] = 'complete'
Example #4
def generate_link(backend,
                  file_path: str,
                  project_id: str = None,
                  is_public=False) -> str:
    """Hook to check the backend of a file resource, to build an appropriate link
    that can be used by the client to retrieve the actual file.

    # TODO: replace config['TESTING'] with mocking GCS.
    if backend == 'gcs' and current_app.config['TESTING']:
        log.info('Skipping GCS link generation, and returning a fake link '
        return '/path/to/testing/gcs/%s' % file_path

    if backend in {'gcs', 'local'}:
        from ..file_storage_backends import Bucket

        bucket_cls = Bucket.for_backend(backend)
        storage = bucket_cls(project_id)
        blob = storage.get_blob(file_path)

        if blob is None:
                'generate_link(%r, %r): unable to find blob for file'
                ' path, returning empty link.', backend, file_path)
            return ''

        return blob.get_url(is_public=is_public)

    if backend == 'pillar':  # obsolete, replace with local.
        return url_for('file_storage.index',
    if backend == 'cdnsun':
        return hash_file_path(file_path, None)
    if backend == 'unittest':
        return 'https://unit.test/%s' % md5(file_path.encode()).hexdigest()

        'generate_link(): Unknown backend %r, returning empty string '
        'as new link.', backend)
    return ''
Example #5
    def storage_backend(self):
        from pillar.api.file_storage_backends import Bucket

        return Bucket.for_backend('local')
Example #6
def zencoder_notifications():

    See: https://app.zencoder.com/docs/guides/getting-started/notifications#api_version_2

    if current_app.config['ENCODING_BACKEND'] != 'zencoder':
        log.warning('Received notification from Zencoder but app not configured for Zencoder.')
        return abort(403)

    if not current_app.config['DEBUG']:
        # If we are in production, look for the Zencoder header secret
            notification_secret_request = request.headers[
        except KeyError:
            log.warning('Received Zencoder notification without secret.')
            return abort(401)
        # If the header is found, check it agains the one in the config
        notification_secret = current_app.config['ZENCODER_NOTIFICATIONS_SECRET']
        if notification_secret_request != notification_secret:
            log.warning('Received Zencoder notification with incorrect secret.')
            return abort(401)

    # Cast request data into a dict
    data = request.get_json()

    if log.isEnabledFor(logging.DEBUG):
        from pprint import pformat
        log.debug('Zencoder job JSON: %s', pformat(data))

    files_collection = current_app.data.driver.db['files']
    # Find the file object based on processing backend and job_id
    zencoder_job_id = data['job']['id']
    lookup = {'processing.backend': 'zencoder',
              'processing.job_id': str(zencoder_job_id)}
    file_doc = files_collection.find_one(lookup)
    if not file_doc:
        log.warning('Unknown Zencoder job id %r', zencoder_job_id)
        # Return 200 OK when debugging, or Zencoder will keep trying and trying and trying...
        # which is what we want in production.
        return "Not found, but that's okay.", 200 if current_app.config['DEBUG'] else 404

    file_id = ObjectId(file_doc['_id'])
    # Remove internal keys (so that we can run put internal)
    file_doc = utils.remove_private_keys(file_doc)

    # Update processing status
    job_state = data['job']['state']
    file_doc['processing']['status'] = job_state

    if job_state == 'failed':
        log.warning('Zencoder job %s for file %s failed: %s', zencoder_job_id, file_id,
                    json.dumps(data, sort_keys=True, indent=4))

        file_doc['status'] = 'failed'
        current_app.put_internal('files', file_doc, _id=file_id)

        # This is 'okay' because we handled the Zencoder notification properly.
        return "You failed, but that's okay.", 200

    log.info('Zencoder job %s for file %s completed with status %s.', zencoder_job_id, file_id,

    # For every variation encoded, try to update the file object
    storage_name, _ = os.path.splitext(file_doc['file_path'])
    nice_name, _ = os.path.splitext(file_doc['filename'])

    bucket_class = Bucket.for_backend(file_doc['backend'])
    bucket = bucket_class(str(file_doc['project']))

    for output in data['outputs']:
        video_format = output['format']
        # Change the zencoder 'mpeg4' format to 'mp4' used internally
        video_format = 'mp4' if video_format == 'mpeg4' else video_format

        # Find a variation matching format and resolution
        variation = next((v for v in file_doc['variations']
                          if v['format'] == format and v['width'] == output['width']), None)
        # Fall back to a variation matching just the format
        if variation is None:
            variation = next((v for v in file_doc['variations']
                              if v['format'] == video_format), None)
        if variation is None:
            log.warning('Unable to find variation for video format %s for file %s',
                        video_format, file_id)

        # Rename the file to include the now-known size descriptor.
        size = size_descriptor(output['width'], output['height'])
        new_fname = f'{storage_name}-{size}.{video_format}'

        # Rename the file on the storage.
        blob = bucket.blob(variation['file_path'])
            new_blob = bucket.rename_blob(blob, new_fname)
        except Exception:
            log.warning('Unable to rename blob %r to %r. Keeping old name.',
                        blob, new_fname, exc_info=True)
            variation['file_path'] = new_fname

        # TODO: calculate md5 on the storage
            'height': output['height'],
            'width': output['width'],
            'length': output['file_size_in_bytes'],
            'duration': data['input']['duration_in_ms'] / 1000,
            'md5': output['md5_checksum'] or '',  # they don't do MD5 for GCS...
            'size': size,

    file_doc['status'] = 'complete'

    # Force an update of the links on the next load of the file.
    file_doc['link_expires'] = utils.utcnow() - datetime.timedelta(days=1)

    r, _, _, status = current_app.put_internal('files', file_doc, _id=file_id)
    if status != 200:
        log.error('unable to save file %s after Zencoder notification: %s', file_id, r)
        return json.dumps(r), 500

    return '', 204
Example #7
    def test_actual_notification(self):
        """Test with actual file and notification documents."""
        file_doc = {
            "4. pose-library-previews.mkv",
            "variations": [{
            "processing": {
                "status": "processing",
                "job_id": "447043841",
                "backend": "zencoder"

        # Make sure the to-be-renamed file exists on the local storage bucket.
        from pillar.api.file_storage_backends import Bucket, local
        bucket_class = Bucket.for_backend('local')
        bucket = bucket_class(str(file_doc['project']))
        blob: local.LocalBlob = bucket.blob(

        files_coll = self.app.db('files')
        file_id = file_doc['_id']

        notif = {
            'input': {
                'audio_bitrate_in_kbps': None,
                'audio_codec': None,
                'audio_sample_rate': None,
                'channels': None,
                'duration_in_ms': 100840,
                'file_size_in_bytes': 39283494,
                'format': 'matroska',
                'frame_rate': 25.0,
                'height': 1080,
                'id': 447014781,
                'md5_checksum': None,
                'state': 'finished',
                'total_bitrate_in_kbps': None,
                'video_bitrate_in_kbps': 3054,
                'video_codec': 'h264',
                'width': 1920
            'job': {
                'created_at': '2018-01-23T15:16:17Z',
                'id': 447043841,
                'pass_through': None,
                'state': 'finished',
                'submitted_at': '2018-01-23T15:16:17Z',
                'test': False,
                'updated_at': '2018-01-23T15:16:42Z'
            'outputs': [{
                'audio_bitrate_in_kbps': None,
                'audio_codec': None,
                'audio_sample_rate': None,
                'channels': None,
                'duration_in_ms': 100840,
                'file_size_in_bytes': 6050358,
                'format': 'mpeg4',
                'fragment_duration_in_ms': None,
                'frame_rate': 25.0,
                'height': 1080,
                'id': 1656104422,
                'label': None,
                'md5_checksum': None,
                'rfc_6381_audio_codec': None,
                'rfc_6381_video_codec': 'avc1.420028',
                'state': 'finished',
                'total_bitrate_in_kbps': 479,
                'type': 'standard',
                'url': 'gcs://59d69c94f488551661254569/_/02-mp4.mp4',
                'video_bitrate_in_kbps': 479,
                'video_codec': 'h264',
                'width': 1920

                  headers={'X-Zencoder-Notification-Secret': self.secret},

        db_file = files_coll.find_one(file_id)
        self.assertEqual('complete', db_file['status'])
        self.assertEqual('finished', db_file['processing']['status'])