def _fix_variation(fdoc, variation, nice_name): from pillar.api.file_storage_backends import Bucket # See if we can reuse the bucket we already had. backend = fdoc['backend'] pid_str = str(fdoc['project']) bucket_cls = Bucket.for_backend(backend) bucket = bucket_cls(pid_str) var_path = PurePosixPath(variation["file_path"]) # NOTE: this breaks for variations with double extensions var_stem = var_path.stem m = _var_type_re.search(var_stem) var_type = m.group(0) if m else '' var_name = f'{nice_name}{var_type}{var_path.suffix}' log.info(f' - %s → %s', variation["file_path"], var_name) blob = bucket.blob(variation['file_path']) if not blob.exists(): log.warning('Blob %s does not exist', blob) return try: blob.update_filename(var_name) except Exception: log.warning('Unable to update blob %s filename to %r', blob, var_name, exc_info=True)
def move_to_bucket(file_id: ObjectId, dest_project_id: ObjectId, *, skip_storage=False): """Move a file + variations from its own bucket to the new project_id bucket. :param file_id: ID of the file to move. :param dest_project_id: Project to move to. :param skip_storage: If True, the storage bucket will not be touched. Only use this when you know what you're doing. """ files_coll = current_app.db('files') f = files_coll.find_one(file_id) if f is None: raise ValueError(f'File with _id: {file_id} not found') # Move file and variations to the new bucket. if skip_storage: log.warning('NOT ACTUALLY MOVING file %s on storage, just updating MongoDB', file_id) else: from pillar.api.file_storage_backends import Bucket bucket_class = Bucket.for_backend(f['backend']) src_bucket = bucket_class(str(f['project'])) dst_bucket = bucket_class(str(dest_project_id)) src_blob = src_bucket.get_blob(f['file_path']) src_bucket.copy_blob(src_blob, dst_bucket) for var in f.get('variations', []): src_blob = src_bucket.get_blob(var['file_path']) src_bucket.copy_blob(src_blob, dst_bucket) # Update the file document after moving was successful. # No need to update _etag or _updated, since that'll be done when # the links are regenerated at the end of this function. log.info('Switching file %s to project %s', file_id, dest_project_id) update_result = files_coll.update_one({'_id': file_id}, {'$set': {'project': dest_project_id}}) if update_result.matched_count != 1: raise RuntimeError( 'Unable to update file %s in MongoDB: matched_count=%i; modified_count=%i' % ( file_id, update_result.matched_count, update_result.modified_count)) log.info('Switching file %s: matched_count=%i; modified_count=%i', file_id, update_result.matched_count, update_result.modified_count) # Regenerate the links for this file f['project'] = dest_project_id generate_all_links(f, now=utils.utcnow())
def _process_image(bucket: Bucket, file_id: ObjectId, local_file: tempfile._TemporaryFileWrapper, src_file: dict): from PIL import Image im = Image.open(local_file) res = im.size src_file['width'] = res[0] src_file['height'] = res[1] # Generate previews log.info('Generating thumbnails for file %s', file_id) local_path = pathlib.Path(local_file.name) name_base = pathlib.Path(src_file['name']).stem src_file['variations'] = imaging.generate_local_thumbnails( name_base, local_path) # Send those previews to Google Cloud Storage. log.info( 'Uploading %i thumbnails for file %s to Google Cloud Storage ' '(GCS)', len(src_file['variations']), file_id) # TODO: parallelize this at some point. for variation in src_file['variations']: fname = variation['file_path'] if current_app.config['TESTING']: log.warning(' - NOT sending thumbnail %s to %s', fname, bucket) else: blob = bucket.blob(fname) log.debug(' - Sending thumbnail %s to %s', fname, blob) blob.upload_from_path(pathlib.Path(variation['local_path']), content_type=variation['content_type']) if variation.get('size') == 't': blob.make_public() try: os.unlink(variation['local_path']) except OSError: log.warning( 'Unable to unlink %s, ignoring this but it will need ' 'cleanup later.', variation['local_path']) del variation['local_path'] log.info('Done processing file %s', file_id) src_file['status'] = 'complete'
def generate_link(backend, file_path: str, project_id: str = None, is_public=False) -> str: """Hook to check the backend of a file resource, to build an appropriate link that can be used by the client to retrieve the actual file. """ # TODO: replace config['TESTING'] with mocking GCS. if backend == 'gcs' and current_app.config['TESTING']: log.info('Skipping GCS link generation, and returning a fake link ' 'instead.') return '/path/to/testing/gcs/%s' % file_path if backend in {'gcs', 'local'}: from ..file_storage_backends import Bucket bucket_cls = Bucket.for_backend(backend) storage = bucket_cls(project_id) blob = storage.get_blob(file_path) if blob is None: log.warning( 'generate_link(%r, %r): unable to find blob for file' ' path, returning empty link.', backend, file_path) return '' return blob.get_url(is_public=is_public) if backend == 'pillar': # obsolete, replace with local. return url_for('file_storage.index', file_name=file_path, _external=True, _scheme=current_app.config['SCHEME']) if backend == 'cdnsun': return hash_file_path(file_path, None) if backend == 'unittest': return 'https://unit.test/%s' % md5(file_path.encode()).hexdigest() log.warning( 'generate_link(): Unknown backend %r, returning empty string ' 'as new link.', backend) return ''
def storage_backend(self): from pillar.api.file_storage_backends import Bucket return Bucket.for_backend('local')
def zencoder_notifications(): """ See: https://app.zencoder.com/docs/guides/getting-started/notifications#api_version_2 """ if current_app.config['ENCODING_BACKEND'] != 'zencoder': log.warning('Received notification from Zencoder but app not configured for Zencoder.') return abort(403) if not current_app.config['DEBUG']: # If we are in production, look for the Zencoder header secret try: notification_secret_request = request.headers[ 'X-Zencoder-Notification-Secret'] except KeyError: log.warning('Received Zencoder notification without secret.') return abort(401) # If the header is found, check it agains the one in the config notification_secret = current_app.config['ZENCODER_NOTIFICATIONS_SECRET'] if notification_secret_request != notification_secret: log.warning('Received Zencoder notification with incorrect secret.') return abort(401) # Cast request data into a dict data = request.get_json() if log.isEnabledFor(logging.DEBUG): from pprint import pformat log.debug('Zencoder job JSON: %s', pformat(data)) files_collection = current_app.data.driver.db['files'] # Find the file object based on processing backend and job_id zencoder_job_id = data['job']['id'] lookup = {'processing.backend': 'zencoder', 'processing.job_id': str(zencoder_job_id)} file_doc = files_collection.find_one(lookup) if not file_doc: log.warning('Unknown Zencoder job id %r', zencoder_job_id) # Return 200 OK when debugging, or Zencoder will keep trying and trying and trying... # which is what we want in production. return "Not found, but that's okay.", 200 if current_app.config['DEBUG'] else 404 file_id = ObjectId(file_doc['_id']) # Remove internal keys (so that we can run put internal) file_doc = utils.remove_private_keys(file_doc) # Update processing status job_state = data['job']['state'] file_doc['processing']['status'] = job_state if job_state == 'failed': log.warning('Zencoder job %s for file %s failed: %s', zencoder_job_id, file_id, json.dumps(data, sort_keys=True, indent=4)) file_doc['status'] = 'failed' current_app.put_internal('files', file_doc, _id=file_id) # This is 'okay' because we handled the Zencoder notification properly. return "You failed, but that's okay.", 200 log.info('Zencoder job %s for file %s completed with status %s.', zencoder_job_id, file_id, job_state) # For every variation encoded, try to update the file object storage_name, _ = os.path.splitext(file_doc['file_path']) nice_name, _ = os.path.splitext(file_doc['filename']) bucket_class = Bucket.for_backend(file_doc['backend']) bucket = bucket_class(str(file_doc['project'])) for output in data['outputs']: video_format = output['format'] # Change the zencoder 'mpeg4' format to 'mp4' used internally video_format = 'mp4' if video_format == 'mpeg4' else video_format # Find a variation matching format and resolution variation = next((v for v in file_doc['variations'] if v['format'] == format and v['width'] == output['width']), None) # Fall back to a variation matching just the format if variation is None: variation = next((v for v in file_doc['variations'] if v['format'] == video_format), None) if variation is None: log.warning('Unable to find variation for video format %s for file %s', video_format, file_id) continue # Rename the file to include the now-known size descriptor. size = size_descriptor(output['width'], output['height']) new_fname = f'{storage_name}-{size}.{video_format}' # Rename the file on the storage. blob = bucket.blob(variation['file_path']) try: new_blob = bucket.rename_blob(blob, new_fname) new_blob.update_filename(f'{nice_name}-{size}.{video_format}') except Exception: log.warning('Unable to rename blob %r to %r. Keeping old name.', blob, new_fname, exc_info=True) else: variation['file_path'] = new_fname # TODO: calculate md5 on the storage variation.update({ 'height': output['height'], 'width': output['width'], 'length': output['file_size_in_bytes'], 'duration': data['input']['duration_in_ms'] / 1000, 'md5': output['md5_checksum'] or '', # they don't do MD5 for GCS... 'size': size, }) file_doc['status'] = 'complete' # Force an update of the links on the next load of the file. file_doc['link_expires'] = utils.utcnow() - datetime.timedelta(days=1) r, _, _, status = current_app.put_internal('files', file_doc, _id=file_id) if status != 200: log.error('unable to save file %s after Zencoder notification: %s', file_id, r) return json.dumps(r), 500 return '', 204
def test_actual_notification(self): """Test with actual file and notification documents.""" self.ensure_project_exists() file_doc = { "_id": ObjectId("5a6751b33bea6a01fdfd59f0"), "name": "02a877a1d9da45509cdba97e283ef0bc.mkv", "filename": "4. pose-library-previews.mkv", "file_path": "02a877a1d9da45509cdba97e283ef0bc.mkv", "user": ctd.EXAMPLE_PROJECT_OWNER_ID, "backend": "local", "md5": "", "content_type": "video/x-matroska", "length": 39283494, "project": ctd.EXAMPLE_PROJECT_ID, "status": "processing", "length_aggregate_in_bytes": 45333852, "format": "x-matroska", "variations": [{ "format": "mp4", "content_type": "video/mp4", "file_path": "02a877a1d9da45509cdba97e283ef0bc-1080p.mp4", "size": "1080p", "duration": 100, "width": 1920, "height": 1080, "length": 6050358, "md5": "", "link": "https://storage.googleapis.com/59d69c94f4/_%2F02-1080p.mp4" }], "processing": { "status": "processing", "job_id": "447043841", "backend": "zencoder" }, "link_expires": dateutil.parser.parse("2018-01-27T06:24:31.827+0100"), "_updated": dateutil.parser.parse("2018-01-26T07:24:54.000+0100"), "_created": dateutil.parser.parse("2018-01-23T16:16:03.000+0100"), "_deleted": False, "_etag": "54f1d65326f4d856b740480dc52edefa96476d8a", "link": "https://storage.googleapis.com/59d69c94f4/_%2F02.mkv" } # Make sure the to-be-renamed file exists on the local storage bucket. from pillar.api.file_storage_backends import Bucket, local bucket_class = Bucket.for_backend('local') bucket = bucket_class(str(file_doc['project'])) blob: local.LocalBlob = bucket.blob( '02a877a1d9da45509cdba97e283ef0bc-1080p.mp4') blob.touch() files_coll = self.app.db('files') files_coll.insert_one(file_doc) file_id = file_doc['_id'] notif = { 'input': { 'audio_bitrate_in_kbps': None, 'audio_codec': None, 'audio_sample_rate': None, 'channels': None, 'duration_in_ms': 100840, 'file_size_in_bytes': 39283494, 'format': 'matroska', 'frame_rate': 25.0, 'height': 1080, 'id': 447014781, 'md5_checksum': None, 'state': 'finished', 'total_bitrate_in_kbps': None, 'video_bitrate_in_kbps': 3054, 'video_codec': 'h264', 'width': 1920 }, 'job': { 'created_at': '2018-01-23T15:16:17Z', 'id': 447043841, 'pass_through': None, 'state': 'finished', 'submitted_at': '2018-01-23T15:16:17Z', 'test': False, 'updated_at': '2018-01-23T15:16:42Z' }, 'outputs': [{ 'audio_bitrate_in_kbps': None, 'audio_codec': None, 'audio_sample_rate': None, 'channels': None, 'duration_in_ms': 100840, 'file_size_in_bytes': 6050358, 'format': 'mpeg4', 'fragment_duration_in_ms': None, 'frame_rate': 25.0, 'height': 1080, 'id': 1656104422, 'label': None, 'md5_checksum': None, 'rfc_6381_audio_codec': None, 'rfc_6381_video_codec': 'avc1.420028', 'state': 'finished', 'total_bitrate_in_kbps': 479, 'type': 'standard', 'url': 'gcs://59d69c94f488551661254569/_/02-mp4.mp4', 'video_bitrate_in_kbps': 479, 'video_codec': 'h264', 'width': 1920 }] } self.post('/api/encoding/zencoder/notifications', json=notif, headers={'X-Zencoder-Notification-Secret': self.secret}, expected_status=204) db_file = files_coll.find_one(file_id) self.assertEqual('complete', db_file['status']) self.assertEqual('finished', db_file['processing']['status'])