def add_media(config, media): if media.status != Media.PENDING: media_src_path = config.storage_medias_path(media, generate=False) if media_src_path: media_dst_path = config.storage_medias_path(media, generate=True) if media_dst_path != media_src_path: # Generate media storage uri and move it to media storage path + set permissions media.uri = config.storage_medias_uri(media) try_makedirs(os.path.dirname(media_dst_path)) the_error = None for i in xrange(5): try: os.rename(media_src_path, media_dst_path) # FIXME chown chmod the_error = None break except OSError as error: the_error = error time.sleep(1) if the_error: raise IndexError(to_bytes(u'An error occured : {0} ({1} -> {2}).'.format( the_error, media_src_path, media_dst_path))) try: size = get_size(os.path.dirname(media_dst_path)) except OSError: raise ValueError(to_bytes(u'Unable to detect size of media asset {0}.'.format(media_dst_path))) duration = get_media_duration(media_dst_path) if duration is None: raise ValueError(to_bytes(u'Unable to detect duration of media asset {0}.'.format(media_dst_path))) return (size, duration) else: raise NotImplementedError(to_bytes(u'FIXME Add of external URI not implemented.')) return (0, None)
def main(): signal.signal(signal.SIGINT, lambda *args: sys.exit(0)) log = setup_log() parser = argparse.ArgumentParser(epilog='Archive stuff on S3.') parser.add_argument('--config', action=FullPaths, required=True, type=is_file) parser.add_argument('--simulate', action='store_true') parser.add_argument('--verbosity', choices=(0, 1, 2), default=0, type=int) args = parser.parse_args() def log_it(verbosity, level, message, **extra): if args.verbosity >= verbosity: extra['level'] = LEVEL_MAP[level] getattr(log, level)(message, extra=extra) s3 = boto3.client('s3') log_it(1, 'info', 'Process started') try: with open(args.config) as config_file: config = yaml.load(config_file) if config['enabled']: log_it(1, 'info', 'Its time to transfer!') if args.simulate: log_it(1, 'warning', 'Simulation mode enabled') for transfer in config['transfers']: name = transfer['name'] log_it(1, 'info', 'Handling transfer', transfer=name) bucket = transfer['bucket'] delete = transfer['delete'] directory = transfer['directory'] prefix = transfer['prefix'].format(host_fqdn=socket.getfqdn()) processed_bytes = processed_count = skipped_bytes = skipped_count = 0 for source_path in filesystem.find_recursive( directory, transfer['patterns'], unix_wildcards=False ): target_path = os.path.join(prefix, os.path.relpath(source_path, directory)) target_obj = aws.s3.load_object_meta(s3, bucket, target_path, fail=False) with open(source_path, 'rb') as source_file: # Retrieve metadata from source and target source_size = filesystem.get_size(source_path) target_size = None if target_obj is None else target_obj['ContentLength'] target_md5 = None if target_obj is None else target_obj['ETag'].strip('"') source_md5 = crypto.checksum( source_path, is_path=True, algorithm='md5', chunk_size=1024 * 1024) changed = source_md5 != target_md5 log_it( 2, 'info', 'File', transfer=name, changed=changed, source_md5=source_md5, source_path=source_path, source_size=source_size, target_md5=target_md5, target_path=target_path, target_size=target_size) if changed: processed_bytes += source_size processed_count += 1 else: skipped_bytes += source_size skipped_count += 1 if not args.simulate: aws.s3.write_object(s3, bucket, target_path, source_file) if delete: filesystem.remove(source_path) log_it( 1, 'info', 'Summary', transfer=name, processed_bytes=processed_bytes, processed_count=processed_count, skipped_bytes=skipped_bytes, skipped_count=skipped_count) else: log.warning('Process is disabled') except Exception as e: log.exception(e) finally: log_it(1, 'info', 'Process ended')
def size(self): if self._size is None: return 0 if self.is_pipe else filesystem.get_size(self.path) return self._size
def transform_task(media_in_json, media_out_json, profile_json, callback_json): def copy_callback(start_date, elapsed_time, eta_time, src_size, dst_size, ratio): transform_task.update_state(state=TransformTask.PROGRESS, meta={ u'hostname': request.hostname, 'start_date': start_date, u'elapsed_time': elapsed_time, u'eta_time': eta_time, u'media_in_size': src_size, u'media_out_size': dst_size, u'percent': int(100 * ratio)}) def transform_callback(status, measures): data_json = object2json({u'task_id': request.id, u'status': status, u'measures': measures}, include_properties=False) if callback is None: print(u'{0} [ERROR] Unable to callback orchestrator: {1}'.format(request.id, data_json)) else: r = callback.post(data_json) print(u'{0} Code {1} {2} : {3}'.format(request.id, r.status_code, r.reason, r._content)) # ------------------------------------------------------------------------------------------------------------------ RATIO_DELTA, TIME_DELTA = 0.01, 1 # Update status if at least 1% of progress and 1 second elapsed. MAX_TIME_DELTA = 5 # Also ensure status update every 5 seconds. DASHCAST_TIMEOUT_TIME = 10 try: # Avoid 'referenced before assignment' callback = dashcast_conf = None encoder_out, request = u'', current_task.request # Let's the task begin ! print(u'{0} Transformation task started'.format(request.id)) # Read current configuration to translate files uri to local paths local_config = TransformLocalConfig.read(LOCAL_CONFIG_FILENAME, inspect_constructor=False) print(object2json(local_config, include_properties=True)) # Load and check task parameters callback = Callback.from_json(callback_json, inspect_constructor=True) callback.is_valid(True) # Update callback socket according to configuration if local_config.api_nat_socket and len(local_config.api_nat_socket) > 0: callback.replace_netloc(local_config.api_nat_socket) media_in = Media.from_json(media_in_json, inspect_constructor=True) media_out = Media.from_json(media_out_json, inspect_constructor=True) profile = TransformProfile.from_json(profile_json, inspect_constructor=True) media_in.is_valid(True) media_out.is_valid(True) profile.is_valid(True) # Verify that media file can be accessed and create output path media_in_path = local_config.storage_medias_path(media_in, generate=False) if not media_in_path: raise NotImplementedError(to_bytes(u'Input media asset will not be readed from shared storage : {0}'.format( media_in.uri))) media_out_path = local_config.storage_medias_path(media_out, generate=True) if not media_out_path: raise NotImplementedError(to_bytes(u'Output media asset will not be written to shared storage : {0}'.format( media_out.uri))) media_in_root = dirname(media_in_path) media_out_root = dirname(media_out_path) try_makedirs(media_out_root) # Get input media duration and frames to be able to estimate ETA media_in_duration = get_media_duration(media_in_path) # Keep potential PSNR status measures = {} # NOT A REAL TRANSFORM : FILE COPY ----------------------------------------------------------------------------- if profile.encoder_name == u'copy': infos = recursive_copy(media_in_root, media_out_root, copy_callback, RATIO_DELTA, TIME_DELTA) media_out_tmp = media_in_path.replace(media_in_root, media_out_root) os.rename(media_out_tmp, media_out_path) start_date = infos[u'start_date'] elapsed_time = infos[u'elapsed_time'] media_in_size = infos[u'src_size'] # A REAL TRANSFORM : TRANSCODE WITH FFMPEG --------------------------------------------------------------------- elif profile.encoder_name == u'ffmpeg': start_date, start_time = datetime_now(), time.time() prev_ratio = prev_time = 0 # Get input media size to be able to estimate ETA media_in_size = get_size(media_in_root) # Create FFmpeg subprocess cmd = u'ffmpeg -y -i "{0}" {1} "{2}"'.format(media_in_path, profile.encoder_string, media_out_path) print(cmd) ffmpeg = Popen(shlex.split(cmd), stderr=PIPE, close_fds=True) make_async(ffmpeg.stderr) while True: # Wait for data to become available select.select([ffmpeg.stderr], [], []) chunk = ffmpeg.stderr.read() encoder_out += chunk elapsed_time = time.time() - start_time match = FFMPEG_REGEX.match(chunk) if match: stats = match.groupdict() media_out_duration = stats[u'time'] try: ratio = total_seconds(media_out_duration) / total_seconds(media_in_duration) ratio = 0.0 if ratio < 0.0 else 1.0 if ratio > 1.0 else ratio except ZeroDivisionError: ratio = 1.0 delta_time = elapsed_time - prev_time if (ratio - prev_ratio > RATIO_DELTA and delta_time > TIME_DELTA) or delta_time > MAX_TIME_DELTA: prev_ratio, prev_time = ratio, elapsed_time eta_time = int(elapsed_time * (1.0 - ratio) / ratio) if ratio > 0 else 0 transform_task.update_state( state=TransformTask.PROGRESS, meta={u'hostname': request.hostname, u'start_date': start_date, u'elapsed_time': elapsed_time, u'eta_time': eta_time, u'media_in_size': media_in_size, u'media_in_duration': media_in_duration, u'media_out_size': get_size(media_out_root), u'media_out_duration': media_out_duration, u'percent': int(100 * ratio), u'encoding_frame': stats[u'frame'], u'encoding_fps': stats[u'fps'], u'encoding_bitrate': stats[u'bitrate'], u'encoding_quality': stats[u'q']}) returncode = ffmpeg.poll() if returncode is not None: break # FFmpeg output sanity check if returncode != 0: raise OSError(to_bytes(u'FFmpeg return code is {0}, encoding probably failed.'.format(returncode))) # compute stats about the video measures['psnr'] = get_media_psnr(media_in_path, media_out_path) measures['ssim'] = get_media_ssim(media_in_path, media_out_path) # measures of the data and its metadata measures['bitrate'] = get_media_bitrate(media_out_path) # FIXME: fake git url, commit measures['git_url'] = 'https://github.com/videolan/x265' measures['git_commit'] = 'd2051f9544434612a105d2f5267db23018cb3454' # Output media file sanity check # media_out_duration = get_media_duration(media_out_path) # if total_seconds(media_out_duration) / total_seconds(media_in_duration) > 1.5 or < 0.8: # salut elif profile.encoder_name == u'from_git': start_date, start_time = datetime_now(), time.time() prev_ratio = prev_time = 0 # Get input media size to be able to estimate ETA media_in_size = get_size(media_in_root) metadata = media_out.metadata dirpath = tempfile.mkdtemp() prepare_cmd = u'git clone --depth=1 "{0}" "{1}" && cd "{1}" && git checkout "{2}" && {3}'.format(metadata['git_url'], dirpath, metadata['git_commit'], metadata['build_cmds']) check_call(prepare_cmd, shell=True) # Templated parameter encoder_string = profile.encoder_string.replace(u"BITRATE", str(metadata['input_bitrate'])) cmd = u'cd "{0}" && ffmpeg -y -i "{1}" -f yuv4mpegpipe - | {2} "{3}"'.format(dirpath, media_in_path, encoder_string, media_out_path) returncode = call(cmd, shell=True) if returncode != 0: raise OSError(to_bytes(u'Encoding return code is {0}, encoding probably failed.'.format(returncode))) # compute stats about the video measures['psnr'] = get_media_psnr(media_in_path, media_out_path) measures['ssim'] = get_media_ssim(media_in_path, media_out_path) # measures of the data and its metadata measures['bitrate'] = get_media_bitrate(media_out_path) # FIXME: don't put this in measures measures['git_url'] = metadata['git_url'] measures['git_commit'] = metadata['git_commit'] # A REAL TRANSFORM : TRANSCODE WITH DASHCAST ------------------------------------------------------------------- elif profile.encoder_name == u'dashcast': start_date, start_time = datetime_now(), time.time() prev_ratio = prev_time = 0 # Get input media size and frames to be able to estimate ETA media_in_size = get_size(media_in_root) try: media_in_frames = int(get_media_tracks(media_in_path)[u'video'][u'0:0'][u'estimated_frames']) media_out_frames = 0 except: raise ValueError(to_bytes(u'Unable to estimate # frames of input media asset')) # Create DashCast configuration file and subprocess dashcast_conf = u'dashcast_{0}.conf'.format(uuid.uuid4()) with open(dashcast_conf, u'w', u'utf-8') as f: f.write(profile.dash_config) cmd = u'DashCast -conf {0} -av "{1}" {2} -out "{3}" -mpd "{4}"'.format( dashcast_conf, media_in_path, profile.dash_options, media_out_root, media_out.filename) print(cmd) dashcast = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True) make_async(dashcast.stdout.fileno()) make_async(dashcast.stderr.fileno()) while True: # Wait for data to become available select.select([dashcast.stdout.fileno()], [], []) stdout, stderr = read_async(dashcast.stdout), read_async(dashcast.stderr) elapsed_time = time.time() - start_time match = DASHCAST_REGEX.match(stdout) if match: stats = match.groupdict() media_out_frames = int(stats[u'frame']) try: ratio = float(media_out_frames) / media_in_frames ratio = 0.0 if ratio < 0.0 else 1.0 if ratio > 1.0 else ratio except ZeroDivisionError: ratio = 1.0 delta_time = elapsed_time - prev_time if (ratio - prev_ratio > RATIO_DELTA and delta_time > TIME_DELTA) or delta_time > MAX_TIME_DELTA: prev_ratio, prev_time = ratio, elapsed_time eta_time = int(elapsed_time * (1.0 - ratio) / ratio) if ratio > 0 else 0 transform_task.update_state( state=TransformTask.PROGRESS, meta={u'hostname': request.hostname, u'start_date': start_date, u'elapsed_time': elapsed_time, u'eta_time': eta_time, u'media_in_size': media_in_size, u'media_in_duration': media_in_duration, u'media_out_size': get_size(media_out_root), u'percent': int(100 * ratio), u'encoding_frame': media_out_frames}) match = DASHCAST_SUCCESS_REGEX.match(stdout) returncode = dashcast.poll() if returncode is not None or match: encoder_out = u'stdout: {0}\nstderr: {1}'.format(stdout, stderr) break if media_out_frames == 0 and elapsed_time > DASHCAST_TIMEOUT_TIME: encoder_out = u'stdout: {0}\nstderr: {1}'.format(stdout, stderr) raise OSError(to_bytes(u'DashCast does not output frame number, encoding probably failed.')) # DashCast output sanity check if not exists(media_out_path): raise OSError(to_bytes(u'Output media asset not found, DashCast encoding probably failed.')) if returncode != 0: raise OSError(to_bytes(u'DashCast return code is {0}, encoding probably failed.'.format(returncode))) # FIXME check duration too ! # Here all seem okay ------------------------------------------------------------------------------------------- elapsed_time = time.time() - start_time media_out_size = get_size(media_out_root) media_out_duration = get_media_duration(media_out_path) print(u'{0} Transformation task successful, output media asset {1}'.format(request.id, media_out.filename)) transform_callback(TransformTask.SUCCESS, measures) return {u'hostname': request.hostname, u'start_date': start_date, u'elapsed_time': elapsed_time, u'eta_time': 0, u'media_in_size': media_in_size, u'media_in_duration': media_in_duration, u'media_out_size': media_out_size, u'media_out_duration': media_out_duration, u'percent': 100 } except Exception as error: # Here something went wrong print(u'{0} Transformation task failed '.format(request.id)) transform_callback(u'ERROR\n{0}\n\nOUTPUT\n{1}'.format(unicode(error), encoder_out), {}) raise finally: if dashcast_conf: try_remove(dashcast_conf)
def transform_task(media_in_json, media_out_json, profile_json, callback_json): def copy_callback(start_date, elapsed_time, eta_time, src_size, dst_size, ratio): transform_task.update_state( state=TransformTask.PROGRESS, meta={ "hostname": request.hostname, "start_date": start_date, "elapsed_time": elapsed_time, "eta_time": eta_time, "media_in_size": src_size, "media_out_size": dst_size, "percent": int(100 * ratio), }, ) def transform_callback(status): data_json = object2json({"task_id": request.id, "status": status}, include_properties=False) if callback is None: print("{0} [ERROR] Unable to callback orchestrator: {1}".format(request.id, data_json)) else: r = callback.post(data_json) print("{0} Code {1} {2} : {3}".format(request.id, r.status_code, r.reason, r._content)) # ------------------------------------------------------------------------------------------------------------------ RATIO_DELTA, TIME_DELTA = 0.01, 1 # Update status if at least 1% of progress and 1 second elapsed. MAX_TIME_DELTA = 5 # Also ensure status update every 5 seconds. DASHCAST_TIMEOUT_TIME = 10 try: # Avoid 'referenced before assignment' callback = dashcast_conf = None encoder_out, request = "", current_task.request # Let's the task begin ! print("{0} Transformation task started".format(request.id)) # Read current configuration to translate files uri to local paths local_config = TransformLocalConfig.read(LOCAL_CONFIG_FILENAME, inspect_constructor=False) print(object2json(local_config, include_properties=True)) # Load and check task parameters callback = Callback.from_json(callback_json, inspect_constructor=True) callback.is_valid(True) # Update callback socket according to configuration if local_config.api_nat_socket and len(local_config.api_nat_socket) > 0: callback.replace_netloc(local_config.api_nat_socket) media_in = Media.from_json(media_in_json, inspect_constructor=True) media_out = Media.from_json(media_out_json, inspect_constructor=True) profile = TransformProfile.from_json(profile_json, inspect_constructor=True) media_in.is_valid(True) media_out.is_valid(True) profile.is_valid(True) # Verify that media file can be accessed and create output path media_in_path = local_config.storage_medias_path(media_in, generate=False) if not media_in_path: raise NotImplementedError( to_bytes("Input media asset will not be readed from shared storage : {0}".format(media_in.uri)) ) media_out_path = local_config.storage_medias_path(media_out, generate=True) if not media_out_path: raise NotImplementedError( to_bytes("Output media asset will not be written to shared storage : {0}".format(media_out.uri)) ) media_in_root = dirname(media_in_path) media_out_root = dirname(media_out_path) try_makedirs(media_out_root) # Get input media duration and frames to be able to estimate ETA media_in_duration = get_media_duration(media_in_path) # NOT A REAL TRANSFORM : FILE COPY ----------------------------------------------------------------------------- if profile.encoder_name == "copy": infos = recursive_copy(media_in_root, media_out_root, copy_callback, RATIO_DELTA, TIME_DELTA) media_out_tmp = media_in_path.replace(media_in_root, media_out_root) os.rename(media_out_tmp, media_out_path) start_date = infos["start_date"] elapsed_time = infos["elapsed_time"] media_in_size = infos["src_size"] # A REAL TRANSFORM : TRANSCODE WITH FFMPEG --------------------------------------------------------------------- elif profile.encoder_name == "ffmpeg": start_date, start_time = datetime_now(), time.time() prev_ratio = prev_time = 0 # Get input media size to be able to estimate ETA media_in_size = get_size(media_in_root) # Create FFmpeg subprocess cmd = 'ffmpeg -y -i "{0}" {1} "{2}"'.format(media_in_path, profile.encoder_string, media_out_path) print(cmd) ffmpeg = Popen(shlex.split(cmd), stderr=PIPE, close_fds=True) make_async(ffmpeg.stderr) while True: # Wait for data to become available select.select([ffmpeg.stderr], [], []) chunk = ffmpeg.stderr.read() encoder_out += chunk elapsed_time = time.time() - start_time match = FFMPEG_REGEX.match(chunk) if match: stats = match.groupdict() media_out_duration = stats["time"] try: ratio = total_seconds(media_out_duration) / total_seconds(media_in_duration) ratio = 0.0 if ratio < 0.0 else 1.0 if ratio > 1.0 else ratio except ZeroDivisionError: ratio = 1.0 delta_time = elapsed_time - prev_time if (ratio - prev_ratio > RATIO_DELTA and delta_time > TIME_DELTA) or delta_time > MAX_TIME_DELTA: prev_ratio, prev_time = ratio, elapsed_time eta_time = int(elapsed_time * (1.0 - ratio) / ratio) if ratio > 0 else 0 transform_task.update_state( state=TransformTask.PROGRESS, meta={ "hostname": request.hostname, "start_date": start_date, "elapsed_time": elapsed_time, "eta_time": eta_time, "media_in_size": media_in_size, "media_in_duration": media_in_duration, "media_out_size": get_size(media_out_root), "media_out_duration": media_out_duration, "percent": int(100 * ratio), "encoding_frame": stats["frame"], "encoding_fps": stats["fps"], "encoding_bitrate": stats["bitrate"], "encoding_quality": stats["q"], }, ) returncode = ffmpeg.poll() if returncode is not None: break # FFmpeg output sanity check if returncode != 0: raise OSError(to_bytes("FFmpeg return code is {0}, encoding probably failed.".format(returncode))) # Output media file sanity check # media_out_duration = get_media_duration(media_out_path) # if total_seconds(media_out_duration) / total_seconds(media_in_duration) > 1.5 or < 0.8: # salut # A REAL TRANSFORM : TRANSCODE WITH DASHCAST ------------------------------------------------------------------- elif profile.encoder_name == "dashcast": start_date, start_time = datetime_now(), time.time() prev_ratio = prev_time = 0 # Get input media size and frames to be able to estimate ETA media_in_size = get_size(media_in_root) try: media_in_frames = int(get_media_tracks(media_in_path)["video"]["0:0"]["estimated_frames"]) media_out_frames = 0 except: raise ValueError(to_bytes("Unable to estimate # frames of input media asset")) # Create DashCast configuration file and subprocess dashcast_conf = "dashcast_{0}.conf".format(uuid.uuid4()) with open(dashcast_conf, "w", "utf-8") as f: f.write(profile.dash_config) cmd = 'DashCast -conf {0} -av "{1}" {2} -out "{3}" -mpd "{4}"'.format( dashcast_conf, media_in_path, profile.dash_options, media_out_root, media_out.filename ) print(cmd) dashcast = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE, close_fds=True) make_async(dashcast.stdout.fileno()) make_async(dashcast.stderr.fileno()) while True: # Wait for data to become available select.select([dashcast.stdout.fileno()], [], []) stdout, stderr = read_async(dashcast.stdout), read_async(dashcast.stderr) elapsed_time = time.time() - start_time match = DASHCAST_REGEX.match(stdout) if match: stats = match.groupdict() media_out_frames = int(stats["frame"]) try: ratio = float(media_out_frames) / media_in_frames ratio = 0.0 if ratio < 0.0 else 1.0 if ratio > 1.0 else ratio except ZeroDivisionError: ratio = 1.0 delta_time = elapsed_time - prev_time if (ratio - prev_ratio > RATIO_DELTA and delta_time > TIME_DELTA) or delta_time > MAX_TIME_DELTA: prev_ratio, prev_time = ratio, elapsed_time eta_time = int(elapsed_time * (1.0 - ratio) / ratio) if ratio > 0 else 0 transform_task.update_state( state=TransformTask.PROGRESS, meta={ "hostname": request.hostname, "start_date": start_date, "elapsed_time": elapsed_time, "eta_time": eta_time, "media_in_size": media_in_size, "media_in_duration": media_in_duration, "media_out_size": get_size(media_out_root), "percent": int(100 * ratio), "encoding_frame": media_out_frames, }, ) match = DASHCAST_SUCCESS_REGEX.match(stdout) returncode = dashcast.poll() if returncode is not None or match: encoder_out = "stdout: {0}\nstderr: {1}".format(stdout, stderr) break if media_out_frames == 0 and elapsed_time > DASHCAST_TIMEOUT_TIME: encoder_out = "stdout: {0}\nstderr: {1}".format(stdout, stderr) raise OSError(to_bytes("DashCast does not output frame number, encoding probably failed.")) # DashCast output sanity check if not exists(media_out_path): raise OSError(to_bytes("Output media asset not found, DashCast encoding probably failed.")) if returncode != 0: raise OSError(to_bytes("DashCast return code is {0}, encoding probably failed.".format(returncode))) # FIXME check duration too ! # Here all seem okay ------------------------------------------------------------------------------------------- media_out_size = get_size(media_out_root) media_out_duration = get_media_duration(media_out_path) print("{0} Transformation task successful, output media asset {1}".format(request.id, media_out.filename)) transform_callback(TransformTask.SUCCESS) return { "hostname": request.hostname, "start_date": start_date, "elapsed_time": elapsed_time, "eta_time": 0, "media_in_size": media_in_size, "media_in_duration": media_in_duration, "media_out_size": media_out_size, "media_out_duration": media_out_duration, "percent": 100, } except Exception as error: # Here something went wrong print("{0} Transformation task failed ".format(request.id)) transform_callback("ERROR\n{0}\n\nOUTPUT\n{1}".format(unicode(error), encoder_out)) raise finally: if dashcast_conf: try_remove(dashcast_conf)