def ffmpeg_mix(tracks, width=None, height=None, audio_only=False, scale=1): """Mixes all tracks, returning a list of output streams.""" audio_streams = [] video_streams = [] xstack_layout = [] # FFMPEG filters for t in tracks: stream = ffmpeg.input(t['path']) if t.get('has_audio'): audio = stream.audio if t.get('alignment_analysis'): audio = audio.align_audio(t['alignment_analysis']) # Volume adjust in_loudness = t['filters'].get('loudness', {}).get('L') if in_loudness: # From https://github.com/mltframework/mlt/blob/7da01504d6844412b6e26c03b7c98214a1730343/src/modules/plus/filter_loudness.c#L160-L169 target_db = t['filters']['loudness']['program'] delta_db = target_db - in_loudness audio = audio.volume(dB=delta_db) audio_streams.append(audio) if t.get('has_video') and not audio_only: video = stream.video if t.get('alignment_analysis'): video = video.align_video(t['alignment_analysis']) video = video.scale(w=int(t['width'] * scale), h=int(t['height'] * scale)) video_streams.append(video) xstack_layout.append((int(t['left'] * scale), int(t['top'] * scale))) # background for xstack if video_streams: video_tracks = [t for t in tracks if t.get('has_video')] w = int((width or max(t['left'] + t['width'] for t in video_tracks)) * scale) h = int((height or max(t['top'] + t['height'] for t in video_tracks)) * scale) background = ffmpeg.input(f'color=black:size={w}x{h}:duration=1:rate=30', format='lavfi') video_streams.insert(0, background) xstack_layout.insert(0, (0, 0)) # Combine outputs and return all the streams streams = [] if audio_streams: streams.append(ffmpeg.amix(audio_streams, dropout_transition=1000) # amix reduces volume of each track to `1/n`, so increase the # result by a factor of `n` to get it back to normal. Hopefully # we've already applied some volume normalization, so this # shouldn't be too loud. AFAICT Shotcut doesn't do any kind of # volume reduction when it mixes tracks, so this should be # closer to what it will sound like in Shotcut anyways. .volume(len(audio_streams))) if video_streams: streams.append(ffmpeg.xstack(video_streams, layout=xstack_layout)) return streams
def load_bitmap(filename, h): (stdout, _) = (ffmpeg.input(filename, ss=10).scale( w=-2, h=h).output('pipe:', vframes=1, format='image2pipe', vcodec='png').run(capture_stdout=True)) return stdout
def write_aligned_video(in_file, out_file, analysis, cfg): stream = ffmpeg.input(in_file) # Video filters if ffmpeg.probe(in_file).video: video = stream.video # Detect crop at 20 seconds into the video crop = ffmpeg.run_cropdetect(in_file, ss=20) if crop: video = video.crop(**crop) if cfg.get('resize'): video = video.scale(**cfg['resize']) video = video.align_video(analysis) else: video = None # Audio filters audio = stream.audio audio = audio.align_audio(analysis) if analysis.get('loudnorm'): audio = audio.loudnorm(analysis['loudnorm'], resample=cfg.get('samplerate')) elif cfg.get('samplerate'): audio = audio.resample(cfg.get('samplerate')) # Output output_args = {} if cfg.get('framerate'): output_args['r'] = cfg.get('framerate') output_args['movflags'] = '+faststart' # allow re-encoding on the fly output_args['ac'] = 1 streams = [audio, video] if video else [audio] return ffmpeg.output(*streams, out_file, **output_args).run(overwrite_output=True)
def extract_audio_to_file(in_file, out_file, cfg): return (ffmpeg.input(in_file).audio.filter( 'aresample', cfg.get('samplerate', 48000), first_pts=0).filter('asetpts', 'PTS-STARTPTS').output( out_file, acodec=cfg.get('codec', 'aac'), audio_bitrate=cfg.get('bitrate', '128k'), ac=1, ).run(overwrite_output=True))
def write_aligned_audio(in_file, out_file, analysis, cfg): audio = ffmpeg.input(in_file).audio.align_audio(analysis) if analysis.get('loudnorm'): audio = audio.loudnorm(analysis['loudnorm'], resample=cfg.get('samplerate', 48000)) return audio.output(out_file, ac=1).run(overwrite_output=True)