def extract_audio(): all_videos = find_all_video_files(output_dir) for video in tqdm(all_videos): mkvfile = os.path.join(os.path.dirname(video), 'temp.mkv') command = 'mkvmerge -o ' + mkvfile + ' ' + video subprocess.call(command, shell=True) video_ts_file = os.path.join(os.path.dirname(video), 'video_ts.txt') audio_ts_file = os.path.join(os.path.dirname(video), 'audio_ts.txt') command = 'mkvextract ' + mkvfile + ' timestamps_v2 0:' + video_ts_file subprocess.call(command, shell=True) command = 'mkvextract ' + mkvfile + ' timestamps_v2 1:' + audio_ts_file subprocess.call(command, shell=True) with open(video_ts_file, 'r') as f: f.readline() # skip header video_start = f.readline() with open(audio_ts_file, 'r') as f: f.readline() # skip header audio_start = f.readline() offset_ms = int(audio_start) - int(video_start) # extract audio audio_tmp = os.path.join(os.path.dirname(video), 'temp.wav') command = 'ffmpeg -i ' + video + ' -ar 44100 -ac 1 -y ' + audio_tmp subprocess.call(command, shell=True) # use the offset to pad the audio with zeros, or trim the audio audio_name = os.path.splitext(video)[0] + '.wav' tfm = Transformer() if offset_ms >= 0: tfm.pad(start_duration=offset_ms / 1000) elif offset_ms < 0: tfm.trim(start_time=-offset_ms / 1000) tfm.build(audio_tmp, audio_name) os.remove(mkvfile) os.remove(audio_tmp) os.remove(video_ts_file) os.remove(audio_ts_file)
def _processSamples(sample_list): for sample in sample_list: sample_new_name = _renameSample(sample) _out = join(out_path, sample_new_name) processed_samples.append(_out) _in = sample # Sox processing using Transform instance tfm = Transformer() tfm.convert(samplerate=44100, n_channels=2, bitdepth=16) if NORMALIZE: tfm.norm(db_level=-3) if SILENCE: tfm.silence(location=-1, silence_threshold=0.05, min_silence_duration=0.1) if PADDING: tfm.pad(0, PADDING) tfm.build(_in, _out)