def speed(self, speed): """ Adjusts speed to given percentage without changing pitch :param speed: Percentage to increase/decrease speed without changing pitch :type speed: float """ if speed != 1: logger.info("Setting speed to %f", speed) logger.debug("Export file to BytesIO") wav_in = BytesIO() wav_in = self._segment.export(wav_in, format="wav") wav_in.seek(0) logger.debug("Initializing reader and writer") with WavReader(wav_in) as reader: wav_out = BytesIO() with WavWriter(wav_out, reader.channels, reader.samplerate) as writer: logger.debug("Adjusting speed with vocoder") tsm = phasevocoder(reader.channels, speed=speed) tsm.run(reader, writer) logger.debug("Reload audio segment") wav_out.seek(44) # skip metadata and start at first sample self._segment = AudioSegment.from_raw( wav_out, sample_width=self._segment.sample_width, channels=self._segment.channels, frame_rate=self._segment.frame_rate, )
def change_pitch(self, source_filepath): import pydub from audiotsm.io.wav import WavReader, WavWriter from audiotsm import phasevocoder if abs(self.octaves) > 0.1: _, sampled_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) sound = pydub.AudioSegment.from_mp3(source_filepath) sample_rate = int(sound.frame_rate * (2**self.octaves)) modified = sound._spawn(sound.raw_data, overrides={"frame_rate":sample_rate}) modified.export(sampled_filepath, format="wav") else: sampled_filepath = source_filepath if abs(self.speed - 1) > 0.1: #output_filepath = f"{os.path.basename(source_filepath)}{self.oct_str}_{self.speed}.wav" _, output_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) with WavReader(sampled_filepath) as reader: with WavWriter(output_filepath, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=self.speed*(sound.frame_rate/sample_rate)) tsm.run(reader, writer) return output_filepath else: return sampled_filepath
def test_data(data_file, speed, tsm_name, save): """Test the TSM procedures on real data.""" reader = None writer = None try: # Create the reader reader = WavReader(data_file) # Create the writer if save: # pylint: disable=no-member rel_path = os.path.relpath(data_file, pytest.DATA_DIR) # pylint: enable=no-member # Copy original file to "orig" directory orig_file = os.path.join(EXAMPLES_DIR, "orig", rel_path) orig_dir = os.path.dirname(orig_file) if not os.path.isdir(orig_dir): os.makedirs(orig_dir) if not os.path.isfile(orig_file): shutil.copy2(data_file, orig_file) # Generate output file path speed_dir = "speed-{:.2f}".format(speed) name = os.path.splitext(rel_path)[0] output_name = "{}_{}.wav".format(name, tsm_name) output_file = os.path.join(EXAMPLES_DIR, speed_dir, output_name) output_dir = os.path.dirname(output_file) if not os.path.isdir(output_dir): os.makedirs(output_dir) writer = WavWriter(output_file, reader.channels, reader.samplerate) else: writer = ArrayWriter(reader.channels) # Create and run the TSM tsm = create_tsm(tsm_name, reader.channels, speed) tsm.run(reader, writer) finally: # Close files if reader: reader.close() if save and writer: writer.close()
def 音频变速(wav音频数据列表, 声道数, 采样率, 目标速度, 临时文件夹): if 目标速度 == 1.0: return wav音频数据列表 if 查找可执行程序('soundstretch') != None: 内存音频二进制缓存区 = io.BytesIO() fd, soundstretch临时输出文件 = tempfile.mkstemp() os.close(fd) wavfile.write(内存音频二进制缓存区, 采样率, wav音频数据列表) 变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}' 变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) 变速线程.communicate(内存音频二进制缓存区.getvalue()) try: 采样率, 音频区间处理后的数据 = wavfile.read(soundstretch临时输出文件) except Exception as e: 出错时间 = int(time.time()) fd, 原始数据存放位置 = tempfile.mkstemp(dir=临时文件夹, prefix=f'原始-{出错时间}-', suffix='.wav') os.close(fd) wavfile.write(原始数据存放位置, 采样率, wav音频数据列表) fd, 出错文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav') os.close(fd) try: copy(soundstretch临时输出文件, 出错文件) except: ... fd, soundstretch临时输出文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav') os.close(fd) 变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}' 变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) 变速线程.communicate(内存音频二进制缓存区.getvalue()) print(f'Soundstretch 音频变速出错了,请前往查看详情\n 原始音频数据:{原始数据存放位置} \n 变速音频数据:{soundstretch临时输出文件}\n') print(f'出错的音频信息:\n 音频采样数:{len(wav音频数据列表)}\n 目标速度:{目标速度}\n 目标采样数:{len(wav音频数据列表) / 目标速度}') return wav音频数据列表 os.remove(soundstretch临时输出文件) else: print( '检测到没有安装 SoundTouch 的 soundstretch,所以使用 phasevocoder 的音频变速方法。建议到 http://www.surina.net/soundtouch 下载系统对应的 soundstretch,放到系统环境变量下,可以获得更好的音频变速效果\n') sFile = io.BytesIO() wavfile.write(sFile, 采样率, wav音频数据列表) sFile = io.BytesIO(sFile.getvalue()) eFile = io.BytesIO() with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=目标速度) tsm.run(reader, writer) _, 音频区间处理后的数据 = wavfile.read(io.BytesIO(eFile.getvalue())) return 音频区间处理后的数据
def getStretchedData(low, sf): s = PLACEHOLDER_WAV_AUX playSpeed = 1 / sf if low: s = LOW_PLACEHOLDER_WAV_AUX playSpeed *= LOW_FACTOR with WavReader(s) as reader: with WavWriter(STRECH, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=playSpeed) tsm.run(reader, writer) _, s = wavfile.read(STRECH) d = np.zeros(s.shape) if low: d += s else: d += s * 0.81 return d
def getStretchedData(low, sf): s = "placeholder.wav" playSpeed = 1/sf if low: s = "lowPlaceholder.wav" playSpeed *= LOW_FACTOR with WavReader(s) as reader: with WavWriter("stretchholder.wav", reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=playSpeed) tsm.run(reader, writer) _, s = wavfile.read("stretchholder.wav") d = np.zeros(s.shape) if low: d += s else: d += s*0.81 return d
def resampling(x, coef=1., fs=16000): fn_r, fn_w = tempfile.NamedTemporaryFile( mode="r", suffix=".wav"), tempfile.NamedTemporaryFile(mode="w", suffix=".wav") sf.write(fn_r.name, x, fs, "PCM_16") with WavReader(fn_r.name) as fr: with WavWriter(fn_w.name, fr.channels, fr.samplerate) as fw: tsm = wsola(channels=fr.channels, speed=coef, frame_length=256, synthesis_hop=int(fr.samplerate / 70.0)) tsm.run(fr, fw) y = resample(librosa.load(fn_w.name)[0], len(x)).astype(x.dtype) fn_r.close() fn_w.close() return y
def change_bar_speed(self, audio_slice_id, target_bpm=120.0): if not os.path.isdir(c.LF_CH_BPM + self._audio_id): try: os.mkdir(c.LF_CH_BPM + self._audio_id) except FileExistsError: pass else: if os.path.isfile(c.LF_CH_BPM + self._audio_id + "/" + audio_slice_id + ".wav"): return 0 bar_bpm = 60.00 / ( (self.beat_track[int(audio_slice_id.split("_")[1]) + 1] - self.beat_track[int(audio_slice_id.split("_")[1])]) / 8) with WavReader("{}{}/{}.wav".format(c.LF_SLICE, self._audio_id, audio_slice_id)) as r: with WavWriter( "{}{}/{}.wav".format(c.LF_CH_BPM, self._audio_id, audio_slice_id), r.channels, r.samplerate) as w: phasevocoder(r.channels, speed=target_bpm / bar_bpm).run(r, w) print("only came " + audio_slice_id)
def main(args): frameRate = args.frame_rate SAMPLE_RATE = args.sample_rate SILENT_THRESHOLD = args.silent_threshold FRAME_SPREADAGE = args.frame_margin NEW_SPEED = [args.silent_speed, args.sounded_speed] FRAME_QUALITY = args.frame_quality AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) TEMP_FOLDER = "TEMP" if not os.path.isdir(TEMP_FOLDER): os.makedirs(TEMP_FOLDER) if args.url != None: INPUT_FILE = downloadFile(args.url) else: INPUT_FILE = args.input if len(args.output) >= 1: OUTPUT_FILE = args.output else: OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str( FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav" subprocess.call(command, shell=True) command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1" f = open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(command, shell=True, stdout=f) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search('Stream #.*Video.* ([0-9]*) fps', line) if m is not None: frameRate = float(m.group(1)) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(TEMP_FOLDER, inputFrame, outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(TEMP_FOLDER, lastExistingFrame, outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) command = "ffmpeg -framerate " + str( frameRate ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE subprocess.call(command, shell=True) rmtree(TEMP_FOLDER, ignore_errors=False)
def process_and_concatenate(self): audio_fade_envelope_size = 400 # smooth out transition's audio by quickly fading in/out self.save_audio() sample_rate, audio_data = wavfile.read(self.temp_folder + "/audio.wav") audio_sample_count = audio_data.shape[0] max_audio_volume = get_max_volume(audio_data) samples_per_frame = sample_rate / self.fps audio_frame_count = int( math.ceil(audio_sample_count / samples_per_frame)) has_loud_audio = np.zeros(audio_frame_count) for i in range(audio_frame_count): start = int(i * samples_per_frame) end = min(int((i + 1) * samples_per_frame), audio_sample_count) audio_chunks = audio_data[start:end] max_chunks_volume = float( get_max_volume(audio_chunks)) / max_audio_volume if max_chunks_volume >= SILENT_THRESHOLD: has_loud_audio[i] = 1 chunks = [[0, 0, 0]] should_include_frame = np.zeros(audio_frame_count) last_idx = 0 for i in range(audio_frame_count): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audio_frame_count, i + 1 + FRAME_SPREADAGE)) should_include_frame[i] = np.max(has_loud_audio[start:end]) if i >= 1 and should_include_frame[i] != should_include_frame[ i - 1]: # Did we flip? chunks.append([chunks[-1][1], i, should_include_frame[i - 1]]) last_idx = i chunks.append([ chunks[-1][1], audio_frame_count, should_include_frame[last_idx - 1] ]) chunks = chunks[1:] output_audio_data = np.zeros((0, audio_data.shape[1])) output_pointer = 0 last_existing_frame = None duration = self.get_duration() frames_num = int(float(duration) * self.fps) signed_frames = [False for _ in range(frames_num)] output_frames = [] for chunk in chunks: audio_chunk = audio_data[int(chunk[0] * samples_per_frame ):int(chunk[1] * samples_per_frame)] s_file = self.temp_folder + "/tempStart.wav" e_file = self.temp_folder + "/tempEnd.wav" wavfile.write(s_file, SAMPLE_RATE, audio_chunk) with WavReader(s_file) as reader: with WavWriter(e_file, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, altered_audio_data = wavfile.read(e_file) leng = altered_audio_data.shape[0] end_pointer = output_pointer + leng output_audio_data = np.concatenate( (output_audio_data, altered_audio_data / max_audio_volume)) if leng < audio_fade_envelope_size: output_audio_data[output_pointer:end_pointer] = 0 else: pre_mask = np.arange( audio_fade_envelope_size) / audio_fade_envelope_size mask = np.repeat(pre_mask[:, np.newaxis], 2, axis=1) output_audio_data[output_pointer:output_pointer + audio_fade_envelope_size] *= mask output_audio_data[ end_pointer - audio_fade_envelope_size:end_pointer] *= 1 - mask start_output_frame = int( math.ceil(output_pointer / samples_per_frame)) end_output_frame = int(math.ceil(end_pointer / samples_per_frame)) for outputFrame in range(start_output_frame, end_output_frame): input_frame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - start_output_frame)) if input_frame < frames_num - 2: signed_frames[input_frame + 1] = True last_existing_frame = input_frame else: signed_frames[last_existing_frame] = True output_frames.append(outputFrame) output_pointer = end_pointer j = 0 for i, frame_sign in enumerate(signed_frames): if frame_sign: self.copy_frame(i, j) j += 1 wavfile.write(self.temp_folder + "/audioNew.wav", SAMPLE_RATE, output_audio_data) self.final_concatenation() delete_path(self.temp_folder)
source_filename = f"{abbr(txt)}-{lang}" source_filepath = os.path.join(temp_dir, source_filename + ".mp3") voice = gtts.gTTS(txt, lang=lang) voice.save(source_filepath) if abs(octaves) > 0.1: import pydub sound = pydub.AudioSegment.from_mp3(source_filepath) sample_rate = int(sound.frame_rate * (2**octaves)) modified = sound._spawn(sound.raw_data, overrides={"frame_rate": sample_rate}) oct_str = str(octaves) if oct_str[0] != '-': oct_str = '+' + oct_str sampled_filename = f"tmp_{source_filename}{oct_str}.wav" sampled_filepath = os.path.join(temp_dir, sampled_filename) modified.export(sampled_filepath, format="wav") from audiotsm import phasevocoder from audiotsm.io.wav import WavReader, WavWriter output_filepath = f"{os.path.basename(source_filename)}{oct_str}_{speed}.wav" with WavReader(sampled_filepath) as reader: with WavWriter(output_filepath, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=speed * (sound.frame_rate / sample_rate)) tsm.run(reader, writer)
def splitVideo(chunks, NEW_SPEED, frameRate, zooms, samplesPerFrame, SAMPLE_RATE, audioData, extension, VERBOSE): """ This function is responsible for outputting a new image sequence in the correct order. splitVideo is also responsible for creating Renames.txt. copying every jpeg is computationally expensive, renaming the file is less so, but we still need to create the cache folder and we can't do that if the program has no idea what it renamed and where. Renames.txt will be read in originalMethod.py to recreate the original image sequence. To avoid desyncing issues with the audio, we need to have audioData and go along roughly the same way originalAudio.py does it. Specifically, get the length of the new audio chunk. If the speed set is 1, this is easy. If not, we have to create a new file modified to be the new speed with audiotsm, then read that file to get the length. """ print('Creating new video.') num = 0 chunk_len = str(len(chunks)) outputPointer = 0 Renames = [] lastExisting = None for chunk in chunks: if (NEW_SPEED[int(chunk[2])] < 99999): audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] if (NEW_SPEED[chunk[2]] == 1): leng = len(audioChunk) else: sFile = TEMP + '/tempStart2.wav' eFile = TEMP + '/tempEnd2.wav' wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: phasevocoder(reader.channels, speed=NEW_SPEED[chunk[2]]).run( reader, writer) __, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[chunk[2]] * (outputFrame - startOutputFrame)) src = ''.join( [CACHE, '/frame{:06d}'.format(inputFrame + 1), '.jpg']) dst = ''.join( [TEMP, '/newFrame{:06d}'.format(outputFrame + 1), '.jpg']) if (os.path.isfile(src)): lastExisting = inputFrame if (inputFrame in zooms): resize(src, dst, zooms[inputFrame]) else: os.rename(src, dst) Renames.extend([src, dst]) else: if (lastExisting == None): print(src + ' does not exist.') raise IOError(f'Fatal Error! No existing frame exist.') src = ''.join([ CACHE, '/frame{:06d}'.format(lastExisting + 1), '.jpg' ]) if (os.path.isfile(src)): if (lastExisting in zooms): resize(src, dst, zooms[lastExisting]) else: os.rename(src, dst) Renames.extend([src, dst]) else: # uh oh, we need to find the file we just renamed! myFile = None for i in range(0, len(Renames), 2): if (Renames[i] == src): myFile = Renames[i + 1] break if (myFile is not None): copyfile(myFile, dst) else: raise IOError( f'Error! The file {src} does not exist.') outputPointer = endPointer num += 1 if (num % 10 == 0): print(''.join([str(num), '/', chunk_len, ' frame chunks done.'])) print(''.join([str(num), '/', chunk_len, ' frame chunks done.'])) with open(f'{TEMP}/Renames.txt', 'w') as f: for item in Renames: f.write(f"{item}\n") print('Creating finished video. (This can take a while)') cmd = [ 'ffmpeg', '-y', '-framerate', str(frameRate), '-i', f'{TEMP}/newFrame%06d.jpg', f'{TEMP}/output{extension}' ] if (not VERBOSE): cmd.extend(['-nostats', '-loglevel', '0']) subprocess.call(cmd)
def process(self): global isCanceld try: if os.path.exists(TEMP_FOLDER): msg = input( 'Warning: Are you sure you want to Delete videocuts_tmp folder' ) if msg.lower() in ('yes', 'y'): deletePath(TEMP_FOLDER) Thread(target=self.timer).start() global gain gain = 1.2 self.new_video_size = 'N/A' self.new_video_length = 'N/A' Extras = "" frameRate = float(60) SAMPLE_RATE = int(self.frame_rate) SILENT_THRESHOLD = float(self.silence_threshold) FRAME_SPREADAGE = int(self.frame_margin) NEW_SPEED = [float(self.silent_speed), float(self.play_speed)] gain = 0.6 INPUT_FILE = self.downloadFile(str(self.video_url)) if INPUT_FILE == '': return FRAME_QUALITY = self.frame_quality assert INPUT_FILE is not None, "You did not specify an input file. You must specify an input file without spaces." OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) createPath(TEMP_FOLDER) global dir dir = os.getcwd() if isCanceld: return print( ' Step 1 - Frame quality has been assessed and is processing ') cmdary = [ resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '-qscale:v', str(FRAME_QUALITY), TEMP_FOLDER + "/frame%06d.jpg", '-hide_banner' ] subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print( ' Step 1 - Frame quality processing has successfully completed ' ) time.sleep(2) if isCanceld: return print(' Step 2 - Sample Rate has been assessed and is processing ') cmdary = [ resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1', '-ab', '160k', '-ac', '2', '-ar', str(SAMPLE_RATE), '-vn', TEMP_FOLDER + "/audio.wav" ] subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print( ' Step 2 - Sample Rate processing has successfully completed ') time.sleep(2) if isCanceld: return print( ' Step 3 - Video Frames are processing. This might take a while... ' ) cmdary = [resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1'] open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print( ' Step 3 - Video Frames processing has successfully completed ' ) time.sleep(2) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) cap = cv2.VideoCapture(INPUT_FILE) fps = cap.get(cv2.CAP_PROP_FPS) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search(' ([0-9]*.[0-9]*) fps,', line) if m is None: frameRate = float(fps) if m is not None: frameRate = float(m.group(1)) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros(audioFrameCount) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float( getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros(audioFrameCount) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[ i - 1]: # Did we flip? chunks.append( [chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append( [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame ):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat( premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[ endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int( math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) ''' outputFrame = math.ceil(outputPointer/samplesPerFrame) for endGap in range(outputFrame,audioFrameCount): copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap) ''' if isCanceld: return print(' Step 4 - Finalizing.... Please wait') cmdary = [ resource_path('ffmpeg.exe'), '-framerate', str(frameRate), "-i", TEMP_FOLDER + "/newFrame%06d.jpg", '-i', TEMP_FOLDER + "/audioNew.wav", '-strict', '-2' + str(Extras), OUTPUT_FILE ] subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print(' Video processing finished successfully.') deletePath(TEMP_FOLDER) path = os.path.dirname(INPUT_FILE) global stpTimer stpTimer = True self.new_video_size = get_size(OUTPUT_FILE) + ' MB' self.output_parameters.append(self.new_video_size) self.new_video_length = str(self.get_length(OUTPUT_FILE)) self.output_parameters.append(self.new_video_length) except Exception as e: print(' Processing Video Failed! ') if str(e) != 'main thread is not in main loop': print('error message.', str(e)) deletePath(TEMP_FOLDER) print(self.output_parameters)
def videoProcess(frame_rate, sample_rate, silent_threshold, frame_margin, silent_speed, sounded_speed, url, input_file, output_file, frame_quality): try: print(frame_rate, sample_rate, silent_threshold, frame_margin, silent_speed, sounded_speed, url, input_file, output_file, frame_quality) New_Speed_silent_and_sounded = [silent_speed, sounded_speed] if url: name = YouTube(url).streams.first().download() renamed = name.replace(' ', '_') os.rename(name, renamed) return renamed else: Input_Video = input_file assert Input_Video != None, "enter input video" if len(output_file) >= 1: Output_Video = output_file else: dot_position = filename.rfind(".") Output_Video = filename[:dot_position] + "NEWVIDEO" + filename[ dot_position:] # print ( Output_Video) Audio_fade_envelope_size = 400 try: os.mkdir(TEMP) except OSError: assert False, "Directory Already existing" command = "ffmpeg -i " + Input_Video + " -qscale:v " + str( frame_quality) + " " + TEMP + "/old_frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + Input_Video + " -ab 160k -ac 2 -ar " + str( sample_rate) + " -vn " + TEMP + "/audio.wav" subprocess.call(command, shell=True) sampleRate, audioData = wavfile.read(TEMP + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = GetVolume(audioData) # print(" please ") samplesPerFrame = 1470 audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = numpy.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(GetVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= silent_threshold: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = numpy.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - frame_margin)) end = int(min(audioFrameCount, i + 1 + frame_margin)) shouldIncludeFrame[i] = numpy.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append( [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = numpy.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP + "/tempStart.wav" eFile = TEMP + "/tempEnd.wav" wavfile.write(sFile, sample_rate, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=New_Speed_silent_and_sounded[int( chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = numpy.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) if leng < Audio_fade_envelope_size: outputAudioData[outputPointer:endPointer] = 0 else: premask = numpy.arange( Audio_fade_envelope_size) / Audio_fade_envelope_size mask = numpy.repeat(premask[:, numpy.newaxis], 2, axis=1) outputAudioData[outputPointer:outputPointer + Audio_fade_envelope_size] *= mask outputAudioData[ endPointer - Audio_fade_envelope_size:endPointer] *= 1 - mask startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for op_frame in range(startOutputFrame, endOutputFrame): ip_frame = int(chunk[0] + New_Speed_silent_and_sounded[int(chunk[2])] * (op_frame - startOutputFrame)) didItWork = FRameCopy(ip_frame, op_frame) if didItWork: lastExistingFrame = ip_frame else: FRameCopy(lastExistingFrame, op_frame) outputPointer = endPointer wavfile.write(TEMP + "/audioNew.wav", sample_rate, outputAudioData) command = "ffmpeg -framerate " + str( frame_rate ) + " -i " + TEMP + "/new_frame%06d.jpg -i " + TEMP + "/audioNew.wav -strict -2 " + Output_Video subprocess.call(command, shell=True) try: rmtree(TEMP, ignore_errors=False) except OSError: print("Delete failed") return "done" #not sure abt it except: return " nothing"
# coding: utf-8 # In[11]: from audiotsm import phasevocoder from audiotsm.io.wav import WavReader, WavWriter with WavReader('qbhexamples.wav') as reader: print reader.channels, reader.samplerate with WavWriter('qbh_half.wav', reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=0.5) tsm.run(reader, writer) print "Finished, closing files." close(reader) close(writer) # In[ ]:
def process_video(args): TEMP_FOLDER,frameRate,SAMPLE_RATE,NEW_SPEED,SILENT_THRESHOLD,FRAME_SPREADAGE,AUDIO_FADE_ENVELOPE_SIZE = args sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) samplesPerFrame = sampleRate/frameRate audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i*samplesPerFrame) end = min(int((i+1)*samplesPerFrame),audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0,0,0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0,i-FRAME_SPREADAGE)) end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip? chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]]) chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]]) chunks = chunks[1:] outputAudioData = np.zeros((0,audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)] sFile = TEMP_FOLDER+"/tempStart.wav" eFile = TEMP_FOLDER+"/tempEnd.wav" wavfile.write(sFile,SAMPLE_RATE,audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer+leng outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer/samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame)) didItWork = copyFrame(TEMP_FOLDER,inputFrame,outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(TEMP_FOLDER,lastExistingFrame,outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData) '''
def jumpcutter(input_file, frame_rate): input_file = input_file AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) createPath(TEMP_FOLDER) command = "ffmpeg -i "+input_file+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) noise_reduction = " -af afftdn" command = "ffmpeg -i "+input_file+noise_reduction+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav" subprocess.call(command, shell=True) command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1 -b:v 50000" f = open(TEMP_FOLDER+"/params.txt", "w") subprocess.call(command, shell=True, stdout=f) sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) print("maxAudioVolume:", maxAudioVolume) f = open(TEMP_FOLDER+"/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search('Stream #.*Video.* ([0-9]*) fps',line) if m is not None: frame_rate = float(m.group(1)) samplesPerFrame = sampleRate/frame_rate audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i*samplesPerFrame) end = min(int((i+1)*samplesPerFrame),audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume print("maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume",maxchunksVolume,float(getMaxVolume(audiochunks)),maxAudioVolume) #maxchunksVolume = float(getMaxVolume(audiochunks)) #if maxchunksVolume >= SILENT_THRESHOLD_ABS: if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0,0,0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0,i-FRAME_SPREADAGE)) end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip? chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]]) chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]]) chunks = chunks[1:] outputAudioData = np.zeros((0,audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)] sFile = TEMP_FOLDER+"/tempStart.wav" eFile = TEMP_FOLDER+"/tempEnd.wav" wavfile.write(sFile,SAMPLE_RATE,audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer+leng outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer/samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame)) didItWork = copyFrame(inputFrame,outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame,outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData) ''' outputFrame = math.ceil(outputPointer/samplesPerFrame) for endGap in range(outputFrame,audioFrameCount): copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap) ''' output_file = inputToOutputFilename(input_file) command = "ffmpeg -framerate "+str(frame_rate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+output_file subprocess.call(command, shell=True) deletePath(TEMP_FOLDER)
def process(pid, threads, INPUT_FILE, OUTPUT_FILE, FRAME_RATE, SAMPLE_RATE, SILENT_THRESHOLD, FRAME_SPREADAGE, NEW_SPEED, FRAME_QUALITY): try: TEMP_FOLDER = "TEMP_" + str(pid) AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) misc_func.createPath(TEMP_FOLDER) #image extraction command = 'ffmpeg -v quiet -threads ' + str( threads ) + ' -thread_queue_size 512 -i "' + INPUT_FILE + '" -qscale:v ' + str( FRAME_QUALITY) + ' ' + TEMP_FOLDER + '/frame%06d.jpg -hide_banner' subprocess.call(command, shell=True) #audio extraction command = 'ffmpeg -v quiet -threads ' + str( threads ) + ' -thread_queue_size 512 -i "' + INPUT_FILE + '" -ab 160k -ac 2 -ar ' + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav > NUL" subprocess.call(command, shell=True) #original parameter extraction command = 'ffmpeg -i "' + INPUT_FILE + '" 2>&1' f = open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(command, shell=True, stdout=f) f.close() sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = misc_func.getMaxVolume(audioData) if FRAME_RATE is None: FRAME_RATE = misc_func.getFrameRate(INPUT_FILE) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') samplesPerFrame = sampleRate / FRAME_RATE audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float( misc_func.getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append( [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[ endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = misc_func.copyFrame(inputFrame, outputFrame, TEMP_FOLDER) if didItWork: lastExistingFrame = inputFrame else: misc_func.copyFrame(lastExistingFrame, outputFrame, TEMP_FOLDER) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) command = 'ffmpeg -v quiet -threads ' + str( threads ) + ' -thread_queue_size 1024 -framerate ' + str( FRAME_RATE ) + ' -i ' + TEMP_FOLDER + '/newFrame%06d.jpg -i ' + TEMP_FOLDER + '/audioNew.wav -strict -2 "' + OUTPUT_FILE + '"' subprocess.call(command, shell=True) misc_func.deletePath(TEMP_FOLDER) except Exception as e: print(e)
def execute(input_file="", url="", output_file="", silent_threshold=0.03, sounded_speed=1.00, silent_speed=5.00, frame_margin=1, sample_rate=44100, frame_quality=3): SAMPLE_RATE = sample_rate SILENT_THRESHOLD = silent_threshold FRAME_SPREADAGE = frame_margin NEW_SPEED = [silent_speed, sounded_speed] if url != "" and url != None: INPUT_FILE = downloadFile(url) else: INPUT_FILE = input_file FRAME_QUALITY = frame_quality assert INPUT_FILE != "" and INPUT_FILE != None, "why u put no input file, that dum" if len(output_file) >= 1: OUTPUT_FILE = output_file else: OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) print("Saving to: " + OUTPUT_FILE) AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) createPath(TEMP_FOLDER) checkForFFMPEG() command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str( FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav" subprocess.call(command, shell=True) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) frameRate = findFramerate(INPUT_FILE) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) ''' outputFrame = math.ceil(outputPointer/samplesPerFrame) for endGap in range(outputFrame,audioFrameCount): copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap) ''' command = "ffmpeg -y -framerate " + str( frameRate ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE subprocess.call(command, shell=True) deletePath(TEMP_FOLDER)
def splitAudio(filename, chunks, samplesPerFrame, NEW_SPEED, audioData, SAMPLE_RATE, maxAudioVolume): """ This function creates new audio based on the chunk date and the numpy audio data. """ outputAudioData = [] outputPointer = 0 mask = [x / FADE_SIZE for x in range(FADE_SIZE)] num = 0 chunk_len = str(len(chunks)) for chunk in chunks: if (NEW_SPEED[chunk[2]] < 99999): start = int(chunk[0] * samplesPerFrame) end = int(chunk[1] * samplesPerFrame) audioChunk = audioData[start:end] sFile = ''.join([TEMP, '/tempStart.wav']) eFile = ''.join([TEMP, '/tempEnd.wav']) wavfile.write(sFile, SAMPLE_RATE, audioChunk) if (NEW_SPEED[chunk[2]] == 1): __, samefile = wavfile.read(sFile) leng = len(audioChunk) outputAudioData.extend((samefile / maxAudioVolume).tolist()) else: with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: phasevocoder(reader.channels, speed=NEW_SPEED[chunk[2]]).run( reader, writer) __, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] outputAudioData.extend( (alteredAudioData / maxAudioVolume).tolist()) endPointer = outputPointer + leng # smooth out transition's audio by quickly fading in/out if (leng < FADE_SIZE): for i in range(outputPointer, endPointer): try: outputAudioData[i][0] = 0 outputAudioData[i][1] = 0 except TypeError: outputAudioData[i] = 0 else: for i in range(outputPointer, outputPointer + FADE_SIZE): try: outputAudioData[i][0] *= mask[i - outputPointer] outputAudioData[i][1] *= mask[i - outputPointer] except TypeError: outputAudioData[i] *= mask[i - outputPointer] for i in range(endPointer - FADE_SIZE, endPointer): try: outputAudioData[i][0] *= ( 1 - mask[i - endPointer + FADE_SIZE]) outputAudioData[i][1] *= ( 1 - mask[i - endPointer + FADE_SIZE]) except TypeError: outputAudioData[i] *= ( 1 - mask[i - endPointer + FADE_SIZE]) outputPointer = endPointer num += 1 if (num % 10 == 0): print(''.join([str(num), '/', chunk_len, ' audio chunks done.'])) print(''.join([str(num), '/', chunk_len, ' audio chunks done.'])) outputAudioData = np.asarray(outputAudioData) wavfile.write(filename, SAMPLE_RATE, outputAudioData) if (not os.path.isfile(filename)): raise IOError(f'Error: The file {filename} was not created.') else: print('Audio finished.')
def timeStretch(input_filename, output_filename, rate, samplerate): with WavReader(input_filename) as reader: with WavWriter(output_filename, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, rate) tsm.run(reader, writer)
signalborders = np.where(signalpos_smoothed[:np.size(signalpos_smoothed)-1] != signalpos_smoothed[1:])[0] del signalpos_smoothed #signalpos = np.where(np.abs(content) > threschold) #signalpos_norepeat = np.unique(signalpos[0]) #signalpos_norepeat = np.append(signalpos_norepeat, np.shape(content)[0]) #signalborders = signalpos_norepeat[np.gradient(signalpos_norepeat) > 2000] signalborders = np.insert(signalborders, 0, 0) signalborders = np.append(signalborders, np.size(content[:,0])) newcontent = np.empty((0,2), dtype=np.int16) for i in (np.arange(1, np.size(signalborders))): if np.mean(np.abs(content[signalborders[i-1]:signalborders[i],:])) > threschold: lborder = int(np.max([signalborders[i-1]-rate/15, 0])) uborder = int(np.min([signalborders[i]+rate/15, np.size(content[:,0])])) acc_size = int(np.floor((uborder-lborder)/acc_rate0)) acc_part = np.empty((acc_size,2)) nonacc_part = content[lborder:uborder,:] acc_part = nonacc_part[np.floor(np.arange(acc_size) * acc_rate0).astype(int),:] newcontent = np.append(newcontent, acc_part, axis=0) wf.write('output_temp.wav', rate, newcontent) with WavReader('output_temp.wav') as reader: with WavWriter('output.wav', reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=acc_rate) tsm.run(reader, writer) os.remove('output_temp.wav')
def processVideo(inputFile, outputFile, tempDir): global frameRate command = "ffmpeg -i '" + inputFile + "' -qscale:v " + str( FRAME_QUALITY) + " " + tempDir + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i '" + inputFile + "' -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + tempDir + "/audio.wav" subprocess.call(command, shell=True) sampleRate, audioData = wavfile.read(tempDir + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) if frameRate is None: frameRate = getFrameRate(inputFile) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks)) / max( maxAudioVolume, 1e-10) if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = [] outputPointer = 0 mask = [ x / AUDIO_FADE_ENVELOPE_SIZE for x in range(AUDIO_FADE_ENVELOPE_SIZE) ] # Create audio envelope mask lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = tempDir + "/tempStart.wav" eFile = tempDir + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = audio_stretch_algorithm(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData.extend((alteredAudioData / maxAudioVolume).tolist()) # Smoothing the audio if leng < AUDIO_FADE_ENVELOPE_SIZE: for i in range(outputPointer, endPointer): outputAudioData[i] = 0 else: for i in range(outputPointer, outputPointer + AUDIO_FADE_ENVELOPE_SIZE): outputAudioData[i][0] *= mask[i - outputPointer] outputAudioData[i][1] *= mask[i - outputPointer] for i in range(endPointer - AUDIO_FADE_ENVELOPE_SIZE, endPointer): outputAudioData[i][0] *= ( 1 - mask[i - endPointer + AUDIO_FADE_ENVELOPE_SIZE]) outputAudioData[i][1] *= ( 1 - mask[i - endPointer + AUDIO_FADE_ENVELOPE_SIZE]) startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame, tempDir) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame, tempDir) outputPointer = endPointer outputAudioData = np.asarray(outputAudioData) wavfile.write(tempDir + "/audioNew.wav", SAMPLE_RATE, outputAudioData) command = f"ffmpeg -framerate {frameRate} -i {tempDir}/newFrame%06d.jpg -i {tempDir}/audioNew.wav -strict -2 -c:v libx264 -preset {H264_PRESET} -crf {H264_CRF} -pix_fmt yuvj420p '{outputFile}'" subprocess.call(command, shell=True)
i = 0 for i_start, i_end, silence in chunks: i += 1 if i_start != i_end: if silence == 0: speed = SOUNDED_SPEED else: speed = SILENT_SPEED sub_clip = clip.subclip(i_start/1000, i_end/1000) audio[i_start:i_end].export(os.path.join(tempPath, "sub_clip.wav"), format='wav') src = os.path.join(tempPath, "sub_clip.wav") out = os.path.join(tempPath, "sub_clip-reg{0}.wav".format(i)) with WavReader(src) as reader: with WavWriter(out, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=speed) tsm.run(reader, writer) sub_clip = sub_clip.fx(vfx.speedx, speed) sub_clip = sub_clip.set_audio(AudioFileClip(out)) clips.append(sub_clip) if i % 5 == 0: print("Modifying Chunks: " + str(round((i / len(chunks) * 100), 2)) + "% Complete.") params = (['-crf', '25']) output_clip = concatenate_videoclips(clips) output_clip.write_videofile(os.path.join(outputPath, video), codec='libx264', ffmpeg_params=params, threads=8, preset='ultrafast') print("Success!") print("Output is stored in: " + os.path.join(outputPath, video))
chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[
def run_process(): URL = self.URLLineEdit.text() if (self.URLRadio.isChecked()): INPUT_FILE = downloadFile(URL) else: INPUT_FILE = self.fileLineEdit.text() if (INPUT_FILE == ''): winsound.PlaySound('SystemExclamation', winsound.SND_ALIAS) else: frameRate = self.frameRateSlider.value() SAMPLE_RATE = self.sampleRateSlider.value() SILENT_THRESHOLD = float(self.thresholdLineEdit.text()) FRAME_SPREADAGE = float(self.frameMarginSlider.value()) SILENT_SPEED = float(self.silentSpeedLineEdit.text()) SOUNDED_SPEED = float(self.soundedSpeedLineEdit.text()) NEW_SPEED = [SILENT_SPEED, SOUNDED_SPEED] print(NEW_SPEED) FRAME_QUALITY = float(self.frameQualitySlider.value()) assert INPUT_FILE != None, "why u put no input file, that dum" """ if len(args.output_file) >= 1: OUTPUT_FILE = args.output_file else: """ OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) TEMP_FOLDER = "TEMP" AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) createPath(TEMP_FOLDER) command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str( FRAME_QUALITY ) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav" subprocess.call(command, shell=True) command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1" f = open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(command, shell=True, stdout=f) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search('Stream #.*Video.* ([0-9]*) fps', line) if m is not None: frameRate = float(m.group(1)) samplesPerFrame = sampleRate / frameRate audioFrameCount = int( math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float( getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append( [chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([ chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1] ]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame ):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE ) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat( premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[ endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int( math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int( math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame, TEMP_FOLDER) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame, TEMP_FOLDER) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) ''' outputFrame = math.ceil(outputPointer/samplesPerFrame) for endGap in range(outputFrame,audioFrameCount): copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap) ''' command = "ffmpeg -framerate " + str( frameRate ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE subprocess.call(command, shell=True) deletePath(TEMP_FOLDER)
def main(): """Change the speed of an audio file without changing its pitch.""" # Parse command line arguments parser = argparse.ArgumentParser(description=( "Change the speed of an audio file without changing its pitch.")) parser.add_argument( '-s', '--speed', metavar="S", type=float, default=1., help="Set the speed ratio (e.g 0.5 to play at half speed)") parser.add_argument( '-m', '--method', type=str, default="wsola", help="Select the TSM method (ola, wsola, or phasevocoder)") parser.add_argument('-l', '--frame-length', metavar='N', type=int, default=None, help="Set the frame length to N.") parser.add_argument('-a', '--analysis-hop', metavar='N', type=int, default=None, help="Set the analysis hop to N.") parser.add_argument('--synthesis-hop', metavar='N', type=int, default=None, help="Set the synthesis hop to N.") parser.add_argument( '-t', '--tolerance', metavar='N', type=int, default=None, help="Set the tolerance to N (only used when method is set to wsola).") '''parser.add_argument( '-p', '--phase-locking', metavar='S', type=str, default=None, help=("Set the phase locking strategy (none or identity; " "only used when method is set to phasevocoder)."))''' parser.add_argument('-o', '--output', metavar='FILENAME', type=str, default=None, help="Write the output in the wav file FILENAME.") parser.add_argument('-i', '--input_filename', metavar='INPUT_FILENAME', type=str, help="The audio input file") args = parser.parse_args() if not os.path.isfile(args.input_filename): parser.error('The input file "{}" does not exist.'.format( args.input_filename)) # Get TSM method parameters parameters = {} if args.speed: parameters['speed'] = args.speed if args.frame_length: parameters['frame_length'] = args.frame_length if args.analysis_hop: parameters['analysis_hop'] = args.analysis_hop if args.synthesis_hop: parameters['synthesis_hop'] = args.synthesis_hop if args.tolerance is not None and args.method == "wsola": parameters['tolerance'] = args.tolerance '''if args.phase_locking and args.method == "phasevocoder": parameters['phase_locking'] = PhaseLocking.from_str(args.phase_locking)''' # Run the TSM procedure with WavReader(args.input_filename) as reader: with create_writer(args.output, reader) as writer: tsm = create_tsm(args.method, reader.channels, parameters) tsm.run(reader, writer)
def splitVideo(chunks, NEW_SPEED, frameRate, zooms, samplesPerFrame, SAMPLE_RATE, audioData, extension, VERBOSE): print('Creating new video.') num = 0 chunk_len = str(len(chunks)) outputPointer = 0 Renames = [] lastExisting = None for chunk in chunks: if (NEW_SPEED[int(chunk[2])] < 99999): audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] if (NEW_SPEED[int(chunk[2])] == 1): leng = len(audioChunk) else: sFile = TEMP + '/tempStart2.wav' eFile = TEMP + '/tempEnd2.wav' wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]).run( reader, writer) __, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) src = ''.join( [CACHE, '/frame{:06d}'.format(inputFrame + 1), '.jpg']) dst = ''.join( [TEMP, '/newFrame{:06d}'.format(outputFrame + 1), '.jpg']) if (os.path.isfile(src)): lastExisting = inputFrame if (inputFrame in zooms): resize(src, dst, zooms[inputFrame]) else: os.rename(src, dst) Renames.extend([src, dst]) else: if (lastExisting == None): print(src + ' does not exist.') raise IOError(f'Fatal Error! No existing frame exist.') src = ''.join([ CACHE, '/frame{:06d}'.format(lastExisting + 1), '.jpg' ]) if (os.path.isfile(src)): if (lastExisting in zooms): resize(src, dst, zooms[lastExisting]) else: os.rename(src, dst) Renames.extend([src, dst]) else: # uh oh, we need to find the file we just renamed! myFile = None for i in range(0, len(Renames), 2): if (Renames[i] == src): myFile = Renames[i + 1] break if (myFile is not None): copyfile(myFile, dst) else: raise IOError( f'Error! The file {src} does not exist.') outputPointer = endPointer num += 1 if (num % 10 == 0): print(''.join([str(num), '/', chunk_len, ' frame chunks done.'])) print(''.join([str(num), '/', chunk_len, ' frame chunks done.'])) with open(f'{TEMP}/Renames.txt', 'w') as f: for item in Renames: f.write(f"{item}\n") print('Creating finished video. (This can take a while)') cmd = [ 'ffmpeg', '-y', '-framerate', str(frameRate), '-i', f'{TEMP}/newFrame%06d.jpg', f'{TEMP}/output{extension}' ] if (not VERBOSE): cmd.extend(['-nostats', '-loglevel', '0']) subprocess.call(cmd)