def create_tsm(name, channels, speed): """Create a TSM object given the method name and its parameters.""" if name == "ola": return ola(channels, speed) if name == "wsola": return wsola(channels, speed) if name == "phasevocoder": return phasevocoder(channels, speed, phase_locking=PhaseLocking.NONE) if name == "phasevocoder_identity": return phasevocoder(channels, speed, phase_locking=PhaseLocking.IDENTITY) raise ValueError("unknown TSM method: {}".format(name))
def speed(self, speed): """ Adjusts speed to given percentage without changing pitch :param speed: Percentage to increase/decrease speed without changing pitch :type speed: float """ if speed != 1: logger.info("Setting speed to %f", speed) logger.debug("Export file to BytesIO") wav_in = BytesIO() wav_in = self._segment.export(wav_in, format="wav") wav_in.seek(0) logger.debug("Initializing reader and writer") with WavReader(wav_in) as reader: wav_out = BytesIO() with WavWriter(wav_out, reader.channels, reader.samplerate) as writer: logger.debug("Adjusting speed with vocoder") tsm = phasevocoder(reader.channels, speed=speed) tsm.run(reader, writer) logger.debug("Reload audio segment") wav_out.seek(44) # skip metadata and start at first sample self._segment = AudioSegment.from_raw( wav_out, sample_width=self._segment.sample_width, channels=self._segment.channels, frame_rate=self._segment.frame_rate, )
def change_pitch(self, source_filepath): import pydub from audiotsm.io.wav import WavReader, WavWriter from audiotsm import phasevocoder if abs(self.octaves) > 0.1: _, sampled_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) sound = pydub.AudioSegment.from_mp3(source_filepath) sample_rate = int(sound.frame_rate * (2**self.octaves)) modified = sound._spawn(sound.raw_data, overrides={"frame_rate":sample_rate}) modified.export(sampled_filepath, format="wav") else: sampled_filepath = source_filepath if abs(self.speed - 1) > 0.1: #output_filepath = f"{os.path.basename(source_filepath)}{self.oct_str}_{self.speed}.wav" _, output_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) with WavReader(sampled_filepath) as reader: with WavWriter(output_filepath, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=self.speed*(sound.frame_rate/sample_rate)) tsm.run(reader, writer) return output_filepath else: return sampled_filepath
def create_tsm(name, channels, parameters): """Create a TSM object given the method name and its parameters.""" if name == "ola": return ola(channels, **parameters) if name == "wsola": return wsola(channels, **parameters) if name == "phasevocoder": return phasevocoder(channels, **parameters) raise ValueError("unknown TSM method: {}".format(name))
def create_tsm(self, channels): parameters = {} if self.frame_length > 0: parameters['frame_length'] = self.frame_length if self.synthesis_hop > 0: parameters['synthesis_hop'] = self.synthesis_hop if self.phase_locking >= 0: parameters['phase_locking'] = self.phase_locking return phasevocoder(channels, **parameters)
def 音频变速(wav音频数据列表, 声道数, 采样率, 目标速度, 临时文件夹): if 目标速度 == 1.0: return wav音频数据列表 if 查找可执行程序('soundstretch') != None: 内存音频二进制缓存区 = io.BytesIO() fd, soundstretch临时输出文件 = tempfile.mkstemp() os.close(fd) wavfile.write(内存音频二进制缓存区, 采样率, wav音频数据列表) 变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}' 变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) 变速线程.communicate(内存音频二进制缓存区.getvalue()) try: 采样率, 音频区间处理后的数据 = wavfile.read(soundstretch临时输出文件) except Exception as e: 出错时间 = int(time.time()) fd, 原始数据存放位置 = tempfile.mkstemp(dir=临时文件夹, prefix=f'原始-{出错时间}-', suffix='.wav') os.close(fd) wavfile.write(原始数据存放位置, 采样率, wav音频数据列表) fd, 出错文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav') os.close(fd) try: copy(soundstretch临时输出文件, 出错文件) except: ... fd, soundstretch临时输出文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav') os.close(fd) 变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}' 变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) 变速线程.communicate(内存音频二进制缓存区.getvalue()) print(f'Soundstretch 音频变速出错了,请前往查看详情\n 原始音频数据:{原始数据存放位置} \n 变速音频数据:{soundstretch临时输出文件}\n') print(f'出错的音频信息:\n 音频采样数:{len(wav音频数据列表)}\n 目标速度:{目标速度}\n 目标采样数:{len(wav音频数据列表) / 目标速度}') return wav音频数据列表 os.remove(soundstretch临时输出文件) else: print( '检测到没有安装 SoundTouch 的 soundstretch,所以使用 phasevocoder 的音频变速方法。建议到 http://www.surina.net/soundtouch 下载系统对应的 soundstretch,放到系统环境变量下,可以获得更好的音频变速效果\n') sFile = io.BytesIO() wavfile.write(sFile, 采样率, wav音频数据列表) sFile = io.BytesIO(sFile.getvalue()) eFile = io.BytesIO() with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=目标速度) tsm.run(reader, writer) _, 音频区间处理后的数据 = wavfile.read(io.BytesIO(eFile.getvalue())) return 音频区间处理后的数据
def change_bar_speed(self, audio_slice_id, target_bpm=120.0): if not os.path.isdir(c.LF_CH_BPM + self._audio_id): try: os.mkdir(c.LF_CH_BPM + self._audio_id) except FileExistsError: pass else: if os.path.isfile(c.LF_CH_BPM + self._audio_id + "/" + audio_slice_id + ".wav"): return 0 bar_bpm = 60.00 / ( (self.beat_track[int(audio_slice_id.split("_")[1]) + 1] - self.beat_track[int(audio_slice_id.split("_")[1])]) / 8) with WavReader("{}{}/{}.wav".format(c.LF_SLICE, self._audio_id, audio_slice_id)) as r: with WavWriter( "{}{}/{}.wav".format(c.LF_CH_BPM, self._audio_id, audio_slice_id), r.channels, r.samplerate) as w: phasevocoder(r.channels, speed=target_bpm / bar_bpm).run(r, w) print("only came " + audio_slice_id)
def getStretchedData(low, sf): s = "placeholder.wav" playSpeed = 1/sf if low: s = "lowPlaceholder.wav" playSpeed *= LOW_FACTOR with WavReader(s) as reader: with WavWriter("stretchholder.wav", reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=playSpeed) tsm.run(reader, writer) _, s = wavfile.read("stretchholder.wav") d = np.zeros(s.shape) if low: d += s else: d += s*0.81 return d
def getStretchedData(low, sf): s = PLACEHOLDER_WAV_AUX playSpeed = 1 / sf if low: s = LOW_PLACEHOLDER_WAV_AUX playSpeed *= LOW_FACTOR with WavReader(s) as reader: with WavWriter(STRECH, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=playSpeed) tsm.run(reader, writer) _, s = wavfile.read(STRECH) d = np.zeros(s.shape) if low: d += s else: d += s * 0.81 return d
def execute(self): # get values of audio frames, 0 for silence, 1 for loudness. has_loud_audio = self.get_loud_frame() # get edit points of silence and loudness. edit_points = self.get_edit_points(has_loud_audio) start_frame = 0 output = self.get_output() for edit_point in edit_points: audio_chunk = self.parameter.audio_data[ int(edit_point.start_frame * self.parameter.samples_per_frame): int(edit_point.end_frame * self.parameter.samples_per_frame) ] # need channels * frames, transpose data first. reader = ArrayReader(np.transpose(audio_chunk)) writer = ArrayWriter(reader.channels) tsm = phasevocoder(reader.channels, speed=self.parameter.new_speed[int(edit_point.should_keep)]) tsm.run(reader, writer) altered_audio_data = np.transpose(writer.data) altered_audio_data_length = altered_audio_data.shape[0] if altered_audio_data_length < self.parameter.audio_fade_envelope_size: altered_audio_data[:] = 0 # audio is less than 0.01 sec, let's just remove it. else: self.fade_out_silence(altered_audio_data) end_frame = start_frame + altered_audio_data_length start_output_frame = int(math.ceil(start_frame / self.parameter.samples_per_frame)) end_output_frame = int(math.ceil(end_frame / self.parameter.samples_per_frame)) output.apply_edit_point(edit_point, altered_audio_data, start_output_frame, end_output_frame) start_frame = end_frame output.close()
def fast_video_function(videoFile, NEW_SPEEDfloat, silentThreshold, frameMargin): global NEW_SPEED NEW_SPEED = [NEW_SPEEDfloat, 1] global startTime startTime = time.time() global cap cap = cv2.VideoCapture(videoFile) #In case files were left behind try: os.remove('output.wav') os.remove('spedup.mp4') os.remove('spedupAudio.wav') except: pass global width global height global fourcc global fps global extractAudio width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') fps = round(cap.get(cv2.CAP_PROP_FPS)) extractAudio = 'ffmpeg -i "{}" -ab 160k -ac 2 -ar 44100 -vn output.wav'.format(videoFile) subprocess.call(extractAudio, shell=True) global out out = cv2.VideoWriter('spedup.mp4', fourcc, fps, (width, height)) sampleRate, audioData = wavfile.read('output.wav') global skipped skipped = 0 global nFrames nFrames = 0 global channels channels = int(audioData.shape[1]) framesProcessed = 0 def getMaxVolume(s): maxv = np.max(s) minv = np.min(s) return max(maxv,-minv) def writeFrames(frames, nAudio, speed, samplePerSecond, writer): numAudioChunks = round(nAudio / samplePerSecond * fps) global nFrames numWrites = numAudioChunks - nFrames # a = [1, 2, 3], len(a) == 3 but a[3] is error limit = len(frames) - 1 for i in range(numWrites): frameIndex = round(i * speed) if frameIndex > limit: writer.write(frames[-1]) else: writer.write(frames[frameIndex]) nFrames += 1 global normal normal = 0 # 0 for silent, 1 for normal global switchStart switchStart = 0 global maxVolume maxVolume = getMaxVolume(audioData) # not used: # fadeInSamples = 400 # preMask = np.arange(fadeInSamples)/fadeInSamples # mask = np.repeat(preMask[:, np.newaxis], 2, axis = 1) global y global yPointer global frameBuffer y = np.zeros_like(audioData, dtype=np.int16) yPointer = 0 frameBuffer = [] while (cap.isOpened()): ret, frame = cap.read() if not ret: break # since samplerate is in seconds, I need to convert this to second as well currentTime = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 audioSampleStart = math.floor(currentTime * sampleRate) # more accurate frame counting framesProcessed += 1 # audioSampleStart + one frame worth of samples audioSampleEnd = min((audioSampleStart + ((sampleRate // fps) * frameMargin)),(len(audioData))) switchEnd = (audioSampleStart + ((sampleRate // fps))) audioChunkMod = audioData[audioSampleStart:switchEnd] audioChunk = audioData[audioSampleStart:audioSampleEnd] # if it's quite if getMaxVolume(audioChunk) / maxVolume < silentThreshold: skipped += 1 # if the frame is 'switched' frameBuffer.append(frame) normal = 0 else: # if it's 'loud' # and the last frame is 'loud' if normal: out.write(frame) nFrames += 1 switchStart = switchEnd yPointerEnd = yPointer + audioChunkMod.shape[0] y[yPointer : yPointerEnd] = audioChunkMod yPointer = yPointerEnd else: spedChunk = audioData[switchStart:switchEnd] spedupAudio = np.zeros((0,2), dtype=np.int16) with ArrReader(spedChunk, channels, sampleRate, 2) as reader: with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[normal]) tsm.run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] y[yPointer : yPointerEnd] = spedupAudio yPointer = yPointerEnd writeFrames(frameBuffer, yPointerEnd, NEW_SPEED[normal], sampleRate, out) frameBuffer = [] switchStart = switchEnd normal = 1 if framesProcessed % 500 == 0: print("{} frames processed".format(framesProcessed)) print("{} frames skipped".format(skipped)) y = y[:yPointer] wavfile.write("spedupAudio.wav", sampleRate, y) cap.release() out.release() cv2.destroyAllWindows() outFile = "{}_faster{}".format(videoFile[:videoFile.rfind('.')],videoFile[videoFile.rfind('.'):]) command = "ffmpeg -y -i spedup.mp4 -i spedupAudio.wav -c:v copy -c:a aac {}".format(outFile) subprocess.call(command, shell=True) os.remove('output.wav') os.remove('spedup.mp4') os.remove('spedupAudio.wav') timeLength = round(time.time() - startTime, 2) minutes = timedelta(seconds=(round(timeLength))) print('Finished.') print(f'Took {timeLength} seconds ({minutes})') print(f'Removed {math.floor(skipped / fps)} seconds from a {math.floor(framesProcessed / fps)} second video.')
def splitVideo(chunks, NEW_SPEED, frameRate, zooms, samplesPerFrame, SAMPLE_RATE, audioData, extension, VERBOSE): """ This function is responsible for outputting a new image sequence in the correct order. splitVideo is also responsible for creating Renames.txt. copying every jpeg is computationally expensive, renaming the file is less so, but we still need to create the cache folder and we can't do that if the program has no idea what it renamed and where. Renames.txt will be read in originalMethod.py to recreate the original image sequence. To avoid desyncing issues with the audio, we need to have audioData and go along roughly the same way originalAudio.py does it. Specifically, get the length of the new audio chunk. If the speed set is 1, this is easy. If not, we have to create a new file modified to be the new speed with audiotsm, then read that file to get the length. """ print('Creating new video.') num = 0 chunk_len = str(len(chunks)) outputPointer = 0 Renames = [] lastExisting = None for chunk in chunks: if (NEW_SPEED[int(chunk[2])] < 99999): audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] if (NEW_SPEED[chunk[2]] == 1): leng = len(audioChunk) else: sFile = TEMP + '/tempStart2.wav' eFile = TEMP + '/tempEnd2.wav' wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: phasevocoder(reader.channels, speed=NEW_SPEED[chunk[2]]).run( reader, writer) __, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[chunk[2]] * (outputFrame - startOutputFrame)) src = ''.join( [CACHE, '/frame{:06d}'.format(inputFrame + 1), '.jpg']) dst = ''.join( [TEMP, '/newFrame{:06d}'.format(outputFrame + 1), '.jpg']) if (os.path.isfile(src)): lastExisting = inputFrame if (inputFrame in zooms): resize(src, dst, zooms[inputFrame]) else: os.rename(src, dst) Renames.extend([src, dst]) else: if (lastExisting == None): print(src + ' does not exist.') raise IOError(f'Fatal Error! No existing frame exist.') src = ''.join([ CACHE, '/frame{:06d}'.format(lastExisting + 1), '.jpg' ]) if (os.path.isfile(src)): if (lastExisting in zooms): resize(src, dst, zooms[lastExisting]) else: os.rename(src, dst) Renames.extend([src, dst]) else: # uh oh, we need to find the file we just renamed! myFile = None for i in range(0, len(Renames), 2): if (Renames[i] == src): myFile = Renames[i + 1] break if (myFile is not None): copyfile(myFile, dst) else: raise IOError( f'Error! The file {src} does not exist.') outputPointer = endPointer num += 1 if (num % 10 == 0): print(''.join([str(num), '/', chunk_len, ' frame chunks done.'])) print(''.join([str(num), '/', chunk_len, ' frame chunks done.'])) with open(f'{TEMP}/Renames.txt', 'w') as f: for item in Renames: f.write(f"{item}\n") print('Creating finished video. (This can take a while)') cmd = [ 'ffmpeg', '-y', '-framerate', str(frameRate), '-i', f'{TEMP}/newFrame%06d.jpg', f'{TEMP}/output{extension}' ] if (not VERBOSE): cmd.extend(['-nostats', '-loglevel', '0']) subprocess.call(cmd)
outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
def process(self): global isCanceld try: if os.path.exists(TEMP_FOLDER): msg = input( 'Warning: Are you sure you want to Delete videocuts_tmp folder' ) if msg.lower() in ('yes', 'y'): deletePath(TEMP_FOLDER) Thread(target=self.timer).start() global gain gain = 1.2 self.new_video_size = 'N/A' self.new_video_length = 'N/A' Extras = "" frameRate = float(60) SAMPLE_RATE = int(self.frame_rate) SILENT_THRESHOLD = float(self.silence_threshold) FRAME_SPREADAGE = int(self.frame_margin) NEW_SPEED = [float(self.silent_speed), float(self.play_speed)] gain = 0.6 INPUT_FILE = self.downloadFile(str(self.video_url)) if INPUT_FILE == '': return FRAME_QUALITY = self.frame_quality assert INPUT_FILE is not None, "You did not specify an input file. You must specify an input file without spaces." OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) createPath(TEMP_FOLDER) global dir dir = os.getcwd() if isCanceld: return print( ' Step 1 - Frame quality has been assessed and is processing ') cmdary = [ resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '-qscale:v', str(FRAME_QUALITY), TEMP_FOLDER + "/frame%06d.jpg", '-hide_banner' ] subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print( ' Step 1 - Frame quality processing has successfully completed ' ) time.sleep(2) if isCanceld: return print(' Step 2 - Sample Rate has been assessed and is processing ') cmdary = [ resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1', '-ab', '160k', '-ac', '2', '-ar', str(SAMPLE_RATE), '-vn', TEMP_FOLDER + "/audio.wav" ] subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print( ' Step 2 - Sample Rate processing has successfully completed ') time.sleep(2) if isCanceld: return print( ' Step 3 - Video Frames are processing. This might take a while... ' ) cmdary = [resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1'] open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print( ' Step 3 - Video Frames processing has successfully completed ' ) time.sleep(2) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) cap = cv2.VideoCapture(INPUT_FILE) fps = cap.get(cv2.CAP_PROP_FPS) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search(' ([0-9]*.[0-9]*) fps,', line) if m is None: frameRate = float(fps) if m is not None: frameRate = float(m.group(1)) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros(audioFrameCount) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float( getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros(audioFrameCount) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[ i - 1]: # Did we flip? chunks.append( [chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append( [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame ):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat( premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[ endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int( math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) ''' outputFrame = math.ceil(outputPointer/samplesPerFrame) for endGap in range(outputFrame,audioFrameCount): copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap) ''' if isCanceld: return print(' Step 4 - Finalizing.... Please wait') cmdary = [ resource_path('ffmpeg.exe'), '-framerate', str(frameRate), "-i", TEMP_FOLDER + "/newFrame%06d.jpg", '-i', TEMP_FOLDER + "/audioNew.wav", '-strict', '-2' + str(Extras), OUTPUT_FILE ] subprocess.call(cmdary, cwd=dir, shell=True) if isCanceld: return print(' Video processing finished successfully.') deletePath(TEMP_FOLDER) path = os.path.dirname(INPUT_FILE) global stpTimer stpTimer = True self.new_video_size = get_size(OUTPUT_FILE) + ' MB' self.output_parameters.append(self.new_video_size) self.new_video_length = str(self.get_length(OUTPUT_FILE)) self.output_parameters.append(self.new_video_length) except Exception as e: print(' Processing Video Failed! ') if str(e) != 'main thread is not in main loop': print('error message.', str(e)) deletePath(TEMP_FOLDER) print(self.output_parameters)
def run_process(): URL = self.URLLineEdit.text() if (self.URLRadio.isChecked()): INPUT_FILE = downloadFile(URL) else: INPUT_FILE = self.fileLineEdit.text() if (INPUT_FILE == ''): winsound.PlaySound('SystemExclamation', winsound.SND_ALIAS) else: frameRate = self.frameRateSlider.value() SAMPLE_RATE = self.sampleRateSlider.value() SILENT_THRESHOLD = float(self.thresholdLineEdit.text()) FRAME_SPREADAGE = float(self.frameMarginSlider.value()) SILENT_SPEED = float(self.silentSpeedLineEdit.text()) SOUNDED_SPEED = float(self.soundedSpeedLineEdit.text()) NEW_SPEED = [SILENT_SPEED, SOUNDED_SPEED] print(NEW_SPEED) FRAME_QUALITY = float(self.frameQualitySlider.value()) assert INPUT_FILE != None, "why u put no input file, that dum" """ if len(args.output_file) >= 1: OUTPUT_FILE = args.output_file else: """ OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) TEMP_FOLDER = "TEMP" AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) createPath(TEMP_FOLDER) command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str( FRAME_QUALITY ) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav" subprocess.call(command, shell=True) command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1" f = open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(command, shell=True, stdout=f) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search('Stream #.*Video.* ([0-9]*) fps', line) if m is not None: frameRate = float(m.group(1)) samplesPerFrame = sampleRate / frameRate audioFrameCount = int( math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float( getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append( [chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([ chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1] ]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame ):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE ) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat( premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[ endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int( math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int( math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame, TEMP_FOLDER) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame, TEMP_FOLDER) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) ''' outputFrame = math.ceil(outputPointer/samplesPerFrame) for endGap in range(outputFrame,audioFrameCount): copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap) ''' command = "ffmpeg -framerate " + str( frameRate ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE subprocess.call(command, shell=True) deletePath(TEMP_FOLDER)
outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None sFile = os.path.join(TEMP_FOLDER, "tempStart.wav") eFile = os.path.join(TEMP_FOLDER, "tempEnd.wav") outputFrame = 0 for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]).run( reader, WavWriter(eFile, reader.channels, reader.samplerate)) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transition's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else:
signalborders = np.where(signalpos_smoothed[:np.size(signalpos_smoothed)-1] != signalpos_smoothed[1:])[0] del signalpos_smoothed #signalpos = np.where(np.abs(content) > threschold) #signalpos_norepeat = np.unique(signalpos[0]) #signalpos_norepeat = np.append(signalpos_norepeat, np.shape(content)[0]) #signalborders = signalpos_norepeat[np.gradient(signalpos_norepeat) > 2000] signalborders = np.insert(signalborders, 0, 0) signalborders = np.append(signalborders, np.size(content[:,0])) newcontent = np.empty((0,2), dtype=np.int16) for i in (np.arange(1, np.size(signalborders))): if np.mean(np.abs(content[signalborders[i-1]:signalborders[i],:])) > threschold: lborder = int(np.max([signalborders[i-1]-rate/15, 0])) uborder = int(np.min([signalborders[i]+rate/15, np.size(content[:,0])])) acc_size = int(np.floor((uborder-lborder)/acc_rate0)) acc_part = np.empty((acc_size,2)) nonacc_part = content[lborder:uborder,:] acc_part = nonacc_part[np.floor(np.arange(acc_size) * acc_rate0).astype(int),:] newcontent = np.append(newcontent, acc_part, axis=0) wf.write('output_temp.wav', rate, newcontent) with WavReader('output_temp.wav') as reader: with WavWriter('output.wav', reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=acc_rate) tsm.run(reader, writer) os.remove('output_temp.wav')
def splitAudio(filename, chunks, samplesPerFrame, NEW_SPEED, audioData, SAMPLE_RATE, maxAudioVolume): """ This function creates new audio based on the chunk date and the numpy audio data. """ outputAudioData = [] outputPointer = 0 mask = [x / FADE_SIZE for x in range(FADE_SIZE)] num = 0 chunk_len = str(len(chunks)) for chunk in chunks: if (NEW_SPEED[chunk[2]] < 99999): start = int(chunk[0] * samplesPerFrame) end = int(chunk[1] * samplesPerFrame) audioChunk = audioData[start:end] sFile = ''.join([TEMP, '/tempStart.wav']) eFile = ''.join([TEMP, '/tempEnd.wav']) wavfile.write(sFile, SAMPLE_RATE, audioChunk) if (NEW_SPEED[chunk[2]] == 1): __, samefile = wavfile.read(sFile) leng = len(audioChunk) outputAudioData.extend((samefile / maxAudioVolume).tolist()) else: with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: phasevocoder(reader.channels, speed=NEW_SPEED[chunk[2]]).run( reader, writer) __, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] outputAudioData.extend( (alteredAudioData / maxAudioVolume).tolist()) endPointer = outputPointer + leng # smooth out transition's audio by quickly fading in/out if (leng < FADE_SIZE): for i in range(outputPointer, endPointer): try: outputAudioData[i][0] = 0 outputAudioData[i][1] = 0 except TypeError: outputAudioData[i] = 0 else: for i in range(outputPointer, outputPointer + FADE_SIZE): try: outputAudioData[i][0] *= mask[i - outputPointer] outputAudioData[i][1] *= mask[i - outputPointer] except TypeError: outputAudioData[i] *= mask[i - outputPointer] for i in range(endPointer - FADE_SIZE, endPointer): try: outputAudioData[i][0] *= ( 1 - mask[i - endPointer + FADE_SIZE]) outputAudioData[i][1] *= ( 1 - mask[i - endPointer + FADE_SIZE]) except TypeError: outputAudioData[i] *= ( 1 - mask[i - endPointer + FADE_SIZE]) outputPointer = endPointer num += 1 if (num % 10 == 0): print(''.join([str(num), '/', chunk_len, ' audio chunks done.'])) print(''.join([str(num), '/', chunk_len, ' audio chunks done.'])) outputAudioData = np.asarray(outputAudioData) wavfile.write(filename, SAMPLE_RATE, outputAudioData) if (not os.path.isfile(filename)): raise IOError(f'Error: The file {filename} was not created.') else: print('Audio finished.')
def execute(input_file="", url="", output_file="", silent_threshold=0.03, sounded_speed=1.00, silent_speed=5.00, frame_margin=1, sample_rate=44100, frame_quality=3): SAMPLE_RATE = sample_rate SILENT_THRESHOLD = silent_threshold FRAME_SPREADAGE = frame_margin NEW_SPEED = [silent_speed, sounded_speed] if url != "" and url != None: INPUT_FILE = downloadFile(url) else: INPUT_FILE = input_file FRAME_QUALITY = frame_quality assert INPUT_FILE != "" and INPUT_FILE != None, "why u put no input file, that dum" if len(output_file) >= 1: OUTPUT_FILE = output_file else: OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) print("Saving to: " + OUTPUT_FILE) AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) createPath(TEMP_FOLDER) checkForFFMPEG() command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str( FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav" subprocess.call(command, shell=True) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) frameRate = findFramerate(INPUT_FILE) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) ''' outputFrame = math.ceil(outputPointer/samplesPerFrame) for endGap in range(outputFrame,audioFrameCount): copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap) ''' command = "ffmpeg -y -framerate " + str( frameRate ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE subprocess.call(command, shell=True) deletePath(TEMP_FOLDER)
def process_video(args): TEMP_FOLDER,frameRate,SAMPLE_RATE,NEW_SPEED,SILENT_THRESHOLD,FRAME_SPREADAGE,AUDIO_FADE_ENVELOPE_SIZE = args sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) samplesPerFrame = sampleRate/frameRate audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i*samplesPerFrame) end = min(int((i+1)*samplesPerFrame),audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0,0,0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0,i-FRAME_SPREADAGE)) end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip? chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]]) chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]]) chunks = chunks[1:] outputAudioData = np.zeros((0,audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)] sFile = TEMP_FOLDER+"/tempStart.wav" eFile = TEMP_FOLDER+"/tempEnd.wav" wavfile.write(sFile,SAMPLE_RATE,audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer+leng outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer/samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame)) didItWork = copyFrame(TEMP_FOLDER,inputFrame,outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(TEMP_FOLDER,lastExistingFrame,outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData) '''
if normal: out.write(frame) nFrames += 1 switchStart = switchEnd yPointerEnd = yPointer + audioChunk.shape[0] y[yPointer:yPointerEnd] = audioChunk yPointer = yPointerEnd else: spedChunk = audioData[switchStart:switchEnd] spedupAudio = np.zeros((0, 2), dtype=np.int16) # ArrReader (array, channels, samplerate, samplewidth) with ArrReader(spedChunk, channels, sampleRate, 2) as reader: # 2 as sampleWidth for now with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer: tsm = phasevocoder(reader.channels, speed=silentSpeed) tsm.run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] y[yPointer:yPointerEnd] = spedupAudio yPointer = yPointerEnd writeFrames(frameBuffer, yPointerEnd, silentSpeed, sampleRate, out) frameBuffer = [] switchStart = switchEnd normal = 1 if skipped % 1000 == 0: print("{} frames inspected".format(skipped)) skipped += 1
def process_and_concatenate(self): audio_fade_envelope_size = 400 # smooth out transition's audio by quickly fading in/out self.save_audio() sample_rate, audio_data = wavfile.read(self.temp_folder + "/audio.wav") audio_sample_count = audio_data.shape[0] max_audio_volume = get_max_volume(audio_data) samples_per_frame = sample_rate / self.fps audio_frame_count = int( math.ceil(audio_sample_count / samples_per_frame)) has_loud_audio = np.zeros(audio_frame_count) for i in range(audio_frame_count): start = int(i * samples_per_frame) end = min(int((i + 1) * samples_per_frame), audio_sample_count) audio_chunks = audio_data[start:end] max_chunks_volume = float( get_max_volume(audio_chunks)) / max_audio_volume if max_chunks_volume >= SILENT_THRESHOLD: has_loud_audio[i] = 1 chunks = [[0, 0, 0]] should_include_frame = np.zeros(audio_frame_count) last_idx = 0 for i in range(audio_frame_count): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audio_frame_count, i + 1 + FRAME_SPREADAGE)) should_include_frame[i] = np.max(has_loud_audio[start:end]) if i >= 1 and should_include_frame[i] != should_include_frame[ i - 1]: # Did we flip? chunks.append([chunks[-1][1], i, should_include_frame[i - 1]]) last_idx = i chunks.append([ chunks[-1][1], audio_frame_count, should_include_frame[last_idx - 1] ]) chunks = chunks[1:] output_audio_data = np.zeros((0, audio_data.shape[1])) output_pointer = 0 last_existing_frame = None duration = self.get_duration() frames_num = int(float(duration) * self.fps) signed_frames = [False for _ in range(frames_num)] output_frames = [] for chunk in chunks: audio_chunk = audio_data[int(chunk[0] * samples_per_frame ):int(chunk[1] * samples_per_frame)] s_file = self.temp_folder + "/tempStart.wav" e_file = self.temp_folder + "/tempEnd.wav" wavfile.write(s_file, SAMPLE_RATE, audio_chunk) with WavReader(s_file) as reader: with WavWriter(e_file, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, altered_audio_data = wavfile.read(e_file) leng = altered_audio_data.shape[0] end_pointer = output_pointer + leng output_audio_data = np.concatenate( (output_audio_data, altered_audio_data / max_audio_volume)) if leng < audio_fade_envelope_size: output_audio_data[output_pointer:end_pointer] = 0 else: pre_mask = np.arange( audio_fade_envelope_size) / audio_fade_envelope_size mask = np.repeat(pre_mask[:, np.newaxis], 2, axis=1) output_audio_data[output_pointer:output_pointer + audio_fade_envelope_size] *= mask output_audio_data[ end_pointer - audio_fade_envelope_size:end_pointer] *= 1 - mask start_output_frame = int( math.ceil(output_pointer / samples_per_frame)) end_output_frame = int(math.ceil(end_pointer / samples_per_frame)) for outputFrame in range(start_output_frame, end_output_frame): input_frame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - start_output_frame)) if input_frame < frames_num - 2: signed_frames[input_frame + 1] = True last_existing_frame = input_frame else: signed_frames[last_existing_frame] = True output_frames.append(outputFrame) output_pointer = end_pointer j = 0 for i, frame_sign in enumerate(signed_frames): if frame_sign: self.copy_frame(i, j) j += 1 wavfile.write(self.temp_folder + "/audioNew.wav", SAMPLE_RATE, output_audio_data) self.final_concatenation() delete_path(self.temp_folder)
def timeStretch(input_filename, output_filename, rate, samplerate): with WavReader(input_filename) as reader: with WavWriter(output_filename, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, rate) tsm.run(reader, writer)
def main(args): frameRate = args.frame_rate SAMPLE_RATE = args.sample_rate SILENT_THRESHOLD = args.silent_threshold FRAME_SPREADAGE = args.frame_margin NEW_SPEED = [args.silent_speed, args.sounded_speed] FRAME_QUALITY = args.frame_quality AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever) TEMP_FOLDER = "TEMP" if not os.path.isdir(TEMP_FOLDER): os.makedirs(TEMP_FOLDER) if args.url != None: INPUT_FILE = downloadFile(args.url) else: INPUT_FILE = args.input if len(args.output) >= 1: OUTPUT_FILE = args.output else: OUTPUT_FILE = inputToOutputFilename(INPUT_FILE) command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str( FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav" subprocess.call(command, shell=True) command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1" f = open(TEMP_FOLDER + "/params.txt", "w") subprocess.call(command, shell=True, stdout=f) sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) f = open(TEMP_FOLDER + "/params.txt", 'r+') pre_params = f.read() f.close() params = pre_params.split('\n') for line in params: m = re.search('Stream #.*Video.* ([0-9]*) fps', line) if m is not None: frameRate = float(m.group(1)) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/tempStart.wav" eFile = TEMP_FOLDER + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE: outputAudioData[ outputPointer: endPointer] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE mask = np.repeat(premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo outputAudioData[outputPointer:outputPointer + AUDIO_FADE_ENVELOPE_SIZE] *= mask outputAudioData[endPointer - AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(TEMP_FOLDER, inputFrame, outputFrame) if didItWork: lastExistingFrame = inputFrame else: copyFrame(TEMP_FOLDER, lastExistingFrame, outputFrame) outputPointer = endPointer wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData) command = "ffmpeg -framerate " + str( frameRate ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE subprocess.call(command, shell=True) rmtree(TEMP_FOLDER, ignore_errors=False)
def speed_up_video(input_file: str, output_file: str = None, frame_rate: float = 30, sample_rate: int = 44100, silent_threshold: float = 0.03, silent_speed: float = 5.0, sounded_speed: float = 1.0, frame_spreadage: int = 1, audio_fade_envelope_size: int = 400, temp_folder: str = 'TEMP') -> None: """ Speeds up a video file with different speeds for the silent and loud sections in the video. :param input_file: The file name of the video to be sped up. :param output_file: The file name of the output file. If not given will be 'input_file'_ALTERED.ext. :param frame_rate: The frame rate of the given video. Only needed if not extractable through ffmpeg. :param sample_rate: The sample rate of the audio in the video. :param silent_threshold: The threshold when a chunk counts towards being a silent chunk. Value ranges from 0 (nothing) - 1 (max volume). :param silent_speed: The speed of the silent chunks. :param sounded_speed: The speed of the loud chunks. :param frame_spreadage: How many silent frames adjacent to sounded frames should be included to provide context. :param audio_fade_envelope_size: Audio transition smoothing duration in samples. :param temp_folder: The file path of the temporary working folder. """ # Set output file name based on input file name if none was given if output_file is None: output_file = _input_to_output_filename(input_file) # Create Temp Folder if os.path.exists(temp_folder): _delete_path(temp_folder) _create_path(temp_folder) # Find out framerate and duration of the input video command = 'ffprobe -i "{}" -hide_banner -loglevel error -select_streams v' \ ' -show_entries format=duration:stream=avg_frame_rate'.format(input_file) p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, universal_newlines=True, shell=True) std_out, err = p.communicate() match_frame_rate = re.search(r'frame_rate=(\d*)/(\d*)', str(std_out)) if match_frame_rate is not None: frame_rate = float(match_frame_rate.group(1)) / float( match_frame_rate.group(2)) # print(f'Found Framerate {frame_rate}') match_duration = re.search(r'duration=([\d.]*)', str(std_out)) original_duration = 0.0 if match_duration is not None: original_duration = float(match_duration.group(1)) # print(f'Found Duration {original_duration}') # Extract the audio command = 'ffmpeg -i "{}" -ab 160k -ac 2 -ar {} -vn {} -hide_banner' \ .format(input_file, sample_rate, temp_folder + '/audio.wav') _run_timed_ffmpeg_command(command, total=int(original_duration * frame_rate), unit='frames', desc='Extracting audio:') wav_sample_rate, audio_data = wavfile.read(temp_folder + "/audio.wav") audio_sample_count = audio_data.shape[0] max_audio_volume = _get_max_volume(audio_data) samples_per_frame = wav_sample_rate / frame_rate audio_frame_count = int(math.ceil(audio_sample_count / samples_per_frame)) # Find frames with loud audio has_loud_audio = np.zeros(audio_frame_count, dtype=bool) for i in range(audio_frame_count): start = int(i * samples_per_frame) end = min(int((i + 1) * samples_per_frame), audio_sample_count) audio_chunk = audio_data[start:end] chunk_max_volume = float( _get_max_volume(audio_chunk)) / max_audio_volume if chunk_max_volume >= silent_threshold: has_loud_audio[i] = True # Chunk the frames together that are quiet or loud chunks = [[0, 0, 0]] should_include_frame = np.zeros(audio_frame_count, dtype=bool) for i in tqdm(range(audio_frame_count), desc='Finding chunks:', unit='frames'): start = int(max(0, i - frame_spreadage)) end = int(min(audio_frame_count, i + 1 + frame_spreadage)) should_include_frame[i] = np.any(has_loud_audio[start:end]) if i >= 1 and should_include_frame[i] != should_include_frame[ i - 1]: # Did we flip? chunks.append([chunks[-1][1], i, should_include_frame[i - 1]]) chunks.append([ chunks[-1][1], audio_frame_count, should_include_frame[audio_frame_count - 1] ]) chunks = chunks[1:] # Generate audio data with varying speed for each chunk new_speeds = [silent_speed, sounded_speed] output_pointer = 0 audio_buffers = [] for index, chunk in tqdm(enumerate(chunks), total=len(chunks), desc='Changing audio:', unit='chunks'): audio_chunk = audio_data[int(chunk[0] * samples_per_frame):int(chunk[1] * samples_per_frame)] reader = ArrayReader(np.transpose(audio_chunk)) writer = ArrayWriter(reader.channels) tsm = phasevocoder(reader.channels, speed=new_speeds[int(chunk[2])]) tsm.run(reader, writer) altered_audio_data = np.transpose(writer.data) # smooth out transition's audio by quickly fading in/out if altered_audio_data.shape[0] < audio_fade_envelope_size: altered_audio_data[:] = 0 # audio is less than 0.01 sec, let's just remove it. else: premask = np.arange( audio_fade_envelope_size) / audio_fade_envelope_size mask = np.repeat(premask[:, np.newaxis], 2, axis=1) # make the fade-envelope mask stereo altered_audio_data[:audio_fade_envelope_size] *= mask altered_audio_data[-audio_fade_envelope_size:] *= 1 - mask audio_buffers.append(altered_audio_data / max_audio_volume) end_pointer = output_pointer + altered_audio_data.shape[0] start_output_frame = int(math.ceil(output_pointer / samples_per_frame)) end_output_frame = int(math.ceil(end_pointer / samples_per_frame)) chunks[index] = chunk[:2] + [start_output_frame, end_output_frame] output_pointer = end_pointer # print(chunks) output_audio_data = np.concatenate(audio_buffers) wavfile.write(temp_folder + "/audioNew.wav", sample_rate, output_audio_data) # Cut the video parts to length expression = _get_tree_expression(chunks) filter_graph_file = open(temp_folder + "/filterGraph.txt", 'w') filter_graph_file.write(f'fps=fps={frame_rate},setpts=') filter_graph_file.write(expression.replace(',', '\\,')) filter_graph_file.close() command = 'ffmpeg -i "{}" -i "{}" -filter_script:v "{}" -map 0 -map -0:a -map 1:a -c:a aac "{}"' \ ' -loglevel warning -stats -y -hide_banner' \ .format(input_file, temp_folder + '/audioNew.wav', temp_folder + '/filterGraph.txt', output_file) _run_timed_ffmpeg_command(command, total=chunks[-1][3], unit='frames', desc='Generating final:') _delete_path(temp_folder)
def apply_speed_to_audio(audio, speed): reader = ArrayReader(audio) writer = ArrayWriter(2) tsm = phasevocoder(reader.channels, speed) tsm.run(reader, writer) return writer.data
# and the last frame is 'loud' if normal: out.write(frame) nFrames += 1 switchStart = switchEnd yPointerEnd = yPointer + audioChunkMod.shape[0] y[yPointer:yPointerEnd] = audioChunkMod yPointer = yPointerEnd else: spedChunk = audioData[switchStart:switchEnd] spedupAudio = np.zeros((0, 2), dtype=np.int16) with ArrReader(spedChunk, channels, sampleRate, 2) as reader: with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[normal]) tsm.run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] y[yPointer:yPointerEnd] = spedupAudio yPointer = yPointerEnd writeFrames(frameBuffer, yPointerEnd, NEW_SPEED[normal], sampleRate, out) frameBuffer = [] switchStart = switchEnd normal = 1 if framesProcessed % 500 == 0: print("{} frames processed".format(framesProcessed))
def videoProcess(frame_rate, sample_rate, silent_threshold, frame_margin, silent_speed, sounded_speed, url, input_file, output_file, frame_quality): try: print(frame_rate, sample_rate, silent_threshold, frame_margin, silent_speed, sounded_speed, url, input_file, output_file, frame_quality) New_Speed_silent_and_sounded = [silent_speed, sounded_speed] if url: name = YouTube(url).streams.first().download() renamed = name.replace(' ', '_') os.rename(name, renamed) return renamed else: Input_Video = input_file assert Input_Video != None, "enter input video" if len(output_file) >= 1: Output_Video = output_file else: dot_position = filename.rfind(".") Output_Video = filename[:dot_position] + "NEWVIDEO" + filename[ dot_position:] # print ( Output_Video) Audio_fade_envelope_size = 400 try: os.mkdir(TEMP) except OSError: assert False, "Directory Already existing" command = "ffmpeg -i " + Input_Video + " -qscale:v " + str( frame_quality) + " " + TEMP + "/old_frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i " + Input_Video + " -ab 160k -ac 2 -ar " + str( sample_rate) + " -vn " + TEMP + "/audio.wav" subprocess.call(command, shell=True) sampleRate, audioData = wavfile.read(TEMP + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = GetVolume(audioData) # print(" please ") samplesPerFrame = 1470 audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = numpy.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(GetVolume(audiochunks)) / maxAudioVolume if maxchunksVolume >= silent_threshold: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = numpy.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - frame_margin)) end = int(min(audioFrameCount, i + 1 + frame_margin)) shouldIncludeFrame[i] = numpy.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append( [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = numpy.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP + "/tempStart.wav" eFile = TEMP + "/tempEnd.wav" wavfile.write(sFile, sample_rate, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=New_Speed_silent_and_sounded[int( chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = numpy.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) if leng < Audio_fade_envelope_size: outputAudioData[outputPointer:endPointer] = 0 else: premask = numpy.arange( Audio_fade_envelope_size) / Audio_fade_envelope_size mask = numpy.repeat(premask[:, numpy.newaxis], 2, axis=1) outputAudioData[outputPointer:outputPointer + Audio_fade_envelope_size] *= mask outputAudioData[ endPointer - Audio_fade_envelope_size:endPointer] *= 1 - mask startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for op_frame in range(startOutputFrame, endOutputFrame): ip_frame = int(chunk[0] + New_Speed_silent_and_sounded[int(chunk[2])] * (op_frame - startOutputFrame)) didItWork = FRameCopy(ip_frame, op_frame) if didItWork: lastExistingFrame = ip_frame else: FRameCopy(lastExistingFrame, op_frame) outputPointer = endPointer wavfile.write(TEMP + "/audioNew.wav", sample_rate, outputAudioData) command = "ffmpeg -framerate " + str( frame_rate ) + " -i " + TEMP + "/new_frame%06d.jpg -i " + TEMP + "/audioNew.wav -strict -2 " + Output_Video subprocess.call(command, shell=True) try: rmtree(TEMP, ignore_errors=False) except OSError: print("Delete failed") return "done" #not sure abt it except: return " nothing"
def fastVideoPlus(videoFile, outFile, silentThreshold, frameMargin, SAMPLE_RATE, AUD_BITRATE, VERBOSE, videoSpeed, silentSpeed, cutByThisTrack, keepTracksSep): print('Running from fastVideoPlus.py') if (not os.path.isfile(videoFile)): print('Could not find file:', videoFile) sys.exit() TEMP = '.TEMP' FADE_SIZE = 400 NEW_SPEED = [silentSpeed, videoSpeed] cap = cv2.VideoCapture(videoFile) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') fps = round(cap.get(cv2.CAP_PROP_FPS)) try: os.mkdir(TEMP) except OSError: rmtree(TEMP) os.mkdir(TEMP) tracks = vidTracks(videoFile) if (cutByThisTrack >= tracks): print("Error: You choose a track that doesn't exist.") print(f'There are only {tracks-1} tracks. (starting from 0)') sys.exit() for trackNumber in range(tracks): cmd = [ 'ffmpeg', '-i', videoFile, '-ab', AUD_BITRATE, '-ac', '2', '-ar', str(SAMPLE_RATE), '-map', f'0:a:{trackNumber}', f'{TEMP}/{trackNumber}.wav' ] if (not VERBOSE): cmd.extend(['-nostats', '-loglevel', '0']) else: cmd.extend(['-hide_banner']) subprocess.call(cmd) sampleRate, audioData = wavfile.read(f'{TEMP}/{cutByThisTrack}.wav') chunks = getAudioChunks(audioData, sampleRate, fps, silentThreshold, 2, frameMargin) hmm = preview(chunks, NEW_SPEED, fps) estLeng = int((hmm * SAMPLE_RATE) * 1.5) + int(SAMPLE_RATE * 2) oldAudios = [] newAudios = [] for i in range(tracks): __, audioData = wavfile.read(f'{TEMP}/{i}.wav') oldAudios.append(audioData) newAudios.append(np.zeros((estLeng, 2), dtype=np.int16)) yPointer = 0 out = cv2.VideoWriter(f'{TEMP}/spedup.mp4', fourcc, fps, (width, height)) channels = 2 switchStart = 0 needChange = False preve = None endMargin = 0 yPointer = 0 frameBuffer = [] def writeFrames(frames, nAudio, speed, samplePerSecond, writer): numAudioChunks = round(nAudio / samplePerSecond * fps) global nFrames numWrites = numAudioChunks - nFrames nFrames += numWrites # if sync issue exists, change this back limit = len(frames) - 1 for i in range(numWrites): frameIndex = round(i * speed) if (frameIndex > limit): writer.write(frames[-1]) else: writer.write(frames[frameIndex]) totalFrames = chunks[len(chunks) - 1][1] outFrame = 0 beginTime = time() while cap.isOpened(): ret, frame = cap.read() if (not ret): break cframe = int(cap.get(cv2.CAP_PROP_POS_FRAMES)) # current frame currentTime = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 audioSampleStart = int(currentTime * sampleRate) audioSampleEnd = min( audioSampleStart + sampleRate // fps * frameMargin, len(audioData)) switchEnd = audioSampleStart + sampleRate // fps audioChunk = audioData[audioSampleStart:audioSampleEnd] state = None for chunk in chunks: if (cframe >= chunk[0] and cframe <= chunk[1]): state = chunk[2] break if (state == 0): if (endMargin < 1): isSilent = 1 else: isSilent = 0 endMargin -= 1 else: isSilent = 0 endMargin = frameMargin if (preve is not None and preve != isSilent): needChange = True preve = isSilent if (not needChange): frameBuffer.append(frame) else: theSpeed = NEW_SPEED[isSilent] if (theSpeed < 99999): # handle audio tracks for i, oneAudioData in enumerate(oldAudios): spedChunk = oneAudioData[switchStart:switchEnd] spedupAudio = np.zeros((0, 2), dtype=np.int16) with ArrReader(spedChunk, channels, sampleRate, 2) as reader: with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer: phasevocoder(reader.channels, speed=theSpeed).run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] newAudios[i][yPointer:yPointerEnd] = spedupAudio yPointer = yPointerEnd else: yPointerEnd = yPointer writeFrames(frameBuffer, yPointerEnd, NEW_SPEED[isSilent], sampleRate, out) frameBuffer = [] switchStart = switchEnd needChange = False progressBar(cframe, totalFrames, beginTime) # finish audio for i, newData in enumerate(newAudios): newData = newData[:yPointer] wavfile.write(f'{TEMP}/new{i}.wav', sampleRate, newData) if (not os.path.isfile(f'{TEMP}/new{i}.wav')): raise IOError('audio file not created.') cap.release() out.release() cv2.destroyAllWindows() first = videoFile[:videoFile.rfind('.')] extension = videoFile[videoFile.rfind('.'):] if (outFile == ''): outFile = f'{first}_ALTERED{extension}' # Now mix new audio(s) and the new video. if (keepTracksSep): cmd = ['ffmpeg', '-y'] for i in range(tracks): cmd.extend(['-i', f'{TEMP}/new{i}.wav']) cmd.extend(['-i', f'{TEMP}/spedup.mp4']) # add input video for i in range(tracks): cmd.extend(['-map', f'{i}:a:0']) cmd.extend([ '-map', f'{tracks}:v:0', '-c:v', 'copy', '-movflags', '+faststart', outFile ]) if (not VERBOSE): cmd.extend(['-nostats', '-loglevel', '0']) else: if (tracks > 1): cmd = ['ffmpeg'] for i in range(tracks): cmd.extend(['-i', f'{TEMP}/new{i}.wav']) cmd.extend([ '-filter_complex', f'amerge=inputs={tracks}', '-ac', '2', f'{TEMP}/newAudioFile.wav' ]) if (not VERBOSE): cmd.extend(['-nostats', '-loglevel', '0']) subprocess.call(cmd) else: os.rename(f'{TEMP}/new0.wav', f'{TEMP}/newAudioFile.wav') cmd = [ 'ffmpeg', '-y', '-i', f'{TEMP}/newAudioFile.wav', '-i', f'{TEMP}/spedup.mp4', '-c:v', 'copy', '-movflags', '+faststart', outFile ] if (not VERBOSE): cmd.extend(['-nostats', '-loglevel', '0']) subprocess.call(cmd) return outFile
# coding: utf-8 # In[11]: from audiotsm import phasevocoder from audiotsm.io.wav import WavReader, WavWriter with WavReader('qbhexamples.wav') as reader: print reader.channels, reader.samplerate with WavWriter('qbh_half.wav', reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=0.5) tsm.run(reader, writer) print "Finished, closing files." close(reader) close(writer) # In[ ]: