def processVideo(inputFile, outputFile, tempDir): global frameRate command = "ffmpeg -i '" + inputFile + "' -qscale:v " + str( FRAME_QUALITY) + " " + tempDir + "/frame%06d.jpg -hide_banner" subprocess.call(command, shell=True) command = "ffmpeg -i '" + inputFile + "' -ab 160k -ac 2 -ar " + str( SAMPLE_RATE) + " -vn " + tempDir + "/audio.wav" subprocess.call(command, shell=True) sampleRate, audioData = wavfile.read(tempDir + "/audio.wav") audioSampleCount = audioData.shape[0] maxAudioVolume = getMaxVolume(audioData) if frameRate is None: frameRate = getFrameRate(inputFile) samplesPerFrame = sampleRate / frameRate audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame)) hasLoudAudio = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(i * samplesPerFrame) end = min(int((i + 1) * samplesPerFrame), audioSampleCount) audiochunks = audioData[start:end] maxchunksVolume = float(getMaxVolume(audiochunks)) / max( maxAudioVolume, 1e-10) if maxchunksVolume >= SILENT_THRESHOLD: hasLoudAudio[i] = 1 chunks = [[0, 0, 0]] shouldIncludeFrame = np.zeros((audioFrameCount)) for i in range(audioFrameCount): start = int(max(0, i - FRAME_SPREADAGE)) end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE)) shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end]) if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]): # Did we flip? chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]]) chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = [] outputPointer = 0 mask = [ x / AUDIO_FADE_ENVELOPE_SIZE for x in range(AUDIO_FADE_ENVELOPE_SIZE) ] # Create audio envelope mask lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = tempDir + "/tempStart.wav" eFile = tempDir + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = audio_stretch_algorithm(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData.extend((alteredAudioData / maxAudioVolume).tolist()) # Smoothing the audio if leng < AUDIO_FADE_ENVELOPE_SIZE: for i in range(outputPointer, endPointer): outputAudioData[i] = 0 else: for i in range(outputPointer, outputPointer + AUDIO_FADE_ENVELOPE_SIZE): outputAudioData[i][0] *= mask[i - outputPointer] outputAudioData[i][1] *= mask[i - outputPointer] for i in range(endPointer - AUDIO_FADE_ENVELOPE_SIZE, endPointer): outputAudioData[i][0] *= ( 1 - mask[i - endPointer + AUDIO_FADE_ENVELOPE_SIZE]) outputAudioData[i][1] *= ( 1 - mask[i - endPointer + AUDIO_FADE_ENVELOPE_SIZE]) startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame)) endOutputFrame = int(math.ceil(endPointer / samplesPerFrame)) for outputFrame in range(startOutputFrame, endOutputFrame): inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] * (outputFrame - startOutputFrame)) didItWork = copyFrame(inputFrame, outputFrame, tempDir) if didItWork: lastExistingFrame = inputFrame else: copyFrame(lastExistingFrame, outputFrame, tempDir) outputPointer = endPointer outputAudioData = np.asarray(outputAudioData) wavfile.write(tempDir + "/audioNew.wav", SAMPLE_RATE, outputAudioData) command = f"ffmpeg -framerate {frameRate} -i {tempDir}/newFrame%06d.jpg -i {tempDir}/audioNew.wav -strict -2 -c:v libx264 -preset {H264_PRESET} -crf {H264_CRF} -pix_fmt yuvj420p '{outputFile}'" subprocess.call(command, shell=True)
for chunk in chunks: if EDL: if (chunk[2] == True): edlFrameNumber += 1 writeELD(chunk[0], chunk[1], edlFrameNumber) continue audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER.name + "/tempStart.wav" eFile = TEMP_FOLDER.name + "/tempEnd.wav" wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = audio_stretch_algorithm(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData.extend((alteredAudioData / maxAudioVolume).tolist()) # Smoothing the audio if leng < AUDIO_FADE_ENVELOPE_SIZE: for i in range(outputPointer, endPointer): outputAudioData[i] = 0 else: for i in range(outputPointer, outputPointer + AUDIO_FADE_ENVELOPE_SIZE): outputAudioData[i][0] *= mask[i - outputPointer] outputAudioData[i][1] *= mask[i - outputPointer]