Ejemplo n.º 1
0
def create_tsm(name, channels, speed):
    """Create a TSM object given the method name and its parameters."""
    if name == "ola":
        return ola(channels, speed)
    if name == "wsola":
        return wsola(channels, speed)
    if name == "phasevocoder":
        return phasevocoder(channels, speed, phase_locking=PhaseLocking.NONE)
    if name == "phasevocoder_identity":
        return phasevocoder(channels, speed,
                            phase_locking=PhaseLocking.IDENTITY)

    raise ValueError("unknown TSM method: {}".format(name))
Ejemplo n.º 2
0
    def speed(self, speed):
        """
        Adjusts speed to given percentage without changing pitch

        :param speed: Percentage to increase/decrease speed without changing
                      pitch
        :type speed: float
        """
        if speed != 1:
            logger.info("Setting speed to %f", speed)
            logger.debug("Export file to BytesIO")
            wav_in = BytesIO()
            wav_in = self._segment.export(wav_in, format="wav")
            wav_in.seek(0)
            logger.debug("Initializing reader and writer")
            with WavReader(wav_in) as reader:
                wav_out = BytesIO()
                with WavWriter(wav_out, reader.channels,
                               reader.samplerate) as writer:
                    logger.debug("Adjusting speed with vocoder")
                    tsm = phasevocoder(reader.channels, speed=speed)
                    tsm.run(reader, writer)
                    logger.debug("Reload audio segment")
                    wav_out.seek(44)  # skip metadata and start at first sample
                    self._segment = AudioSegment.from_raw(
                        wav_out,
                        sample_width=self._segment.sample_width,
                        channels=self._segment.channels,
                        frame_rate=self._segment.frame_rate,
                    )
Ejemplo n.º 3
0
	def change_pitch(self, source_filepath):
		import pydub
		from audiotsm.io.wav import WavReader, WavWriter
		from audiotsm import phasevocoder
		if abs(self.octaves) > 0.1:
			_, sampled_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) 

			sound = pydub.AudioSegment.from_mp3(source_filepath)
			sample_rate = int(sound.frame_rate * (2**self.octaves))
			modified = sound._spawn(sound.raw_data, overrides={"frame_rate":sample_rate})

			modified.export(sampled_filepath, format="wav")
		else:
			sampled_filepath = source_filepath
		if abs(self.speed - 1) > 0.1:
			#output_filepath = f"{os.path.basename(source_filepath)}{self.oct_str}_{self.speed}.wav"
			_, output_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) 
			with WavReader(sampled_filepath) as reader:
				with WavWriter(output_filepath, reader.channels, reader.samplerate) as writer:
					tsm = phasevocoder(reader.channels, 
					speed=self.speed*(sound.frame_rate/sample_rate))
					tsm.run(reader, writer)
			return output_filepath
		else:
			return sampled_filepath
Ejemplo n.º 4
0
def create_tsm(name, channels, parameters):
    """Create a TSM object given the method name and its parameters."""
    if name == "ola":
        return ola(channels, **parameters)
    if name == "wsola":
        return wsola(channels, **parameters)
    if name == "phasevocoder":
        return phasevocoder(channels, **parameters)

    raise ValueError("unknown TSM method: {}".format(name))
Ejemplo n.º 5
0
    def create_tsm(self, channels):
        parameters = {}
        if self.frame_length > 0:
            parameters['frame_length'] = self.frame_length
        if self.synthesis_hop > 0:
            parameters['synthesis_hop'] = self.synthesis_hop
        if self.phase_locking >= 0:
            parameters['phase_locking'] = self.phase_locking

        return phasevocoder(channels, **parameters)
Ejemplo n.º 6
0
def 音频变速(wav音频数据列表, 声道数, 采样率, 目标速度, 临时文件夹):
    if 目标速度 == 1.0:
        return wav音频数据列表
    if 查找可执行程序('soundstretch') != None:
        内存音频二进制缓存区 = io.BytesIO()
        fd, soundstretch临时输出文件 = tempfile.mkstemp()
        os.close(fd)
        wavfile.write(内存音频二进制缓存区, 采样率, wav音频数据列表)
        变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}'
        变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
        变速线程.communicate(内存音频二进制缓存区.getvalue())
        try:
            采样率, 音频区间处理后的数据 = wavfile.read(soundstretch临时输出文件)
        except Exception as e:
            出错时间 = int(time.time())

            fd, 原始数据存放位置 = tempfile.mkstemp(dir=临时文件夹, prefix=f'原始-{出错时间}-', suffix='.wav')
            os.close(fd)
            wavfile.write(原始数据存放位置, 采样率, wav音频数据列表)

            fd, 出错文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav')
            os.close(fd)
            try:
                copy(soundstretch临时输出文件, 出错文件)
            except:
                ...

            fd, soundstretch临时输出文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav')
            os.close(fd)
            变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}'
            变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
            变速线程.communicate(内存音频二进制缓存区.getvalue())

            print(f'Soundstretch 音频变速出错了,请前往查看详情\n    原始音频数据:{原始数据存放位置} \n    变速音频数据:{soundstretch临时输出文件}\n')
            print(f'出错的音频信息:\n    音频采样数:{len(wav音频数据列表)}\n    目标速度:{目标速度}\n    目标采样数:{len(wav音频数据列表) / 目标速度}')

            return wav音频数据列表

        os.remove(soundstretch临时输出文件)
    else:
        print(
            '检测到没有安装 SoundTouch 的 soundstretch,所以使用 phasevocoder 的音频变速方法。建议到 http://www.surina.net/soundtouch 下载系统对应的 soundstretch,放到系统环境变量下,可以获得更好的音频变速效果\n')
        sFile = io.BytesIO()
        wavfile.write(sFile, 采样率, wav音频数据列表)
        sFile = io.BytesIO(sFile.getvalue())
        eFile = io.BytesIO()
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels, speed=目标速度)
                tsm.run(reader, writer)
        _, 音频区间处理后的数据 = wavfile.read(io.BytesIO(eFile.getvalue()))

    return 音频区间处理后的数据
Ejemplo n.º 7
0
    def change_bar_speed(self, audio_slice_id, target_bpm=120.0):
        if not os.path.isdir(c.LF_CH_BPM + self._audio_id):
            try:
                os.mkdir(c.LF_CH_BPM + self._audio_id)
            except FileExistsError:
                pass
        else:
            if os.path.isfile(c.LF_CH_BPM + self._audio_id + "/" +
                              audio_slice_id + ".wav"):
                return 0

        bar_bpm = 60.00 / (
            (self.beat_track[int(audio_slice_id.split("_")[1]) + 1] -
             self.beat_track[int(audio_slice_id.split("_")[1])]) / 8)
        with WavReader("{}{}/{}.wav".format(c.LF_SLICE, self._audio_id,
                                            audio_slice_id)) as r:
            with WavWriter(
                    "{}{}/{}.wav".format(c.LF_CH_BPM, self._audio_id,
                                         audio_slice_id), r.channels,
                    r.samplerate) as w:
                phasevocoder(r.channels, speed=target_bpm / bar_bpm).run(r, w)
        print("only came " + audio_slice_id)
Ejemplo n.º 8
0
def getStretchedData(low, sf):
    s = "placeholder.wav"
    playSpeed = 1/sf
    if low:
        s = "lowPlaceholder.wav"
        playSpeed *= LOW_FACTOR
    with WavReader(s) as reader:
        with WavWriter("stretchholder.wav", reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=playSpeed)
            tsm.run(reader, writer)
    _, s = wavfile.read("stretchholder.wav")
    d = np.zeros(s.shape)
    if low:
        d += s
    else:
        d += s*0.81
    return d
Ejemplo n.º 9
0
def getStretchedData(low, sf):
    s = PLACEHOLDER_WAV_AUX
    playSpeed = 1 / sf
    if low:
        s = LOW_PLACEHOLDER_WAV_AUX
        playSpeed *= LOW_FACTOR
    with WavReader(s) as reader:
        with WavWriter(STRECH, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=playSpeed)
            tsm.run(reader, writer)
    _, s = wavfile.read(STRECH)
    d = np.zeros(s.shape)
    if low:
        d += s
    else:
        d += s * 0.81
    return d
Ejemplo n.º 10
0
    def execute(self):
        # get values of audio frames, 0 for silence, 1 for loudness.
        has_loud_audio = self.get_loud_frame()
        # get edit points of silence and loudness.
        edit_points = self.get_edit_points(has_loud_audio)

        start_frame = 0
        output = self.get_output()
        for edit_point in edit_points:
            audio_chunk = self.parameter.audio_data[
                          int(edit_point.start_frame * self.parameter.samples_per_frame):
                          int(edit_point.end_frame * self.parameter.samples_per_frame)
                          ]

            # need channels * frames, transpose data first.
            reader = ArrayReader(np.transpose(audio_chunk))
            writer = ArrayWriter(reader.channels)
            tsm = phasevocoder(reader.channels, speed=self.parameter.new_speed[int(edit_point.should_keep)])
            tsm.run(reader, writer)
            altered_audio_data = np.transpose(writer.data)

            altered_audio_data_length = altered_audio_data.shape[0]
            if altered_audio_data_length < self.parameter.audio_fade_envelope_size:
                altered_audio_data[:] = 0  # audio is less than 0.01 sec, let's just remove it.
            else:
                self.fade_out_silence(altered_audio_data)
            end_frame = start_frame + altered_audio_data_length

            start_output_frame = int(math.ceil(start_frame / self.parameter.samples_per_frame))
            end_output_frame = int(math.ceil(end_frame / self.parameter.samples_per_frame))

            output.apply_edit_point(edit_point, altered_audio_data, start_output_frame, end_output_frame)

            start_frame = end_frame

        output.close()
Ejemplo n.º 11
0
def fast_video_function(videoFile, NEW_SPEEDfloat, silentThreshold, frameMargin):
    global NEW_SPEED
    NEW_SPEED = [NEW_SPEEDfloat, 1]
    global startTime
    startTime = time.time()
    global cap
    cap = cv2.VideoCapture(videoFile)
    #In case files were left behind
    try:
        os.remove('output.wav')
        os.remove('spedup.mp4')
        os.remove('spedupAudio.wav')
    except:
        pass

    global width
    global height
    global fourcc
    global fps
    global extractAudio
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = round(cap.get(cv2.CAP_PROP_FPS))
    extractAudio = 'ffmpeg -i "{}" -ab 160k -ac 2 -ar 44100 -vn output.wav'.format(videoFile)
    subprocess.call(extractAudio, shell=True)

    global out
    out = cv2.VideoWriter('spedup.mp4', fourcc, fps, (width, height))
    sampleRate, audioData = wavfile.read('output.wav')

    global skipped
    skipped = 0
    global nFrames
    nFrames = 0
    global channels
    channels = int(audioData.shape[1])
    framesProcessed = 0
    def getMaxVolume(s):
        maxv = np.max(s)
        minv = np.min(s)
        return max(maxv,-minv)


    def writeFrames(frames, nAudio, speed, samplePerSecond, writer):
        numAudioChunks = round(nAudio / samplePerSecond * fps)
        global nFrames
        numWrites = numAudioChunks - nFrames
        # a = [1, 2, 3], len(a) == 3 but a[3] is error
        limit = len(frames) - 1
        for i in range(numWrites):
            frameIndex = round(i * speed)
            if frameIndex > limit:
                writer.write(frames[-1])
            else:
                writer.write(frames[frameIndex])
            nFrames += 1

    global normal
    normal = 0
    # 0 for silent, 1 for normal
    global switchStart
    switchStart = 0
    global maxVolume
    maxVolume = getMaxVolume(audioData)

    # not used:
    # fadeInSamples = 400
    # preMask = np.arange(fadeInSamples)/fadeInSamples
    # mask = np.repeat(preMask[:, np.newaxis], 2, axis = 1)

    global y
    global yPointer
    global frameBuffer
    y = np.zeros_like(audioData, dtype=np.int16)
    yPointer = 0
    frameBuffer = []


    while (cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        # since samplerate is in seconds, I need to convert this to second as well
        currentTime = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
        audioSampleStart = math.floor(currentTime * sampleRate)
        
        # more accurate frame counting
        framesProcessed += 1
        # audioSampleStart + one frame worth of samples
        audioSampleEnd = min((audioSampleStart + ((sampleRate // fps) * frameMargin)),(len(audioData)))
        switchEnd = (audioSampleStart + ((sampleRate // fps)))
        audioChunkMod = audioData[audioSampleStart:switchEnd]
        audioChunk = audioData[audioSampleStart:audioSampleEnd]

        # if it's quite
        if getMaxVolume(audioChunk) / maxVolume < silentThreshold:
            skipped += 1
            # if the frame is 'switched'
            frameBuffer.append(frame)
            normal = 0
        else: # if it's 'loud'

            # and the last frame is 'loud'
            if normal:
                out.write(frame)
                nFrames += 1
                switchStart = switchEnd

                yPointerEnd = yPointer + audioChunkMod.shape[0]
                y[yPointer : yPointerEnd] = audioChunkMod
                yPointer = yPointerEnd
            else:
                spedChunk = audioData[switchStart:switchEnd]
                spedupAudio = np.zeros((0,2), dtype=np.int16)
                with ArrReader(spedChunk, channels, sampleRate, 2) as reader:
                    with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer:
                        tsm = phasevocoder(reader.channels, speed=NEW_SPEED[normal])
                        tsm.run(reader, writer)
                        spedupAudio = writer.output

                yPointerEnd = yPointer + spedupAudio.shape[0]
                y[yPointer : yPointerEnd] = spedupAudio
                yPointer = yPointerEnd

                writeFrames(frameBuffer, yPointerEnd, NEW_SPEED[normal], sampleRate, out)
                frameBuffer = []
                switchStart = switchEnd

            normal = 1
        if framesProcessed % 500 == 0:
            print("{} frames processed".format(framesProcessed))
            print("{} frames skipped".format(skipped))

    y = y[:yPointer]
    wavfile.write("spedupAudio.wav", sampleRate, y)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    outFile = "{}_faster{}".format(videoFile[:videoFile.rfind('.')],videoFile[videoFile.rfind('.'):])
    command = "ffmpeg -y -i spedup.mp4 -i spedupAudio.wav -c:v copy -c:a aac {}".format(outFile)
    subprocess.call(command, shell=True)

    os.remove('output.wav')
    os.remove('spedup.mp4')
    os.remove('spedupAudio.wav')
    timeLength = round(time.time() - startTime, 2)
    minutes = timedelta(seconds=(round(timeLength)))
    print('Finished.')
    print(f'Took {timeLength} seconds ({minutes})')
    print(f'Removed {math.floor(skipped / fps)} seconds from a {math.floor(framesProcessed / fps)} second video.')
Ejemplo n.º 12
0
def splitVideo(chunks, NEW_SPEED, frameRate, zooms, samplesPerFrame,
               SAMPLE_RATE, audioData, extension, VERBOSE):
    """
    This function is responsible for outputting a new image sequence in the correct order.

    splitVideo is also responsible for creating Renames.txt. copying every jpeg is
    computationally expensive, renaming the file is less so, but we still need to create
    the cache folder and we can't do that if the program has no idea what it renamed
    and where. Renames.txt will be read in originalMethod.py to recreate the original
    image sequence.

    To avoid desyncing issues with the audio, we need to have audioData and go along
    roughly the same way originalAudio.py does it. Specifically, get the length of the
    new audio chunk.

    If the speed set is 1, this is easy. If not, we have to create a new file modified
    to be the new speed with audiotsm, then read that file to get the length.
    """
    print('Creating new video.')
    num = 0
    chunk_len = str(len(chunks))
    outputPointer = 0
    Renames = []
    lastExisting = None
    for chunk in chunks:
        if (NEW_SPEED[int(chunk[2])] < 99999):
            audioChunk = audioData[int(chunk[0] *
                                       samplesPerFrame):int(chunk[1] *
                                                            samplesPerFrame)]
            if (NEW_SPEED[chunk[2]] == 1):
                leng = len(audioChunk)
            else:
                sFile = TEMP + '/tempStart2.wav'
                eFile = TEMP + '/tempEnd2.wav'
                wavfile.write(sFile, SAMPLE_RATE, audioChunk)
                with WavReader(sFile) as reader:
                    with WavWriter(eFile, reader.channels,
                                   reader.samplerate) as writer:
                        phasevocoder(reader.channels,
                                     speed=NEW_SPEED[chunk[2]]).run(
                                         reader, writer)
                __, alteredAudioData = wavfile.read(eFile)
                leng = alteredAudioData.shape[0]

            endPointer = outputPointer + leng

            startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
            endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
            for outputFrame in range(startOutputFrame, endOutputFrame):
                inputFrame = int(chunk[0] + NEW_SPEED[chunk[2]] *
                                 (outputFrame - startOutputFrame))

                src = ''.join(
                    [CACHE, '/frame{:06d}'.format(inputFrame + 1), '.jpg'])
                dst = ''.join(
                    [TEMP, '/newFrame{:06d}'.format(outputFrame + 1), '.jpg'])
                if (os.path.isfile(src)):
                    lastExisting = inputFrame
                    if (inputFrame in zooms):
                        resize(src, dst, zooms[inputFrame])
                    else:
                        os.rename(src, dst)
                        Renames.extend([src, dst])
                else:
                    if (lastExisting == None):
                        print(src + ' does not exist.')
                        raise IOError(f'Fatal Error! No existing frame exist.')
                    src = ''.join([
                        CACHE, '/frame{:06d}'.format(lastExisting + 1), '.jpg'
                    ])
                    if (os.path.isfile(src)):
                        if (lastExisting in zooms):
                            resize(src, dst, zooms[lastExisting])
                        else:
                            os.rename(src, dst)
                            Renames.extend([src, dst])
                    else:
                        # uh oh, we need to find the file we just renamed!
                        myFile = None
                        for i in range(0, len(Renames), 2):
                            if (Renames[i] == src):
                                myFile = Renames[i + 1]
                                break
                        if (myFile is not None):
                            copyfile(myFile, dst)
                        else:
                            raise IOError(
                                f'Error! The file {src} does not exist.')

            outputPointer = endPointer

        num += 1
        if (num % 10 == 0):
            print(''.join([str(num), '/', chunk_len, ' frame chunks done.']))
    print(''.join([str(num), '/', chunk_len, ' frame chunks done.']))

    with open(f'{TEMP}/Renames.txt', 'w') as f:
        for item in Renames:
            f.write(f"{item}\n")

    print('Creating finished video. (This can take a while)')
    cmd = [
        'ffmpeg', '-y', '-framerate',
        str(frameRate), '-i', f'{TEMP}/newFrame%06d.jpg',
        f'{TEMP}/output{extension}'
    ]
    if (not VERBOSE):
        cmd.extend(['-nostats', '-loglevel', '0'])
    subprocess.call(cmd)
Ejemplo n.º 13
0
outputAudioData = np.zeros((0, audioData.shape[1]))
outputPointer = 0

lastExistingFrame = None
for chunk in chunks:
    audioChunk = audioData[int(chunk[0] *
                               samplesPerFrame):int(chunk[1] *
                                                    samplesPerFrame)]

    sFile = TEMP_FOLDER + "/tempStart.wav"
    eFile = TEMP_FOLDER + "/tempEnd.wav"
    wavfile.write(sFile, SAMPLE_RATE, audioChunk)
    with WavReader(sFile) as reader:
        with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
            tsm.run(reader, writer)
    _, alteredAudioData = wavfile.read(eFile)
    leng = alteredAudioData.shape[0]
    endPointer = outputPointer + leng
    outputAudioData = np.concatenate(
        (outputAudioData, alteredAudioData / maxAudioVolume))

    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

    # smooth out transitiion's audio by quickly fading in/out

    if leng < AUDIO_FADE_ENVELOPE_SIZE:
        outputAudioData[
            outputPointer:
            endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
Ejemplo n.º 14
0
    def process(self):
        global isCanceld
        try:
            if os.path.exists(TEMP_FOLDER):
                msg = input(
                    'Warning: Are you sure you want to Delete videocuts_tmp folder'
                )
                if msg.lower() in ('yes', 'y'):
                    deletePath(TEMP_FOLDER)

            Thread(target=self.timer).start()
            global gain
            gain = 1.2
            self.new_video_size = 'N/A'
            self.new_video_length = 'N/A'
            Extras = ""
            frameRate = float(60)
            SAMPLE_RATE = int(self.frame_rate)
            SILENT_THRESHOLD = float(self.silence_threshold)
            FRAME_SPREADAGE = int(self.frame_margin)
            NEW_SPEED = [float(self.silent_speed), float(self.play_speed)]
            gain = 0.6
            INPUT_FILE = self.downloadFile(str(self.video_url))
            if INPUT_FILE == '':
                return
            FRAME_QUALITY = self.frame_quality

            assert INPUT_FILE is not None, "You did not specify an input file.  You must specify an input file without spaces."

            OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

            AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
            createPath(TEMP_FOLDER)

            global dir
            dir = os.getcwd()
            if isCanceld:
                return

            print(
                ' Step 1 - Frame quality has been assessed and is processing ')
            cmdary = [
                resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '-qscale:v',
                str(FRAME_QUALITY), TEMP_FOLDER + "/frame%06d.jpg",
                '-hide_banner'
            ]
            subprocess.call(cmdary, cwd=dir, shell=True)
            if isCanceld:
                return
            print(
                ' Step 1 - Frame quality processing has successfully completed '
            )

            time.sleep(2)
            if isCanceld:
                return
            print(' Step 2 - Sample Rate has been assessed and is processing ')
            cmdary = [
                resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1', '-ab',
                '160k', '-ac', '2', '-ar',
                str(SAMPLE_RATE), '-vn', TEMP_FOLDER + "/audio.wav"
            ]
            subprocess.call(cmdary, cwd=dir, shell=True)

            if isCanceld:
                return

            print(
                ' Step 2 - Sample Rate processing has successfully completed ')

            time.sleep(2)
            if isCanceld:
                return
            print(
                ' Step 3 - Video Frames are processing. This might take a while... '
            )
            cmdary = [resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1']
            open(TEMP_FOLDER + "/params.txt", "w")
            subprocess.call(cmdary, cwd=dir, shell=True)
            if isCanceld:
                return
            print(
                ' Step 3 - Video Frames processing has successfully completed '
            )
            time.sleep(2)
            sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
            audioSampleCount = audioData.shape[0]
            maxAudioVolume = getMaxVolume(audioData)

            cap = cv2.VideoCapture(INPUT_FILE)
            fps = cap.get(cv2.CAP_PROP_FPS)
            f = open(TEMP_FOLDER + "/params.txt", 'r+')
            pre_params = f.read()
            f.close()
            params = pre_params.split('\n')
            for line in params:
                m = re.search(' ([0-9]*.[0-9]*) fps,', line)
                if m is None:
                    frameRate = float(fps)
                if m is not None:
                    frameRate = float(m.group(1))

            samplesPerFrame = sampleRate / frameRate

            audioFrameCount = int(math.ceil(audioSampleCount /
                                            samplesPerFrame))

            hasLoudAudio = np.zeros(audioFrameCount)

            for i in range(audioFrameCount):
                start = int(i * samplesPerFrame)
                end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
                audiochunks = audioData[start:end]
                maxchunksVolume = float(
                    getMaxVolume(audiochunks)) / maxAudioVolume
                if maxchunksVolume >= SILENT_THRESHOLD:
                    hasLoudAudio[i] = 1

            chunks = [[0, 0, 0]]
            shouldIncludeFrame = np.zeros(audioFrameCount)
            for i in range(audioFrameCount):
                start = int(max(0, i - FRAME_SPREADAGE))
                end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
                shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
                if i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[
                        i - 1]:  # Did we flip?
                    chunks.append(
                        [chunks[-1][1], i, shouldIncludeFrame[i - 1]])

            chunks.append(
                [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
            chunks = chunks[1:]

            outputAudioData = np.zeros((0, audioData.shape[1]))
            outputPointer = 0

            lastExistingFrame = None
            for chunk in chunks:
                audioChunk = audioData[int(chunk[0] * samplesPerFrame
                                           ):int(chunk[1] * samplesPerFrame)]

                sFile = TEMP_FOLDER + "/tempStart.wav"
                eFile = TEMP_FOLDER + "/tempEnd.wav"
                wavfile.write(sFile, SAMPLE_RATE, audioChunk)
                with WavReader(sFile) as reader:
                    with WavWriter(eFile, reader.channels,
                                   reader.samplerate) as writer:
                        tsm = phasevocoder(reader.channels,
                                           speed=NEW_SPEED[int(chunk[2])])
                        tsm.run(reader, writer)
                _, alteredAudioData = wavfile.read(eFile)
                leng = alteredAudioData.shape[0]
                endPointer = outputPointer + leng
                outputAudioData = np.concatenate(
                    (outputAudioData, alteredAudioData / maxAudioVolume))

                # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

                # smooth out transitiion's audio by quickly fading in/out

                if leng < AUDIO_FADE_ENVELOPE_SIZE:
                    outputAudioData[
                        outputPointer:
                        endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
                else:
                    premask = np.arange(
                        AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE
                    mask = np.repeat(
                        premask[:, np.newaxis], 2,
                        axis=1)  # make the fade-envelope mask stereo
                    outputAudioData[outputPointer:outputPointer +
                                    AUDIO_FADE_ENVELOPE_SIZE] *= mask
                    outputAudioData[
                        endPointer -
                        AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

                startOutputFrame = int(
                    math.ceil(outputPointer / samplesPerFrame))
                endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
                for outputFrame in range(startOutputFrame, endOutputFrame):
                    inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                     (outputFrame - startOutputFrame))
                    didItWork = copyFrame(inputFrame, outputFrame)
                    if didItWork:
                        lastExistingFrame = inputFrame
                    else:
                        copyFrame(lastExistingFrame, outputFrame)

                outputPointer = endPointer

            wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE,
                          outputAudioData)
            '''
            outputFrame = math.ceil(outputPointer/samplesPerFrame)
            for endGap in range(outputFrame,audioFrameCount):
                copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
            '''

            if isCanceld:
                return
            print(' Step 4 - Finalizing.... Please wait')
            cmdary = [
                resource_path('ffmpeg.exe'), '-framerate',
                str(frameRate), "-i", TEMP_FOLDER + "/newFrame%06d.jpg", '-i',
                TEMP_FOLDER + "/audioNew.wav", '-strict', '-2' + str(Extras),
                OUTPUT_FILE
            ]
            subprocess.call(cmdary, cwd=dir, shell=True)

            if isCanceld:
                return
            print(' Video processing finished successfully.')

            deletePath(TEMP_FOLDER)
            path = os.path.dirname(INPUT_FILE)

            global stpTimer
            stpTimer = True
            self.new_video_size = get_size(OUTPUT_FILE) + ' MB'
            self.output_parameters.append(self.new_video_size)
            self.new_video_length = str(self.get_length(OUTPUT_FILE))
            self.output_parameters.append(self.new_video_length)

        except Exception as e:
            print(' Processing Video Failed! ')
            if str(e) != 'main thread is not in main loop':
                print('error message.', str(e))
            deletePath(TEMP_FOLDER)

        print(self.output_parameters)
Ejemplo n.º 15
0
        def run_process():

            URL = self.URLLineEdit.text()
            if (self.URLRadio.isChecked()):
                INPUT_FILE = downloadFile(URL)
            else:
                INPUT_FILE = self.fileLineEdit.text()

            if (INPUT_FILE == ''):
                winsound.PlaySound('SystemExclamation', winsound.SND_ALIAS)

            else:
                frameRate = self.frameRateSlider.value()
                SAMPLE_RATE = self.sampleRateSlider.value()
                SILENT_THRESHOLD = float(self.thresholdLineEdit.text())
                FRAME_SPREADAGE = float(self.frameMarginSlider.value())
                SILENT_SPEED = float(self.silentSpeedLineEdit.text())
                SOUNDED_SPEED = float(self.soundedSpeedLineEdit.text())
                NEW_SPEED = [SILENT_SPEED, SOUNDED_SPEED]
                print(NEW_SPEED)

                FRAME_QUALITY = float(self.frameQualitySlider.value())

                assert INPUT_FILE != None, "why u put no input file, that dum"
                """   
                if len(args.output_file) >= 1:
                    OUTPUT_FILE = args.output_file
                else:
                """

                OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

                TEMP_FOLDER = "TEMP"
                AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)

                createPath(TEMP_FOLDER)

                command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str(
                    FRAME_QUALITY
                ) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner"
                subprocess.call(command, shell=True)

                command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str(
                    SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav"

                subprocess.call(command, shell=True)

                command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1"
                f = open(TEMP_FOLDER + "/params.txt", "w")
                subprocess.call(command, shell=True, stdout=f)

                sampleRate, audioData = wavfile.read(TEMP_FOLDER +
                                                     "/audio.wav")
                audioSampleCount = audioData.shape[0]
                maxAudioVolume = getMaxVolume(audioData)

                f = open(TEMP_FOLDER + "/params.txt", 'r+')
                pre_params = f.read()
                f.close()
                params = pre_params.split('\n')
                for line in params:
                    m = re.search('Stream #.*Video.* ([0-9]*) fps', line)
                    if m is not None:
                        frameRate = float(m.group(1))

                samplesPerFrame = sampleRate / frameRate

                audioFrameCount = int(
                    math.ceil(audioSampleCount / samplesPerFrame))

                hasLoudAudio = np.zeros((audioFrameCount))

                for i in range(audioFrameCount):
                    start = int(i * samplesPerFrame)
                    end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
                    audiochunks = audioData[start:end]
                    maxchunksVolume = float(
                        getMaxVolume(audiochunks)) / maxAudioVolume
                    if maxchunksVolume >= SILENT_THRESHOLD:
                        hasLoudAudio[i] = 1

                chunks = [[0, 0, 0]]
                shouldIncludeFrame = np.zeros((audioFrameCount))
                for i in range(audioFrameCount):
                    start = int(max(0, i - FRAME_SPREADAGE))
                    end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
                    shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
                    if (i >= 1 and shouldIncludeFrame[i] !=
                            shouldIncludeFrame[i - 1]):  # Did we flip?
                        chunks.append(
                            [chunks[-1][1], i, shouldIncludeFrame[i - 1]])

                chunks.append([
                    chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]
                ])
                chunks = chunks[1:]

                outputAudioData = np.zeros((0, audioData.shape[1]))
                outputPointer = 0

                lastExistingFrame = None
                for chunk in chunks:
                    audioChunk = audioData[int(chunk[0] * samplesPerFrame
                                               ):int(chunk[1] *
                                                     samplesPerFrame)]

                    sFile = TEMP_FOLDER + "/tempStart.wav"
                    eFile = TEMP_FOLDER + "/tempEnd.wav"
                    wavfile.write(sFile, SAMPLE_RATE, audioChunk)
                    with WavReader(sFile) as reader:
                        with WavWriter(eFile, reader.channels,
                                       reader.samplerate) as writer:
                            tsm = phasevocoder(reader.channels,
                                               speed=NEW_SPEED[int(chunk[2])])
                            tsm.run(reader, writer)
                    _, alteredAudioData = wavfile.read(eFile)
                    leng = alteredAudioData.shape[0]
                    endPointer = outputPointer + leng
                    outputAudioData = np.concatenate(
                        (outputAudioData, alteredAudioData / maxAudioVolume))

                    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

                    # smooth out transitiion's audio by quickly fading in/out

                    if leng < AUDIO_FADE_ENVELOPE_SIZE:
                        outputAudioData[
                            outputPointer:
                            endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
                    else:
                        premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE
                                            ) / AUDIO_FADE_ENVELOPE_SIZE
                        mask = np.repeat(
                            premask[:, np.newaxis], 2,
                            axis=1)  # make the fade-envelope mask stereo
                        outputAudioData[outputPointer:outputPointer +
                                        AUDIO_FADE_ENVELOPE_SIZE] *= mask
                        outputAudioData[
                            endPointer -
                            AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

                    startOutputFrame = int(
                        math.ceil(outputPointer / samplesPerFrame))
                    endOutputFrame = int(
                        math.ceil(endPointer / samplesPerFrame))
                    for outputFrame in range(startOutputFrame, endOutputFrame):
                        inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                         (outputFrame - startOutputFrame))
                        didItWork = copyFrame(inputFrame, outputFrame,
                                              TEMP_FOLDER)
                        if didItWork:
                            lastExistingFrame = inputFrame
                        else:
                            copyFrame(lastExistingFrame, outputFrame,
                                      TEMP_FOLDER)

                    outputPointer = endPointer

                wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE,
                              outputAudioData)
                '''
                outputFrame = math.ceil(outputPointer/samplesPerFrame)
                for endGap in range(outputFrame,audioFrameCount):
                    copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
                '''

                command = "ffmpeg -framerate " + str(
                    frameRate
                ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE
                subprocess.call(command, shell=True)

                deletePath(TEMP_FOLDER)
Ejemplo n.º 16
0
outputAudioData = np.zeros((0, audioData.shape[1]))
outputPointer = 0

lastExistingFrame = None
sFile = os.path.join(TEMP_FOLDER, "tempStart.wav")
eFile = os.path.join(TEMP_FOLDER, "tempEnd.wav")
outputFrame = 0
for chunk in chunks:
    audioChunk = audioData[int(chunk[0] *
                               samplesPerFrame):int(chunk[1] *
                                                    samplesPerFrame)]

    wavfile.write(sFile, SAMPLE_RATE, audioChunk)
    with WavReader(sFile) as reader:
        phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]).run(
            reader, WavWriter(eFile, reader.channels, reader.samplerate))
    _, alteredAudioData = wavfile.read(eFile)
    leng = alteredAudioData.shape[0]
    endPointer = outputPointer + leng
    outputAudioData = np.concatenate(
        (outputAudioData, alteredAudioData / maxAudioVolume))

    # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

    # smooth out transition's audio by quickly fading in/out

    if leng < AUDIO_FADE_ENVELOPE_SIZE:
        outputAudioData[
            outputPointer:
            endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
    else:
Ejemplo n.º 17
0
signalborders = np.where(signalpos_smoothed[:np.size(signalpos_smoothed)-1] != signalpos_smoothed[1:])[0]
del signalpos_smoothed
#signalpos = np.where(np.abs(content) > threschold)
#signalpos_norepeat = np.unique(signalpos[0])
#signalpos_norepeat = np.append(signalpos_norepeat, np.shape(content)[0])
#signalborders = signalpos_norepeat[np.gradient(signalpos_norepeat) > 2000]
signalborders = np.insert(signalborders, 0, 0)
signalborders = np.append(signalborders, np.size(content[:,0]))

newcontent = np.empty((0,2), dtype=np.int16)

for i in (np.arange(1, np.size(signalborders))):
    if np.mean(np.abs(content[signalborders[i-1]:signalborders[i],:])) > threschold:
        lborder = int(np.max([signalborders[i-1]-rate/15, 0]))
        uborder = int(np.min([signalborders[i]+rate/15, np.size(content[:,0])]))
        acc_size = int(np.floor((uborder-lborder)/acc_rate0))
        acc_part = np.empty((acc_size,2))
        nonacc_part = content[lborder:uborder,:]
        acc_part = nonacc_part[np.floor(np.arange(acc_size) * acc_rate0).astype(int),:]
        newcontent = np.append(newcontent, acc_part, axis=0)
        
wf.write('output_temp.wav', rate, newcontent)

with WavReader('output_temp.wav') as reader:
    with WavWriter('output.wav', reader.channels, reader.samplerate) as writer:
        tsm = phasevocoder(reader.channels, speed=acc_rate)
        tsm.run(reader, writer)
        
os.remove('output_temp.wav')
        
Ejemplo n.º 18
0
def splitAudio(filename, chunks, samplesPerFrame, NEW_SPEED, audioData,
               SAMPLE_RATE, maxAudioVolume):
    """
    This function creates new audio based on the chunk date and the numpy audio data.
    """

    outputAudioData = []
    outputPointer = 0
    mask = [x / FADE_SIZE for x in range(FADE_SIZE)]
    num = 0
    chunk_len = str(len(chunks))
    for chunk in chunks:
        if (NEW_SPEED[chunk[2]] < 99999):
            start = int(chunk[0] * samplesPerFrame)
            end = int(chunk[1] * samplesPerFrame)
            audioChunk = audioData[start:end]

            sFile = ''.join([TEMP, '/tempStart.wav'])
            eFile = ''.join([TEMP, '/tempEnd.wav'])
            wavfile.write(sFile, SAMPLE_RATE, audioChunk)
            if (NEW_SPEED[chunk[2]] == 1):
                __, samefile = wavfile.read(sFile)
                leng = len(audioChunk)

                outputAudioData.extend((samefile / maxAudioVolume).tolist())
            else:
                with WavReader(sFile) as reader:
                    with WavWriter(eFile, reader.channels,
                                   reader.samplerate) as writer:
                        phasevocoder(reader.channels,
                                     speed=NEW_SPEED[chunk[2]]).run(
                                         reader, writer)
                __, alteredAudioData = wavfile.read(eFile)
                leng = alteredAudioData.shape[0]

                outputAudioData.extend(
                    (alteredAudioData / maxAudioVolume).tolist())
            endPointer = outputPointer + leng

            # smooth out transition's audio by quickly fading in/out
            if (leng < FADE_SIZE):
                for i in range(outputPointer, endPointer):
                    try:
                        outputAudioData[i][0] = 0
                        outputAudioData[i][1] = 0
                    except TypeError:
                        outputAudioData[i] = 0
            else:
                for i in range(outputPointer, outputPointer + FADE_SIZE):
                    try:
                        outputAudioData[i][0] *= mask[i - outputPointer]
                        outputAudioData[i][1] *= mask[i - outputPointer]
                    except TypeError:
                        outputAudioData[i] *= mask[i - outputPointer]
                for i in range(endPointer - FADE_SIZE, endPointer):
                    try:
                        outputAudioData[i][0] *= (
                            1 - mask[i - endPointer + FADE_SIZE])
                        outputAudioData[i][1] *= (
                            1 - mask[i - endPointer + FADE_SIZE])
                    except TypeError:
                        outputAudioData[i] *= (
                            1 - mask[i - endPointer + FADE_SIZE])
            outputPointer = endPointer
        num += 1
        if (num % 10 == 0):
            print(''.join([str(num), '/', chunk_len, ' audio chunks done.']))

    print(''.join([str(num), '/', chunk_len, ' audio chunks done.']))
    outputAudioData = np.asarray(outputAudioData)
    wavfile.write(filename, SAMPLE_RATE, outputAudioData)

    if (not os.path.isfile(filename)):
        raise IOError(f'Error: The file {filename} was not created.')
    else:
        print('Audio finished.')
Ejemplo n.º 19
0
def execute(input_file="",
            url="",
            output_file="",
            silent_threshold=0.03,
            sounded_speed=1.00,
            silent_speed=5.00,
            frame_margin=1,
            sample_rate=44100,
            frame_quality=3):
    SAMPLE_RATE = sample_rate
    SILENT_THRESHOLD = silent_threshold
    FRAME_SPREADAGE = frame_margin
    NEW_SPEED = [silent_speed, sounded_speed]
    if url != "" and url != None:
        INPUT_FILE = downloadFile(url)
    else:
        INPUT_FILE = input_file
    FRAME_QUALITY = frame_quality

    assert INPUT_FILE != "" and INPUT_FILE != None, "why u put no input file, that dum"

    if len(output_file) >= 1:
        OUTPUT_FILE = output_file
    else:
        OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)
    print("Saving to: " + OUTPUT_FILE)

    AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)

    createPath(TEMP_FOLDER)

    checkForFFMPEG()

    command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str(
        FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner"
    subprocess.call(command, shell=True)

    command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str(
        SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav"

    subprocess.call(command, shell=True)

    sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)

    frameRate = findFramerate(INPUT_FILE)

    samplesPerFrame = sampleRate / frameRate

    audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))

    hasLoudAudio = np.zeros((audioFrameCount))

    for i in range(audioFrameCount):
        start = int(i * samplesPerFrame)
        end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1

    chunks = [[0, 0, 0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0, i - FRAME_SPREADAGE))
        end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] !=
                shouldIncludeFrame[i - 1]):  # Did we flip?
            chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])

    chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
    chunks = chunks[1:]

    outputAudioData = np.zeros((0, audioData.shape[1]))
    outputPointer = 0

    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0] *
                                   samplesPerFrame):int(chunk[1] *
                                                        samplesPerFrame)]

        sFile = TEMP_FOLDER + "/tempStart.wav"
        eFile = TEMP_FOLDER + "/tempEnd.wav"
        wavfile.write(sFile, SAMPLE_RATE, audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels,
                           reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels,
                                   speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer + leng
        outputAudioData = np.concatenate(
            (outputAudioData, alteredAudioData / maxAudioVolume))

        # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

        # smooth out transitiion's audio by quickly fading in/out

        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            outputAudioData[
                outputPointer:
                endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(
                AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE
            mask = np.repeat(premask[:, np.newaxis], 2,
                             axis=1)  # make the fade-envelope mask stereo
            outputAudioData[outputPointer:outputPointer +
                            AUDIO_FADE_ENVELOPE_SIZE] *= mask
            outputAudioData[endPointer -
                            AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

        startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                             (outputFrame - startOutputFrame))
            didItWork = copyFrame(inputFrame, outputFrame)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(lastExistingFrame, outputFrame)

        outputPointer = endPointer

    wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData)
    '''
    outputFrame = math.ceil(outputPointer/samplesPerFrame)
    for endGap in range(outputFrame,audioFrameCount):
        copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
    '''

    command = "ffmpeg -y -framerate " + str(
        frameRate
    ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE
    subprocess.call(command, shell=True)

    deletePath(TEMP_FOLDER)
Ejemplo n.º 20
0
def process_video(args):
    TEMP_FOLDER,frameRate,SAMPLE_RATE,NEW_SPEED,SILENT_THRESHOLD,FRAME_SPREADAGE,AUDIO_FADE_ENVELOPE_SIZE = args    


    sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)


    samplesPerFrame = sampleRate/frameRate

    audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))

    hasLoudAudio = np.zeros((audioFrameCount))

    for i in range(audioFrameCount):
        start = int(i*samplesPerFrame)
        end = min(int((i+1)*samplesPerFrame),audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1

    chunks = [[0,0,0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0,i-FRAME_SPREADAGE))
        end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
            chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])

    chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
    chunks = chunks[1:]

    outputAudioData = np.zeros((0,audioData.shape[1]))
    outputPointer = 0

    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]
        
        sFile = TEMP_FOLDER+"/tempStart.wav"
        eFile = TEMP_FOLDER+"/tempEnd.wav"
        wavfile.write(sFile,SAMPLE_RATE,audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer+leng
        outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))

        #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

        # smooth out transitiion's audio by quickly fading in/out
        
        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
            mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
            outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
            outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask

        startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
            didItWork = copyFrame(TEMP_FOLDER,inputFrame,outputFrame)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(TEMP_FOLDER,lastExistingFrame,outputFrame)

        outputPointer = endPointer

    wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)

    '''
Ejemplo n.º 21
0
        if normal:
            out.write(frame)
            nFrames += 1
            switchStart = switchEnd

            yPointerEnd = yPointer + audioChunk.shape[0]
            y[yPointer:yPointerEnd] = audioChunk
            yPointer = yPointerEnd
        else:
            spedChunk = audioData[switchStart:switchEnd]
            spedupAudio = np.zeros((0, 2), dtype=np.int16)
            # ArrReader (array, channels, samplerate, samplewidth)
            with ArrReader(spedChunk, channels, sampleRate, 2) as reader:
                # 2 as sampleWidth for now
                with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer:
                    tsm = phasevocoder(reader.channels, speed=silentSpeed)
                    tsm.run(reader, writer)
                    spedupAudio = writer.output

            yPointerEnd = yPointer + spedupAudio.shape[0]
            y[yPointer:yPointerEnd] = spedupAudio
            yPointer = yPointerEnd

            writeFrames(frameBuffer, yPointerEnd, silentSpeed, sampleRate, out)
            frameBuffer = []
            switchStart = switchEnd

        normal = 1
    if skipped % 1000 == 0:
        print("{} frames inspected".format(skipped))
        skipped += 1
Ejemplo n.º 22
0
    def process_and_concatenate(self):
        audio_fade_envelope_size = 400  # smooth out transition's audio by quickly fading in/out

        self.save_audio()

        sample_rate, audio_data = wavfile.read(self.temp_folder + "/audio.wav")
        audio_sample_count = audio_data.shape[0]
        max_audio_volume = get_max_volume(audio_data)

        samples_per_frame = sample_rate / self.fps

        audio_frame_count = int(
            math.ceil(audio_sample_count / samples_per_frame))

        has_loud_audio = np.zeros(audio_frame_count)

        for i in range(audio_frame_count):
            start = int(i * samples_per_frame)
            end = min(int((i + 1) * samples_per_frame), audio_sample_count)
            audio_chunks = audio_data[start:end]
            max_chunks_volume = float(
                get_max_volume(audio_chunks)) / max_audio_volume
            if max_chunks_volume >= SILENT_THRESHOLD:
                has_loud_audio[i] = 1

        chunks = [[0, 0, 0]]
        should_include_frame = np.zeros(audio_frame_count)

        last_idx = 0
        for i in range(audio_frame_count):
            start = int(max(0, i - FRAME_SPREADAGE))
            end = int(min(audio_frame_count, i + 1 + FRAME_SPREADAGE))
            should_include_frame[i] = np.max(has_loud_audio[start:end])
            if i >= 1 and should_include_frame[i] != should_include_frame[
                    i - 1]:  # Did we flip?
                chunks.append([chunks[-1][1], i, should_include_frame[i - 1]])
            last_idx = i

        chunks.append([
            chunks[-1][1], audio_frame_count,
            should_include_frame[last_idx - 1]
        ])
        chunks = chunks[1:]

        output_audio_data = np.zeros((0, audio_data.shape[1]))
        output_pointer = 0

        last_existing_frame = None

        duration = self.get_duration()
        frames_num = int(float(duration) * self.fps)
        signed_frames = [False for _ in range(frames_num)]
        output_frames = []

        for chunk in chunks:
            audio_chunk = audio_data[int(chunk[0] * samples_per_frame
                                         ):int(chunk[1] * samples_per_frame)]

            s_file = self.temp_folder + "/tempStart.wav"
            e_file = self.temp_folder + "/tempEnd.wav"
            wavfile.write(s_file, SAMPLE_RATE, audio_chunk)
            with WavReader(s_file) as reader:
                with WavWriter(e_file, reader.channels,
                               reader.samplerate) as writer:
                    tsm = phasevocoder(reader.channels,
                                       speed=NEW_SPEED[int(chunk[2])])
                    tsm.run(reader, writer)
            _, altered_audio_data = wavfile.read(e_file)
            leng = altered_audio_data.shape[0]
            end_pointer = output_pointer + leng
            output_audio_data = np.concatenate(
                (output_audio_data, altered_audio_data / max_audio_volume))

            if leng < audio_fade_envelope_size:
                output_audio_data[output_pointer:end_pointer] = 0
            else:
                pre_mask = np.arange(
                    audio_fade_envelope_size) / audio_fade_envelope_size
                mask = np.repeat(pre_mask[:, np.newaxis], 2, axis=1)
                output_audio_data[output_pointer:output_pointer +
                                  audio_fade_envelope_size] *= mask
                output_audio_data[
                    end_pointer -
                    audio_fade_envelope_size:end_pointer] *= 1 - mask

            start_output_frame = int(
                math.ceil(output_pointer / samples_per_frame))
            end_output_frame = int(math.ceil(end_pointer / samples_per_frame))

            for outputFrame in range(start_output_frame, end_output_frame):
                input_frame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                  (outputFrame - start_output_frame))
                if input_frame < frames_num - 2:
                    signed_frames[input_frame + 1] = True
                    last_existing_frame = input_frame
                else:
                    signed_frames[last_existing_frame] = True
                output_frames.append(outputFrame)

            output_pointer = end_pointer

        j = 0
        for i, frame_sign in enumerate(signed_frames):
            if frame_sign:
                self.copy_frame(i, j)
                j += 1
        wavfile.write(self.temp_folder + "/audioNew.wav", SAMPLE_RATE,
                      output_audio_data)

        self.final_concatenation()
        delete_path(self.temp_folder)
Ejemplo n.º 23
0
def timeStretch(input_filename, output_filename, rate, samplerate):
    with WavReader(input_filename) as reader:
        with WavWriter(output_filename, reader.channels,
                       reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, rate)
            tsm.run(reader, writer)
Ejemplo n.º 24
0
def main(args):
    frameRate = args.frame_rate
    SAMPLE_RATE = args.sample_rate
    SILENT_THRESHOLD = args.silent_threshold
    FRAME_SPREADAGE = args.frame_margin
    NEW_SPEED = [args.silent_speed, args.sounded_speed]
    FRAME_QUALITY = args.frame_quality
    AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
    TEMP_FOLDER = "TEMP"
    if not os.path.isdir(TEMP_FOLDER):
        os.makedirs(TEMP_FOLDER)

    if args.url != None:
        INPUT_FILE = downloadFile(args.url)
    else:
        INPUT_FILE = args.input

    if len(args.output) >= 1:
        OUTPUT_FILE = args.output
    else:
        OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

    command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str(
        FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner"
    subprocess.call(command, shell=True)

    command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str(
        SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav"
    subprocess.call(command, shell=True)

    command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1"
    f = open(TEMP_FOLDER + "/params.txt", "w")
    subprocess.call(command, shell=True, stdout=f)

    sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)

    f = open(TEMP_FOLDER + "/params.txt", 'r+')
    pre_params = f.read()
    f.close()
    params = pre_params.split('\n')
    for line in params:
        m = re.search('Stream #.*Video.* ([0-9]*) fps', line)
        if m is not None:
            frameRate = float(m.group(1))

    samplesPerFrame = sampleRate / frameRate

    audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))

    hasLoudAudio = np.zeros((audioFrameCount))

    for i in range(audioFrameCount):
        start = int(i * samplesPerFrame)
        end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1

    chunks = [[0, 0, 0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0, i - FRAME_SPREADAGE))
        end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] !=
                shouldIncludeFrame[i - 1]):  # Did we flip?
            chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])

    chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
    chunks = chunks[1:]

    outputAudioData = np.zeros((0, audioData.shape[1]))
    outputPointer = 0

    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0] *
                                   samplesPerFrame):int(chunk[1] *
                                                        samplesPerFrame)]

        sFile = TEMP_FOLDER + "/tempStart.wav"
        eFile = TEMP_FOLDER + "/tempEnd.wav"
        wavfile.write(sFile, SAMPLE_RATE, audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels,
                           reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels,
                                   speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer + leng
        outputAudioData = np.concatenate(
            (outputAudioData, alteredAudioData / maxAudioVolume))

        # smooth out transitiion's audio by quickly fading in/out

        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            outputAudioData[
                outputPointer:
                endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(
                AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE
            mask = np.repeat(premask[:, np.newaxis], 2,
                             axis=1)  # make the fade-envelope mask stereo
            outputAudioData[outputPointer:outputPointer +
                            AUDIO_FADE_ENVELOPE_SIZE] *= mask
            outputAudioData[endPointer -
                            AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

        startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                             (outputFrame - startOutputFrame))
            didItWork = copyFrame(TEMP_FOLDER, inputFrame, outputFrame)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(TEMP_FOLDER, lastExistingFrame, outputFrame)

        outputPointer = endPointer

    wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData)

    command = "ffmpeg -framerate " + str(
        frameRate
    ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE
    subprocess.call(command, shell=True)

    rmtree(TEMP_FOLDER, ignore_errors=False)
Ejemplo n.º 25
0
def speed_up_video(input_file: str,
                   output_file: str = None,
                   frame_rate: float = 30,
                   sample_rate: int = 44100,
                   silent_threshold: float = 0.03,
                   silent_speed: float = 5.0,
                   sounded_speed: float = 1.0,
                   frame_spreadage: int = 1,
                   audio_fade_envelope_size: int = 400,
                   temp_folder: str = 'TEMP') -> None:
    """
    Speeds up a video file with different speeds for the silent and loud sections in the video.

    :param input_file: The file name of the video to be sped up.
    :param output_file: The file name of the output file. If not given will be 'input_file'_ALTERED.ext.
    :param frame_rate: The frame rate of the given video. Only needed if not extractable through ffmpeg.
    :param sample_rate: The sample rate of the audio in the video.
    :param silent_threshold: The threshold when a chunk counts towards being a silent chunk.
                             Value ranges from 0 (nothing) - 1 (max volume).
    :param silent_speed: The speed of the silent chunks.
    :param sounded_speed: The speed of the loud chunks.
    :param frame_spreadage: How many silent frames adjacent to sounded frames should be included to provide context.
    :param audio_fade_envelope_size: Audio transition smoothing duration in samples.
    :param temp_folder: The file path of the temporary working folder.
    """
    # Set output file name based on input file name if none was given
    if output_file is None:
        output_file = _input_to_output_filename(input_file)

    # Create Temp Folder
    if os.path.exists(temp_folder):
        _delete_path(temp_folder)
    _create_path(temp_folder)

    # Find out framerate and duration of the input video
    command = 'ffprobe -i "{}" -hide_banner -loglevel error -select_streams v' \
              ' -show_entries format=duration:stream=avg_frame_rate'.format(input_file)
    p = subprocess.Popen(command,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         bufsize=1,
                         universal_newlines=True,
                         shell=True)
    std_out, err = p.communicate()
    match_frame_rate = re.search(r'frame_rate=(\d*)/(\d*)', str(std_out))
    if match_frame_rate is not None:
        frame_rate = float(match_frame_rate.group(1)) / float(
            match_frame_rate.group(2))
        # print(f'Found Framerate {frame_rate}')

    match_duration = re.search(r'duration=([\d.]*)', str(std_out))
    original_duration = 0.0
    if match_duration is not None:
        original_duration = float(match_duration.group(1))
        # print(f'Found Duration {original_duration}')

    # Extract the audio
    command = 'ffmpeg -i "{}" -ab 160k -ac 2 -ar {} -vn {} -hide_banner' \
        .format(input_file,
                sample_rate,
                temp_folder + '/audio.wav')

    _run_timed_ffmpeg_command(command,
                              total=int(original_duration * frame_rate),
                              unit='frames',
                              desc='Extracting audio:')

    wav_sample_rate, audio_data = wavfile.read(temp_folder + "/audio.wav")
    audio_sample_count = audio_data.shape[0]
    max_audio_volume = _get_max_volume(audio_data)
    samples_per_frame = wav_sample_rate / frame_rate
    audio_frame_count = int(math.ceil(audio_sample_count / samples_per_frame))

    # Find frames with loud audio
    has_loud_audio = np.zeros(audio_frame_count, dtype=bool)

    for i in range(audio_frame_count):
        start = int(i * samples_per_frame)
        end = min(int((i + 1) * samples_per_frame), audio_sample_count)
        audio_chunk = audio_data[start:end]
        chunk_max_volume = float(
            _get_max_volume(audio_chunk)) / max_audio_volume
        if chunk_max_volume >= silent_threshold:
            has_loud_audio[i] = True

    # Chunk the frames together that are quiet or loud
    chunks = [[0, 0, 0]]
    should_include_frame = np.zeros(audio_frame_count, dtype=bool)
    for i in tqdm(range(audio_frame_count),
                  desc='Finding chunks:',
                  unit='frames'):
        start = int(max(0, i - frame_spreadage))
        end = int(min(audio_frame_count, i + 1 + frame_spreadage))
        should_include_frame[i] = np.any(has_loud_audio[start:end])
        if i >= 1 and should_include_frame[i] != should_include_frame[
                i - 1]:  # Did we flip?
            chunks.append([chunks[-1][1], i, should_include_frame[i - 1]])

    chunks.append([
        chunks[-1][1], audio_frame_count,
        should_include_frame[audio_frame_count - 1]
    ])
    chunks = chunks[1:]

    # Generate audio data with varying speed for each chunk
    new_speeds = [silent_speed, sounded_speed]
    output_pointer = 0
    audio_buffers = []
    for index, chunk in tqdm(enumerate(chunks),
                             total=len(chunks),
                             desc='Changing audio:',
                             unit='chunks'):
        audio_chunk = audio_data[int(chunk[0] *
                                     samples_per_frame):int(chunk[1] *
                                                            samples_per_frame)]

        reader = ArrayReader(np.transpose(audio_chunk))
        writer = ArrayWriter(reader.channels)
        tsm = phasevocoder(reader.channels, speed=new_speeds[int(chunk[2])])
        tsm.run(reader, writer)
        altered_audio_data = np.transpose(writer.data)

        # smooth out transition's audio by quickly fading in/out
        if altered_audio_data.shape[0] < audio_fade_envelope_size:
            altered_audio_data[:] = 0  # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(
                audio_fade_envelope_size) / audio_fade_envelope_size
            mask = np.repeat(premask[:, np.newaxis], 2,
                             axis=1)  # make the fade-envelope mask stereo
            altered_audio_data[:audio_fade_envelope_size] *= mask
            altered_audio_data[-audio_fade_envelope_size:] *= 1 - mask

        audio_buffers.append(altered_audio_data / max_audio_volume)

        end_pointer = output_pointer + altered_audio_data.shape[0]
        start_output_frame = int(math.ceil(output_pointer / samples_per_frame))
        end_output_frame = int(math.ceil(end_pointer / samples_per_frame))
        chunks[index] = chunk[:2] + [start_output_frame, end_output_frame]

        output_pointer = end_pointer

    # print(chunks)

    output_audio_data = np.concatenate(audio_buffers)
    wavfile.write(temp_folder + "/audioNew.wav", sample_rate,
                  output_audio_data)

    # Cut the video parts to length
    expression = _get_tree_expression(chunks)

    filter_graph_file = open(temp_folder + "/filterGraph.txt", 'w')
    filter_graph_file.write(f'fps=fps={frame_rate},setpts=')
    filter_graph_file.write(expression.replace(',', '\\,'))
    filter_graph_file.close()

    command = 'ffmpeg -i "{}" -i "{}" -filter_script:v "{}" -map 0 -map -0:a -map 1:a -c:a aac "{}"' \
              ' -loglevel warning -stats -y -hide_banner' \
        .format(input_file,
                temp_folder + '/audioNew.wav',
                temp_folder + '/filterGraph.txt',
                output_file)

    _run_timed_ffmpeg_command(command,
                              total=chunks[-1][3],
                              unit='frames',
                              desc='Generating final:')

    _delete_path(temp_folder)
Ejemplo n.º 26
0
def apply_speed_to_audio(audio, speed):
    reader = ArrayReader(audio)
    writer = ArrayWriter(2)
    tsm = phasevocoder(reader.channels, speed)
    tsm.run(reader, writer)
    return writer.data
Ejemplo n.º 27
0
        # and the last frame is 'loud'
        if normal:
            out.write(frame)
            nFrames += 1
            switchStart = switchEnd

            yPointerEnd = yPointer + audioChunkMod.shape[0]
            y[yPointer:yPointerEnd] = audioChunkMod
            yPointer = yPointerEnd
        else:
            spedChunk = audioData[switchStart:switchEnd]
            spedupAudio = np.zeros((0, 2), dtype=np.int16)
            with ArrReader(spedChunk, channels, sampleRate, 2) as reader:
                with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer:
                    tsm = phasevocoder(reader.channels,
                                       speed=NEW_SPEED[normal])
                    tsm.run(reader, writer)
                    spedupAudio = writer.output

            yPointerEnd = yPointer + spedupAudio.shape[0]
            y[yPointer:yPointerEnd] = spedupAudio
            yPointer = yPointerEnd

            writeFrames(frameBuffer, yPointerEnd, NEW_SPEED[normal],
                        sampleRate, out)
            frameBuffer = []
            switchStart = switchEnd

        normal = 1
    if framesProcessed % 500 == 0:
        print("{} frames processed".format(framesProcessed))
Ejemplo n.º 28
0
def videoProcess(frame_rate, sample_rate, silent_threshold, frame_margin,
                 silent_speed, sounded_speed, url, input_file, output_file,
                 frame_quality):
    try:
        print(frame_rate, sample_rate, silent_threshold, frame_margin,
              silent_speed, sounded_speed, url, input_file, output_file,
              frame_quality)

        New_Speed_silent_and_sounded = [silent_speed, sounded_speed]

        if url:
            name = YouTube(url).streams.first().download()
            renamed = name.replace(' ', '_')
            os.rename(name, renamed)
            return renamed

        else:
            Input_Video = input_file

        assert Input_Video != None, "enter input video"

        if len(output_file) >= 1:
            Output_Video = output_file

        else:
            dot_position = filename.rfind(".")
            Output_Video = filename[:dot_position] + "NEWVIDEO" + filename[
                dot_position:]

        # print ( Output_Video)
        Audio_fade_envelope_size = 400

        try:
            os.mkdir(TEMP)
        except OSError:
            assert False, "Directory Already existing"

        command = "ffmpeg -i " + Input_Video + " -qscale:v " + str(
            frame_quality) + " " + TEMP + "/old_frame%06d.jpg -hide_banner"
        subprocess.call(command, shell=True)

        command = "ffmpeg -i " + Input_Video + " -ab 160k -ac 2 -ar " + str(
            sample_rate) + " -vn " + TEMP + "/audio.wav"

        subprocess.call(command, shell=True)

        sampleRate, audioData = wavfile.read(TEMP + "/audio.wav")
        audioSampleCount = audioData.shape[0]
        maxAudioVolume = GetVolume(audioData)

        # print("  please  ")
        samplesPerFrame = 1470

        audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))

        hasLoudAudio = numpy.zeros((audioFrameCount))

        for i in range(audioFrameCount):
            start = int(i * samplesPerFrame)
            end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
            audiochunks = audioData[start:end]
            maxchunksVolume = float(GetVolume(audiochunks)) / maxAudioVolume
            if maxchunksVolume >= silent_threshold:
                hasLoudAudio[i] = 1

        chunks = [[0, 0, 0]]
        shouldIncludeFrame = numpy.zeros((audioFrameCount))
        for i in range(audioFrameCount):
            start = int(max(0, i - frame_margin))
            end = int(min(audioFrameCount, i + 1 + frame_margin))
            shouldIncludeFrame[i] = numpy.max(hasLoudAudio[start:end])
            if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]):
                chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])

        chunks.append(
            [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
        chunks = chunks[1:]

        outputAudioData = numpy.zeros((0, audioData.shape[1]))
        outputPointer = 0

        lastExistingFrame = None
        for chunk in chunks:
            audioChunk = audioData[int(chunk[0] *
                                       samplesPerFrame):int(chunk[1] *
                                                            samplesPerFrame)]

            sFile = TEMP + "/tempStart.wav"
            eFile = TEMP + "/tempEnd.wav"
            wavfile.write(sFile, sample_rate, audioChunk)
            with WavReader(sFile) as reader:
                with WavWriter(eFile, reader.channels,
                               reader.samplerate) as writer:
                    tsm = phasevocoder(reader.channels,
                                       speed=New_Speed_silent_and_sounded[int(
                                           chunk[2])])
                    tsm.run(reader, writer)
            _, alteredAudioData = wavfile.read(eFile)
            leng = alteredAudioData.shape[0]
            endPointer = outputPointer + leng
            outputAudioData = numpy.concatenate(
                (outputAudioData, alteredAudioData / maxAudioVolume))

            if leng < Audio_fade_envelope_size:
                outputAudioData[outputPointer:endPointer] = 0
            else:
                premask = numpy.arange(
                    Audio_fade_envelope_size) / Audio_fade_envelope_size
                mask = numpy.repeat(premask[:, numpy.newaxis], 2, axis=1)
                outputAudioData[outputPointer:outputPointer +
                                Audio_fade_envelope_size] *= mask
                outputAudioData[
                    endPointer -
                    Audio_fade_envelope_size:endPointer] *= 1 - mask

            startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
            endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
            for op_frame in range(startOutputFrame, endOutputFrame):
                ip_frame = int(chunk[0] +
                               New_Speed_silent_and_sounded[int(chunk[2])] *
                               (op_frame - startOutputFrame))
                didItWork = FRameCopy(ip_frame, op_frame)
                if didItWork:
                    lastExistingFrame = ip_frame
                else:
                    FRameCopy(lastExistingFrame, op_frame)

            outputPointer = endPointer

        wavfile.write(TEMP + "/audioNew.wav", sample_rate, outputAudioData)

        command = "ffmpeg -framerate " + str(
            frame_rate
        ) + " -i " + TEMP + "/new_frame%06d.jpg -i " + TEMP + "/audioNew.wav -strict -2 " + Output_Video
        subprocess.call(command, shell=True)

        try:
            rmtree(TEMP, ignore_errors=False)
        except OSError:
            print("Delete failed")

            return "done"  #not sure abt it
    except:
        return " nothing"
Ejemplo n.º 29
0
def fastVideoPlus(videoFile, outFile, silentThreshold, frameMargin,
                  SAMPLE_RATE, AUD_BITRATE, VERBOSE, videoSpeed, silentSpeed,
                  cutByThisTrack, keepTracksSep):

    print('Running from fastVideoPlus.py')

    if (not os.path.isfile(videoFile)):
        print('Could not find file:', videoFile)
        sys.exit()

    TEMP = '.TEMP'
    FADE_SIZE = 400
    NEW_SPEED = [silentSpeed, videoSpeed]

    cap = cv2.VideoCapture(videoFile)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = round(cap.get(cv2.CAP_PROP_FPS))

    try:
        os.mkdir(TEMP)
    except OSError:
        rmtree(TEMP)
        os.mkdir(TEMP)

    tracks = vidTracks(videoFile)

    if (cutByThisTrack >= tracks):
        print("Error: You choose a track that doesn't exist.")
        print(f'There are only {tracks-1} tracks. (starting from 0)')
        sys.exit()
    for trackNumber in range(tracks):
        cmd = [
            'ffmpeg', '-i', videoFile, '-ab', AUD_BITRATE, '-ac', '2', '-ar',
            str(SAMPLE_RATE), '-map', f'0:a:{trackNumber}',
            f'{TEMP}/{trackNumber}.wav'
        ]
        if (not VERBOSE):
            cmd.extend(['-nostats', '-loglevel', '0'])
        else:
            cmd.extend(['-hide_banner'])
        subprocess.call(cmd)

    sampleRate, audioData = wavfile.read(f'{TEMP}/{cutByThisTrack}.wav')
    chunks = getAudioChunks(audioData, sampleRate, fps, silentThreshold, 2,
                            frameMargin)

    hmm = preview(chunks, NEW_SPEED, fps)
    estLeng = int((hmm * SAMPLE_RATE) * 1.5) + int(SAMPLE_RATE * 2)

    oldAudios = []
    newAudios = []
    for i in range(tracks):
        __, audioData = wavfile.read(f'{TEMP}/{i}.wav')
        oldAudios.append(audioData)
        newAudios.append(np.zeros((estLeng, 2), dtype=np.int16))

    yPointer = 0

    out = cv2.VideoWriter(f'{TEMP}/spedup.mp4', fourcc, fps, (width, height))

    channels = 2

    switchStart = 0
    needChange = False
    preve = None
    endMargin = 0

    yPointer = 0
    frameBuffer = []

    def writeFrames(frames, nAudio, speed, samplePerSecond, writer):
        numAudioChunks = round(nAudio / samplePerSecond * fps)
        global nFrames
        numWrites = numAudioChunks - nFrames
        nFrames += numWrites  # if sync issue exists, change this back
        limit = len(frames) - 1
        for i in range(numWrites):
            frameIndex = round(i * speed)
            if (frameIndex > limit):
                writer.write(frames[-1])
            else:
                writer.write(frames[frameIndex])

    totalFrames = chunks[len(chunks) - 1][1]
    outFrame = 0
    beginTime = time()

    while cap.isOpened():
        ret, frame = cap.read()
        if (not ret):
            break

        cframe = int(cap.get(cv2.CAP_PROP_POS_FRAMES))  # current frame

        currentTime = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
        audioSampleStart = int(currentTime * sampleRate)

        audioSampleEnd = min(
            audioSampleStart + sampleRate // fps * frameMargin, len(audioData))
        switchEnd = audioSampleStart + sampleRate // fps

        audioChunk = audioData[audioSampleStart:audioSampleEnd]

        state = None
        for chunk in chunks:
            if (cframe >= chunk[0] and cframe <= chunk[1]):
                state = chunk[2]
                break

        if (state == 0):
            if (endMargin < 1):
                isSilent = 1
            else:
                isSilent = 0
                endMargin -= 1
        else:
            isSilent = 0
            endMargin = frameMargin
        if (preve is not None and preve != isSilent):
            needChange = True

        preve = isSilent

        if (not needChange):
            frameBuffer.append(frame)
        else:
            theSpeed = NEW_SPEED[isSilent]
            if (theSpeed < 99999):

                # handle audio tracks
                for i, oneAudioData in enumerate(oldAudios):
                    spedChunk = oneAudioData[switchStart:switchEnd]
                    spedupAudio = np.zeros((0, 2), dtype=np.int16)
                    with ArrReader(spedChunk, channels, sampleRate,
                                   2) as reader:
                        with ArrWriter(spedupAudio, channels, sampleRate,
                                       2) as writer:
                            phasevocoder(reader.channels,
                                         speed=theSpeed).run(reader, writer)
                            spedupAudio = writer.output

                    yPointerEnd = yPointer + spedupAudio.shape[0]

                    newAudios[i][yPointer:yPointerEnd] = spedupAudio
                yPointer = yPointerEnd

            else:
                yPointerEnd = yPointer

            writeFrames(frameBuffer, yPointerEnd, NEW_SPEED[isSilent],
                        sampleRate, out)
            frameBuffer = []
            switchStart = switchEnd
            needChange = False

        progressBar(cframe, totalFrames, beginTime)

    # finish audio
    for i, newData in enumerate(newAudios):
        newData = newData[:yPointer]
        wavfile.write(f'{TEMP}/new{i}.wav', sampleRate, newData)

        if (not os.path.isfile(f'{TEMP}/new{i}.wav')):
            raise IOError('audio file not created.')

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    first = videoFile[:videoFile.rfind('.')]
    extension = videoFile[videoFile.rfind('.'):]

    if (outFile == ''):
        outFile = f'{first}_ALTERED{extension}'

    # Now mix new audio(s) and the new video.

    if (keepTracksSep):
        cmd = ['ffmpeg', '-y']
        for i in range(tracks):
            cmd.extend(['-i', f'{TEMP}/new{i}.wav'])
        cmd.extend(['-i', f'{TEMP}/spedup.mp4'])  # add input video
        for i in range(tracks):
            cmd.extend(['-map', f'{i}:a:0'])
        cmd.extend([
            '-map', f'{tracks}:v:0', '-c:v', 'copy', '-movflags', '+faststart',
            outFile
        ])
        if (not VERBOSE):
            cmd.extend(['-nostats', '-loglevel', '0'])
    else:
        if (tracks > 1):
            cmd = ['ffmpeg']
            for i in range(tracks):
                cmd.extend(['-i', f'{TEMP}/new{i}.wav'])
            cmd.extend([
                '-filter_complex', f'amerge=inputs={tracks}', '-ac', '2',
                f'{TEMP}/newAudioFile.wav'
            ])
            if (not VERBOSE):
                cmd.extend(['-nostats', '-loglevel', '0'])
            subprocess.call(cmd)
        else:
            os.rename(f'{TEMP}/new0.wav', f'{TEMP}/newAudioFile.wav')

        cmd = [
            'ffmpeg', '-y', '-i', f'{TEMP}/newAudioFile.wav', '-i',
            f'{TEMP}/spedup.mp4', '-c:v', 'copy', '-movflags', '+faststart',
            outFile
        ]
        if (not VERBOSE):
            cmd.extend(['-nostats', '-loglevel', '0'])
        subprocess.call(cmd)

    return outFile
Ejemplo n.º 30
0
# coding: utf-8

# In[11]:

from audiotsm import phasevocoder
from audiotsm.io.wav import WavReader, WavWriter

with WavReader('qbhexamples.wav') as reader:
    print reader.channels, reader.samplerate
    with WavWriter('qbh_half.wav', reader.channels, reader.samplerate) as writer:
        tsm = phasevocoder(reader.channels, speed=0.5)
        tsm.run(reader, writer)
        print "Finished, closing files."
        close(reader)
        close(writer)


# In[ ]: