예제 #1
0
    def speed(self, speed):
        """
        Adjusts speed to given percentage without changing pitch

        :param speed: Percentage to increase/decrease speed without changing
                      pitch
        :type speed: float
        """
        if speed != 1:
            logger.info("Setting speed to %f", speed)
            logger.debug("Export file to BytesIO")
            wav_in = BytesIO()
            wav_in = self._segment.export(wav_in, format="wav")
            wav_in.seek(0)
            logger.debug("Initializing reader and writer")
            with WavReader(wav_in) as reader:
                wav_out = BytesIO()
                with WavWriter(wav_out, reader.channels,
                               reader.samplerate) as writer:
                    logger.debug("Adjusting speed with vocoder")
                    tsm = phasevocoder(reader.channels, speed=speed)
                    tsm.run(reader, writer)
                    logger.debug("Reload audio segment")
                    wav_out.seek(44)  # skip metadata and start at first sample
                    self._segment = AudioSegment.from_raw(
                        wav_out,
                        sample_width=self._segment.sample_width,
                        channels=self._segment.channels,
                        frame_rate=self._segment.frame_rate,
                    )
예제 #2
0
	def change_pitch(self, source_filepath):
		import pydub
		from audiotsm.io.wav import WavReader, WavWriter
		from audiotsm import phasevocoder
		if abs(self.octaves) > 0.1:
			_, sampled_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) 

			sound = pydub.AudioSegment.from_mp3(source_filepath)
			sample_rate = int(sound.frame_rate * (2**self.octaves))
			modified = sound._spawn(sound.raw_data, overrides={"frame_rate":sample_rate})

			modified.export(sampled_filepath, format="wav")
		else:
			sampled_filepath = source_filepath
		if abs(self.speed - 1) > 0.1:
			#output_filepath = f"{os.path.basename(source_filepath)}{self.oct_str}_{self.speed}.wav"
			_, output_filepath = tempfile.mkstemp(suffix='.wav', dir=self.temp_dir) 
			with WavReader(sampled_filepath) as reader:
				with WavWriter(output_filepath, reader.channels, reader.samplerate) as writer:
					tsm = phasevocoder(reader.channels, 
					speed=self.speed*(sound.frame_rate/sample_rate))
					tsm.run(reader, writer)
			return output_filepath
		else:
			return sampled_filepath
예제 #3
0
def test_data(data_file, speed, tsm_name, save):
    """Test the TSM procedures on real data."""
    reader = None
    writer = None

    try:
        # Create the reader
        reader = WavReader(data_file)

        # Create the writer
        if save:
            # pylint: disable=no-member
            rel_path = os.path.relpath(data_file, pytest.DATA_DIR)
            # pylint: enable=no-member

            # Copy original file to "orig" directory
            orig_file = os.path.join(EXAMPLES_DIR, "orig", rel_path)
            orig_dir = os.path.dirname(orig_file)
            if not os.path.isdir(orig_dir):
                os.makedirs(orig_dir)
            if not os.path.isfile(orig_file):
                shutil.copy2(data_file, orig_file)

            # Generate output file path
            speed_dir = "speed-{:.2f}".format(speed)
            name = os.path.splitext(rel_path)[0]
            output_name = "{}_{}.wav".format(name, tsm_name)
            output_file = os.path.join(EXAMPLES_DIR, speed_dir, output_name)
            output_dir = os.path.dirname(output_file)
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)

            writer = WavWriter(output_file, reader.channels, reader.samplerate)
        else:
            writer = ArrayWriter(reader.channels)

        # Create and run the TSM
        tsm = create_tsm(tsm_name, reader.channels, speed)
        tsm.run(reader, writer)

    finally:
        # Close files
        if reader:
            reader.close()
        if save and writer:
            writer.close()
예제 #4
0
def 音频变速(wav音频数据列表, 声道数, 采样率, 目标速度, 临时文件夹):
    if 目标速度 == 1.0:
        return wav音频数据列表
    if 查找可执行程序('soundstretch') != None:
        内存音频二进制缓存区 = io.BytesIO()
        fd, soundstretch临时输出文件 = tempfile.mkstemp()
        os.close(fd)
        wavfile.write(内存音频二进制缓存区, 采样率, wav音频数据列表)
        变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}'
        变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
        变速线程.communicate(内存音频二进制缓存区.getvalue())
        try:
            采样率, 音频区间处理后的数据 = wavfile.read(soundstretch临时输出文件)
        except Exception as e:
            出错时间 = int(time.time())

            fd, 原始数据存放位置 = tempfile.mkstemp(dir=临时文件夹, prefix=f'原始-{出错时间}-', suffix='.wav')
            os.close(fd)
            wavfile.write(原始数据存放位置, 采样率, wav音频数据列表)

            fd, 出错文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav')
            os.close(fd)
            try:
                copy(soundstretch临时输出文件, 出错文件)
            except:
                ...

            fd, soundstretch临时输出文件 = tempfile.mkstemp(dir=临时文件夹, prefix=f'变速-{出错时间}-', suffix='.wav')
            os.close(fd)
            变速命令 = f'soundstretch stdin "{soundstretch临时输出文件}" -tempo={(目标速度 - 1) * 100}'
            变速线程 = subprocess.Popen(变速命令, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
            变速线程.communicate(内存音频二进制缓存区.getvalue())

            print(f'Soundstretch 音频变速出错了,请前往查看详情\n    原始音频数据:{原始数据存放位置} \n    变速音频数据:{soundstretch临时输出文件}\n')
            print(f'出错的音频信息:\n    音频采样数:{len(wav音频数据列表)}\n    目标速度:{目标速度}\n    目标采样数:{len(wav音频数据列表) / 目标速度}')

            return wav音频数据列表

        os.remove(soundstretch临时输出文件)
    else:
        print(
            '检测到没有安装 SoundTouch 的 soundstretch,所以使用 phasevocoder 的音频变速方法。建议到 http://www.surina.net/soundtouch 下载系统对应的 soundstretch,放到系统环境变量下,可以获得更好的音频变速效果\n')
        sFile = io.BytesIO()
        wavfile.write(sFile, 采样率, wav音频数据列表)
        sFile = io.BytesIO(sFile.getvalue())
        eFile = io.BytesIO()
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels, speed=目标速度)
                tsm.run(reader, writer)
        _, 音频区间处理后的数据 = wavfile.read(io.BytesIO(eFile.getvalue()))

    return 音频区间处理后的数据
예제 #5
0
def getStretchedData(low, sf):
    s = PLACEHOLDER_WAV_AUX
    playSpeed = 1 / sf
    if low:
        s = LOW_PLACEHOLDER_WAV_AUX
        playSpeed *= LOW_FACTOR
    with WavReader(s) as reader:
        with WavWriter(STRECH, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=playSpeed)
            tsm.run(reader, writer)
    _, s = wavfile.read(STRECH)
    d = np.zeros(s.shape)
    if low:
        d += s
    else:
        d += s * 0.81
    return d
예제 #6
0
def getStretchedData(low, sf):
    s = "placeholder.wav"
    playSpeed = 1/sf
    if low:
        s = "lowPlaceholder.wav"
        playSpeed *= LOW_FACTOR
    with WavReader(s) as reader:
        with WavWriter("stretchholder.wav", reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=playSpeed)
            tsm.run(reader, writer)
    _, s = wavfile.read("stretchholder.wav")
    d = np.zeros(s.shape)
    if low:
        d += s
    else:
        d += s*0.81
    return d
def resampling(x, coef=1., fs=16000):
    fn_r, fn_w = tempfile.NamedTemporaryFile(
        mode="r", suffix=".wav"), tempfile.NamedTemporaryFile(mode="w",
                                                              suffix=".wav")

    sf.write(fn_r.name, x, fs, "PCM_16")
    with WavReader(fn_r.name) as fr:
        with WavWriter(fn_w.name, fr.channels, fr.samplerate) as fw:
            tsm = wsola(channels=fr.channels,
                        speed=coef,
                        frame_length=256,
                        synthesis_hop=int(fr.samplerate / 70.0))
            tsm.run(fr, fw)

    y = resample(librosa.load(fn_w.name)[0], len(x)).astype(x.dtype)
    fn_r.close()
    fn_w.close()

    return y
예제 #8
0
    def change_bar_speed(self, audio_slice_id, target_bpm=120.0):
        if not os.path.isdir(c.LF_CH_BPM + self._audio_id):
            try:
                os.mkdir(c.LF_CH_BPM + self._audio_id)
            except FileExistsError:
                pass
        else:
            if os.path.isfile(c.LF_CH_BPM + self._audio_id + "/" +
                              audio_slice_id + ".wav"):
                return 0

        bar_bpm = 60.00 / (
            (self.beat_track[int(audio_slice_id.split("_")[1]) + 1] -
             self.beat_track[int(audio_slice_id.split("_")[1])]) / 8)
        with WavReader("{}{}/{}.wav".format(c.LF_SLICE, self._audio_id,
                                            audio_slice_id)) as r:
            with WavWriter(
                    "{}{}/{}.wav".format(c.LF_CH_BPM, self._audio_id,
                                         audio_slice_id), r.channels,
                    r.samplerate) as w:
                phasevocoder(r.channels, speed=target_bpm / bar_bpm).run(r, w)
        print("only came " + audio_slice_id)
예제 #9
0
파일: speedup.py 프로젝트: spanth/SpeedUp
def main(args):
    frameRate = args.frame_rate
    SAMPLE_RATE = args.sample_rate
    SILENT_THRESHOLD = args.silent_threshold
    FRAME_SPREADAGE = args.frame_margin
    NEW_SPEED = [args.silent_speed, args.sounded_speed]
    FRAME_QUALITY = args.frame_quality
    AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
    TEMP_FOLDER = "TEMP"
    if not os.path.isdir(TEMP_FOLDER):
        os.makedirs(TEMP_FOLDER)

    if args.url != None:
        INPUT_FILE = downloadFile(args.url)
    else:
        INPUT_FILE = args.input

    if len(args.output) >= 1:
        OUTPUT_FILE = args.output
    else:
        OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

    command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str(
        FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner"
    subprocess.call(command, shell=True)

    command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str(
        SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav"
    subprocess.call(command, shell=True)

    command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1"
    f = open(TEMP_FOLDER + "/params.txt", "w")
    subprocess.call(command, shell=True, stdout=f)

    sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)

    f = open(TEMP_FOLDER + "/params.txt", 'r+')
    pre_params = f.read()
    f.close()
    params = pre_params.split('\n')
    for line in params:
        m = re.search('Stream #.*Video.* ([0-9]*) fps', line)
        if m is not None:
            frameRate = float(m.group(1))

    samplesPerFrame = sampleRate / frameRate

    audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))

    hasLoudAudio = np.zeros((audioFrameCount))

    for i in range(audioFrameCount):
        start = int(i * samplesPerFrame)
        end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1

    chunks = [[0, 0, 0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0, i - FRAME_SPREADAGE))
        end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] !=
                shouldIncludeFrame[i - 1]):  # Did we flip?
            chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])

    chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
    chunks = chunks[1:]

    outputAudioData = np.zeros((0, audioData.shape[1]))
    outputPointer = 0

    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0] *
                                   samplesPerFrame):int(chunk[1] *
                                                        samplesPerFrame)]

        sFile = TEMP_FOLDER + "/tempStart.wav"
        eFile = TEMP_FOLDER + "/tempEnd.wav"
        wavfile.write(sFile, SAMPLE_RATE, audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels,
                           reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels,
                                   speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer + leng
        outputAudioData = np.concatenate(
            (outputAudioData, alteredAudioData / maxAudioVolume))

        # smooth out transitiion's audio by quickly fading in/out

        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            outputAudioData[
                outputPointer:
                endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(
                AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE
            mask = np.repeat(premask[:, np.newaxis], 2,
                             axis=1)  # make the fade-envelope mask stereo
            outputAudioData[outputPointer:outputPointer +
                            AUDIO_FADE_ENVELOPE_SIZE] *= mask
            outputAudioData[endPointer -
                            AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

        startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                             (outputFrame - startOutputFrame))
            didItWork = copyFrame(TEMP_FOLDER, inputFrame, outputFrame)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(TEMP_FOLDER, lastExistingFrame, outputFrame)

        outputPointer = endPointer

    wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData)

    command = "ffmpeg -framerate " + str(
        frameRate
    ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE
    subprocess.call(command, shell=True)

    rmtree(TEMP_FOLDER, ignore_errors=False)
예제 #10
0
    def process_and_concatenate(self):
        audio_fade_envelope_size = 400  # smooth out transition's audio by quickly fading in/out

        self.save_audio()

        sample_rate, audio_data = wavfile.read(self.temp_folder + "/audio.wav")
        audio_sample_count = audio_data.shape[0]
        max_audio_volume = get_max_volume(audio_data)

        samples_per_frame = sample_rate / self.fps

        audio_frame_count = int(
            math.ceil(audio_sample_count / samples_per_frame))

        has_loud_audio = np.zeros(audio_frame_count)

        for i in range(audio_frame_count):
            start = int(i * samples_per_frame)
            end = min(int((i + 1) * samples_per_frame), audio_sample_count)
            audio_chunks = audio_data[start:end]
            max_chunks_volume = float(
                get_max_volume(audio_chunks)) / max_audio_volume
            if max_chunks_volume >= SILENT_THRESHOLD:
                has_loud_audio[i] = 1

        chunks = [[0, 0, 0]]
        should_include_frame = np.zeros(audio_frame_count)

        last_idx = 0
        for i in range(audio_frame_count):
            start = int(max(0, i - FRAME_SPREADAGE))
            end = int(min(audio_frame_count, i + 1 + FRAME_SPREADAGE))
            should_include_frame[i] = np.max(has_loud_audio[start:end])
            if i >= 1 and should_include_frame[i] != should_include_frame[
                    i - 1]:  # Did we flip?
                chunks.append([chunks[-1][1], i, should_include_frame[i - 1]])
            last_idx = i

        chunks.append([
            chunks[-1][1], audio_frame_count,
            should_include_frame[last_idx - 1]
        ])
        chunks = chunks[1:]

        output_audio_data = np.zeros((0, audio_data.shape[1]))
        output_pointer = 0

        last_existing_frame = None

        duration = self.get_duration()
        frames_num = int(float(duration) * self.fps)
        signed_frames = [False for _ in range(frames_num)]
        output_frames = []

        for chunk in chunks:
            audio_chunk = audio_data[int(chunk[0] * samples_per_frame
                                         ):int(chunk[1] * samples_per_frame)]

            s_file = self.temp_folder + "/tempStart.wav"
            e_file = self.temp_folder + "/tempEnd.wav"
            wavfile.write(s_file, SAMPLE_RATE, audio_chunk)
            with WavReader(s_file) as reader:
                with WavWriter(e_file, reader.channels,
                               reader.samplerate) as writer:
                    tsm = phasevocoder(reader.channels,
                                       speed=NEW_SPEED[int(chunk[2])])
                    tsm.run(reader, writer)
            _, altered_audio_data = wavfile.read(e_file)
            leng = altered_audio_data.shape[0]
            end_pointer = output_pointer + leng
            output_audio_data = np.concatenate(
                (output_audio_data, altered_audio_data / max_audio_volume))

            if leng < audio_fade_envelope_size:
                output_audio_data[output_pointer:end_pointer] = 0
            else:
                pre_mask = np.arange(
                    audio_fade_envelope_size) / audio_fade_envelope_size
                mask = np.repeat(pre_mask[:, np.newaxis], 2, axis=1)
                output_audio_data[output_pointer:output_pointer +
                                  audio_fade_envelope_size] *= mask
                output_audio_data[
                    end_pointer -
                    audio_fade_envelope_size:end_pointer] *= 1 - mask

            start_output_frame = int(
                math.ceil(output_pointer / samples_per_frame))
            end_output_frame = int(math.ceil(end_pointer / samples_per_frame))

            for outputFrame in range(start_output_frame, end_output_frame):
                input_frame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                  (outputFrame - start_output_frame))
                if input_frame < frames_num - 2:
                    signed_frames[input_frame + 1] = True
                    last_existing_frame = input_frame
                else:
                    signed_frames[last_existing_frame] = True
                output_frames.append(outputFrame)

            output_pointer = end_pointer

        j = 0
        for i, frame_sign in enumerate(signed_frames):
            if frame_sign:
                self.copy_frame(i, j)
                j += 1
        wavfile.write(self.temp_folder + "/audioNew.wav", SAMPLE_RATE,
                      output_audio_data)

        self.final_concatenation()
        delete_path(self.temp_folder)
예제 #11
0
source_filename = f"{abbr(txt)}-{lang}"
source_filepath = os.path.join(temp_dir, source_filename + ".mp3")

voice = gtts.gTTS(txt, lang=lang)
voice.save(source_filepath)

if abs(octaves) > 0.1:
    import pydub
    sound = pydub.AudioSegment.from_mp3(source_filepath)
    sample_rate = int(sound.frame_rate * (2**octaves))
    modified = sound._spawn(sound.raw_data,
                            overrides={"frame_rate": sample_rate})

    oct_str = str(octaves)
    if oct_str[0] != '-':
        oct_str = '+' + oct_str
    sampled_filename = f"tmp_{source_filename}{oct_str}.wav"
    sampled_filepath = os.path.join(temp_dir, sampled_filename)
    modified.export(sampled_filepath, format="wav")

    from audiotsm import phasevocoder
    from audiotsm.io.wav import WavReader, WavWriter

    output_filepath = f"{os.path.basename(source_filename)}{oct_str}_{speed}.wav"
    with WavReader(sampled_filepath) as reader:
        with WavWriter(output_filepath, reader.channels,
                       reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels,
                               speed=speed * (sound.frame_rate / sample_rate))
            tsm.run(reader, writer)
예제 #12
0
def splitVideo(chunks, NEW_SPEED, frameRate, zooms, samplesPerFrame,
               SAMPLE_RATE, audioData, extension, VERBOSE):
    """
    This function is responsible for outputting a new image sequence in the correct order.

    splitVideo is also responsible for creating Renames.txt. copying every jpeg is
    computationally expensive, renaming the file is less so, but we still need to create
    the cache folder and we can't do that if the program has no idea what it renamed
    and where. Renames.txt will be read in originalMethod.py to recreate the original
    image sequence.

    To avoid desyncing issues with the audio, we need to have audioData and go along
    roughly the same way originalAudio.py does it. Specifically, get the length of the
    new audio chunk.

    If the speed set is 1, this is easy. If not, we have to create a new file modified
    to be the new speed with audiotsm, then read that file to get the length.
    """
    print('Creating new video.')
    num = 0
    chunk_len = str(len(chunks))
    outputPointer = 0
    Renames = []
    lastExisting = None
    for chunk in chunks:
        if (NEW_SPEED[int(chunk[2])] < 99999):
            audioChunk = audioData[int(chunk[0] *
                                       samplesPerFrame):int(chunk[1] *
                                                            samplesPerFrame)]
            if (NEW_SPEED[chunk[2]] == 1):
                leng = len(audioChunk)
            else:
                sFile = TEMP + '/tempStart2.wav'
                eFile = TEMP + '/tempEnd2.wav'
                wavfile.write(sFile, SAMPLE_RATE, audioChunk)
                with WavReader(sFile) as reader:
                    with WavWriter(eFile, reader.channels,
                                   reader.samplerate) as writer:
                        phasevocoder(reader.channels,
                                     speed=NEW_SPEED[chunk[2]]).run(
                                         reader, writer)
                __, alteredAudioData = wavfile.read(eFile)
                leng = alteredAudioData.shape[0]

            endPointer = outputPointer + leng

            startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
            endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
            for outputFrame in range(startOutputFrame, endOutputFrame):
                inputFrame = int(chunk[0] + NEW_SPEED[chunk[2]] *
                                 (outputFrame - startOutputFrame))

                src = ''.join(
                    [CACHE, '/frame{:06d}'.format(inputFrame + 1), '.jpg'])
                dst = ''.join(
                    [TEMP, '/newFrame{:06d}'.format(outputFrame + 1), '.jpg'])
                if (os.path.isfile(src)):
                    lastExisting = inputFrame
                    if (inputFrame in zooms):
                        resize(src, dst, zooms[inputFrame])
                    else:
                        os.rename(src, dst)
                        Renames.extend([src, dst])
                else:
                    if (lastExisting == None):
                        print(src + ' does not exist.')
                        raise IOError(f'Fatal Error! No existing frame exist.')
                    src = ''.join([
                        CACHE, '/frame{:06d}'.format(lastExisting + 1), '.jpg'
                    ])
                    if (os.path.isfile(src)):
                        if (lastExisting in zooms):
                            resize(src, dst, zooms[lastExisting])
                        else:
                            os.rename(src, dst)
                            Renames.extend([src, dst])
                    else:
                        # uh oh, we need to find the file we just renamed!
                        myFile = None
                        for i in range(0, len(Renames), 2):
                            if (Renames[i] == src):
                                myFile = Renames[i + 1]
                                break
                        if (myFile is not None):
                            copyfile(myFile, dst)
                        else:
                            raise IOError(
                                f'Error! The file {src} does not exist.')

            outputPointer = endPointer

        num += 1
        if (num % 10 == 0):
            print(''.join([str(num), '/', chunk_len, ' frame chunks done.']))
    print(''.join([str(num), '/', chunk_len, ' frame chunks done.']))

    with open(f'{TEMP}/Renames.txt', 'w') as f:
        for item in Renames:
            f.write(f"{item}\n")

    print('Creating finished video. (This can take a while)')
    cmd = [
        'ffmpeg', '-y', '-framerate',
        str(frameRate), '-i', f'{TEMP}/newFrame%06d.jpg',
        f'{TEMP}/output{extension}'
    ]
    if (not VERBOSE):
        cmd.extend(['-nostats', '-loglevel', '0'])
    subprocess.call(cmd)
예제 #13
0
    def process(self):
        global isCanceld
        try:
            if os.path.exists(TEMP_FOLDER):
                msg = input(
                    'Warning: Are you sure you want to Delete videocuts_tmp folder'
                )
                if msg.lower() in ('yes', 'y'):
                    deletePath(TEMP_FOLDER)

            Thread(target=self.timer).start()
            global gain
            gain = 1.2
            self.new_video_size = 'N/A'
            self.new_video_length = 'N/A'
            Extras = ""
            frameRate = float(60)
            SAMPLE_RATE = int(self.frame_rate)
            SILENT_THRESHOLD = float(self.silence_threshold)
            FRAME_SPREADAGE = int(self.frame_margin)
            NEW_SPEED = [float(self.silent_speed), float(self.play_speed)]
            gain = 0.6
            INPUT_FILE = self.downloadFile(str(self.video_url))
            if INPUT_FILE == '':
                return
            FRAME_QUALITY = self.frame_quality

            assert INPUT_FILE is not None, "You did not specify an input file.  You must specify an input file without spaces."

            OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

            AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
            createPath(TEMP_FOLDER)

            global dir
            dir = os.getcwd()
            if isCanceld:
                return

            print(
                ' Step 1 - Frame quality has been assessed and is processing ')
            cmdary = [
                resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '-qscale:v',
                str(FRAME_QUALITY), TEMP_FOLDER + "/frame%06d.jpg",
                '-hide_banner'
            ]
            subprocess.call(cmdary, cwd=dir, shell=True)
            if isCanceld:
                return
            print(
                ' Step 1 - Frame quality processing has successfully completed '
            )

            time.sleep(2)
            if isCanceld:
                return
            print(' Step 2 - Sample Rate has been assessed and is processing ')
            cmdary = [
                resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1', '-ab',
                '160k', '-ac', '2', '-ar',
                str(SAMPLE_RATE), '-vn', TEMP_FOLDER + "/audio.wav"
            ]
            subprocess.call(cmdary, cwd=dir, shell=True)

            if isCanceld:
                return

            print(
                ' Step 2 - Sample Rate processing has successfully completed ')

            time.sleep(2)
            if isCanceld:
                return
            print(
                ' Step 3 - Video Frames are processing. This might take a while... '
            )
            cmdary = [resource_path('ffmpeg.exe'), "-i", INPUT_FILE, '2>&1']
            open(TEMP_FOLDER + "/params.txt", "w")
            subprocess.call(cmdary, cwd=dir, shell=True)
            if isCanceld:
                return
            print(
                ' Step 3 - Video Frames processing has successfully completed '
            )
            time.sleep(2)
            sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
            audioSampleCount = audioData.shape[0]
            maxAudioVolume = getMaxVolume(audioData)

            cap = cv2.VideoCapture(INPUT_FILE)
            fps = cap.get(cv2.CAP_PROP_FPS)
            f = open(TEMP_FOLDER + "/params.txt", 'r+')
            pre_params = f.read()
            f.close()
            params = pre_params.split('\n')
            for line in params:
                m = re.search(' ([0-9]*.[0-9]*) fps,', line)
                if m is None:
                    frameRate = float(fps)
                if m is not None:
                    frameRate = float(m.group(1))

            samplesPerFrame = sampleRate / frameRate

            audioFrameCount = int(math.ceil(audioSampleCount /
                                            samplesPerFrame))

            hasLoudAudio = np.zeros(audioFrameCount)

            for i in range(audioFrameCount):
                start = int(i * samplesPerFrame)
                end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
                audiochunks = audioData[start:end]
                maxchunksVolume = float(
                    getMaxVolume(audiochunks)) / maxAudioVolume
                if maxchunksVolume >= SILENT_THRESHOLD:
                    hasLoudAudio[i] = 1

            chunks = [[0, 0, 0]]
            shouldIncludeFrame = np.zeros(audioFrameCount)
            for i in range(audioFrameCount):
                start = int(max(0, i - FRAME_SPREADAGE))
                end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
                shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
                if i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[
                        i - 1]:  # Did we flip?
                    chunks.append(
                        [chunks[-1][1], i, shouldIncludeFrame[i - 1]])

            chunks.append(
                [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
            chunks = chunks[1:]

            outputAudioData = np.zeros((0, audioData.shape[1]))
            outputPointer = 0

            lastExistingFrame = None
            for chunk in chunks:
                audioChunk = audioData[int(chunk[0] * samplesPerFrame
                                           ):int(chunk[1] * samplesPerFrame)]

                sFile = TEMP_FOLDER + "/tempStart.wav"
                eFile = TEMP_FOLDER + "/tempEnd.wav"
                wavfile.write(sFile, SAMPLE_RATE, audioChunk)
                with WavReader(sFile) as reader:
                    with WavWriter(eFile, reader.channels,
                                   reader.samplerate) as writer:
                        tsm = phasevocoder(reader.channels,
                                           speed=NEW_SPEED[int(chunk[2])])
                        tsm.run(reader, writer)
                _, alteredAudioData = wavfile.read(eFile)
                leng = alteredAudioData.shape[0]
                endPointer = outputPointer + leng
                outputAudioData = np.concatenate(
                    (outputAudioData, alteredAudioData / maxAudioVolume))

                # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

                # smooth out transitiion's audio by quickly fading in/out

                if leng < AUDIO_FADE_ENVELOPE_SIZE:
                    outputAudioData[
                        outputPointer:
                        endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
                else:
                    premask = np.arange(
                        AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE
                    mask = np.repeat(
                        premask[:, np.newaxis], 2,
                        axis=1)  # make the fade-envelope mask stereo
                    outputAudioData[outputPointer:outputPointer +
                                    AUDIO_FADE_ENVELOPE_SIZE] *= mask
                    outputAudioData[
                        endPointer -
                        AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

                startOutputFrame = int(
                    math.ceil(outputPointer / samplesPerFrame))
                endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
                for outputFrame in range(startOutputFrame, endOutputFrame):
                    inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                     (outputFrame - startOutputFrame))
                    didItWork = copyFrame(inputFrame, outputFrame)
                    if didItWork:
                        lastExistingFrame = inputFrame
                    else:
                        copyFrame(lastExistingFrame, outputFrame)

                outputPointer = endPointer

            wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE,
                          outputAudioData)
            '''
            outputFrame = math.ceil(outputPointer/samplesPerFrame)
            for endGap in range(outputFrame,audioFrameCount):
                copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
            '''

            if isCanceld:
                return
            print(' Step 4 - Finalizing.... Please wait')
            cmdary = [
                resource_path('ffmpeg.exe'), '-framerate',
                str(frameRate), "-i", TEMP_FOLDER + "/newFrame%06d.jpg", '-i',
                TEMP_FOLDER + "/audioNew.wav", '-strict', '-2' + str(Extras),
                OUTPUT_FILE
            ]
            subprocess.call(cmdary, cwd=dir, shell=True)

            if isCanceld:
                return
            print(' Video processing finished successfully.')

            deletePath(TEMP_FOLDER)
            path = os.path.dirname(INPUT_FILE)

            global stpTimer
            stpTimer = True
            self.new_video_size = get_size(OUTPUT_FILE) + ' MB'
            self.output_parameters.append(self.new_video_size)
            self.new_video_length = str(self.get_length(OUTPUT_FILE))
            self.output_parameters.append(self.new_video_length)

        except Exception as e:
            print(' Processing Video Failed! ')
            if str(e) != 'main thread is not in main loop':
                print('error message.', str(e))
            deletePath(TEMP_FOLDER)

        print(self.output_parameters)
예제 #14
0
def videoProcess(frame_rate, sample_rate, silent_threshold, frame_margin,
                 silent_speed, sounded_speed, url, input_file, output_file,
                 frame_quality):
    try:
        print(frame_rate, sample_rate, silent_threshold, frame_margin,
              silent_speed, sounded_speed, url, input_file, output_file,
              frame_quality)

        New_Speed_silent_and_sounded = [silent_speed, sounded_speed]

        if url:
            name = YouTube(url).streams.first().download()
            renamed = name.replace(' ', '_')
            os.rename(name, renamed)
            return renamed

        else:
            Input_Video = input_file

        assert Input_Video != None, "enter input video"

        if len(output_file) >= 1:
            Output_Video = output_file

        else:
            dot_position = filename.rfind(".")
            Output_Video = filename[:dot_position] + "NEWVIDEO" + filename[
                dot_position:]

        # print ( Output_Video)
        Audio_fade_envelope_size = 400

        try:
            os.mkdir(TEMP)
        except OSError:
            assert False, "Directory Already existing"

        command = "ffmpeg -i " + Input_Video + " -qscale:v " + str(
            frame_quality) + " " + TEMP + "/old_frame%06d.jpg -hide_banner"
        subprocess.call(command, shell=True)

        command = "ffmpeg -i " + Input_Video + " -ab 160k -ac 2 -ar " + str(
            sample_rate) + " -vn " + TEMP + "/audio.wav"

        subprocess.call(command, shell=True)

        sampleRate, audioData = wavfile.read(TEMP + "/audio.wav")
        audioSampleCount = audioData.shape[0]
        maxAudioVolume = GetVolume(audioData)

        # print("  please  ")
        samplesPerFrame = 1470

        audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))

        hasLoudAudio = numpy.zeros((audioFrameCount))

        for i in range(audioFrameCount):
            start = int(i * samplesPerFrame)
            end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
            audiochunks = audioData[start:end]
            maxchunksVolume = float(GetVolume(audiochunks)) / maxAudioVolume
            if maxchunksVolume >= silent_threshold:
                hasLoudAudio[i] = 1

        chunks = [[0, 0, 0]]
        shouldIncludeFrame = numpy.zeros((audioFrameCount))
        for i in range(audioFrameCount):
            start = int(max(0, i - frame_margin))
            end = int(min(audioFrameCount, i + 1 + frame_margin))
            shouldIncludeFrame[i] = numpy.max(hasLoudAudio[start:end])
            if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i - 1]):
                chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])

        chunks.append(
            [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
        chunks = chunks[1:]

        outputAudioData = numpy.zeros((0, audioData.shape[1]))
        outputPointer = 0

        lastExistingFrame = None
        for chunk in chunks:
            audioChunk = audioData[int(chunk[0] *
                                       samplesPerFrame):int(chunk[1] *
                                                            samplesPerFrame)]

            sFile = TEMP + "/tempStart.wav"
            eFile = TEMP + "/tempEnd.wav"
            wavfile.write(sFile, sample_rate, audioChunk)
            with WavReader(sFile) as reader:
                with WavWriter(eFile, reader.channels,
                               reader.samplerate) as writer:
                    tsm = phasevocoder(reader.channels,
                                       speed=New_Speed_silent_and_sounded[int(
                                           chunk[2])])
                    tsm.run(reader, writer)
            _, alteredAudioData = wavfile.read(eFile)
            leng = alteredAudioData.shape[0]
            endPointer = outputPointer + leng
            outputAudioData = numpy.concatenate(
                (outputAudioData, alteredAudioData / maxAudioVolume))

            if leng < Audio_fade_envelope_size:
                outputAudioData[outputPointer:endPointer] = 0
            else:
                premask = numpy.arange(
                    Audio_fade_envelope_size) / Audio_fade_envelope_size
                mask = numpy.repeat(premask[:, numpy.newaxis], 2, axis=1)
                outputAudioData[outputPointer:outputPointer +
                                Audio_fade_envelope_size] *= mask
                outputAudioData[
                    endPointer -
                    Audio_fade_envelope_size:endPointer] *= 1 - mask

            startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
            endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
            for op_frame in range(startOutputFrame, endOutputFrame):
                ip_frame = int(chunk[0] +
                               New_Speed_silent_and_sounded[int(chunk[2])] *
                               (op_frame - startOutputFrame))
                didItWork = FRameCopy(ip_frame, op_frame)
                if didItWork:
                    lastExistingFrame = ip_frame
                else:
                    FRameCopy(lastExistingFrame, op_frame)

            outputPointer = endPointer

        wavfile.write(TEMP + "/audioNew.wav", sample_rate, outputAudioData)

        command = "ffmpeg -framerate " + str(
            frame_rate
        ) + " -i " + TEMP + "/new_frame%06d.jpg -i " + TEMP + "/audioNew.wav -strict -2 " + Output_Video
        subprocess.call(command, shell=True)

        try:
            rmtree(TEMP, ignore_errors=False)
        except OSError:
            print("Delete failed")

            return "done"  #not sure abt it
    except:
        return " nothing"
예제 #15
0
# coding: utf-8

# In[11]:

from audiotsm import phasevocoder
from audiotsm.io.wav import WavReader, WavWriter

with WavReader('qbhexamples.wav') as reader:
    print reader.channels, reader.samplerate
    with WavWriter('qbh_half.wav', reader.channels, reader.samplerate) as writer:
        tsm = phasevocoder(reader.channels, speed=0.5)
        tsm.run(reader, writer)
        print "Finished, closing files."
        close(reader)
        close(writer)


# In[ ]:



예제 #16
0
def process_video(args):
    TEMP_FOLDER,frameRate,SAMPLE_RATE,NEW_SPEED,SILENT_THRESHOLD,FRAME_SPREADAGE,AUDIO_FADE_ENVELOPE_SIZE = args    


    sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)


    samplesPerFrame = sampleRate/frameRate

    audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))

    hasLoudAudio = np.zeros((audioFrameCount))

    for i in range(audioFrameCount):
        start = int(i*samplesPerFrame)
        end = min(int((i+1)*samplesPerFrame),audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1

    chunks = [[0,0,0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0,i-FRAME_SPREADAGE))
        end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
            chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])

    chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
    chunks = chunks[1:]

    outputAudioData = np.zeros((0,audioData.shape[1]))
    outputPointer = 0

    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]
        
        sFile = TEMP_FOLDER+"/tempStart.wav"
        eFile = TEMP_FOLDER+"/tempEnd.wav"
        wavfile.write(sFile,SAMPLE_RATE,audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer+leng
        outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))

        #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

        # smooth out transitiion's audio by quickly fading in/out
        
        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
            mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
            outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
            outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask

        startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
            didItWork = copyFrame(TEMP_FOLDER,inputFrame,outputFrame)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(TEMP_FOLDER,lastExistingFrame,outputFrame)

        outputPointer = endPointer

    wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)

    '''
예제 #17
0
def jumpcutter(input_file, frame_rate):
    input_file = input_file
    
    AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
        
    createPath(TEMP_FOLDER)

    command = "ffmpeg -i "+input_file+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner"
    subprocess.call(command, shell=True)

    noise_reduction = " -af afftdn"
    command = "ffmpeg -i "+input_file+noise_reduction+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav"

    subprocess.call(command, shell=True)

    command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1 -b:v 50000"
    f = open(TEMP_FOLDER+"/params.txt", "w")
    subprocess.call(command, shell=True, stdout=f)



    sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)
    print("maxAudioVolume:", maxAudioVolume)

    f = open(TEMP_FOLDER+"/params.txt", 'r+')
    pre_params = f.read()
    f.close()
    params = pre_params.split('\n')
    for line in params:
        m = re.search('Stream #.*Video.* ([0-9]*) fps',line)
        if m is not None:
            frame_rate = float(m.group(1))

    samplesPerFrame = sampleRate/frame_rate

    audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))

    hasLoudAudio = np.zeros((audioFrameCount))



    for i in range(audioFrameCount):
        start = int(i*samplesPerFrame)
        end = min(int((i+1)*samplesPerFrame),audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
        print("maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume",maxchunksVolume,float(getMaxVolume(audiochunks)),maxAudioVolume)
        #maxchunksVolume = float(getMaxVolume(audiochunks))
        #if maxchunksVolume >= SILENT_THRESHOLD_ABS:
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1

    chunks = [[0,0,0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0,i-FRAME_SPREADAGE))
        end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
            chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])

    chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
    chunks = chunks[1:]

    outputAudioData = np.zeros((0,audioData.shape[1]))
    outputPointer = 0

    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]
        
        sFile = TEMP_FOLDER+"/tempStart.wav"
        eFile = TEMP_FOLDER+"/tempEnd.wav"
        wavfile.write(sFile,SAMPLE_RATE,audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer+leng
        outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))

        #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

        # smooth out transitiion's audio by quickly fading in/out
        
        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
            mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
            outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
            outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask

        startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
            didItWork = copyFrame(inputFrame,outputFrame)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(lastExistingFrame,outputFrame)

        outputPointer = endPointer

    wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)

    '''
    outputFrame = math.ceil(outputPointer/samplesPerFrame)
    for endGap in range(outputFrame,audioFrameCount):
        copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
    '''
    output_file = inputToOutputFilename(input_file)
    command = "ffmpeg -framerate "+str(frame_rate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+output_file
    subprocess.call(command, shell=True)

    deletePath(TEMP_FOLDER)
예제 #18
0
def process(pid, threads, INPUT_FILE, OUTPUT_FILE, FRAME_RATE, SAMPLE_RATE,
            SILENT_THRESHOLD, FRAME_SPREADAGE, NEW_SPEED, FRAME_QUALITY):
    try:
        TEMP_FOLDER = "TEMP_" + str(pid)
        AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
        misc_func.createPath(TEMP_FOLDER)

        #image extraction
        command = 'ffmpeg -v quiet -threads ' + str(
            threads
        ) + ' -thread_queue_size 512 -i "' + INPUT_FILE + '" -qscale:v ' + str(
            FRAME_QUALITY) + ' ' + TEMP_FOLDER + '/frame%06d.jpg -hide_banner'
        subprocess.call(command, shell=True)

        #audio extraction
        command = 'ffmpeg -v quiet -threads ' + str(
            threads
        ) + ' -thread_queue_size 512 -i "' + INPUT_FILE + '" -ab 160k -ac 2 -ar ' + str(
            SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav > NUL"
        subprocess.call(command, shell=True)

        #original parameter extraction
        command = 'ffmpeg -i "' + INPUT_FILE + '" 2>&1'
        f = open(TEMP_FOLDER + "/params.txt", "w")
        subprocess.call(command, shell=True, stdout=f)
        f.close()

        sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
        audioSampleCount = audioData.shape[0]
        maxAudioVolume = misc_func.getMaxVolume(audioData)

        if FRAME_RATE is None:
            FRAME_RATE = misc_func.getFrameRate(INPUT_FILE)

        f = open(TEMP_FOLDER + "/params.txt", 'r+')
        pre_params = f.read()
        f.close()
        params = pre_params.split('\n')

        samplesPerFrame = sampleRate / FRAME_RATE

        audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))

        hasLoudAudio = np.zeros((audioFrameCount))

        for i in range(audioFrameCount):
            start = int(i * samplesPerFrame)
            end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
            audiochunks = audioData[start:end]
            maxchunksVolume = float(
                misc_func.getMaxVolume(audiochunks)) / maxAudioVolume
            if maxchunksVolume >= SILENT_THRESHOLD:
                hasLoudAudio[i] = 1

        chunks = [[0, 0, 0]]
        shouldIncludeFrame = np.zeros((audioFrameCount))
        for i in range(audioFrameCount):
            start = int(max(0, i - FRAME_SPREADAGE))
            end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
            shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
            if (i >= 1 and shouldIncludeFrame[i] !=
                    shouldIncludeFrame[i - 1]):  # Did we flip?
                chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])

        chunks.append(
            [chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
        chunks = chunks[1:]

        outputAudioData = np.zeros((0, audioData.shape[1]))
        outputPointer = 0

        lastExistingFrame = None
        for chunk in chunks:
            audioChunk = audioData[int(chunk[0] *
                                       samplesPerFrame):int(chunk[1] *
                                                            samplesPerFrame)]

            sFile = TEMP_FOLDER + "/tempStart.wav"
            eFile = TEMP_FOLDER + "/tempEnd.wav"
            wavfile.write(sFile, SAMPLE_RATE, audioChunk)
            with WavReader(sFile) as reader:
                with WavWriter(eFile, reader.channels,
                               reader.samplerate) as writer:
                    tsm = phasevocoder(reader.channels,
                                       speed=NEW_SPEED[int(chunk[2])])
                    tsm.run(reader, writer)
            _, alteredAudioData = wavfile.read(eFile)
            leng = alteredAudioData.shape[0]
            endPointer = outputPointer + leng
            outputAudioData = np.concatenate(
                (outputAudioData, alteredAudioData / maxAudioVolume))

            # smooth out transitiion's audio by quickly fading in/out

            if leng < AUDIO_FADE_ENVELOPE_SIZE:
                outputAudioData[
                    outputPointer:
                    endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
            else:
                premask = np.arange(
                    AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE
                mask = np.repeat(premask[:, np.newaxis], 2,
                                 axis=1)  # make the fade-envelope mask stereo
                outputAudioData[outputPointer:outputPointer +
                                AUDIO_FADE_ENVELOPE_SIZE] *= mask
                outputAudioData[
                    endPointer -
                    AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

            startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
            endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
            for outputFrame in range(startOutputFrame, endOutputFrame):
                inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                 (outputFrame - startOutputFrame))
                didItWork = misc_func.copyFrame(inputFrame, outputFrame,
                                                TEMP_FOLDER)
                if didItWork:
                    lastExistingFrame = inputFrame
                else:
                    misc_func.copyFrame(lastExistingFrame, outputFrame,
                                        TEMP_FOLDER)

            outputPointer = endPointer

        wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE,
                      outputAudioData)

        command = 'ffmpeg -v quiet -threads ' + str(
            threads
        ) + ' -thread_queue_size 1024 -framerate ' + str(
            FRAME_RATE
        ) + ' -i ' + TEMP_FOLDER + '/newFrame%06d.jpg -i ' + TEMP_FOLDER + '/audioNew.wav -strict -2 "' + OUTPUT_FILE + '"'
        subprocess.call(command, shell=True)
        misc_func.deletePath(TEMP_FOLDER)

    except Exception as e:
        print(e)
예제 #19
0
def execute(input_file="",
            url="",
            output_file="",
            silent_threshold=0.03,
            sounded_speed=1.00,
            silent_speed=5.00,
            frame_margin=1,
            sample_rate=44100,
            frame_quality=3):
    SAMPLE_RATE = sample_rate
    SILENT_THRESHOLD = silent_threshold
    FRAME_SPREADAGE = frame_margin
    NEW_SPEED = [silent_speed, sounded_speed]
    if url != "" and url != None:
        INPUT_FILE = downloadFile(url)
    else:
        INPUT_FILE = input_file
    FRAME_QUALITY = frame_quality

    assert INPUT_FILE != "" and INPUT_FILE != None, "why u put no input file, that dum"

    if len(output_file) >= 1:
        OUTPUT_FILE = output_file
    else:
        OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)
    print("Saving to: " + OUTPUT_FILE)

    AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)

    createPath(TEMP_FOLDER)

    checkForFFMPEG()

    command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str(
        FRAME_QUALITY) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner"
    subprocess.call(command, shell=True)

    command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str(
        SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav"

    subprocess.call(command, shell=True)

    sampleRate, audioData = wavfile.read(TEMP_FOLDER + "/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)

    frameRate = findFramerate(INPUT_FILE)

    samplesPerFrame = sampleRate / frameRate

    audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))

    hasLoudAudio = np.zeros((audioFrameCount))

    for i in range(audioFrameCount):
        start = int(i * samplesPerFrame)
        end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks)) / maxAudioVolume
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1

    chunks = [[0, 0, 0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0, i - FRAME_SPREADAGE))
        end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] !=
                shouldIncludeFrame[i - 1]):  # Did we flip?
            chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])

    chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
    chunks = chunks[1:]

    outputAudioData = np.zeros((0, audioData.shape[1]))
    outputPointer = 0

    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0] *
                                   samplesPerFrame):int(chunk[1] *
                                                        samplesPerFrame)]

        sFile = TEMP_FOLDER + "/tempStart.wav"
        eFile = TEMP_FOLDER + "/tempEnd.wav"
        wavfile.write(sFile, SAMPLE_RATE, audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels,
                           reader.samplerate) as writer:
                tsm = phasevocoder(reader.channels,
                                   speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer + leng
        outputAudioData = np.concatenate(
            (outputAudioData, alteredAudioData / maxAudioVolume))

        # outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

        # smooth out transitiion's audio by quickly fading in/out

        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            outputAudioData[
                outputPointer:
                endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(
                AUDIO_FADE_ENVELOPE_SIZE) / AUDIO_FADE_ENVELOPE_SIZE
            mask = np.repeat(premask[:, np.newaxis], 2,
                             axis=1)  # make the fade-envelope mask stereo
            outputAudioData[outputPointer:outputPointer +
                            AUDIO_FADE_ENVELOPE_SIZE] *= mask
            outputAudioData[endPointer -
                            AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

        startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                             (outputFrame - startOutputFrame))
            didItWork = copyFrame(inputFrame, outputFrame)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(lastExistingFrame, outputFrame)

        outputPointer = endPointer

    wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE, outputAudioData)
    '''
    outputFrame = math.ceil(outputPointer/samplesPerFrame)
    for endGap in range(outputFrame,audioFrameCount):
        copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
    '''

    command = "ffmpeg -y -framerate " + str(
        frameRate
    ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE
    subprocess.call(command, shell=True)

    deletePath(TEMP_FOLDER)
예제 #20
0
def splitAudio(filename, chunks, samplesPerFrame, NEW_SPEED, audioData,
               SAMPLE_RATE, maxAudioVolume):
    """
    This function creates new audio based on the chunk date and the numpy audio data.
    """

    outputAudioData = []
    outputPointer = 0
    mask = [x / FADE_SIZE for x in range(FADE_SIZE)]
    num = 0
    chunk_len = str(len(chunks))
    for chunk in chunks:
        if (NEW_SPEED[chunk[2]] < 99999):
            start = int(chunk[0] * samplesPerFrame)
            end = int(chunk[1] * samplesPerFrame)
            audioChunk = audioData[start:end]

            sFile = ''.join([TEMP, '/tempStart.wav'])
            eFile = ''.join([TEMP, '/tempEnd.wav'])
            wavfile.write(sFile, SAMPLE_RATE, audioChunk)
            if (NEW_SPEED[chunk[2]] == 1):
                __, samefile = wavfile.read(sFile)
                leng = len(audioChunk)

                outputAudioData.extend((samefile / maxAudioVolume).tolist())
            else:
                with WavReader(sFile) as reader:
                    with WavWriter(eFile, reader.channels,
                                   reader.samplerate) as writer:
                        phasevocoder(reader.channels,
                                     speed=NEW_SPEED[chunk[2]]).run(
                                         reader, writer)
                __, alteredAudioData = wavfile.read(eFile)
                leng = alteredAudioData.shape[0]

                outputAudioData.extend(
                    (alteredAudioData / maxAudioVolume).tolist())
            endPointer = outputPointer + leng

            # smooth out transition's audio by quickly fading in/out
            if (leng < FADE_SIZE):
                for i in range(outputPointer, endPointer):
                    try:
                        outputAudioData[i][0] = 0
                        outputAudioData[i][1] = 0
                    except TypeError:
                        outputAudioData[i] = 0
            else:
                for i in range(outputPointer, outputPointer + FADE_SIZE):
                    try:
                        outputAudioData[i][0] *= mask[i - outputPointer]
                        outputAudioData[i][1] *= mask[i - outputPointer]
                    except TypeError:
                        outputAudioData[i] *= mask[i - outputPointer]
                for i in range(endPointer - FADE_SIZE, endPointer):
                    try:
                        outputAudioData[i][0] *= (
                            1 - mask[i - endPointer + FADE_SIZE])
                        outputAudioData[i][1] *= (
                            1 - mask[i - endPointer + FADE_SIZE])
                    except TypeError:
                        outputAudioData[i] *= (
                            1 - mask[i - endPointer + FADE_SIZE])
            outputPointer = endPointer
        num += 1
        if (num % 10 == 0):
            print(''.join([str(num), '/', chunk_len, ' audio chunks done.']))

    print(''.join([str(num), '/', chunk_len, ' audio chunks done.']))
    outputAudioData = np.asarray(outputAudioData)
    wavfile.write(filename, SAMPLE_RATE, outputAudioData)

    if (not os.path.isfile(filename)):
        raise IOError(f'Error: The file {filename} was not created.')
    else:
        print('Audio finished.')
예제 #21
0
def timeStretch(input_filename, output_filename, rate, samplerate):
    with WavReader(input_filename) as reader:
        with WavWriter(output_filename, reader.channels,
                       reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, rate)
            tsm.run(reader, writer)
예제 #22
0
signalborders = np.where(signalpos_smoothed[:np.size(signalpos_smoothed)-1] != signalpos_smoothed[1:])[0]
del signalpos_smoothed
#signalpos = np.where(np.abs(content) > threschold)
#signalpos_norepeat = np.unique(signalpos[0])
#signalpos_norepeat = np.append(signalpos_norepeat, np.shape(content)[0])
#signalborders = signalpos_norepeat[np.gradient(signalpos_norepeat) > 2000]
signalborders = np.insert(signalborders, 0, 0)
signalborders = np.append(signalborders, np.size(content[:,0]))

newcontent = np.empty((0,2), dtype=np.int16)

for i in (np.arange(1, np.size(signalborders))):
    if np.mean(np.abs(content[signalborders[i-1]:signalborders[i],:])) > threschold:
        lborder = int(np.max([signalborders[i-1]-rate/15, 0]))
        uborder = int(np.min([signalborders[i]+rate/15, np.size(content[:,0])]))
        acc_size = int(np.floor((uborder-lborder)/acc_rate0))
        acc_part = np.empty((acc_size,2))
        nonacc_part = content[lborder:uborder,:]
        acc_part = nonacc_part[np.floor(np.arange(acc_size) * acc_rate0).astype(int),:]
        newcontent = np.append(newcontent, acc_part, axis=0)
        
wf.write('output_temp.wav', rate, newcontent)

with WavReader('output_temp.wav') as reader:
    with WavWriter('output.wav', reader.channels, reader.samplerate) as writer:
        tsm = phasevocoder(reader.channels, speed=acc_rate)
        tsm.run(reader, writer)
        
os.remove('output_temp.wav')
        
예제 #23
0
def processVideo(inputFile, outputFile, tempDir):
    global frameRate
    command = "ffmpeg -i '" + inputFile + "' -qscale:v " + str(
        FRAME_QUALITY) + " " + tempDir + "/frame%06d.jpg -hide_banner"
    subprocess.call(command, shell=True)
    command = "ffmpeg -i '" + inputFile + "' -ab 160k -ac 2 -ar " + str(
        SAMPLE_RATE) + " -vn " + tempDir + "/audio.wav"
    subprocess.call(command, shell=True)
    sampleRate, audioData = wavfile.read(tempDir + "/audio.wav")
    audioSampleCount = audioData.shape[0]
    maxAudioVolume = getMaxVolume(audioData)
    if frameRate is None:
        frameRate = getFrameRate(inputFile)
    samplesPerFrame = sampleRate / frameRate
    audioFrameCount = int(math.ceil(audioSampleCount / samplesPerFrame))
    hasLoudAudio = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(i * samplesPerFrame)
        end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
        audiochunks = audioData[start:end]
        maxchunksVolume = float(getMaxVolume(audiochunks)) / max(
            maxAudioVolume, 1e-10)
        if maxchunksVolume >= SILENT_THRESHOLD:
            hasLoudAudio[i] = 1
    chunks = [[0, 0, 0]]
    shouldIncludeFrame = np.zeros((audioFrameCount))
    for i in range(audioFrameCount):
        start = int(max(0, i - FRAME_SPREADAGE))
        end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
        if (i >= 1 and shouldIncludeFrame[i] !=
                shouldIncludeFrame[i - 1]):  # Did we flip?
            chunks.append([chunks[-1][1], i, shouldIncludeFrame[i - 1]])
    chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
    chunks = chunks[1:]
    outputAudioData = []
    outputPointer = 0
    mask = [
        x / AUDIO_FADE_ENVELOPE_SIZE for x in range(AUDIO_FADE_ENVELOPE_SIZE)
    ]  # Create audio envelope mask
    lastExistingFrame = None
    for chunk in chunks:
        audioChunk = audioData[int(chunk[0] *
                                   samplesPerFrame):int(chunk[1] *
                                                        samplesPerFrame)]

        sFile = tempDir + "/tempStart.wav"
        eFile = tempDir + "/tempEnd.wav"
        wavfile.write(sFile, SAMPLE_RATE, audioChunk)
        with WavReader(sFile) as reader:
            with WavWriter(eFile, reader.channels,
                           reader.samplerate) as writer:
                tsm = audio_stretch_algorithm(reader.channels,
                                              speed=NEW_SPEED[int(chunk[2])])
                tsm.run(reader, writer)
        _, alteredAudioData = wavfile.read(eFile)
        leng = alteredAudioData.shape[0]
        endPointer = outputPointer + leng
        outputAudioData.extend((alteredAudioData / maxAudioVolume).tolist())

        # Smoothing the audio
        if leng < AUDIO_FADE_ENVELOPE_SIZE:
            for i in range(outputPointer, endPointer):
                outputAudioData[i] = 0
        else:
            for i in range(outputPointer,
                           outputPointer + AUDIO_FADE_ENVELOPE_SIZE):
                outputAudioData[i][0] *= mask[i - outputPointer]
                outputAudioData[i][1] *= mask[i - outputPointer]
            for i in range(endPointer - AUDIO_FADE_ENVELOPE_SIZE, endPointer):
                outputAudioData[i][0] *= (
                    1 - mask[i - endPointer + AUDIO_FADE_ENVELOPE_SIZE])
                outputAudioData[i][1] *= (
                    1 - mask[i - endPointer + AUDIO_FADE_ENVELOPE_SIZE])

        startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
        endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
        for outputFrame in range(startOutputFrame, endOutputFrame):
            inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                             (outputFrame - startOutputFrame))
            didItWork = copyFrame(inputFrame, outputFrame, tempDir)
            if didItWork:
                lastExistingFrame = inputFrame
            else:
                copyFrame(lastExistingFrame, outputFrame, tempDir)

        outputPointer = endPointer
    outputAudioData = np.asarray(outputAudioData)
    wavfile.write(tempDir + "/audioNew.wav", SAMPLE_RATE, outputAudioData)
    command = f"ffmpeg -framerate {frameRate} -i {tempDir}/newFrame%06d.jpg -i {tempDir}/audioNew.wav -strict -2 -c:v libx264 -preset {H264_PRESET} -crf {H264_CRF} -pix_fmt yuvj420p '{outputFile}'"
    subprocess.call(command, shell=True)
예제 #24
0
        i = 0
        for i_start, i_end, silence in chunks:
            i += 1
            if i_start != i_end:
                if silence == 0:
                    speed = SOUNDED_SPEED
                else:
                    speed = SILENT_SPEED

                sub_clip = clip.subclip(i_start/1000, i_end/1000)

                audio[i_start:i_end].export(os.path.join(tempPath, "sub_clip.wav"), format='wav')

                src = os.path.join(tempPath, "sub_clip.wav")
                out = os.path.join(tempPath, "sub_clip-reg{0}.wav".format(i))
                with WavReader(src) as reader:
                    with WavWriter(out, reader.channels, reader.samplerate) as writer:
                        tsm = phasevocoder(reader.channels, speed=speed)
                        tsm.run(reader, writer)

                sub_clip = sub_clip.fx(vfx.speedx, speed)
                sub_clip = sub_clip.set_audio(AudioFileClip(out))
                clips.append(sub_clip)
                if i % 5 == 0:
                    print("Modifying Chunks: " + str(round((i / len(chunks) * 100), 2)) + "% Complete.")
        
        params = (['-crf', '25'])
        output_clip = concatenate_videoclips(clips)
        output_clip.write_videofile(os.path.join(outputPath, video), codec='libx264', ffmpeg_params=params, threads=8, preset='ultrafast')
        print("Success!")
        print("Output is stored in: " + os.path.join(outputPath, video))
예제 #25
0
chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
chunks = chunks[1:]

outputAudioData = np.zeros((0, audioData.shape[1]))
outputPointer = 0

lastExistingFrame = None
for chunk in chunks:
    audioChunk = audioData[int(chunk[0] *
                               samplesPerFrame):int(chunk[1] *
                                                    samplesPerFrame)]

    sFile = TEMP_FOLDER + "/tempStart.wav"
    eFile = TEMP_FOLDER + "/tempEnd.wav"
    wavfile.write(sFile, SAMPLE_RATE, audioChunk)
    with WavReader(sFile) as reader:
        with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
            tsm.run(reader, writer)
    _, alteredAudioData = wavfile.read(eFile)
    leng = alteredAudioData.shape[0]
    endPointer = outputPointer + leng
    outputAudioData = np.concatenate(
        (outputAudioData, alteredAudioData / maxAudioVolume))

    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

    # smooth out transitiion's audio by quickly fading in/out

    if leng < AUDIO_FADE_ENVELOPE_SIZE:
        outputAudioData[
예제 #26
0
        def run_process():

            URL = self.URLLineEdit.text()
            if (self.URLRadio.isChecked()):
                INPUT_FILE = downloadFile(URL)
            else:
                INPUT_FILE = self.fileLineEdit.text()

            if (INPUT_FILE == ''):
                winsound.PlaySound('SystemExclamation', winsound.SND_ALIAS)

            else:
                frameRate = self.frameRateSlider.value()
                SAMPLE_RATE = self.sampleRateSlider.value()
                SILENT_THRESHOLD = float(self.thresholdLineEdit.text())
                FRAME_SPREADAGE = float(self.frameMarginSlider.value())
                SILENT_SPEED = float(self.silentSpeedLineEdit.text())
                SOUNDED_SPEED = float(self.soundedSpeedLineEdit.text())
                NEW_SPEED = [SILENT_SPEED, SOUNDED_SPEED]
                print(NEW_SPEED)

                FRAME_QUALITY = float(self.frameQualitySlider.value())

                assert INPUT_FILE != None, "why u put no input file, that dum"
                """   
                if len(args.output_file) >= 1:
                    OUTPUT_FILE = args.output_file
                else:
                """

                OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

                TEMP_FOLDER = "TEMP"
                AUDIO_FADE_ENVELOPE_SIZE = 400  # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)

                createPath(TEMP_FOLDER)

                command = "ffmpeg -i " + INPUT_FILE + " -qscale:v " + str(
                    FRAME_QUALITY
                ) + " " + TEMP_FOLDER + "/frame%06d.jpg -hide_banner"
                subprocess.call(command, shell=True)

                command = "ffmpeg -i " + INPUT_FILE + " -ab 160k -ac 2 -ar " + str(
                    SAMPLE_RATE) + " -vn " + TEMP_FOLDER + "/audio.wav"

                subprocess.call(command, shell=True)

                command = "ffmpeg -i " + TEMP_FOLDER + "/input.mp4 2>&1"
                f = open(TEMP_FOLDER + "/params.txt", "w")
                subprocess.call(command, shell=True, stdout=f)

                sampleRate, audioData = wavfile.read(TEMP_FOLDER +
                                                     "/audio.wav")
                audioSampleCount = audioData.shape[0]
                maxAudioVolume = getMaxVolume(audioData)

                f = open(TEMP_FOLDER + "/params.txt", 'r+')
                pre_params = f.read()
                f.close()
                params = pre_params.split('\n')
                for line in params:
                    m = re.search('Stream #.*Video.* ([0-9]*) fps', line)
                    if m is not None:
                        frameRate = float(m.group(1))

                samplesPerFrame = sampleRate / frameRate

                audioFrameCount = int(
                    math.ceil(audioSampleCount / samplesPerFrame))

                hasLoudAudio = np.zeros((audioFrameCount))

                for i in range(audioFrameCount):
                    start = int(i * samplesPerFrame)
                    end = min(int((i + 1) * samplesPerFrame), audioSampleCount)
                    audiochunks = audioData[start:end]
                    maxchunksVolume = float(
                        getMaxVolume(audiochunks)) / maxAudioVolume
                    if maxchunksVolume >= SILENT_THRESHOLD:
                        hasLoudAudio[i] = 1

                chunks = [[0, 0, 0]]
                shouldIncludeFrame = np.zeros((audioFrameCount))
                for i in range(audioFrameCount):
                    start = int(max(0, i - FRAME_SPREADAGE))
                    end = int(min(audioFrameCount, i + 1 + FRAME_SPREADAGE))
                    shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
                    if (i >= 1 and shouldIncludeFrame[i] !=
                            shouldIncludeFrame[i - 1]):  # Did we flip?
                        chunks.append(
                            [chunks[-1][1], i, shouldIncludeFrame[i - 1]])

                chunks.append([
                    chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]
                ])
                chunks = chunks[1:]

                outputAudioData = np.zeros((0, audioData.shape[1]))
                outputPointer = 0

                lastExistingFrame = None
                for chunk in chunks:
                    audioChunk = audioData[int(chunk[0] * samplesPerFrame
                                               ):int(chunk[1] *
                                                     samplesPerFrame)]

                    sFile = TEMP_FOLDER + "/tempStart.wav"
                    eFile = TEMP_FOLDER + "/tempEnd.wav"
                    wavfile.write(sFile, SAMPLE_RATE, audioChunk)
                    with WavReader(sFile) as reader:
                        with WavWriter(eFile, reader.channels,
                                       reader.samplerate) as writer:
                            tsm = phasevocoder(reader.channels,
                                               speed=NEW_SPEED[int(chunk[2])])
                            tsm.run(reader, writer)
                    _, alteredAudioData = wavfile.read(eFile)
                    leng = alteredAudioData.shape[0]
                    endPointer = outputPointer + leng
                    outputAudioData = np.concatenate(
                        (outputAudioData, alteredAudioData / maxAudioVolume))

                    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

                    # smooth out transitiion's audio by quickly fading in/out

                    if leng < AUDIO_FADE_ENVELOPE_SIZE:
                        outputAudioData[
                            outputPointer:
                            endPointer] = 0  # audio is less than 0.01 sec, let's just remove it.
                    else:
                        premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE
                                            ) / AUDIO_FADE_ENVELOPE_SIZE
                        mask = np.repeat(
                            premask[:, np.newaxis], 2,
                            axis=1)  # make the fade-envelope mask stereo
                        outputAudioData[outputPointer:outputPointer +
                                        AUDIO_FADE_ENVELOPE_SIZE] *= mask
                        outputAudioData[
                            endPointer -
                            AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1 - mask

                    startOutputFrame = int(
                        math.ceil(outputPointer / samplesPerFrame))
                    endOutputFrame = int(
                        math.ceil(endPointer / samplesPerFrame))
                    for outputFrame in range(startOutputFrame, endOutputFrame):
                        inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                         (outputFrame - startOutputFrame))
                        didItWork = copyFrame(inputFrame, outputFrame,
                                              TEMP_FOLDER)
                        if didItWork:
                            lastExistingFrame = inputFrame
                        else:
                            copyFrame(lastExistingFrame, outputFrame,
                                      TEMP_FOLDER)

                    outputPointer = endPointer

                wavfile.write(TEMP_FOLDER + "/audioNew.wav", SAMPLE_RATE,
                              outputAudioData)
                '''
                outputFrame = math.ceil(outputPointer/samplesPerFrame)
                for endGap in range(outputFrame,audioFrameCount):
                    copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
                '''

                command = "ffmpeg -framerate " + str(
                    frameRate
                ) + " -i " + TEMP_FOLDER + "/newFrame%06d.jpg -i " + TEMP_FOLDER + "/audioNew.wav -strict -2 " + OUTPUT_FILE
                subprocess.call(command, shell=True)

                deletePath(TEMP_FOLDER)
예제 #27
0
def main():
    """Change the speed of an audio file without changing its pitch."""

    # Parse command line arguments
    parser = argparse.ArgumentParser(description=(
        "Change the speed of an audio file without changing its pitch."))
    parser.add_argument(
        '-s',
        '--speed',
        metavar="S",
        type=float,
        default=1.,
        help="Set the speed ratio (e.g 0.5 to play at half speed)")
    parser.add_argument(
        '-m',
        '--method',
        type=str,
        default="wsola",
        help="Select the TSM method (ola, wsola, or phasevocoder)")
    parser.add_argument('-l',
                        '--frame-length',
                        metavar='N',
                        type=int,
                        default=None,
                        help="Set the frame length to N.")
    parser.add_argument('-a',
                        '--analysis-hop',
                        metavar='N',
                        type=int,
                        default=None,
                        help="Set the analysis hop to N.")
    parser.add_argument('--synthesis-hop',
                        metavar='N',
                        type=int,
                        default=None,
                        help="Set the synthesis hop to N.")
    parser.add_argument(
        '-t',
        '--tolerance',
        metavar='N',
        type=int,
        default=None,
        help="Set the tolerance to N (only used when method is set to wsola).")
    '''parser.add_argument(
        '-p', '--phase-locking', metavar='S', type=str, default=None,
        help=("Set the phase locking strategy (none or identity; "
              "only used when method is set to phasevocoder)."))'''
    parser.add_argument('-o',
                        '--output',
                        metavar='FILENAME',
                        type=str,
                        default=None,
                        help="Write the output in the wav file FILENAME.")
    parser.add_argument('-i',
                        '--input_filename',
                        metavar='INPUT_FILENAME',
                        type=str,
                        help="The audio input file")

    args = parser.parse_args()

    if not os.path.isfile(args.input_filename):
        parser.error('The input file "{}" does not exist.'.format(
            args.input_filename))

    # Get TSM method parameters
    parameters = {}
    if args.speed:
        parameters['speed'] = args.speed
    if args.frame_length:
        parameters['frame_length'] = args.frame_length
    if args.analysis_hop:
        parameters['analysis_hop'] = args.analysis_hop
    if args.synthesis_hop:
        parameters['synthesis_hop'] = args.synthesis_hop
    if args.tolerance is not None and args.method == "wsola":
        parameters['tolerance'] = args.tolerance
    '''if args.phase_locking and args.method == "phasevocoder":
        parameters['phase_locking'] = PhaseLocking.from_str(args.phase_locking)'''

    # Run the TSM procedure
    with WavReader(args.input_filename) as reader:
        with create_writer(args.output, reader) as writer:
            tsm = create_tsm(args.method, reader.channels, parameters)
            tsm.run(reader, writer)
예제 #28
0
def splitVideo(chunks, NEW_SPEED, frameRate, zooms, samplesPerFrame,
               SAMPLE_RATE, audioData, extension, VERBOSE):
    print('Creating new video.')
    num = 0
    chunk_len = str(len(chunks))
    outputPointer = 0
    Renames = []
    lastExisting = None
    for chunk in chunks:
        if (NEW_SPEED[int(chunk[2])] < 99999):
            audioChunk = audioData[int(chunk[0] *
                                       samplesPerFrame):int(chunk[1] *
                                                            samplesPerFrame)]
            if (NEW_SPEED[int(chunk[2])] == 1):
                leng = len(audioChunk)
            else:
                sFile = TEMP + '/tempStart2.wav'
                eFile = TEMP + '/tempEnd2.wav'
                wavfile.write(sFile, SAMPLE_RATE, audioChunk)
                with WavReader(sFile) as reader:
                    with WavWriter(eFile, reader.channels,
                                   reader.samplerate) as writer:
                        phasevocoder(reader.channels,
                                     speed=NEW_SPEED[int(chunk[2])]).run(
                                         reader, writer)
                __, alteredAudioData = wavfile.read(eFile)
                leng = alteredAudioData.shape[0]

            endPointer = outputPointer + leng

            startOutputFrame = int(math.ceil(outputPointer / samplesPerFrame))
            endOutputFrame = int(math.ceil(endPointer / samplesPerFrame))
            for outputFrame in range(startOutputFrame, endOutputFrame):
                inputFrame = int(chunk[0] + NEW_SPEED[int(chunk[2])] *
                                 (outputFrame - startOutputFrame))

                src = ''.join(
                    [CACHE, '/frame{:06d}'.format(inputFrame + 1), '.jpg'])
                dst = ''.join(
                    [TEMP, '/newFrame{:06d}'.format(outputFrame + 1), '.jpg'])
                if (os.path.isfile(src)):
                    lastExisting = inputFrame
                    if (inputFrame in zooms):
                        resize(src, dst, zooms[inputFrame])
                    else:
                        os.rename(src, dst)
                        Renames.extend([src, dst])
                else:
                    if (lastExisting == None):
                        print(src + ' does not exist.')
                        raise IOError(f'Fatal Error! No existing frame exist.')
                    src = ''.join([
                        CACHE, '/frame{:06d}'.format(lastExisting + 1), '.jpg'
                    ])
                    if (os.path.isfile(src)):
                        if (lastExisting in zooms):
                            resize(src, dst, zooms[lastExisting])
                        else:
                            os.rename(src, dst)
                            Renames.extend([src, dst])
                    else:
                        # uh oh, we need to find the file we just renamed!
                        myFile = None
                        for i in range(0, len(Renames), 2):
                            if (Renames[i] == src):
                                myFile = Renames[i + 1]
                                break
                        if (myFile is not None):
                            copyfile(myFile, dst)
                        else:
                            raise IOError(
                                f'Error! The file {src} does not exist.')

            outputPointer = endPointer

        num += 1
        if (num % 10 == 0):
            print(''.join([str(num), '/', chunk_len, ' frame chunks done.']))
    print(''.join([str(num), '/', chunk_len, ' frame chunks done.']))

    with open(f'{TEMP}/Renames.txt', 'w') as f:
        for item in Renames:
            f.write(f"{item}\n")

    print('Creating finished video. (This can take a while)')
    cmd = [
        'ffmpeg', '-y', '-framerate',
        str(frameRate), '-i', f'{TEMP}/newFrame%06d.jpg',
        f'{TEMP}/output{extension}'
    ]
    if (not VERBOSE):
        cmd.extend(['-nostats', '-loglevel', '0'])
    subprocess.call(cmd)