Esempio n. 1
0
def test_invalid_files():
    Nonexist = ['temp.wav', 'temp.mp3']
    target_dbfs = -13.5
    process_files(Nonexist, target_dbfs)

    Unsupported = ['audio1.ape', 'audio2.aac']
    for u in Unsupported:
        with open(u, 'w'):
            pass
    process_files(Unsupported, target_dbfs)
    cleanup_on_finish(Unsupported, '/')
Esempio n. 2
0
    def convert_single_wav_to_input(self, audio_filename):
        aus = []
        audio_file = os.path.join(self.src_dir, 'demo_wav', audio_filename)

        # Default param
        TARGET_AUDIO_DBFS = -20.0
        WAV_STEP = int(0.2 * 16000)  # 0.2s = 5 frames
        STFT_WINDOW_SIZE = {'25': 320, '29.97': 356}
        STFT_WINDOW_STEP = {'25': 4, '29.97': 3}
        FPS = 25

        # Step 1 : Normalize the volume
        target_dbfs = TARGET_AUDIO_DBFS
        pynormalize.process_files(Files=[audio_file],
                                  target_dbfs=target_dbfs,
                                  directory=os.path.join(
                                      self.src_dir, 'raw_wav'))

        #  Step 2 : load wav file
        sample_rate, samples = wav.read(audio_file)
        assert (sample_rate == 16000)
        if (len(samples.shape) > 1):
            samples = samples[:, 0]  # pick mono

        # Step 3 : STFT,
        # 1 frame = 1/25 * 16k = 640 samples => windowsize=320,  overlap=160
        # 1 frame = 1/29.97 * 16k = 533.86 samples => windowsize=356, overlap=178, (mis-align = 4.2sample / 1s)
        f, t, Zxx = stft(samples,
                         fs=sample_rate,
                         nperseg=STFT_WINDOW_SIZE[str(FPS)])

        # stft_abs = np.abs(Zxx)
        stft_abs = np.log(np.abs(Zxx)**2 + 1e-10)
        stft_abs_max = np.max(stft_abs)
        stft_abs /= stft_abs_max

        # Step 4 : align AV (drop last 2 frames of V)
        fl_length = stft_abs.shape[1] // STFT_WINDOW_STEP[str(FPS)]
        audio_stft_length = (fl_length - 2) * STFT_WINDOW_STEP[str(FPS)]
        stft_signal = Zxx[:, 0:audio_stft_length]
        stft_abs = stft_abs[:, 0:audio_stft_length]

        audio_wav_length = int((fl_length - 2) * sample_rate / FPS)
        wav_signal = samples[0:audio_wav_length]

        # # Step 6 : Save audio
        # info_audio = (0, stft_signal, fl_length - 2, audio_stft_length, audio_wav_length)
        # au_data = (stft_abs, wav_signal, info_audio)

        aus.append((stft_abs.T, None, (0, audio_filename, 0)))

        return aus
Esempio n. 3
0
def test_process_files():
    Files = ['temp.wav']
    target_dbfs = -13.5

    empty_audio = AudioSegment.silent(duration=3000)
    empty_audio.export(Files[0], format='wav')
    process_files(Files, target_dbfs)
    if Files[0] in os.listdir(STORE) and \
            get_modified_time_diff(os.path.join(STORE, Files[0])) < 100:
        assert True
    else:
        assert False
    cleanup_on_finish(Files, STORE)
Esempio n. 4
0
def test_process_files_different_directory():
    Files = ['temp.wav']
    target_dbfs = -13.5
    EDITED_STORE = '_TEMP'

    empty_audio = AudioSegment.silent(duration=3000)
    empty_audio.export(Files[0], format='wav')
    process_files(Files=Files, target_dbfs=target_dbfs, directory=EDITED_STORE)
    if Files[0] in os.listdir(EDITED_STORE) and \
            get_modified_time_diff(os.path.join(EDITED_STORE, Files[0])) < 100:
        assert True
    else:
        assert False
    cleanup_on_finish(Files, EDITED_STORE)