def load_audio(audio_path, mode='train', win_length=400, sr=16000, hop_length=160, n_fft=512, spec_len=257):
    # 读取音频数据
    wav, sr_ret = librosa.load(audio_path, sr=sr)
    # 推理的数据要移除静音部分
    if mode == 'infer':
        wav = remove_silence(wav, sr)
        wav = remove_noise(wav, sr)
        assert len(wav) > 0, "音频经过去除噪声和静音片段后,得到的结果为空,该音频不可用!"
    # 数据拼接
    if mode == 'train':
        extended_wav = np.append(wav, wav)
        if np.random.random() < 0.3:
            extended_wav = extended_wav[::-1]
    else:
        extended_wav = np.append(wav, wav[::-1])
    # 计算短时傅里叶变换
    linear = librosa.stft(extended_wav, n_fft=n_fft, win_length=win_length, hop_length=hop_length)
    linear_T = linear.T
    mag, _ = librosa.magphase(linear_T)
    mag_T = mag.T
    freq, freq_time = mag_T.shape
    assert freq_time >= spec_len, "非静音部分长度不能低于1.3s"
    if mode == 'train':
        # 随机裁剪
        rand_time = np.random.randint(0, freq_time - spec_len)
        spec_mag = mag_T[:, rand_time:rand_time + spec_len]
    else:
        spec_mag = mag_T[:, :spec_len]
    mean = np.mean(spec_mag, 0, keepdims=True)
    std = np.std(spec_mag, 0, keepdims=True)
    spec_mag = (spec_mag - mean) / (std + 1e-5)
    spec_mag = spec_mag[np.newaxis, :]
    return spec_mag
Exemple #2
0
def remove_noise_and_silence(wav, sr=_sr):
    """
    remove noise and silence
    :param wav:
    :param sr:
    :return:
    """
    out = aukit.remove_noise(wav, sr=sr)
    out = out / np.max(np.abs(out))
    out = aukit.remove_silence_wave(out, sr=sr, keep_silence_len=50, min_silence_len=100, silence_thresh=-32)
    out = aukit.strip_silence_wave(out, sr=sr, keep_silence_len=20, min_silence_len=40, silence_thresh=-32)
    return out
Exemple #3
0
    def preprocess(self):
        wav = self.ui.selected_utterance.wav
        out = aukit.remove_noise(wav, sr=Synthesizer.sample_rate)
        hp = aukit.Dict2Obj({})
        hp["vad_window_length"] = 10  # milliseconds
        hp["vad_moving_average_width"] = 2
        hp["vad_max_silence_length"] = 2
        hp["audio_norm_target_dBFS"] = -32
        hp["sample_rate"] = 16000
        hp["int16_max"] = (2**15) - 1
        out = trim_long_silences(out, hparams=hp)

        spec = Synthesizer.make_spectrogram(out)
        self.ui.draw_align(spec[::-1], "current")
Exemple #4
0
def run_noise_remover():
    import aukit
    inpath = r"hello.wav"
    wav = aukit.load_wav(inpath)
    out = aukit.remove_noise(wav)
    aukit.play_audio(out)
Exemple #5
0
def remove_noise_audio(inpath, outpath):
    """音频降噪。"""
    import aukit
    wav = aukit.load_wav(inpath, sr=16000)
    out = aukit.remove_noise(wav, sr=16000)
    aukit.save_wav(out, outpath, sr=16000)
Exemple #6
0
def run_noise_remover():
    import aukit
    inpath = r"E:\data\temp\01.wav"
    wav = aukit.load_wav(inpath)
    out = aukit.remove_noise(wav)
    aukit.play_audio(out)