예제 #1
0
def run(audio_file: str, out_path: str, model_name: str, pretrained_path: str, lowpass_freq: int = 0):
    wav, sr = librosa.load(audio_file, sr=settings.SAMPLE_RATE)
    wav = preemphasis(wav)

    if wav.dtype != np.float32:
        wav = wav.astype(np.float32)

    # load model
    model = __load_model(model_name, pretrained_path)

    # make tensor wav
    wav = torch.FloatTensor(wav).unsqueeze(0).cuda()

    # inference
    print('Inference ...')
    with torch.no_grad():
        out_wav = model(wav)
        out_wav = out_wav[0].cpu().numpy()

    if lowpass_freq:
        out_wav = lowpass(out_wav, frequency=lowpass_freq)

    # save wav
    librosa.output.write_wav(out_path, inv_preemphasis(out_wav).clip(-1., 1.), settings.SAMPLE_RATE)

    print('Finish !')
예제 #2
0
 def __getitem__(self, idx: int) -> List[Any]:
     res = super().__getitem__(idx)
     # augmentation with -1
     if np.random.randint(2):
         res[0] = res[0] * -1
         res[1] = res[1] * -1
     # augmentation with audioset data / once on three times
     if np.random.randint(3):
         rand_amp = np.random.rand() * 0.5 + 0.5
         res[0] = augment(res[1], amp=rand_amp)
     # do volume augmentation
     rand_vol = np.random.rand() + 0.5  # 0.5 ~ 1.5
     res[0] = np.clip(res[0] * rand_vol, -1, 1)
     res[1] = np.clip(res[1] * rand_vol, -1, 1)
     # pre emphasis
     res[0] = preemphasis(res[0])
     res[1] = preemphasis(res[1])
     return res
예제 #3
0
def test_dir(in_dir: str, out_dir: str, model_name: str, pretrained_path: str):

    # listup files
    print('List up wave files in given directory ...')
    file_list = glob.glob(os.path.join(in_dir, '*.wav'))

    # load model
    model = __load_model(model_name, pretrained_path)

    # mkdir
    os.makedirs(out_dir, exist_ok=True)

    # loop all
    print('Process files ...')
    noise_all = []
    results = []

    # TODO: Convert it to batch mode
    for file_path in tqdm(file_list):
        # load wave
        origin_wav, _ = librosa.load(file_path, sr=settings.SAMPLE_RATE)

        # default, preemp
        wav = preemphasis(origin_wav)

        # wave to cuda tensor
        wav = torch.FloatTensor(wav).unsqueeze(0).cuda()

        # inference
        with torch.no_grad():
            clean_hat = model(wav)

        noise_all.append(origin_wav)
        results.append(clean_hat.squeeze())

    # write all
    print('Write all result into {} ...'.format(out_dir))
    for file_path, clean_hat, noise in zip(file_list, results, noise_all):
        file_name = os.path.basename(file_path).split('.')[0]
        noise_out_path = os.path.join(out_dir, '{}_noise.wav'.format(file_name))
        clean_out_path = os.path.join(out_dir, '{}_pred.wav'.format(file_name))

        librosa.output.write_wav(noise_out_path, noise, settings.SAMPLE_RATE)
        librosa.output.write_wav(clean_out_path, inv_preemphasis(clean_hat.cpu().numpy()), settings.SAMPLE_RATE)

    print('Finish !')
예제 #4
0
def run(*audio_files: [str],
        out_path: str,
        model_name: str,
        pretrained_path: str,
        lowpass_freq: int = 0,
        sample_rate: int = 22050):
    for audio_file in audio_files:
        print('Loading audio file...')
        wav, sr = librosa.load(audio_file, sr=sample_rate)
        wav = preemphasis(wav)

        if wav.dtype != np.float32:
            wav = wav.astype(np.float32)

        # load model
        model = __load_model(model_name, pretrained_path)

        # make tensor wav
        wav = torch.FloatTensor(wav).unsqueeze(0)

        # inference
        print('Inference ...')
        with torch.no_grad():
            out_wav = model(wav)
            out_wav = out_wav[0].cpu().numpy()

        if lowpass_freq:
            out_wav = lowpass(out_wav, frequency=lowpass_freq)

        # save wav
        librosa.output.write_wav(
            os.path.join(
                out_path,
                os.path.basename(audio_file).rsplit(".", 1)[0] + "_out.wav"),
            inv_preemphasis(out_wav).clip(-1., 1.), sample_rate)

    print('Finish !')
예제 #5
0
 def __getitem__(self, idx):
     wav = librosa.load(self.wav_list[idx],
                        sr=self.sample_rate)[0].squeeze()
     if len(wav) > self.sample_rate * self.max_len:
         wav = np.zeros(1)
     return [preemphasis(wav), np.array([len(wav)])]