Ejemplo n.º 1
0
def run(audio_file: str, out_path: str, model_name: str, pretrained_path: str, lowpass_freq: int = 0):
    wav, sr = librosa.load(audio_file, sr=settings.SAMPLE_RATE)
    wav = preemphasis(wav)

    if wav.dtype != np.float32:
        wav = wav.astype(np.float32)

    # load model
    model = __load_model(model_name, pretrained_path)

    # make tensor wav
    wav = torch.FloatTensor(wav).unsqueeze(0).cuda()

    # inference
    print('Inference ...')
    with torch.no_grad():
        out_wav = model(wav)
        out_wav = out_wav[0].cpu().numpy()

    if lowpass_freq:
        out_wav = lowpass(out_wav, frequency=lowpass_freq)

    # save wav
    librosa.output.write_wav(out_path, inv_preemphasis(out_wav).clip(-1., 1.), settings.SAMPLE_RATE)

    print('Finish !')
Ejemplo n.º 2
0
def validate(meta_dir: str, out_dir: str, model_name: str, pretrained_path: str, batch_size: int = 64,
             num_workers: int = 16,):
    preemp = PreEmphasis().cuda()

    # load model
    model = __load_model(model_name, pretrained_path)

    # load validation data loader
    _, valid_loader = voice_bank.get_datasets(
        meta_dir, batch_size=batch_size, num_workers=num_workers, fix_len=None, audio_mask=True
    )

    # mkdir
    os.makedirs(out_dir, exist_ok=True)

    # loop all
    print('Process Validation Dataset ...')
    noise_all = []
    clean_all = []
    results = []
    for noise, clean, *others in tqdm(valid_loader):
        noise = noise.cuda()
        noise = preemp(noise.unsqueeze(1)).squeeze(1)
        with torch.no_grad():
            clean_hat = model(noise)
        noise_all.append(noise)
        clean_all.append(clean)
        results.append(clean_hat)

    # write all
    print('Write all result into {} ...'.format(out_dir))
    for idx, (batch_clean_hat, batch_noise, batch_clean) in tqdm(enumerate(zip(results, noise_all, clean_all))):
        for in_idx, (clean_hat, noise, clean) in enumerate(zip(batch_clean_hat, batch_noise, batch_clean)):
            noise_out_path = os.path.join(out_dir, '{}_noise.wav'.format(idx * batch_size + in_idx))
            pred_out_path = os.path.join(out_dir, '{}_pred.wav'.format(idx * batch_size + in_idx))
            clean_out_path = os.path.join(out_dir, '{}_clean.wav'.format(idx * batch_size + in_idx))

            librosa.output.write_wav(clean_out_path, clean.cpu().numpy(), settings.SAMPLE_RATE)
            librosa.output.write_wav(noise_out_path,
                                     inv_preemphasis(noise.cpu().numpy()), settings.SAMPLE_RATE)
            librosa.output.write_wav(pred_out_path,
                                     inv_preemphasis(clean_hat.cpu().numpy()).clip(-1., 1.), settings.SAMPLE_RATE)

    print('Finish !')
Ejemplo n.º 3
0
def test_worker(out_wav, file_path, in_dir, out_dir, sample_rate, wav_len):
    try:
        if wav_len == 1:
            return
        # make output path
        sub_dir = os.path.dirname(file_path).replace(in_dir, '')
        file_out_dir = os.path.join(out_dir, sub_dir)
        os.makedirs(file_out_dir, exist_ok=True)
        out_file_path = os.path.join(file_out_dir, os.path.basename(file_path))
        out_wav = out_wav[:wav_len]
        out_wav = inv_preemphasis(out_wav.squeeze())
        librosa.output.write_wav(out_file_path, out_wav, sample_rate)
    except Exception:
        print(f'{file_path} has an error')
Ejemplo n.º 4
0
def test_dir(in_dir: str, out_dir: str, model_name: str, pretrained_path: str):

    # listup files
    print('List up wave files in given directory ...')
    file_list = glob.glob(os.path.join(in_dir, '*.wav'))

    # load model
    model = __load_model(model_name, pretrained_path)

    # mkdir
    os.makedirs(out_dir, exist_ok=True)

    # loop all
    print('Process files ...')
    noise_all = []
    results = []

    # TODO: Convert it to batch mode
    for file_path in tqdm(file_list):
        # load wave
        origin_wav, _ = librosa.load(file_path, sr=settings.SAMPLE_RATE)

        # default, preemp
        wav = preemphasis(origin_wav)

        # wave to cuda tensor
        wav = torch.FloatTensor(wav).unsqueeze(0).cuda()

        # inference
        with torch.no_grad():
            clean_hat = model(wav)

        noise_all.append(origin_wav)
        results.append(clean_hat.squeeze())

    # write all
    print('Write all result into {} ...'.format(out_dir))
    for file_path, clean_hat, noise in zip(file_list, results, noise_all):
        file_name = os.path.basename(file_path).split('.')[0]
        noise_out_path = os.path.join(out_dir, '{}_noise.wav'.format(file_name))
        clean_out_path = os.path.join(out_dir, '{}_pred.wav'.format(file_name))

        librosa.output.write_wav(noise_out_path, noise, settings.SAMPLE_RATE)
        librosa.output.write_wav(clean_out_path, inv_preemphasis(clean_hat.cpu().numpy()), settings.SAMPLE_RATE)

    print('Finish !')
Ejemplo n.º 5
0
def run(*audio_files: [str],
        out_path: str,
        model_name: str,
        pretrained_path: str,
        lowpass_freq: int = 0,
        sample_rate: int = 22050):
    for audio_file in audio_files:
        print('Loading audio file...')
        wav, sr = librosa.load(audio_file, sr=sample_rate)
        wav = preemphasis(wav)

        if wav.dtype != np.float32:
            wav = wav.astype(np.float32)

        # load model
        model = __load_model(model_name, pretrained_path)

        # make tensor wav
        wav = torch.FloatTensor(wav).unsqueeze(0)

        # inference
        print('Inference ...')
        with torch.no_grad():
            out_wav = model(wav)
            out_wav = out_wav[0].cpu().numpy()

        if lowpass_freq:
            out_wav = lowpass(out_wav, frequency=lowpass_freq)

        # save wav
        librosa.output.write_wav(
            os.path.join(
                out_path,
                os.path.basename(audio_file).rsplit(".", 1)[0] + "_out.wav"),
            inv_preemphasis(out_wav).clip(-1., 1.), sample_rate)

    print('Finish !')
Ejemplo n.º 6
0
def validate(meta_dir: str,
             model_name: str,
             pretrained_path: str,
             out_dir: str = '',
             batch_size: int = 64,
             num_workers: int = 16,
             sr: int = 22050):
    """
    Evaluation on validation dataset. It calculates PESQ. If you wanna get validation audio files, put out_dir.
    :param meta_dir: voice bank meta directory
    :param model_name: model name
    :param pretrained_path: pretrained checkpoint file path
    :param out_dir: output directory
    :param batch_size: batch size for evaluating datasets
    :param num_workers: workers of data loader
    :param sr: training sample rate
    """

    preemp = PreEmphasis()

    # load model
    model = __load_model(model_name, pretrained_path)

    # load validation data loader
    _, valid_loader = voice_bank.get_datasets(meta_dir,
                                              batch_size=batch_size,
                                              num_workers=num_workers,
                                              fix_len=0,
                                              audio_mask=True)

    # loop all
    print('Process Validation Dataset (with PESQ) ...')
    pesq_score = 0.
    count = 0

    if out_dir:
        noise_all = []
        clean_all = []
        results = []

    for noise, clean, *others in tqdm(valid_loader, desc='validate'):
        noise = noise
        noise = preemp(noise.unsqueeze(1)).squeeze(1)
        with torch.no_grad():
            clean_hat = model(noise)

        clean = clean.cpu().numpy()
        clean_hat = clean_hat.cpu().numpy()

        # calculate
        for clean_sample, clean_hat_sample in zip(clean, clean_hat):
            # resample
            clean_sample = librosa.core.resample(clean_sample, sr, 16000)
            clean_hat_sample = librosa.core.resample(clean_hat_sample, sr,
                                                     16000)

            item_score = pesq(16000, clean_sample,
                              inv_preemphasis(clean_hat_sample).clip(-1., 1.),
                              'wb')
            pesq_score += item_score
            count += 1

        if out_dir:
            noise_all.append(noise.cpu().numpy())
            clean_all.append(clean)
            results.append(clean_hat)

    print(f'PESQ Score : {pesq_score / count}')

    if out_dir:
        # mkdir
        os.makedirs(out_dir, exist_ok=True)
        # write all
        print('Write all result into {} ...'.format(out_dir))
        for idx, (batch_clean_hat, batch_noise, batch_clean) in tqdm(
                enumerate(zip(results, noise_all, clean_all))):
            for in_idx, (clean_hat, noise, clean) in enumerate(
                    zip(batch_clean_hat, batch_noise, batch_clean)):
                noise_out_path = os.path.join(
                    out_dir, '{}_noise.wav'.format(idx * batch_size + in_idx))
                pred_out_path = os.path.join(
                    out_dir, '{}_pred.wav'.format(idx * batch_size + in_idx))
                clean_out_path = os.path.join(
                    out_dir, '{}_clean.wav'.format(idx * batch_size + in_idx))

                librosa.output.write_wav(clean_out_path, clean,
                                         settings.SAMPLE_RATE)
                librosa.output.write_wav(noise_out_path,
                                         inv_preemphasis(noise),
                                         settings.SAMPLE_RATE)
                librosa.output.write_wav(
                    pred_out_path,
                    inv_preemphasis(clean_hat).clip(-1., 1.),
                    settings.SAMPLE_RATE)

        print('Finish writing files.')