Exemple #1
0
def eval(ref_name, enh_name, nsy_name, results):
    try:
        utt_id = ref_name.split('/')[-1]
        ref, sr = audioread(ref_name)
        enh, sr = audioread(enh_name)
        nsy, sr = audioread(nsy_name)
        enh_len = enh.shape[0]
        ref_len = ref.shape[0]
        if enh_len > ref_len:
            enh = enh[:ref_len]
        else:
            ref = ref[:enh_len]
            nsy = nsy[:enh_len]
        ref_score = pesq(ref, nsy, sr)
        enh_score = pesq(ref, enh, sr)
        ref_stoi = stoi(ref, nsy, sr, extended=False)
        enh_stoi = stoi(ref, enh, sr, extended=False)
        ref_sisdr = si_sdr(nsy, ref)
        enh_sisdr = si_sdr(enh, ref)
        ref_sdr = sdr(nsy, ref)
        enh_sdr = sdr(enh, ref)

    except Exception as e:
        print(e)

    results.append([
        utt_id, {
            'pesq': [ref_score, enh_score],
            'stoi': [ref_stoi, enh_stoi],
            'si_sdr': [ref_sisdr, enh_sisdr],
            'sdr': [ref_sdr, enh_sdr],
        }
    ])
Exemple #2
0
    def eval(self, audio_est, audio_ref):

        x_est, fs_est = sf.read(audio_est)
        x_ref, fs_ref = sf.read(audio_ref)
        # align
        len_x = np.min([len(x_est), len(x_ref)])
        x_est = x_est[:len_x]
        x_ref = x_ref[:len_x]

        # x_ref = x_ref / np.max(np.abs(x_ref))

        if fs_est != fs_ref:
            raise ValueError(
                'Sampling rate is different amon estimated audio and reference audio'
            )

        if self.metric == 'rmse':
            return compute_rmse(x_est, x_ref)
        elif self.metric == 'pesq':
            return pesq(x_ref, x_est, fs_est)
        elif self.metric == 'stoi':
            return stoi(x_ref, x_est, fs_est, extended=False)
        elif self.metric == 'estoi':
            return stoi(x_ref, x_est, fs_est, extended=True)
        elif self.metric == 'all':
            score_rmse = compute_rmse(x_est, x_ref)
            score_pesq = pesq(x_ref, x_est, fs_est)
            score_stoi = stoi(x_ref, x_est, fs_est, extended=False)
            return score_rmse, score_pesq, score_stoi
        else:
            raise ValueError(
                'Evaluation only support: rmse, pesq, (e)stoi, all')
Exemple #3
0
    def calc_true_pesq(self, x, y, s, mask, fs=16000):
        scores = []
        for i in range(x.shape[0]):
            ind = int(torch.sum(mask[i]))
            x_i = x[i][:, :ind]
            y_i = y[i][:, :ind]
            s_i = s[i][:, :ind]

            x_i = torch.istft(x_i,
                              n_fft=512,
                              win_length=512,
                              hop_length=128,
                              normalized=True).detach().cpu().numpy()

            y_i = torch.istft(y_i,
                              n_fft=512,
                              win_length=512,
                              hop_length=128,
                              normalized=True).detach().cpu().numpy()
            s_i = torch.istft(s_i,
                              n_fft=512,
                              win_length=512,
                              hop_length=128,
                              normalized=True).detach().cpu().numpy()
            score_x = pesq(s_i, x_i, fs)
            score_y = pesq(s_i, y_i, fs)
            score_s = pesq(s_i, s_i, fs)
            del x_i
            del y_i
            del s_i
            scores.append([score_x, score_y, score_s])
        return torch.tensor(scores)
def eval_pesq(predicted_path,
              noisy_test,
              clean_test,
              out_path,
              img_path='/u/anakuzne/data/snr0_test_img/',
              fs=16000):
    '''
    Params:
        predicted: stfts predicted by the model
        noisy test: stfts of noise mixture
        clean test: clean unmixed test signals
    '''

    noisy_ref = os.listdir(noisy_test)  #.wav
    clean_ref = collect_paths(clean_test)  #.wav
    predicted = os.listdir(predicted_path)  #.npy
    imag = os.listdir(img_path)  #.npy

    scores_clean_ref = []
    scores_noisy_ref = []

    print('Calculating PESQ...')
    for p in tqdm(clean_ref):
        clean_r, sr = librosa.load(p, mono=True)
        clean_r = librosa.core.resample(clean_r, sr, 16000)
        fname = p.split('/')[-2] + '_' + p.split('/')[-1].split(
            '.')[0] + '.npy'
        ind = predicted.index(fname)
        pred = np.load(predicted_path + predicted[ind])

        ind = imag.index(fname)
        imag_num = pad(np.load(img_path + imag[ind]), 1339)
        pred = pred + imag_num
        pred = librosa.istft(pred, hop_length=256, win_length=512)
        pred = pred[:clean_r.shape[0]]

        ##Compare to clean signal
        score_clean = pesq(clean_r, pred, fs)
        scores_clean_ref.append(score_clean)

        #Compare to degraded signal
        ind = noisy_ref.index(fname)
        pred = np.load(predicted_path + predicted[ind])
        noisy_r = np.load(noisy_test + noisy_ref[ind])
        noisy_r = librosa.istft(noisy_r, hop_length=256, win_length=512)
        pred = librosa.istft(pred, hop_length=256, win_length=512)
        pred = pred[:noisy_r.shape[0]]

        score_noisy = pesq(noisy_r, pred, fs)
        scores_noisy_ref.append(score_noisy)

    wav_names = [n.split('/')[-1] for n in clean_ref]
    data = {
        'fname': wav_names,
        'PESQ_clean_ref': scores_clean_ref,
        'PESQ_noisy_ref': scores_noisy_ref
    }
    df = pd.DataFrame(data,
                      columns=['fname', 'PESQ_clean_ref', 'PESQ_noisy_ref'])
    df.to_csv(out_path + 'PESQ.csv')
Exemple #5
0
def getPESQ(name, clean_speech_dir, noise_num):
    clean_waves_dir = 'exp/data_for_ac/' + clean_speech_dir
    clean_list = os.listdir(clean_waves_dir)
    clean_dir_list = [
        os.path.join(clean_waves_dir, clean_file) for clean_file in clean_list
    ]
    clean_dir_list.sort()
    enhanced_waves_dir = 'exp/data_for_ac/' + name + '/enhanced_wav'
    enhanced_list = os.listdir(enhanced_waves_dir)
    enhanced_dir_list = [
        os.path.join(enhanced_waves_dir, enhanced_file)
        for enhanced_file in enhanced_list
    ]
    enhanced_dir_list.sort()
    mixed_waves_dir = 'exp/data_for_ac/' + name + '/mixed_wav'
    mixed_list = os.listdir(mixed_waves_dir)
    mixed_dir_list = [
        os.path.join(mixed_waves_dir, mixed_file) for mixed_file in mixed_list
    ]
    mixed_dir_list.sort()

    clean_dir_list_long = []
    for clean_dir in clean_dir_list:
        for i in range(noise_num):
            clean_dir_list_long.append(clean_dir)

    avg_score_raw = 0.0
    avg_score_en = 0.0
    avg_score_imp = 0.0
    i = 0
    for clean_wave, enhanced_wave, mixed_wave in zip(clean_dir_list_long,
                                                     enhanced_dir_list,
                                                     mixed_dir_list):
        ref, sr = sf.read(clean_wave)
        mixed, sr = sf.read(mixed_wave)
        enhanced, sr = sf.read(enhanced_wave)

        # spec = spectrum_tool.magnitude_spectrum_librosa_stft(enhanced,512,256)
        # angle = spectrum_tool.phase_spectrum_librosa_stft(enhanced,512,256)
        # spec = spec ** 1.3
        # enhanced = spectrum_tool.librosa_istft(spec*np.exp(angle*1j),512,256)

        # score_raw = pesq(ref/np.max(np.abs(ref)), mixed/np.max(np.abs(mixed)), sr)
        # score_en = pesq(ref/np.max(np.abs(ref)), enhanced/np.max(np.abs(enhanced)), sr)
        score_raw = pesq(ref, mixed, sr)
        score_en = pesq(ref, enhanced, sr)
        print(
            str(i % noise_num + 1) + "_score_raw, score_en:", score_raw,
            score_en)
        i += 1
        avg_score_raw += score_raw
        avg_score_en += score_en
        avg_score_imp += (score_en - score_raw)

    avg_score_raw /= len(clean_dir_list_long)
    avg_score_en /= len(clean_dir_list_long)
    avg_score_imp /= len(clean_dir_list_long)
    print('avg_score_raw: %f,\navg_score_en: %f,\nimp: %f' %
          (avg_score_raw, avg_score_en, avg_score_imp))
Exemple #6
0
def separate_sample(sess, model, config, mix, c1, c2):

    batch_size = config['training']['batch_size']
    n_channel = config['training']['n_output']
    n_speaker = config['training']['n_speaker']

    stride = config['model']['hop']

    num_output_samples = mix.shape[0]
    num_fragments = int(np.ceil(num_output_samples / model.input_length))
    target_field_length = model.input_length
    target_padding = 0

    num_batches = int(np.ceil(num_fragments / batch_size))

    output = [[] for _ in range(n_channel)]
    num_pad_values = 0
    fragment_i = 0

    # pad input mixture to 10x, since stride is 10
    num_pad_values = stride - mix.shape[0] % stride
    mix = np.pad(mix, (0, num_pad_values), mode='constant', constant_values=0)

    output, = sess.run(fetches=[model.data_out],
                       feed_dict={model.mix_input: np.expand_dims(mix, 0)})[0]

    output = np.array(output)
    if num_pad_values != 0:
        output = output[:, :-num_pad_values]
        mix = mix[:-num_pad_values]

    clean_wav = np.array([c1, c2])

    perms = np.array(list(itertools.permutations(range(n_channel), n_speaker)))
    perms_onehot = (np.arange(perms.max() + 1) == perms[..., None]).astype(int)

    cross_sdr = signal_to_distortion_ratio(
        np.expand_dims(np.array([c1, c2]), 1), np.expand_dims(output, 0))
    loss_sets = np.einsum('ij,pij->p', cross_sdr, perms_onehot)
    best_perm = perms[np.argmax(loss_sets)]
    pit_output = output[best_perm]

    # SDR
    _sdr, _sir, _sar, _perm = mir_eval.separation.bss_eval_sources(
        clean_wav, pit_output)
    # SISNR
    clean_wav_norm = clean_wav - np.mean(clean_wav, axis=-1, keepdims=True)
    pit_output_norm = pit_output - np.mean(pit_output, axis=-1, keepdims=True)
    _sisnr = signal_to_distortion_ratio(clean_wav_norm, pit_output_norm)
    # PESQ
    _pesq = [
        pesq(clean_wav[0], pit_output[0], 8000),
        pesq(clean_wav[1], pit_output[1], 8000)
    ]

    perm_output = np.expand_dims(best_perm, -1).tolist()

    return _sdr, _sisnr, _pesq, perm_output
Exemple #7
0
def evaluation(ref, est, mix):
	"""
	Wrapper function for evaluating the output of a NN. Metrics are PESQ and STOI

	:param ref: Path to the original (reference point) file.
	:param est: Path to the estimated file.
	:return: Prints in stdout PESQ and STOI metric values.
	"""
	file_ref = ref
	file_est = est
	file_mix = mix
	reference_sources, sr_r = librosa.load(file_ref, sr = None)
	estimated_sources, sr_e = librosa.load(file_est, sr = None)
	mix_sources, sr_m = librosa.load(file_mix, sr = None)

	if sr_r != 16000 or sr_e != 16000 or sr_m != 16000:
		print("\nResampling at 16k...")
		ref_16k = librosa.resample(reference_sources, sr_r, 16000)
		est_16k = librosa.resample(estimated_sources, sr_e, 16000)
		mix_16k = librosa.resample(mix_sources, sr_e, 16000)
	else:
		ref_16k = reference_sources
		est_16k = estimated_sources
		mix_16k = mix_sources

	pesq_score = round(pesq(ref_16k, est_16k, 16000), 3)
	stoi_score = round(stoi(ref_16k, est_16k, sr_r, extended=False), 2)
	estoi_score = round(stoi(ref_16k, est_16k, sr_r, extended=True), 2)
	ssr_score = round(SSR(est_16k, mix_16k), 3)

	print("PESQ\t STOI\t eSTOI\t   SSR")
	print(pesq_score,"\t",stoi_score,"\t",estoi_score,"\t",ssr_score)
Exemple #8
0
def PESQ(ref_wav, deg_wav):
    # reference wav
    # degraded wav
    sr = 16000

    # tfl = tempfile.NamedTemporaryFile()
    # ref_tfl = tfl.name + '_ref.wav'
    # deg_tfl = tfl.name + '_deg.wav'
    #
    # # if ref_wav.max() <= 1:
    # #    ref_wav = np.array(denormalize_wave_minmax(ref_wav), dtype=np.int16)
    # # if deg_wav.max() <= 1:
    # #    deg_wav = np.array(denormalize_wave_minmax(deg_wav), dtype=np.int16)
    #
    # # wavfile.write(ref_tfl, 16000, ref_wav)
    # # wavfile.write(deg_tfl, 16000, deg_wav)
    # sf.write(ref_tfl, ref_wav, sr, subtype='PCM_16')
    # sf.write(deg_tfl, deg_wav, sr, subtype='PCM_16')

    # curr_dir = os.getcwd()
    # Write both to tmp files and then eval with pesqmain
    # try:
    #     p = run(['pesqmain'.format(curr_dir),
    #              ref_tfl, deg_tfl, '+'+str(sr), '+wb'],
    #             stdout=PIPE,
    #             encoding='ascii')
    #     res_line = p.stdout.split('\n')[-2]
    #     results = re.split('\s+', res_line)
    #     return results[-1]
    # except FileNotFoundError:
    #     print('pesqmain not found! Please add it your PATH')
    score = pesq(ref_wav, deg_wav, sr)
    return score
def calc_metrics(loader, actor, device):
    pesq_all = []
    stoi_all = []
    for batch in loader:
        x = batch["noisy"].unsqueeze(1).to(device)
        t = batch["clean"].unsqueeze(1).to(device)
        m = batch["mask"].to(device)
        out_r, out_i = actor(x)
        out_r = torch.transpose(out_r, 1, 2)
        out_i = torch.transpose(out_i, 1, 2)
        y = predict(x.squeeze(1), (out_r, out_i))
        t = t.squeeze()
        m = m.squeeze()
        #print("Y:", y.shape)
        #source, targets, preds = inverse(t, y, m, x)
        targets, preds = inverse(t, y, m, x)

        for j in range(len(targets)):
            curr_pesq = pesq(targets[j].detach().cpu().numpy(),
                             preds[j].detach().cpu().numpy(), 16000)
            curr_stoi = stoi(targets[j].detach().cpu().numpy(),
                             preds[j].detach().cpu().numpy(), 16000)
            pesq_all.append(curr_pesq)
            stoi_all.append(curr_stoi)
    PESQ = torch.mean(torch.tensor(pesq_all))
    STOI = torch.mean(torch.tensor(stoi_all))
    return PESQ, STOI
Exemple #10
0
def cal_score(clean, enhanced):
    clean = clean / abs(clean).max()
    enhanced = enhanced / abs(enhanced).max()

    s_stoi = stoi(clean, enhanced, 16000)
    s_pesq = pesq(clean, enhanced, 16000)

    return round(s_pesq, 5), round(s_stoi, 5)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--results_dir', type=str, required=True)
    parser.add_argument('--audio_sampling_rate', type=int, default=16000)
    args = parser.parse_args()

    audio1, _ = librosa.load(os.path.join(args.results_dir,
                                          'audio1_separated.wav'),
                             sr=args.audio_sampling_rate)
    audio2, _ = librosa.load(os.path.join(args.results_dir,
                                          'audio2_separated.wav'),
                             sr=args.audio_sampling_rate)
    audio1_gt, _ = librosa.load(os.path.join(args.results_dir, 'audio1.wav'),
                                sr=args.audio_sampling_rate)
    audio2_gt, _ = librosa.load(os.path.join(args.results_dir, 'audio2.wav'),
                                sr=args.audio_sampling_rate)
    audio_mix, _ = librosa.load(os.path.join(args.results_dir,
                                             'audio_mixed.wav'),
                                sr=args.audio_sampling_rate)

    # SDR, SIR, SAR
    sdr, sir, sar = getSeparationMetrics(audio1, audio2, audio1_gt, audio2_gt)
    sdr_mixed, _, _ = getSeparationMetrics(audio_mix, audio_mix, audio1_gt,
                                           audio2_gt)

    # PESQ
    pesq_score1 = pesq(audio1, audio1_gt, args.audio_sampling_rate)
    pesq_score2 = pesq(audio2, audio2_gt, args.audio_sampling_rate)
    pesq_score = (pesq_score1 + pesq_score2) / 2

    # STOI
    stoi_score1 = stoi(audio1_gt,
                       audio1,
                       args.audio_sampling_rate,
                       extended=False)
    stoi_score2 = stoi(audio2_gt,
                       audio2,
                       args.audio_sampling_rate,
                       extended=False)
    stoi_score = (stoi_score1 + stoi_score2) / 2

    output_file = open(os.path.join(args.results_dir, 'eval.txt'), 'w')
    output_file.write(
        "%3f %3f %3f %3f %3f %3f %3f" %
        (sdr, sdr_mixed, sdr - sdr_mixed, sir, sar, pesq_score, stoi_score))
    output_file.close()
Exemple #12
0
 def forward(self, clean, enhanced):
     assert len(clean) == len(enhanced)
     clean, enhanced = np.array(clean), np.array(enhanced)
     scores = []
     for c, e in zip(clean, enhanced):
         q = pesq(c.detach().cpu(), e.detach().cpu(), self.sr)
         q = (q + 0.5) / (4.5 + 0.5)  # (q-min)/(max-min)
         scores.append(q)
     return torch.tensor(scores, device=self._device)
Exemple #13
0
def compute_PESQ(clean_signal, noisy_signal, sr=16000):
    """计算PESQ

    Args:
        clean_signal:纯净语音信号
        noisy_signal:带噪语音信号
        sr: 采样率
    """
    return pesq(clean_signal, noisy_signal, sr)
def calculate_pesq(real, fake, sr):
    #PESQ only work with sampling rates 8000 or 16000, so we must resample
    try:
        if sr != 8000:
            real = librosa.resample(real, sr, 8000)
            fake = librosa.resample(fake, sr, 8000)
        return pesq(real, fake, 8000)
    except:
        return 0.0
def inference(clean_path, noisy_path, model_path, out_path):
    device = torch.device("cuda:1")
    model = Actor()
    model = nn.DataParallel(model, device_ids=[1, 2])
    model.load_state_dict(torch.load(model_path + 'actor_best.pth'))
    model = model.to(device)

    dataset = Data(clean_path, noisy_path, mode='Test')
    loader = data.DataLoader(dataset,
                             batch_size=32,
                             shuffle=False,
                             collate_fn=collate_custom)

    fnames = os.listdir(noisy_path)

    print("Num files:", len(fnames))

    pesq_all = []
    stoi_all = []
    fcount = 0

    for batch in tqdm(loader):
        x = batch["noisy"].unsqueeze(1).to(device)
        t = batch["clean"].unsqueeze(1).to(device)
        m = batch["mask"].to(device)
        out_r, out_i = model(x)
        out_r = torch.transpose(out_r, 1, 2)
        out_i = torch.transpose(out_i, 1, 2)
        y = predict(x.squeeze(1), (out_r, out_i))
        t = t.squeeze()
        m = m.squeeze()
        x = x.squeeze()
        source, targets, preds = inverse(t, y, m, x)

        for j in range(len(targets)):
            t_j = targets[j].detach().cpu().numpy()
            p_j = preds[j].detach().cpu().numpy()
            p_j = 10 * (p_j / np.linalg.norm(p_j))
            curr_pesq = pesq(t_j, p_j, 16000)
            curr_stoi = stoi(t_j, p_j, 16000)
            pesq_all.append(curr_pesq)
            stoi_all.append(curr_stoi)
            try:
                sf.write(os.path.join(out_path, fnames[fcount]), p_j, 16000)
            except IndexError:
                print("Fcount:", fcount, len(fnames))
            fcount += 1

    PESQ = torch.mean(torch.tensor(pesq_all))
    STOI = torch.mean(torch.tensor(stoi_all))

    print("PESQ: ", PESQ, "STOI: ", STOI)

    with open(os.path.join(model_path, 'test_scores.txt'), 'w') as fo:
        fo.write("Avg PESQ: " + str(float(PESQ)) + " Avg STOI: " +
                 str(float(STOI)))
 def __call__(self, a, b):
     """
     :param a: 时域信号
     :param b: 时域信号
     :return:
     """
     assert len(a.shape) == 1
     assert len(a) == len(b)
     score = pesq(a, b, self.sr)
     return score
Exemple #17
0
def pesq_by_inout(in_file, out_file):
    sr, ref = read(in_file)
    sr, deg = read(out_file)

    if len(ref.shape) >= 2:
        ref = ref[:, 0]
    if len(deg.shape) >= 2:
        deg = deg[:, 0]
    ref = np.pad(ref, (0, len(deg) - len(ref)), "constant", constant_values=0)
    return pesq(ref, deg, sr)
Exemple #18
0
def compute_PESQ(clean_signal, noisy_signal, sr=16000):
    """
    使用 pypesq 计算 pesq 评价指标。
    Notes:
        pypesq 是 PESQ 官方代码(C 语言)的 wrapper。官方代码在某些语音上会报错,而其报错时直接调用了 exit() 函数,直接导致 Python 脚本停止运行,亦无法捕获异常,实在过于粗鲁。
        为了防止 Python 脚本被打断,这里使用子进程执行 PESQ 评价指标的计算,设置子进程的超时。
        设置子进程超时的方法:https://stackoverflow.com/a/29378611
    Returns:
        当语音可以计算 pesq score 时,返回 pesq score,否则返回 -1
    """
    return pesq(clean_signal, noisy_signal, sr)
Exemple #19
0
def pesq_metric(y_hat, y_true):
    with torch.no_grad():
        y_hat = y_hat.cpu().data.numpy()
        y = y_true.cpu().data.numpy()

        sum_pesq = 0
        for i in range(len(y)):
            sum_pesq += pesq(y[i], y_hat[i], SAMPLE_RATE)

        sum_pesq /= len(y)
        return sum_pesq
def eval(ref_name, enh_name, nsy_name, results):
    try:
        utt_id = ref_name.split('/')[-1]
        ref, sr = audioread(ref_name)
        enh, sr = audioread(enh_name)
        nsy, sr = audioread(nsy_name)
        ref_score = pesq(ref, nsy, sr)
        enh_score = pesq(ref, enh, sr)
        ref_stoi = stoi(ref, nsy, sr, extended=False)
        enh_stoi = stoi(ref, enh, sr, extended=False)
        ref_sdr = si_sdr(nsy, ref)
        enh_sdr = si_sdr(enh, ref)
    except Exception as e:
        print(e)
    
    results.append([utt_id, 
                    {'pesq':[ref_score, enh_score],
                     'stoi':[ref_stoi,enh_stoi],
                     'si_sdr':[ref_sdr, enh_sdr]
                    }])
Exemple #21
0
    def pesq_score(self, ref_audio: 'Audio') -> float:
        """
        PESQ score for speech audio.

        Args:
            ref_audio: Reference audio of type `Audio`.
        
        Returns:
            float -- PESQ score.
        """
        assert ref_audio.sr == self.sr, 'Sample rate should be same'
        return pypesq.pesq(ref_audio.wave, self.wave, self.sr)
Exemple #22
0
def pesq_metric(y_hat, bd):
    # PESQ
    with torch.no_grad():
        y_hat = y_hat.cpu().numpy()
        y = bd['y'].cpu().numpy()  # target signal

        sum = 0
        for i in range(len(y)):
            sum += pesq(y[i, 0], y_hat[i, 0], SAMPLE_RATE)

        sum /= len(y)
        return torch.tensor(sum)
Exemple #23
0
def _eval(batch,
          metrics,
          including='output',
          sample_rate=8000,
          use_pypesq=False):
    if use_pypesq:
        metrics = [m for m in metrics if m != 'pesq']

    has_estoi = False
    if 'estoi' in metrics:
        metrics = [m for m in metrics if m != 'estoi']
        has_estoi = True

    has_wer = False
    if 'wer' in metrics:
        metrics = [m for m in metrics if m != 'wer']
        has_wer = True

    mix = batch['mix']
    clean = batch['clean']
    estimate = batch['enh']
    snr = batch['snr']

    res = get_metrics(mix.numpy(),
                      clean.numpy(),
                      estimate.numpy(),
                      sample_rate=sample_rate,
                      metrics_list=metrics,
                      including=including)

    if use_pypesq:
        res['pesq'] = pesq(clean.flatten(), estimate.flatten(), sample_rate)

    if has_estoi:
        res['estoi'] = stoi(clean.flatten(),
                            estimate.flatten(),
                            sample_rate,
                            extended=True)

    if has_wer:
        res['wer'] = jiwer.wer(batch['clean_text'],
                               batch['transcription'],
                               truth_transform=_wer_trans,
                               hypothesis_transform=_wer_trans)

    if including == 'input':
        for m in metrics:
            res[m] = res['input_' + m]
            del res['input_' + m]

    res['snr'] = snr[0].item()
    return res
def generate_curriculum(clean_path, noisy_path, model_path):
    fnames = os.listdir(clean_path)
    d = {"fname": [], "pesq": []}
    for fname in tqdm(fnames):
        wav_noisy, sr = librosa.core.load(os.path.join(noisy_path, fname),
                                          sr=16000)
        wav_clean, sr = librosa.core.load(os.path.join(clean_path, fname),
                                          sr=16000)
        score = pesq(wav_clean, wav_noisy)
        d["fname"].append(fname)
        d["pesq"].append(score)
    df = pd.DataFrame.from_dict(d)
    df = df.sort_values(by=['pesq'])
    df.to_csv(os.path.join(model_path, "train_sort.tsv"), sep='\t')
Exemple #25
0
def loss_pesq(step, source, estimate_source):
    source = source.cpu()
    estimate_source = estimate_source.cpu()

    score = 0
    for i in range(source.shape[0]):
        score = score + pesq(source[i], estimate_source[i], sr)

        torchaudio.save(os.path.join(dns_home, 'predict_folder',
                                     test[step * batch_size + i]),
                        estimate_source[i].unsqueeze(0),
                        sample_rate=16000)

    return score / source.shape[0]
Exemple #26
0
def cal_PESQi(src_ref, src_est, mix):
    """Calculate Source-to-Distortion Ratio improvement (SDRi).
    NOTE: bss_eval_sources is very very slow.
    Args:
        src_ref: numpy.ndarray, [C, T]
        src_est: numpy.ndarray, [C, T], reordered by best PIT permutation
        mix: numpy.ndarray, [T]
    Returns:
        average_SDRi
    """
    num = 0
    new_pesq = 0
    orig_pesq = 0
    avg_PESQi = 0
    for ref, est in zip(src_ref, src_est):
        num = num + 1
        new_pesq_out = pesq(ref, est, 8000)
        new_pesq = new_pesq + new_pesq_out
        orig_pesq_out = pesq(ref, mix, 8000)
        orig_pesq = orig_pesq + orig_pesq_out
        avg_PESQi = avg_PESQi + (new_pesq_out - orig_pesq_out)

    return new_pesq / num, orig_pesq / num, avg_PESQi / num
Exemple #27
0
def get_pesq(filepath_1, filepath_2):
    '''
    argument:
        filepath_1: original clean .wav file path
        filepath_2: generated clean .wav file path
    return:
        PESQ score
    '''
    clean_wave, clean_sampe_rate = librosa.load(filepath_1, sr=16000)
    cleaned_wave, cleaned_sampe_rate = librosa.load(filepath_2, sr=16000)

    signal_power = 20 * log10(np.mean(np.square(clean_wave)))

    score = pesq(clean_wave, cleaned_wave, clean_sampe_rate)

    return score, signal_power
Exemple #28
0
def main():
    parser = argparse.ArgumentParser(description='Calculate performance index')
    parser.add_argument('--test_mix_folder',
                        default='../test-mix-2-babble',
                        type=str,
                        help='test-set-mix')
    parser.add_argument('--test_clean_folder',
                        default='../test-clean-2-babble',
                        type=str,
                        help='test-set-clean')
    parser.add_argument('--enhanced_folder',
                        default='../test-result',
                        type=str,
                        help='test-set-enhanced')

    opt = parser.parse_args()
    MIX_FOLDER = opt.test_mix_folder
    CLEAN_FOLDER = opt.test_clean_folder
    ENHANCED_FOLDER = opt.enhanced_folder

    pesqs = []
    stois = []

    for cleanfile in os.listdir(CLEAN_FOLDER):
        mixfile = cleanfile.replace('clean', 'mix')
        enhancedfile = 'enhanced_' + mixfile

        cleanfile = os.path.join(CLEAN_FOLDER, cleanfile)
        mixfile = os.path.join(MIX_FOLDER, mixfile)
        enhancedfile = os.path.join(ENHANCED_FOLDER, enhancedfile)

        ref, sr1 = librosa.load(cleanfile, 16000)
        #deg_mix, sr2 = librosa.load(mixfile, 16000)
        deg_enh, sr3 = librosa.load(enhancedfile, 16000)

        #pesq1 = pesq.pesq(ref, deg_mix)
        pesq2 = pesq.pesq(ref, deg_enh[:len(ref)])
        #print("pesq:", pesq1, " --> ", pesq2)

        pesqs.append(pesq2)

        #stoi1 = stoi(ref, deg_mix, fs_sig=16000)
        stoi2 = stoi(ref, deg_enh[:len(ref)], fs_sig=16000)
        #print("stoi:", stoi1, " --> ", stoi2)
        stois.append(stoi2)

    print('Epesq:', np.mean(pesqs), "Estoi:", np.mean(stois))
Exemple #29
0
def cal_score(clean, enhanced):
    clean = clean / abs(clean).max()
    enhanced = enhanced / abs(enhanced).max()

    s_stoi = stoi(clean, enhanced, 16000)
    s_pesq = pesq(clean, enhanced, 16000)

    return round(s_pesq, 5), round(s_stoi, 5)

    #def get_filepaths(directory,folders='BabyCry.wav,cafeteria_babble.wav',ftype='.wav'):
    #    file_paths = []
    #    folders = folders.split(',')
    #    with open(directory, 'r') as f:
    #        for line in f:
    #            if str(line.split('/')[-3]) in folders:
    #                file_paths.append(line[:-1])
    #    return file_paths
    '''
Exemple #30
0
def sample_main():
    sr = 16000
    wav_list = ['data/1.wav', 'data/2.wav', 'data/3.wav', 'data/4.wav', 'data/5.wav']
    src, mixed = mix_audio(wav_list)
    from sklearn.decomposition import FastICA
    W = FastICA(n_components=len(wav_list))
    S = W.fit_transform(mixed)
    ## write audio into file
    for i in range(len(wav_list)):
        output_name = "result/%d.wav"%(i+1)
        wavfile.write(output_name, sr, S[:, i])
    """
    Caluculate sdr, sir, sar
    """
    from mir_eval.separation import bss_eval_sources
    sdr, sir, sar, _ = bss_eval_sources(S.T, src.T)         ## shape = (channels, samples)
    print("SDR: ", sdr)  ## np.array(channels, )
    print("SIR: ", sir)
    print("SAR: ",sar)
    from pypesq import pesq
    pesq_score = pesq(src, S, fs=16000)
    print("PESQ: ", pesq_score)