Beispiel #1
0
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.ckpt")
    #model_path = "/tmp/pycharm_project_591/exp/tmp/checkpoints/epoch=8-step=2519.ckpt"
    pretrain = torch.load(model_path, map_location="cpu")
    model = TasNet()
    model.load_state_dict(pretrain)
    conf["use_gpu"] = False
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    #test_set = TACDataset(args.test_json, train=False)
    test_set = OnlineSimulationDataset(vctk_audio, ms_snsd, 48,
                                       simulation_config_test, truncator,
                                       "./test_online", 50)

    # Used to reorder sources only
    #loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    torch.no_grad().__enter__()
    input_sdr_list = []
    output_sdr_list = []
    model.eval()
    for idx in tqdm(range(len(test_set))):

        # Forward the network on the mixture.
        input = test_set.__getitem__(idx)
        mix = input[0]
        fusion = Prep(input)
        mix = np.expand_dims(mix, axis=0)  # 1 * channel * length
        mix = torch.from_numpy(mix).to(model_device).float()
        ref = input[3] * MAX_INT16
        #raw = torch.tensor(mix, dtype=torch.float32, device=model_device)
        ref = torch.tensor(ref, dtype=torch.float32, device=model_device)

        #valid_mics = torch.ones((len(mix), 1)).to(dtype=torch.long, device=raw.device)
        est_list = []
        for i in range(conf['train_conf']['net']['n_src']):
            est = model(mix, fusion[i])
            est_list.append(est)

        spks = torch.cat(est_list, dim=1)
        ref = center_trim(ref, spks).transpose(1, 0)
        #loss, spks = loss_func(spks, ref, return_est=True)
        spks = spks.data.cpu().numpy().squeeze()
        ref = ref.data.cpu().numpy()

        for idx, samps in enumerate(spks):
            samps = samps * MAX_INT16
            input_sdr_list.append(
                compute_sdr(ref[0, idx], mix[0, 0, :] * MAX_INT16))
            output_sdr_list.append(compute_sdr(ref[0, idx], samps))
    input_sdr_array = np.array(input_sdr_list)
    output_sdr_array = np.array(output_sdr_list)
    result = np.median(output_sdr_array - input_sdr_array)
    print("The SNR: " + str(result))
Beispiel #2
0
    def get_sdr(self, fusion_list, mix_list, ref_list):
        input_sdr_list = []
        output_sdr_list = []

        with th.no_grad():
            self.nnet.eval()
            for idx in range(len(fusion_list)):

                # Forward the network on the mixture.
                # input = dataset.__getitem__(idx)
                mix = mix_list[idx]
                fusion = fusion_list[idx]
                mix = np.expand_dims(mix, axis=0)  # 1 * channel * length
                mix = th.from_numpy(mix).to(device=self.device).float()
                ref = ref_list[idx] * MAX_INT16
                # raw = torch.tensor(mix, dtype=torch.float32, device=model_device)
                ref = th.tensor(ref, dtype=th.float32, device=self.device)

                # valid_mics = torch.ones((len(mix), 1)).to(dtype=torch.long, device=raw.device)
                est_list = []
                for i in range(n_spks):
                    est = self.nnet(mix, fusion[i])
                    est_list.append(est)
                spks = th.cat(est_list, dim=1)

                ref = center_trim(ref, spks).transpose(1, 0)
                # loss, spks = loss_func(spks, ref, return_est=True)
                spks = spks.data.cpu().numpy().squeeze()
                ref = ref.data.cpu().numpy()
                norm = np.linalg.norm(mix[0, 0, :], np.inf)
                for idx, samps in enumerate(spks):
                    #samps = samps * norm / np.max(np.abs(samps))
                    samps = samps * MAX_INT16
                    input_sdr_list.append(compute_sdr(ref[0, idx], mix[0, 0, :] * MAX_INT16))
                    output_sdr_list.append(compute_sdr(ref[0, idx], samps))
            input_sdr_array = np.array(input_sdr_list)
            output_sdr_array = np.array(output_sdr_list)
            result = np.median(output_sdr_array - input_sdr_array)
            print("The SNR: " + str(result))

        return result
Beispiel #3
0
    for i, batch in enumerate(dl2):
        IDX = batch.numpy()
        x = stft_mix_test[IDX]
        vocal_results, accom_results = cass.test(x)
        b_r.append(vocal_results)
        c_r.append(accom_results)
    b_r = np.concatenate(b_r, axis=0)
    c_r = np.concatenate(c_r, axis=0)
    results = [b_r, c_r]

    # compute error
    vocal_error = compute_lx_error(stft_vocal_test, results[0],
                                   stft_mix_test_pha)
    accom_error = compute_lx_error(stft_accom_test, results[1],
                                   stft_mix_test_pha)
    vocal_sdr, vocal_std, vocal_med, vocal_min, vocal_max = compute_sdr(
        stft_vocal_test, results[0], stft_mix_test_pha, stft_mix_test)
    accom_sdr, accom_std, accom_med, accom_min, accom_max = compute_sdr(
        stft_accom_test, results[1], stft_mix_test_pha, stft_mix_test)
    curr_error = 0.5 * (vocal_error + accom_error)
    curr_sdr = 0.5 * (vocal_sdr + accom_sdr)
    print(10 * "=")
    print("Epoch {}, vocal error: {:.4f}, accom error: {:.4f}".format(
        j, vocal_error, accom_error))
    print("Curr Best: {:.4f}, Avg Error: {:.4f}, Avg sdr: {:.4f}".format(
        best_result, 0.5 * (vocal_error + accom_error),
        0.5 * (vocal_sdr + accom_sdr)))
    print("     vocal sdr: {:.4f}, accom sdr: {:.4f}".format(
        vocal_sdr, accom_sdr))
    print("STD: vocal sdr: {:.4f}, accom sdr: {:.4f}".format(
        vocal_std, accom_std))
    print("MED: vocal sdr: {:.4f}, accom sdr: {:.4f}".format(
Beispiel #4
0
        a = stft_bass[IDX]
        b = stft_sax[IDX]
        c = stft_clar[IDX]
        d = stft_vio[IDX]

        enc_losses, dec_losses, dis_losses = cass.train(x, [a, b, c, d])

    results = cass.test(stft_mix_test)
    # compute error
    bass_error = compute_lx_error(stft_bass_test, results[0],
                                  stft_mix_test_pha)
    sax_error = compute_lx_error(stft_sax_test, results[1], stft_mix_test_pha)
    clar_error = compute_lx_error(stft_clar_test, results[2],
                                  stft_mix_test_pha)
    vio_error = compute_lx_error(stft_vio_test, results[3], stft_mix_test_pha)
    bass_sdr, bass_std, bass_med, bass_min, bass_max = compute_sdr(
        stft_bass_test, results[0], stft_mix_test_pha, stft_mix_test)
    sax_sdr, sax_std, sax_med, sax_min, sax_max = compute_sdr(
        stft_sax_test, results[1], stft_mix_test_pha, stft_mix_test)
    clar_sdr, clar_std, clar_med, clar_min, clar_max = compute_sdr(
        stft_clar_test, results[2], stft_mix_test_pha, stft_mix_test)
    vio_sdr, vio_std, vio_med, vio_min, vio_max = compute_sdr(
        stft_vio_test, results[3], stft_mix_test_pha, stft_mix_test)
    curr_error = 0.25 * (bass_error + sax_error + clar_error + vio_error)
    print(10 * "=")
    print(
        "Epoch {}, Bass error: {:.4f}, Sax error: {:.4f}, Clar error: {:.4f}, Vio error: {:.4f}"
        .format(j, bass_error, sax_error, clar_error, vio_error))
    print("Curr Best: {:.4f}, Avg Error: {:.4f}, Avg sdr: {:.4f}".format(
        best_result, curr_error,
        0.25 * (bass_sdr + sax_sdr + clar_sdr + vio_sdr)))
    print(