コード例 #1
0
def test_get_metrics_multichannel():
    mix = np.random.randn(2, 16000)
    clean = np.random.randn(2, 16000)
    est = np.random.randn(2, 16000)
    get_metrics(mix,
                clean,
                est,
                sample_rate=8000,
                metrics_list="si_sdr",
                average=False)
コード例 #2
0
ファイル: metrics_test.py プロジェクト: zwb0626/asteroid
def test_all_metrics():
    # This is separated because very slow (sdr, pesq, stoi)
    mix = np.random.randn(1, 4000)
    clean = np.random.randn(1, 4000)
    est = np.random.randn(1, 4000)
    metrics_dict = get_metrics(mix,
                               clean,
                               est,
                               sample_rate=8000,
                               metrics_list='all')
コード例 #3
0
def test_get_metrics(fs):
    mix = np.random.randn(1, 16000)
    clean = np.random.randn(2, 16000)
    est = np.random.randn(2, 16000)
    metrics_dict = get_metrics(mix,
                               clean,
                               est,
                               sample_rate=fs,
                               metrics_list="si_sdr")
    # Test no average & squeezing
    metrics_dict_bis = get_metrics(mix[0],
                                   clean,
                                   est,
                                   sample_rate=fs,
                                   metrics_list="si_sdr",
                                   average=False)
    assert float(np.mean(metrics_dict_bis["si_sdr"])) == metrics_dict["si_sdr"]
    assert float(np.mean(
        metrics_dict_bis["input_si_sdr"])) == metrics_dict["input_si_sdr"]
コード例 #4
0
ファイル: evaluate.py プロジェクト: flyingleafe/asteroid
def _eval(batch,
          metrics,
          including='output',
          sample_rate=8000,
          use_pypesq=False):
    if use_pypesq:
        metrics = [m for m in metrics if m != 'pesq']

    has_estoi = False
    if 'estoi' in metrics:
        metrics = [m for m in metrics if m != 'estoi']
        has_estoi = True

    has_wer = False
    if 'wer' in metrics:
        metrics = [m for m in metrics if m != 'wer']
        has_wer = True

    mix = batch['mix']
    clean = batch['clean']
    estimate = batch['enh']
    snr = batch['snr']

    res = get_metrics(mix.numpy(),
                      clean.numpy(),
                      estimate.numpy(),
                      sample_rate=sample_rate,
                      metrics_list=metrics,
                      including=including)

    if use_pypesq:
        res['pesq'] = pesq(clean.flatten(), estimate.flatten(), sample_rate)

    if has_estoi:
        res['estoi'] = stoi(clean.flatten(),
                            estimate.flatten(),
                            sample_rate,
                            extended=True)

    if has_wer:
        res['wer'] = jiwer.wer(batch['clean_text'],
                               batch['transcription'],
                               truth_transform=_wer_trans,
                               hypothesis_transform=_wer_trans)

    if including == 'input':
        for m in metrics:
            res[m] = res['input_' + m]
            del res['input_' + m]

    res['snr'] = snr[0].item()
    return res
コード例 #5
0
def test_error_msg(filename):
    mix = np.random.randn(1, 4000)
    clean = np.random.randn(1, 4000)
    est = np.random.randn(1, 4000)
    expected_msg = f".+si_sdr.+{filename or '<unknown file>'}"
    with mock.patch("pb_bss_eval.evaluation.si_sdr",
                    side_effect=RuntimeError("Fatal error")), pytest.raises(
                        RuntimeError, match=expected_msg):
        metrics_dict = get_metrics(mix,
                                   clean,
                                   est,
                                   sample_rate=8000,
                                   metrics_list=["si_sdr", "pesq"],
                                   filename=filename)
コード例 #6
0
def test_ignore_errors(filename, average):
    mix = np.random.randn(1, 4000)
    clean = np.random.randn(1, 4000)
    est = np.random.randn(1, 4000)
    expected_msg = f".+si_sdr.+{filename or '<unknown file>'}.+Fatal error"
    with mock.patch("pb_bss_eval.evaluation.si_sdr",
                    side_effect=RuntimeError("Fatal error")), pytest.warns(
                        RuntimeWarning, match=expected_msg):
        metrics_dict = get_metrics(
            mix,
            clean,
            est,
            sample_rate=8000,
            metrics_list=["si_sdr", "pesq"],
            ignore_metrics_errors=True,
            average=average,
            filename=filename,
        )
    assert metrics_dict["si_sdr"] is None
    assert metrics_dict["pesq"] is not None
コード例 #7
0
def evaluate(dict_list, model, conf, save_dir=None):
    model_device = next(model.parameters()).device
    # Randomly choose the indexes of sentences to save.
    if save_dir is None:
        conf["n_save_ex"] = 0
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(dict_list)
    save_idx = random.sample(range(len(dict_list)), conf["n_save_ex"])
    series_list = []
    for idx, wav_dic in enumerate(tqdm(dict_list)):
        # Forward the network on the mixture.
        noisy_np, clean_np, fs = load_wav_dic(wav_dic)
        with torch.no_grad():
            net_input = torch.tensor(noisy_np)[None, None].to(model_device)
            est_clean_np = model.denoise(
                net_input).squeeze().cpu().data.numpy()

        utt_metrics = get_metrics(
            mix=noisy_np,
            clean=clean_np,
            estimate=est_clean_np,
            sample_rate=fs,
            metrics_list=COMPUTE_METRICS,
        )
        utt_metrics["noisy_path"] = wav_dic["noisy"]
        utt_metrics["clean_path"] = wav_dic["clean"]
        series_list.append(pd.Series(utt_metrics))
        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "noisy.wav", noisy_np, fs)
            sf.write(local_save_dir + "clean.wav", clean_np, fs)
            sf.write(local_save_dir + "estimate.wav", est_clean_np, fs)
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    return all_metrics_df
コード例 #8
0
def get_all_metrics_from_model(model, test_sets, model_name=None):    
    series_list = []
    torch.no_grad().__enter__()
    model = model.cuda()
    for snr, test_set in test_sets.items():
            # makde dirs for each models and separate dir for each snr
            os.makedirs(f'{save_enhanced_dir}/{str(model_name)}/{snr}dB/data/', exist_ok=True)
            denoised_file_paths = []
            print(f'SNR: {snr}db')
            loader = DataLoader(test_set, num_workers=0)

            for i, (mix, clean, path) in tqdm(enumerate(loader)):
                mix = mix.cuda()
                estimate = model(mix).detach().flatten().cpu().numpy()

                denoised_file_name = path[0].split('/')[-1]
                #add a "_" in front of the denoised fie
                denoised_file_path = f'{save_enhanced_dir}/{str(model_name)}/{snr}dB/data/{model_name}_{denoised_file_name}'
                denoised_file_paths.append(denoised_file_path)
                sf.write(denoised_file_path, estimate, samplerate=SAMPLE_RATE)

                ##Dont calculate metric just save separated plus, meta data
                metrics_dict = get_metrics(mix.cpu().numpy(), clean.numpy(), estimate, sample_rate=SAMPLE_RATE, metrics_list=["pesq"])
                metrics_dict["mix_path"] = path
                metrics_dict["snr"] = snr
                series_list.append(pd.Series(metrics_dict))
                all_metrics_df = pd.DataFrame(series_list)             
                if i == 30 : break

            csv_path_tmp = csv_path_dict[str(snr)]
            df = pd.read_csv(csv_path_tmp)
            denoised_file_paths = pd.Series(denoised_file_paths)
            df['denoised_path'] = denoised_file_paths
            df_csv_path = f'{save_enhanced_dir}/{str(model_name)}/{snr}dB/{model_name}_snr{snr}dB.csv'
            df.to_csv(df_csv_path)
    return None
コード例 #9
0
ファイル: eval.py プロジェクト: zmolikova/asteroid
def main(conf):
    compute_metrics = update_compute_metrics(conf["compute_wer"],
                                             COMPUTE_METRICS)
    anno_df = pd.read_csv(
        Path(conf["test_dir"]).parent.parent.parent / "test_annotations.csv")
    wer_tracker = (MockWERTracker() if not conf["compute_wer"] else WERTracker(
        ASR_MODEL_PATH, anno_df))
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = DPRNNTasNet.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = LibriMix(
        csv_dir=conf["test_dir"],
        task=conf["task"],
        sample_rate=conf["sample_rate"],
        n_src=conf["train_conf"]["data"]["n_src"],
        segment=None,
        return_id=True,
    )  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    ex_save_dir = os.path.join(eval_save_dir, "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources, ids = test_set[idx]
        mix, sources = tensors_to_device([mix, sources], device=model_device)
        est_sources = model(mix.unsqueeze(0))
        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=COMPUTE_METRICS,
        )
        utt_metrics["mix_path"] = test_set.mixture_path
        est_sources_np_normalized = normalize_estimates(est_sources_np, mix_np)
        utt_metrics.update(**wer_tracker(
            mix=mix_np,
            clean=sources_np,
            estimate=est_sources_np_normalized,
            wav_id=ids,
            sample_rate=conf["sample_rate"],
        ))
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np,
                     conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np_normalized):
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()

    print("Overall metrics :")
    pprint(final_results)
    if conf["compute_wer"]:
        print("\nWER report")
        wer_card = wer_tracker.final_report_as_markdown()
        print(wer_card)
        # Save the report
        with open(os.path.join(eval_save_dir, "final_wer.md"), "w") as f:
            f.write(wer_card)

    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
コード例 #10
0
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = DPRNNTasNet.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = WhamDataset(
        conf["test_dir"],
        conf["task"],
        sample_rate=conf["sample_rate"],
        nondefault_nsrc=None,
        segment=None,
    )  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf["exp_dir"], "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        est_sources = model(mix[None, None])
        _, indxs = torch.sort(torch.sqrt(torch.mean(est_sources**2, dim=-1)),
                              descending=True)
        indxs = indxs[:, :2]
        # we know a-priori that there are 2 sources in WHAM-clean (WSJ0-2mix clean)
        # so we sort the estimated signals and take only the two with highest energy.
        est_sources = est_sources.gather(
            1,
            indxs.unsqueeze(-1).repeat(1, 1, est_sources.shape[-1]))
        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix[None].cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
        )
        utt_metrics["mix_path"] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np[0],
                     conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src,
                         conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx + 1),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
コード例 #11
0
ファイル: expert.py プロジェクト: 18445864529/s3prl
    def forward(self, mode, features, uttname_list, source_attr, source_wav,
                target_attr, target_wav_list, feat_length, wav_length, records,
                **kwargs):
        """
        Args:
            mode: string
                'train', 'dev' or 'test' for this forward step

            features:
                list of unpadded features [feat1, feat2, ...]
                each feat is in torch.FloatTensor and already
                put in the device assigned by command-line args

            uttname_list:
                list of utterance names

            source_attr:
                source_attr is a dict containing the STFT information 
                for the mixture. source_attr['magnitude'] stores the STFT
                magnitude, source_attr['phase'] stores the STFT phase and
                source_attr['stft'] stores the raw STFT feature. The shape
                is [bs, max_length, feat_dim]

            source_wav:
                source_wav contains the raw waveform for the mixture,
                and it has the shape of [bs, max_wav_length]

            target_attr:
                similar to source_attr, it contains the STFT information
                for individual sources. It only has two keys ('magnitude' and 'phase')
                target_attr['magnitude'] is a list of length n_srcs, and
                target_attr['magnitude'][i] has the shape [bs, max_length, feat_dim]

            target_wav_list:
                target_wav_list contains the raw waveform for the individual
                sources, and it is a list of length n_srcs. target_wav_list[0]
                has the shape [bs, max_wav_length]

            feat_length:
                length of STFT features

            wav_length:
                length of raw waveform

            records:
                defaultdict(list), by appending contents into records,
                these contents can be averaged and logged on Tensorboard
                later by self.log_records every log_step

        Return:
            loss:
                the loss to be optimized, should not be detached
        """

        # match the feature length to STFT feature length
        features = match_length(features, feat_length)
        features = pack_sequence(features)
        mask = self.model(features)

        # evaluate the separation quality of predict sources
        if mode == 'dev' or mode == 'test':
            predict_stfts = [
                torch.squeeze(m * source_attr['stft'].to(device)) for m in mask
            ]
            predict_stfts_np = [
                np.transpose(s.data.cpu().numpy()) for s in predict_stfts
            ]

            assert len(wav_length) == 1
            # reconstruct the signal using iSTFT
            predict_srcs_np = [
                librosa.istft(stft_mat,
                              hop_length=self.upstream_rate,
                              win_length=self.datarc['win_length'],
                              window=self.datarc['window'],
                              center=self.datarc['center'],
                              length=wav_length[0])
                for stft_mat in predict_stfts_np
            ]
            predict_srcs_np = np.stack(predict_srcs_np, 0)
            gt_srcs_np = torch.cat(target_wav_list, 0).data.cpu().numpy()
            mix_np = source_wav.data.cpu().numpy()

            utt_metrics = get_metrics(
                mix_np,
                gt_srcs_np,
                np.maximum(predict_srcs_np, EPS),
                sample_rate=self.datarc['rate'],
                metrics_list=COMPUTE_METRICS,
                compute_permutation=True,
            )

            for metric in COMPUTE_METRICS:
                input_metric = "input_" + metric
                assert metric in utt_metrics and input_metric in utt_metrics
                imp = utt_metrics[metric] - utt_metrics[input_metric]
                if metric not in records:
                    records[metric] = []
                records[metric].append(imp)

            assert 'batch_id' in kwargs
            if kwargs[
                    'batch_id'] % 1000 == 0:  # Save the prediction every 1000 examples
                records['mix'].append(mix_np)
                records['hypo'].append(predict_srcs_np)
                records['ref'].append(gt_srcs_np)
                records['uttname'].append(uttname_list[0])

        if self.loss_type == "MSE":  # mean square loss
            loss = self.objective.compute_loss(mask, feat_length, source_attr,
                                               target_attr)
        elif self.loss_type == "SISDR":  # end-to-end SI-SNR loss
            loss = self.objective.compute_loss(mask, feat_length, source_attr,
                                               wav_length, target_wav_list)
        else:
            raise ValueError("Loss type not defined.")

        records["loss"].append(loss.item())
        return loss
コード例 #12
0
ファイル: eval.py プロジェクト: saurjya/asteroid
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    #model = ConvTasNet.from_pretrained(model_path)
    model = DCUNet.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device

    test_set = BBCSODataset(
        conf["json_dir"],
        conf["n_src"],
        conf["sample_rate"],
        conf["batch_size"],
        220500,
        train = False
    )
    # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf["exp_dir"], "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        mix = mix.unsqueeze(0)
        sources = sources.unsqueeze(0)
        est_sources = model(mix)
        loss, reordered_sources = loss_func(est_sources, sources, return_est=True)
        #mix_np = mix.squeeze(0).cpu().data.numpy()
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.squeeze(0).cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
        )
        #utt_metrics["mix_path"] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))
        
        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            #print(mix_np.shape)
            sf.write(local_save_dir + "mixture.wav", np.swapaxes(mix_np,0,1), conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx + 1),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
コード例 #13
0
ファイル: eval.py プロジェクト: xmpx/signalProcessing
def main(conf):
    model = load_best_model(conf["train_conf"], conf["exp_dir"])
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = WhamRDataset(
        conf["test_dir"],
        conf["task"],
        sample_rate=conf["sample_rate"],
        nondefault_nsrc=model.n_src,
        segment=None,
    )  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf["exp_dir"], "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        est_sources = model(mix[None, None])
        loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True)
        mix_np = mix[None].cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
        )
        utt_metrics["mix_path"] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx + 1),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)
コード例 #14
0
def main(conf):
    # Make the model
    model, _ = make_model_and_optimizer(conf['train_conf'])
    # Load best model
    with open(os.path.join(conf['exp_dir'], 'best_k_models.json'), "r") as f:
        best_k = json.load(f)
    best_model_path = min(best_k, key=best_k.get)
    # Load checkpoint
    checkpoint = torch.load(best_model_path, map_location='cpu')
    state = checkpoint['state_dict']
    state_copy = state.copy()
    # Remove unwanted keys
    for keys, values in state.items():
        if keys.startswith('loss'):
            del state_copy[keys]
            print(keys)
    model = torch_utils.load_state_dict_in(state_copy, model)
    # Handle device placement
    if conf['use_gpu']:
        model.cuda()
    model_device = next(model.parameterss()).device
    test_set = LibriMix(csv_dir=conf['test_dir'],
                        task=conf['task'],
                        sample_rate=conf['sample_rate'],
                        n_src=conf['train_conf']['data']['n_src'],
                        segment=None)  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx')

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf['exp_dir'], conf['out_dir'])
    ex_save_dir = os.path.join(eval_save_dir, 'examples/')
    if conf['n_save_ex'] == -1:
        conf['n_save_ex'] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf['n_save_ex'])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        est_sources = model(mix.unsqueeze(0))
        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.squeeze().cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze().cpu().data.numpy()
        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(mix_np,
                                  sources_np,
                                  est_sources_np,
                                  sample_rate=conf['sample_rate'])
        utt_metrics['mix_path'] = test_set.mixture_path
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np,
                     conf['sample_rate'])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf['sample_rate'])
            for src_idx, est_src in enumerate(est_sources_np):
                sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx),
                         est_src, conf['sample_rate'])
            # Write local metrics to the example folder.
            with open(local_save_dir + 'metrics.json', 'w') as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, 'all_metrics.csv'))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = 'input_' + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + '_imp'] = ldf.mean()
    print('Overall metrics :')
    pprint(final_results)
    with open(os.path.join(eval_save_dir, 'final_metrics.json'), 'w') as f:
        json.dump(final_results, f, indent=0)
コード例 #15
0
def main(conf):
    model_path = os.path.join(conf['exp_dir'], 'best_model.pth')
    model = DPRNNTasNet.from_pretrained(model_path)
    # Handle device placement
    if conf['use_gpu']:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = WhamDataset(conf['test_dir'], conf['task'],
                           sample_rate=conf['sample_rate'],
                           nondefault_nsrc=model.masker.n_src,
                           segment=None)  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx')

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf['exp_dir'], 'examples/')
    if conf['n_save_ex'] == -1:
        conf['n_save_ex'] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf['n_save_ex'])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        est_sources = model(mix[None, None])
        loss, reordered_sources = loss_func(est_sources, sources[None],
                                            return_est=True)
        mix_np = mix[None].cpu().data.numpy()
        sources_np = sources.squeeze().cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze().cpu().data.numpy()
        utt_metrics = get_metrics(mix_np, sources_np, est_sources_np,
                                  sample_rate=conf['sample_rate'])
        utt_metrics['mix_path'] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np[0],
                     conf['sample_rate'])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src,
                         conf['sample_rate'])
            for src_idx, est_src in enumerate(est_sources_np):
                sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx+1),
                         est_src, conf['sample_rate'])
            # Write local metrics to the example folder.
            with open(local_save_dir + 'metrics.json', 'w') as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv'))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = 'input_' + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + '_imp'] = ldf.mean()
    print('Overall metrics :')
    pprint(final_results)
    with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f:
        json.dump(final_results, f, indent=0)
    model_dict = torch.load(model_path, map_location='cpu')

    publishable = save_publishable(
        os.path.join(conf['exp_dir'], 'publish_dir'), model_dict,
        metrics=final_results, train_conf=train_conf
    )
コード例 #16
0
def main(conf):
    model_path = os.path.join(conf["exp_dir"], conf["ckpt_path"])

    # all resulting files would be saved in eval_save_dir
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    os.makedirs(eval_save_dir, exist_ok=True)

    if not os.path.exists(os.path.join(eval_save_dir, "final_metrics.json")):
        if conf["ckpt_path"] == "best_model.pth":
            # serialized checkpoint
            model = getattr(asteroid,
                            conf["model"]).from_pretrained(model_path)
        else:
            # non-serialized checkpoint, _ckpt_epoch_{i}.ckpt, keys would start with
            # "model.", which need to be removed
            model = getattr(asteroid,
                            conf["model"])(**conf["train_conf"]["filterbank"],
                                           **conf["train_conf"]["masknet"])
            all_states = torch.load(model_path, map_location="cpu")
            state_dict = {
                k.split('.', 1)[1]: all_states["state_dict"][k]
                for k in all_states["state_dict"]
            }
            model.load_state_dict(state_dict)
            # model.load_state_dict(all_states["state_dict"], strict=False)

        # Handle device placement
        if conf["use_gpu"]:
            model.cuda()
        model_device = next(model.parameters()).device
        test_set = make_test_dataset(
            corpus=conf["corpus"],
            test_dir=conf["test_dir"],
            task=conf["task"],
            sample_rate=conf["sample_rate"],
            n_src=conf["train_conf"]["data"]["n_src"],
        )
        # Used to reorder sources only
        loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

        # Randomly choose the indexes of sentences to save.
        ex_save_dir = os.path.join(eval_save_dir, "examples/")
        if conf["n_save_ex"] == -1:
            conf["n_save_ex"] = len(test_set)
        save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])

        series_list = []
        torch.no_grad().__enter__()
        for idx in tqdm(range(len(test_set))):
            # Forward the network on the mixture.
            mix, sources = tensors_to_device(test_set[idx],
                                             device=model_device)
            est_sources = model(mix.unsqueeze(0))

            # When inferencing separation for multi-task training,
            # exclude the last channel. Does not effect single-task training
            # models (from_scratch, pre+FT).
            est_sources = est_sources[:, :sources.shape[0]]

            loss, reordered_sources = loss_func(est_sources,
                                                sources[None],
                                                return_est=True)
            mix_np = mix.cpu().data.numpy()
            sources_np = sources.cpu().data.numpy()
            est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
            # For each utterance, we get a dictionary with the mixture path,
            # the input and output metrics
            utt_metrics = get_metrics(
                mix_np,
                sources_np,
                est_sources_np,
                sample_rate=conf["sample_rate"],
                metrics_list=compute_metrics,
            )
            if hasattr(test_set, "mixture_path"):
                utt_metrics["mix_path"] = test_set.mixture_path
            series_list.append(pd.Series(utt_metrics))

            # Save some examples in a folder. Wav files and metrics as text.
            if idx in save_idx:
                local_save_dir = os.path.join(ex_save_dir,
                                              "ex_{}/".format(idx))
                os.makedirs(local_save_dir, exist_ok=True)
                sf.write(local_save_dir + "mixture.wav", mix_np,
                         conf["sample_rate"])
                # Loop over the sources and estimates
                for src_idx, src in enumerate(sources_np):
                    sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                             conf["sample_rate"])
                for src_idx, est_src in enumerate(est_sources_np):
                    est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                    sf.write(
                        local_save_dir + "s{}_estimate.wav".format(src_idx),
                        est_src,
                        conf["sample_rate"],
                    )
                # Write local metrics to the example folder.
                with open(local_save_dir + "metrics.json", "w") as f:
                    json.dump(utt_metrics, f, indent=0)

        # Save all metrics to the experiment folder.
        all_metrics_df = pd.DataFrame(series_list)
        all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

        # Print and save summary metrics
        final_results = {}
        for metric_name in compute_metrics:
            input_metric_name = "input_" + metric_name
            ldf = all_metrics_df[metric_name] - all_metrics_df[
                input_metric_name]
            final_results[metric_name] = all_metrics_df[metric_name].mean()
            final_results[metric_name + "_imp"] = ldf.mean()
        print("Overall metrics :")
        pprint(final_results)
        with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
            json.dump(final_results, f, indent=0)
    else:
        with open(os.path.join(eval_save_dir, "final_metrics.json"), "r") as f:
            final_results = json.load(f)

    if conf["publishable"]:
        assert conf["ckpt_path"] == "best_model.pth"
        model_dict = torch.load(model_path, map_location="cpu")
        os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"),
                    exist_ok=True)
        publishable = save_publishable(
            os.path.join(conf["exp_dir"], "publish_dir"),
            model_dict,
            metrics=final_results,
            train_conf=train_conf,
        )
コード例 #17
0
ファイル: eval.py プロジェクト: zmolikova/asteroid
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = ConvTasNet.from_pretrained(model_path)
    model = LambdaOverlapAdd(
        nnet=model,  # function to apply to each segment.
        n_src=2,  # number of sources in the output of nnet
        window_size=64000,  # Size of segmenting window
        hop_size=None,  # segmentation hop size
        window="hanning",  # Type of the window (see scipy.signal.get_window
        reorder_chunks=False,  # Whether to reorder each consecutive segment.
        enable_grad=
        False,  # Set gradient calculation on of off (see torch.set_grad_enabled)
    )

    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()

    model_device = next(model.parameters()).device

    # Evaluation is mode using 'remix' mixture
    dataset_kwargs = {
        "root_path": Path(conf["train_conf"]["data"]["root_path"]),
        "task": conf["train_conf"]["data"]["task"],
        "sample_rate": conf["train_conf"]["data"]["sample_rate"],
        "num_workers": conf["train_conf"]["training"]["num_workers"],
        "mixture": "remix",
    }

    test_set = DAMPVSEPDataset(split="test", **dataset_kwargs)

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    ex_save_dir = os.path.join(eval_save_dir, "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = test_set[idx]
        mix = mix.to(model_device)
        est_sources = model.forward(mix.unsqueeze(0).unsqueeze(1))
        mix_np = mix.squeeze(0).cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = est_sources.squeeze(0).cpu().data.numpy()

        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
            average=False,
        )
        utt_metrics = split_metric_dict(utt_metrics)
        utt_metrics["mix_path"] = test_set.mixture_path
        series_list.append(pd.Series(utt_metrics))
        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np / max(abs(mix_np)),
                     conf["sample_rate"])

            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf["sample_rate"])

            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        for s in ["", "_s0", "_s1"]:
            input_metric_name = "input_" + f"{metric_name}{s}"
            ldf = all_metrics_df[f"{metric_name}{s}"] - all_metrics_df[
                input_metric_name]
            final_results[f"{metric_name}{s}"] = all_metrics_df[
                f"{metric_name}{s}"].mean()
            final_results[f"{metric_name}{s}" + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
コード例 #18
0
ファイル: eval.py プロジェクト: ChokJohn/SpeechX
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = TransMask.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    if conf['file_path'] == '':
        test_set = LibriMix(
            csv_dir=conf["test_dir"],
            task=conf["task"],
            sample_rate=conf["sample_rate"],
            n_src=conf["train_conf"]["masknet"]["n_src"],
            segment=None,
        )  # Uses all segment length
        # Used to reorder sources only
        loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    ex_save_dir = os.path.join(eval_save_dir, "examples/")
    if conf["n_save_ex"] == -1 and conf['file_path'] == '':
        conf["n_save_ex"] = len(test_set)
        save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    else:
        save_idx = 0
    series_list = []
    torch.no_grad().__enter__()
    sdr = 0
    rtf = 0
    if conf['file_path'] != '':
        file_path = conf['file_path']
        if os.path.isdir(file_path):
            wavs = [
                os.path.join(file_path, wav) for wav in os.listdir(file_path)
                if '.wav' in wav
            ]
            for wav in wavs:
                inference_wav(wav, conf, model_device, model, ex_save_dir)
        else:
            inference_wav(file_path, conf, model_device, model, ex_save_dir)
        return

    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)

        mul = 8
        mix = mix.view(-1, 1).repeat(1, mul).view(-1)
        sources = sources.repeat(1, mul)

        #print('DEVICE')
        #print(model_device)
        ss = time()
        est_sources = model(mix.unsqueeze(0))
        dur = time() - ss
        ll = len(mix) / 8000
        rtf += (dur / ll)
        print(rtf / (idx + 1))
        #import pdb;pdb.set_trace()

        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
        )

        sdr += utt_metrics['sdr']
        print(sdr / (idx + 1))

        utt_metrics["mix_path"] = test_set.mixture_path
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np,
                     conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    # publishable = save_publishable(
    save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
コード例 #19
0
def main(conf):
    model = load_best_model(conf['train_conf'], conf['exp_dir'])
    # Handle device placement
    if conf['use_gpu']:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = Wsj0mixDataset(conf['test_dir'],
                              n_src=conf['n_src'],
                              segment=None)
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, mode='pairwise')

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf['exp_dir'], 'examples/')
    if conf['n_save_ex'] == -1:
        conf['n_save_ex'] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf['n_save_ex'])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        if conf['train_conf']['training']['loss_alpha'] == 1:
            # If Deep clustering only, use DC masks.
            est_sources, dic_out = model.dc_head_separate(mix[None, None])
        else:
            # If Chimera, use mask-inference head masks
            est_sources, dic_out = model.separate(mix[None, None])

        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix[None].cpu().data.numpy()
        sources_np = sources.squeeze().cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze().cpu().data.numpy()
        utt_metrics = get_metrics(mix_np,
                                  sources_np,
                                  est_sources_np,
                                  sample_rate=conf['sample_rate'],
                                  metrics_list=compute_metrics)
        utt_metrics['mix_path'] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np[0],
                     conf['sample_rate'])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src,
                         conf['sample_rate'])
            for src_idx, est_src in enumerate(est_sources_np):
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx + 1),
                    est_src, conf['sample_rate'])
            # Write local metrics to the example folder.
            with open(local_save_dir + 'metrics.json', 'w') as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv'))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = 'input_' + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + '_imp'] = ldf.mean()
    print('Overall metrics :')
    pprint(final_results)
    with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f:
        json.dump(final_results, f, indent=0)
コード例 #20
0
ファイル: eval.py プロジェクト: ChokJohn/SpeechX
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
=======
        mix, sources, ids = test_set[idx]
        mix, sources = tensors_to_device([mix, sources], device=model_device)
        est_sources = model(mix.unsqueeze(0))
        loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True)
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
>>>>>>> 210b5e4eb8ce24fe25780e008c89a4bb71bbd0ea
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
<<<<<<< HEAD
            metrics_list=compute_metrics,
        )
        utt_metrics["mix_path"] = test_set.mix[idx][0]
=======
            metrics_list=COMPUTE_METRICS,
        )
        utt_metrics["mix_path"] = test_set.mixture_path
        est_sources_np_normalized = normalize_estimates(est_sources_np, mix_np)
        utt_metrics.update(
            **wer_tracker(
                mix=mix_np,
                clean=sources_np,
コード例 #21
0
def main(conf):
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(conf['main_args']['cuda'])
    model_dir = conf['main_args']['model_dir']
    exp_dir = conf['main_args']['exp_dir']
    # Define Dataloader
    test_gens = dict()
    for dataset_name, info in TEST.items():
        test_set = LibriMix(csv_path=info[1],
                            sample_rate=conf['data']['sample_rate'],
                            n_src=info[0],
                            segment=None)
        test_gen = DataLoader(test_set,
                              shuffle=False,
                              batch_size=1,
                              num_workers=conf['training']['num_workers'],
                              drop_last=True)
        test_gens.update({dataset_name: test_gen})
    SPKID = LIBRISPEECH_SPKID['test']

    # Define model, optimizer + scheduler
    # if conf['main_args']['stage'] == 1:
    #     model = CAE(conf['cae'])
    #     model_path = os.path.join(exp_dir, 'cae', config_cae_path(conf['cae']))
    # elif conf['main_args']['stage'] == 2:
    model_path = os.path.join(exp_dir, model_dir)
    model = CAE_DANet.load_model(model_path, model_state='best')

    # else:
    #     raise ValueError('Training stage should be either 1 or 2!')
    model = torch.nn.DataParallel(model).cuda()

    # Used to reorder sources only
    # loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx')
    loss_func = PermInvariantSISDR(backward_loss=False,
                                   improvement=True,
                                   return_individual_results=True,
                                   pit=True)

    # Test
    model.eval()
    model.module.danet.add_kmeans(kmeans_type='hard',
                                  alpha=10,
                                  iter=20,
                                  dist_type='cos',
                                  n_init=5)
    with torch.no_grad():
        for set_name, test_gen in test_gens.items():  # different test sets
            series_list = []
            spk_centroids = dict()
            for data in tqdm(test_gen,
                             desc='Testing {}'.format(set_name),
                             ncols=100):
                m1wavs = data[0].unsqueeze(1).cuda()
                clean_wavs = data[-1].cuda()
                speaker_id = data[1]

                if conf['main_args']['stage'] == 1:
                    recon_sources, _, _ = model.module(m1wavs, clean_wavs)
                    reordered_sources = recon_sources
                if conf['main_args']['stage'] == 2:
                    estimated_masks, _, enc_mixture, _, phase = model(
                        m1wavs,
                        clean_wavs,
                        train=True,
                        n_sources=clean_wavs.shape[1])
                    V = estimated_masks[
                        1]  # V (B, K, F*T),  enc_masks (B, C, F*T)
                    A = estimated_masks[2]  # (B, nspk, K)
                    estimated_masks = estimated_masks[
                        0]  # estimated_masks (B, nspk, F*T)
                    recon_sources = model.module.get_rec_sources(
                        estimated_masks.view(m1wavs.shape[0],
                                             estimated_masks.shape[1],
                                             model.module.input_dim, -1),
                        enc_mixture,
                        phase=phase)  # recovered waveform

                    # loss, reordered_sources = loss_func(recon_sources, clean_wavs, return_est=True)
                    test_sisdri = loss_func(recon_sources,
                                            clean_wavs,
                                            initial_mixtures=m1wavs)
                    reordered_sources = torch.zeros(recon_sources.shape).cuda()
                    for j in range(A.shape[0]):
                        reordered_sources[j] = recon_sources[:, loss_func.
                                                             best_perm[j], :]
                        z = 0
                        for k in loss_func.best_perm[j]:
                            spk_id = speaker_id[z][j]
                            if spk_id in spk_centroids.keys():
                                spk_centroids[spk_id] = torch.cat(
                                    (spk_centroids[spk_id],
                                     A[j, k].unsqueeze(0).detach().cpu()),
                                    dim=0)
                            else:
                                spk_centroids[spk_id] = A[j, k].unsqueeze(
                                    0).detach().cpu()
                            z += 1

                m1wavs = m1wavs[:, :, :recon_sources.shape[2]]
                clean_wavs = clean_wavs[:, :, :recon_sources.shape[2]]

                mix_np = m1wavs.squeeze(0).cpu().data.numpy()
                sources_np = clean_wavs.squeeze(0).cpu().data.numpy()
                est_sources_np = reordered_sources.squeeze(
                    0).cpu().data.numpy()
                utt_metrics = get_metrics(
                    mix_np,
                    sources_np,
                    est_sources_np,
                    sample_rate=conf['data']['sample_rate'],
                    metrics_list=compute_metrics)

                # utt_metrics['mix_path'] = test_set.mix[idx][0]
                series_list.append(pd.Series(utt_metrics))
                # pprint(utt_metrics)

            # Save all metrics to the experiment folder.
            all_metrics_df = pd.DataFrame(series_list)
            all_metrics_df.to_csv(
                os.path.join(model_path,
                             '{}_all_metrics.csv'.format(set_name)))
            torch.save(spk_centroids,
                       os.path.join(model_path, '{}_spkC.pt'.format(set_name)))

            # Print and save summary metrics
            final_results = {}
            for metric_name in compute_metrics:
                input_metric_name = 'input_' + metric_name
                ldf = all_metrics_df[metric_name] - all_metrics_df[
                    input_metric_name]
                final_results[metric_name] = all_metrics_df[metric_name].mean()
                final_results[metric_name + '_imp'] = ldf.mean()
            print('Overall metrics :')
            pprint(final_results)
            with open(
                    os.path.join(model_path,
                                 '{}_final_metrics.json'.format(set_name)),
                    'w') as f:
                json.dump(final_results, f, indent=0)
コード例 #22
0
ファイル: test_real.py プロジェクト: nschmidtg/thesis
def main(conf):
    compute_metrics = COMPUTE_METRICS
    wer_tracker = (MockWERTracker())
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    if conf["target_model"] == "UNet":
        sys.path.append('UNet_model')
        AsteroidModelModule = my_import("unet_model.UNet")
    else:
        sys.path.append('ConvTasNet_model')
        AsteroidModelModule = my_import("conv_tasnet_norm.ConvTasNetNorm")
    model = AsteroidModelModule.from_pretrained(
        model_path, sample_rate=conf["sample_rate"])
    print("model_path", model_path)
    # model = ConvTasNet
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    test_set = PodcastLoader(conf["test_dir"], sample_rate=44100, segment=18)
    # Used to reorder sources only

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    ex_save_dir = os.path.join(eval_save_dir, "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []

    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = test_set[idx]

        if conf["target_model"] == "UNet":
            mix = mix.unsqueeze(0)
        # get audio representations, pass the mix to the unet, it will normalize
        # it, create the masks, pass them to audio, unnormalize them and return
        est_sources = model(mix)

        mix_np = mix.cpu().data.numpy()
        if conf["target_model"] == "UNet":
            mix_np = mix_np.squeeze(0)
        sources_np = sources.cpu().data.numpy()
        est_sources_np = est_sources.squeeze(0).cpu().data.numpy()

        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(mix_np,
                                  sources_np,
                                  est_sources_np,
                                  sample_rate=conf["sample_rate"],
                                  metrics_list=COMPUTE_METRICS,
                                  average=False)
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir,
                                          "ex_{}/".format(idx + 1))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np,
                     conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx),
                    est_src,
                    conf["sample_rate"],
                )

        # Write local metrics to the example folder.
        with open(local_save_dir + "metrics.json", "w") as f:
            json.dump({k: v.tolist()
                       for k, v in utt_metrics.items()},
                      f,
                      indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()

    print("Overall metrics :")
    print(final_results)
    if conf["compute_wer"]:
        print("\nWER report")
        wer_card = wer_tracker.final_report_as_markdown()
        print(wer_card)
        # Save the report
        with open(os.path.join(eval_save_dir, "final_wer.md"), "w") as f:
            f.write(wer_card)

    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump({k: v.tolist()
                   for k, v in final_results.items()},
                  f,
                  indent=0)