def test_get_metrics_multichannel(): mix = np.random.randn(2, 16000) clean = np.random.randn(2, 16000) est = np.random.randn(2, 16000) get_metrics(mix, clean, est, sample_rate=8000, metrics_list="si_sdr", average=False)
def test_all_metrics(): # This is separated because very slow (sdr, pesq, stoi) mix = np.random.randn(1, 4000) clean = np.random.randn(1, 4000) est = np.random.randn(1, 4000) metrics_dict = get_metrics(mix, clean, est, sample_rate=8000, metrics_list='all')
def test_get_metrics(fs): mix = np.random.randn(1, 16000) clean = np.random.randn(2, 16000) est = np.random.randn(2, 16000) metrics_dict = get_metrics(mix, clean, est, sample_rate=fs, metrics_list="si_sdr") # Test no average & squeezing metrics_dict_bis = get_metrics(mix[0], clean, est, sample_rate=fs, metrics_list="si_sdr", average=False) assert float(np.mean(metrics_dict_bis["si_sdr"])) == metrics_dict["si_sdr"] assert float(np.mean( metrics_dict_bis["input_si_sdr"])) == metrics_dict["input_si_sdr"]
def _eval(batch, metrics, including='output', sample_rate=8000, use_pypesq=False): if use_pypesq: metrics = [m for m in metrics if m != 'pesq'] has_estoi = False if 'estoi' in metrics: metrics = [m for m in metrics if m != 'estoi'] has_estoi = True has_wer = False if 'wer' in metrics: metrics = [m for m in metrics if m != 'wer'] has_wer = True mix = batch['mix'] clean = batch['clean'] estimate = batch['enh'] snr = batch['snr'] res = get_metrics(mix.numpy(), clean.numpy(), estimate.numpy(), sample_rate=sample_rate, metrics_list=metrics, including=including) if use_pypesq: res['pesq'] = pesq(clean.flatten(), estimate.flatten(), sample_rate) if has_estoi: res['estoi'] = stoi(clean.flatten(), estimate.flatten(), sample_rate, extended=True) if has_wer: res['wer'] = jiwer.wer(batch['clean_text'], batch['transcription'], truth_transform=_wer_trans, hypothesis_transform=_wer_trans) if including == 'input': for m in metrics: res[m] = res['input_' + m] del res['input_' + m] res['snr'] = snr[0].item() return res
def test_error_msg(filename): mix = np.random.randn(1, 4000) clean = np.random.randn(1, 4000) est = np.random.randn(1, 4000) expected_msg = f".+si_sdr.+{filename or '<unknown file>'}" with mock.patch("pb_bss_eval.evaluation.si_sdr", side_effect=RuntimeError("Fatal error")), pytest.raises( RuntimeError, match=expected_msg): metrics_dict = get_metrics(mix, clean, est, sample_rate=8000, metrics_list=["si_sdr", "pesq"], filename=filename)
def test_ignore_errors(filename, average): mix = np.random.randn(1, 4000) clean = np.random.randn(1, 4000) est = np.random.randn(1, 4000) expected_msg = f".+si_sdr.+{filename or '<unknown file>'}.+Fatal error" with mock.patch("pb_bss_eval.evaluation.si_sdr", side_effect=RuntimeError("Fatal error")), pytest.warns( RuntimeWarning, match=expected_msg): metrics_dict = get_metrics( mix, clean, est, sample_rate=8000, metrics_list=["si_sdr", "pesq"], ignore_metrics_errors=True, average=average, filename=filename, ) assert metrics_dict["si_sdr"] is None assert metrics_dict["pesq"] is not None
def evaluate(dict_list, model, conf, save_dir=None): model_device = next(model.parameters()).device # Randomly choose the indexes of sentences to save. if save_dir is None: conf["n_save_ex"] = 0 if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(dict_list) save_idx = random.sample(range(len(dict_list)), conf["n_save_ex"]) series_list = [] for idx, wav_dic in enumerate(tqdm(dict_list)): # Forward the network on the mixture. noisy_np, clean_np, fs = load_wav_dic(wav_dic) with torch.no_grad(): net_input = torch.tensor(noisy_np)[None, None].to(model_device) est_clean_np = model.denoise( net_input).squeeze().cpu().data.numpy() utt_metrics = get_metrics( mix=noisy_np, clean=clean_np, estimate=est_clean_np, sample_rate=fs, metrics_list=COMPUTE_METRICS, ) utt_metrics["noisy_path"] = wav_dic["noisy"] utt_metrics["clean_path"] = wav_dic["clean"] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "noisy.wav", noisy_np, fs) sf.write(local_save_dir + "clean.wav", clean_np, fs) sf.write(local_save_dir + "estimate.wav", est_clean_np, fs) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) return all_metrics_df
def get_all_metrics_from_model(model, test_sets, model_name=None): series_list = [] torch.no_grad().__enter__() model = model.cuda() for snr, test_set in test_sets.items(): # makde dirs for each models and separate dir for each snr os.makedirs(f'{save_enhanced_dir}/{str(model_name)}/{snr}dB/data/', exist_ok=True) denoised_file_paths = [] print(f'SNR: {snr}db') loader = DataLoader(test_set, num_workers=0) for i, (mix, clean, path) in tqdm(enumerate(loader)): mix = mix.cuda() estimate = model(mix).detach().flatten().cpu().numpy() denoised_file_name = path[0].split('/')[-1] #add a "_" in front of the denoised fie denoised_file_path = f'{save_enhanced_dir}/{str(model_name)}/{snr}dB/data/{model_name}_{denoised_file_name}' denoised_file_paths.append(denoised_file_path) sf.write(denoised_file_path, estimate, samplerate=SAMPLE_RATE) ##Dont calculate metric just save separated plus, meta data metrics_dict = get_metrics(mix.cpu().numpy(), clean.numpy(), estimate, sample_rate=SAMPLE_RATE, metrics_list=["pesq"]) metrics_dict["mix_path"] = path metrics_dict["snr"] = snr series_list.append(pd.Series(metrics_dict)) all_metrics_df = pd.DataFrame(series_list) if i == 30 : break csv_path_tmp = csv_path_dict[str(snr)] df = pd.read_csv(csv_path_tmp) denoised_file_paths = pd.Series(denoised_file_paths) df['denoised_path'] = denoised_file_paths df_csv_path = f'{save_enhanced_dir}/{str(model_name)}/{snr}dB/{model_name}_snr{snr}dB.csv' df.to_csv(df_csv_path) return None
def main(conf): compute_metrics = update_compute_metrics(conf["compute_wer"], COMPUTE_METRICS) anno_df = pd.read_csv( Path(conf["test_dir"]).parent.parent.parent / "test_annotations.csv") wer_tracker = (MockWERTracker() if not conf["compute_wer"] else WERTracker( ASR_MODEL_PATH, anno_df)) model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = LibriMix( csv_dir=conf["test_dir"], task=conf["task"], sample_rate=conf["sample_rate"], n_src=conf["train_conf"]["data"]["n_src"], segment=None, return_id=True, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources, ids = test_set[idx] mix, sources = tensors_to_device([mix, sources], device=model_device) est_sources = model(mix.unsqueeze(0)) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=COMPUTE_METRICS, ) utt_metrics["mix_path"] = test_set.mixture_path est_sources_np_normalized = normalize_estimates(est_sources_np, mix_np) utt_metrics.update(**wer_tracker( mix=mix_np, clean=sources_np, estimate=est_sources_np_normalized, wav_id=ids, sample_rate=conf["sample_rate"], )) series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np_normalized): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) if conf["compute_wer"]: print("\nWER report") wer_card = wer_tracker.final_report_as_markdown() print(wer_card) # Save the report with open(os.path.join(eval_save_dir, "final_wer.md"), "w") as f: f.write(wer_card) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset( conf["test_dir"], conf["task"], sample_rate=conf["sample_rate"], nondefault_nsrc=None, segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) _, indxs = torch.sort(torch.sqrt(torch.mean(est_sources**2, dim=-1)), descending=True) indxs = indxs[:, :2] # we know a-priori that there are 2 sources in WHAM-clean (WSJ0-2mix clean) # so we sort the estimated signals and take only the two with highest energy. est_sources = est_sources.gather( 1, indxs.unsqueeze(-1).repeat(1, 1, est_sources.shape[-1])) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def forward(self, mode, features, uttname_list, source_attr, source_wav, target_attr, target_wav_list, feat_length, wav_length, records, **kwargs): """ Args: mode: string 'train', 'dev' or 'test' for this forward step features: list of unpadded features [feat1, feat2, ...] each feat is in torch.FloatTensor and already put in the device assigned by command-line args uttname_list: list of utterance names source_attr: source_attr is a dict containing the STFT information for the mixture. source_attr['magnitude'] stores the STFT magnitude, source_attr['phase'] stores the STFT phase and source_attr['stft'] stores the raw STFT feature. The shape is [bs, max_length, feat_dim] source_wav: source_wav contains the raw waveform for the mixture, and it has the shape of [bs, max_wav_length] target_attr: similar to source_attr, it contains the STFT information for individual sources. It only has two keys ('magnitude' and 'phase') target_attr['magnitude'] is a list of length n_srcs, and target_attr['magnitude'][i] has the shape [bs, max_length, feat_dim] target_wav_list: target_wav_list contains the raw waveform for the individual sources, and it is a list of length n_srcs. target_wav_list[0] has the shape [bs, max_wav_length] feat_length: length of STFT features wav_length: length of raw waveform records: defaultdict(list), by appending contents into records, these contents can be averaged and logged on Tensorboard later by self.log_records every log_step Return: loss: the loss to be optimized, should not be detached """ # match the feature length to STFT feature length features = match_length(features, feat_length) features = pack_sequence(features) mask = self.model(features) # evaluate the separation quality of predict sources if mode == 'dev' or mode == 'test': predict_stfts = [ torch.squeeze(m * source_attr['stft'].to(device)) for m in mask ] predict_stfts_np = [ np.transpose(s.data.cpu().numpy()) for s in predict_stfts ] assert len(wav_length) == 1 # reconstruct the signal using iSTFT predict_srcs_np = [ librosa.istft(stft_mat, hop_length=self.upstream_rate, win_length=self.datarc['win_length'], window=self.datarc['window'], center=self.datarc['center'], length=wav_length[0]) for stft_mat in predict_stfts_np ] predict_srcs_np = np.stack(predict_srcs_np, 0) gt_srcs_np = torch.cat(target_wav_list, 0).data.cpu().numpy() mix_np = source_wav.data.cpu().numpy() utt_metrics = get_metrics( mix_np, gt_srcs_np, np.maximum(predict_srcs_np, EPS), sample_rate=self.datarc['rate'], metrics_list=COMPUTE_METRICS, compute_permutation=True, ) for metric in COMPUTE_METRICS: input_metric = "input_" + metric assert metric in utt_metrics and input_metric in utt_metrics imp = utt_metrics[metric] - utt_metrics[input_metric] if metric not in records: records[metric] = [] records[metric].append(imp) assert 'batch_id' in kwargs if kwargs[ 'batch_id'] % 1000 == 0: # Save the prediction every 1000 examples records['mix'].append(mix_np) records['hypo'].append(predict_srcs_np) records['ref'].append(gt_srcs_np) records['uttname'].append(uttname_list[0]) if self.loss_type == "MSE": # mean square loss loss = self.objective.compute_loss(mask, feat_length, source_attr, target_attr) elif self.loss_type == "SISDR": # end-to-end SI-SNR loss loss = self.objective.compute_loss(mask, feat_length, source_attr, wav_length, target_wav_list) else: raise ValueError("Loss type not defined.") records["loss"].append(loss.item()) return loss
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") #model = ConvTasNet.from_pretrained(model_path) model = DCUNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = BBCSODataset( conf["json_dir"], conf["n_src"], conf["sample_rate"], conf["batch_size"], 220500, train = False ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) mix = mix.unsqueeze(0) sources = sources.unsqueeze(0) est_sources = model(mix) loss, reordered_sources = loss_func(est_sources, sources, return_est=True) #mix_np = mix.squeeze(0).cpu().data.numpy() mix_np = mix.cpu().data.numpy() sources_np = sources.squeeze(0).cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) #utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) #print(mix_np.shape) sf.write(local_save_dir + "mixture.wav", np.swapaxes(mix_np,0,1), conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): model = load_best_model(conf["train_conf"], conf["exp_dir"]) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = WhamRDataset( conf["test_dir"], conf["task"], sample_rate=conf["sample_rate"], nondefault_nsrc=model.n_src, segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0)
def main(conf): # Make the model model, _ = make_model_and_optimizer(conf['train_conf']) # Load best model with open(os.path.join(conf['exp_dir'], 'best_k_models.json'), "r") as f: best_k = json.load(f) best_model_path = min(best_k, key=best_k.get) # Load checkpoint checkpoint = torch.load(best_model_path, map_location='cpu') state = checkpoint['state_dict'] state_copy = state.copy() # Remove unwanted keys for keys, values in state.items(): if keys.startswith('loss'): del state_copy[keys] print(keys) model = torch_utils.load_state_dict_in(state_copy, model) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameterss()).device test_set = LibriMix(csv_dir=conf['test_dir'], task=conf['task'], sample_rate=conf['sample_rate'], n_src=conf['train_conf']['data']['n_src'], segment=None) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf['exp_dir'], conf['out_dir']) ex_save_dir = os.path.join(eval_save_dir, 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix.unsqueeze(0)) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.squeeze().cpu().data.numpy() est_sources_np = reordered_sources.squeeze().cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate']) utt_metrics['mix_path'] = test_set.mixture_path series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(eval_save_dir, 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0)
def main(conf): model_path = os.path.join(conf['exp_dir'], 'best_model.pth') model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset(conf['test_dir'], conf['task'], sample_rate=conf['sample_rate'], nondefault_nsrc=model.masker.n_src, segment=None) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.squeeze().cpu().data.numpy() est_sources_np = reordered_sources.squeeze().cpu().data.numpy() utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate']) utt_metrics['mix_path'] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx+1), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location='cpu') publishable = save_publishable( os.path.join(conf['exp_dir'], 'publish_dir'), model_dict, metrics=final_results, train_conf=train_conf )
def main(conf): model_path = os.path.join(conf["exp_dir"], conf["ckpt_path"]) # all resulting files would be saved in eval_save_dir eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) os.makedirs(eval_save_dir, exist_ok=True) if not os.path.exists(os.path.join(eval_save_dir, "final_metrics.json")): if conf["ckpt_path"] == "best_model.pth": # serialized checkpoint model = getattr(asteroid, conf["model"]).from_pretrained(model_path) else: # non-serialized checkpoint, _ckpt_epoch_{i}.ckpt, keys would start with # "model.", which need to be removed model = getattr(asteroid, conf["model"])(**conf["train_conf"]["filterbank"], **conf["train_conf"]["masknet"]) all_states = torch.load(model_path, map_location="cpu") state_dict = { k.split('.', 1)[1]: all_states["state_dict"][k] for k in all_states["state_dict"] } model.load_state_dict(state_dict) # model.load_state_dict(all_states["state_dict"], strict=False) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = make_test_dataset( corpus=conf["corpus"], test_dir=conf["test_dir"], task=conf["task"], sample_rate=conf["sample_rate"], n_src=conf["train_conf"]["data"]["n_src"], ) # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix.unsqueeze(0)) # When inferencing separation for multi-task training, # exclude the last channel. Does not effect single-task training # models (from_scratch, pre+FT). est_sources = est_sources[:, :sources.shape[0]] loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) if hasattr(test_set, "mixture_path"): utt_metrics["mix_path"] = test_set.mixture_path series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[ input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) else: with open(os.path.join(eval_save_dir, "final_metrics.json"), "r") as f: final_results = json.load(f) if conf["publishable"]: assert conf["ckpt_path"] == "best_model.pth" model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = ConvTasNet.from_pretrained(model_path) model = LambdaOverlapAdd( nnet=model, # function to apply to each segment. n_src=2, # number of sources in the output of nnet window_size=64000, # Size of segmenting window hop_size=None, # segmentation hop size window="hanning", # Type of the window (see scipy.signal.get_window reorder_chunks=False, # Whether to reorder each consecutive segment. enable_grad= False, # Set gradient calculation on of off (see torch.set_grad_enabled) ) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device # Evaluation is mode using 'remix' mixture dataset_kwargs = { "root_path": Path(conf["train_conf"]["data"]["root_path"]), "task": conf["train_conf"]["data"]["task"], "sample_rate": conf["train_conf"]["data"]["sample_rate"], "num_workers": conf["train_conf"]["training"]["num_workers"], "mixture": "remix", } test_set = DAMPVSEPDataset(split="test", **dataset_kwargs) # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = test_set[idx] mix = mix.to(model_device) est_sources = model.forward(mix.unsqueeze(0).unsqueeze(1)) mix_np = mix.squeeze(0).cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = est_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, average=False, ) utt_metrics = split_metric_dict(utt_metrics) utt_metrics["mix_path"] = test_set.mixture_path series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np / max(abs(mix_np)), conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: for s in ["", "_s0", "_s1"]: input_metric_name = "input_" + f"{metric_name}{s}" ldf = all_metrics_df[f"{metric_name}{s}"] - all_metrics_df[ input_metric_name] final_results[f"{metric_name}{s}"] = all_metrics_df[ f"{metric_name}{s}"].mean() final_results[f"{metric_name}{s}" + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = TransMask.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device if conf['file_path'] == '': test_set = LibriMix( csv_dir=conf["test_dir"], task=conf["task"], sample_rate=conf["sample_rate"], n_src=conf["train_conf"]["masknet"]["n_src"], segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1 and conf['file_path'] == '': conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) else: save_idx = 0 series_list = [] torch.no_grad().__enter__() sdr = 0 rtf = 0 if conf['file_path'] != '': file_path = conf['file_path'] if os.path.isdir(file_path): wavs = [ os.path.join(file_path, wav) for wav in os.listdir(file_path) if '.wav' in wav ] for wav in wavs: inference_wav(wav, conf, model_device, model, ex_save_dir) else: inference_wav(file_path, conf, model_device, model, ex_save_dir) return for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) mul = 8 mix = mix.view(-1, 1).repeat(1, mul).view(-1) sources = sources.repeat(1, mul) #print('DEVICE') #print(model_device) ss = time() est_sources = model(mix.unsqueeze(0)) dur = time() - ss ll = len(mix) / 8000 rtf += (dur / ll) print(rtf / (idx + 1)) #import pdb;pdb.set_trace() loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) sdr += utt_metrics['sdr'] print(sdr / (idx + 1)) utt_metrics["mix_path"] = test_set.mixture_path series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) # publishable = save_publishable( save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): model = load_best_model(conf['train_conf'], conf['exp_dir']) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device test_set = Wsj0mixDataset(conf['test_dir'], n_src=conf['n_src'], segment=None) # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, mode='pairwise') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) if conf['train_conf']['training']['loss_alpha'] == 1: # If Deep clustering only, use DC masks. est_sources, dic_out = model.dc_head_separate(mix[None, None]) else: # If Chimera, use mask-inference head masks est_sources, dic_out = model.separate(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.squeeze().cpu().data.numpy() est_sources_np = reordered_sources.squeeze().cpu().data.numpy() utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate'], metrics_list=compute_metrics) utt_metrics['mix_path'] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0)
sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() ======= mix, sources, ids = test_set[idx] mix, sources = tensors_to_device([mix, sources], device=model_device) est_sources = model(mix.unsqueeze(0)) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics >>>>>>> 210b5e4eb8ce24fe25780e008c89a4bb71bbd0ea utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], <<<<<<< HEAD metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] ======= metrics_list=COMPUTE_METRICS, ) utt_metrics["mix_path"] = test_set.mixture_path est_sources_np_normalized = normalize_estimates(est_sources_np, mix_np) utt_metrics.update( **wer_tracker( mix=mix_np, clean=sources_np,
def main(conf): os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(conf['main_args']['cuda']) model_dir = conf['main_args']['model_dir'] exp_dir = conf['main_args']['exp_dir'] # Define Dataloader test_gens = dict() for dataset_name, info in TEST.items(): test_set = LibriMix(csv_path=info[1], sample_rate=conf['data']['sample_rate'], n_src=info[0], segment=None) test_gen = DataLoader(test_set, shuffle=False, batch_size=1, num_workers=conf['training']['num_workers'], drop_last=True) test_gens.update({dataset_name: test_gen}) SPKID = LIBRISPEECH_SPKID['test'] # Define model, optimizer + scheduler # if conf['main_args']['stage'] == 1: # model = CAE(conf['cae']) # model_path = os.path.join(exp_dir, 'cae', config_cae_path(conf['cae'])) # elif conf['main_args']['stage'] == 2: model_path = os.path.join(exp_dir, model_dir) model = CAE_DANet.load_model(model_path, model_state='best') # else: # raise ValueError('Training stage should be either 1 or 2!') model = torch.nn.DataParallel(model).cuda() # Used to reorder sources only # loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') loss_func = PermInvariantSISDR(backward_loss=False, improvement=True, return_individual_results=True, pit=True) # Test model.eval() model.module.danet.add_kmeans(kmeans_type='hard', alpha=10, iter=20, dist_type='cos', n_init=5) with torch.no_grad(): for set_name, test_gen in test_gens.items(): # different test sets series_list = [] spk_centroids = dict() for data in tqdm(test_gen, desc='Testing {}'.format(set_name), ncols=100): m1wavs = data[0].unsqueeze(1).cuda() clean_wavs = data[-1].cuda() speaker_id = data[1] if conf['main_args']['stage'] == 1: recon_sources, _, _ = model.module(m1wavs, clean_wavs) reordered_sources = recon_sources if conf['main_args']['stage'] == 2: estimated_masks, _, enc_mixture, _, phase = model( m1wavs, clean_wavs, train=True, n_sources=clean_wavs.shape[1]) V = estimated_masks[ 1] # V (B, K, F*T), enc_masks (B, C, F*T) A = estimated_masks[2] # (B, nspk, K) estimated_masks = estimated_masks[ 0] # estimated_masks (B, nspk, F*T) recon_sources = model.module.get_rec_sources( estimated_masks.view(m1wavs.shape[0], estimated_masks.shape[1], model.module.input_dim, -1), enc_mixture, phase=phase) # recovered waveform # loss, reordered_sources = loss_func(recon_sources, clean_wavs, return_est=True) test_sisdri = loss_func(recon_sources, clean_wavs, initial_mixtures=m1wavs) reordered_sources = torch.zeros(recon_sources.shape).cuda() for j in range(A.shape[0]): reordered_sources[j] = recon_sources[:, loss_func. best_perm[j], :] z = 0 for k in loss_func.best_perm[j]: spk_id = speaker_id[z][j] if spk_id in spk_centroids.keys(): spk_centroids[spk_id] = torch.cat( (spk_centroids[spk_id], A[j, k].unsqueeze(0).detach().cpu()), dim=0) else: spk_centroids[spk_id] = A[j, k].unsqueeze( 0).detach().cpu() z += 1 m1wavs = m1wavs[:, :, :recon_sources.shape[2]] clean_wavs = clean_wavs[:, :, :recon_sources.shape[2]] mix_np = m1wavs.squeeze(0).cpu().data.numpy() sources_np = clean_wavs.squeeze(0).cpu().data.numpy() est_sources_np = reordered_sources.squeeze( 0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf['data']['sample_rate'], metrics_list=compute_metrics) # utt_metrics['mix_path'] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # pprint(utt_metrics) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv( os.path.join(model_path, '{}_all_metrics.csv'.format(set_name))) torch.save(spk_centroids, os.path.join(model_path, '{}_spkC.pt'.format(set_name))) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[ input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open( os.path.join(model_path, '{}_final_metrics.json'.format(set_name)), 'w') as f: json.dump(final_results, f, indent=0)
def main(conf): compute_metrics = COMPUTE_METRICS wer_tracker = (MockWERTracker()) model_path = os.path.join(conf["exp_dir"], "best_model.pth") if conf["target_model"] == "UNet": sys.path.append('UNet_model') AsteroidModelModule = my_import("unet_model.UNet") else: sys.path.append('ConvTasNet_model') AsteroidModelModule = my_import("conv_tasnet_norm.ConvTasNetNorm") model = AsteroidModelModule.from_pretrained( model_path, sample_rate=conf["sample_rate"]) print("model_path", model_path) # model = ConvTasNet # Handle device placement if conf["use_gpu"]: model.cuda() test_set = PodcastLoader(conf["test_dir"], sample_rate=44100, segment=18) # Used to reorder sources only # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = test_set[idx] if conf["target_model"] == "UNet": mix = mix.unsqueeze(0) # get audio representations, pass the mix to the unet, it will normalize # it, create the masks, pass them to audio, unnormalize them and return est_sources = model(mix) mix_np = mix.cpu().data.numpy() if conf["target_model"] == "UNet": mix_np = mix_np.squeeze(0) sources_np = sources.cpu().data.numpy() est_sources_np = est_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=COMPUTE_METRICS, average=False) series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx + 1)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump({k: v.tolist() for k, v in utt_metrics.items()}, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") print(final_results) if conf["compute_wer"]: print("\nWER report") wer_card = wer_tracker.final_report_as_markdown() print(wer_card) # Save the report with open(os.path.join(eval_save_dir, "final_wer.md"), "w") as f: f.write(wer_card) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump({k: v.tolist() for k, v in final_results.items()}, f, indent=0)