def test_transfer(tensors): if isinstance(tensors, torch.Tensor): assert_allclose(utils.tensors_to_device(tensors, "cpu"), tensors) if isinstance(tensors, list): assert list(utils.tensors_to_device(tensors, "cpu")) == list(tensors) if isinstance(tensors, dict): assert dict(utils.tensors_to_device(tensors, "cpu")) == dict(tensors)
def main(conf): model = get_model(conf) test_set = WhamDataset(conf['test_dir'], conf['task'], sample_rate=conf['sample_rate'], nondefault_nsrc=conf['nondefault_nsrc'], segment=None) loss_func = PITLossWrapper(pairwise_neg_sisdr, mode='pairwise') model_device = next(model.parameters()).device for idx in range(len(test_set)): mix, sources, _ = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix) loss, reordered_sources = loss_func(sources, est_sources, return_est=True) mix_np = mix.data.numpy()[0] sources_np = sources.data.numpy()[0] est_sources_np = reordered_sources.data.numpy()[0] # Waiting for pb_bss support to compute subset of metrics. # We will probably want SI-SDR, + add option for mir_eval SDR, stoi, # pesq input_metrics = InputMetrics(observation=mix_np, speech_source=sources_np, enable_si_sdr=True, sample_rate=conf["sample_rate"]) output_metrics = OutputMetrics(speech_prediction=est_sources_np, speech_source=sources_np, enable_si_sdr=True, sample_rate=conf["sample_rate"])
def main(conf): perms = list(permutations(range(conf["train_conf"]["data"]["n_src"]))) model_path = os.path.join(conf["exp_dir"], conf["ckpt_path"]) if conf["ckpt_path"] == "best_model.pth": # serialized checkpoint model = getattr(asteroid, conf["model"]).from_pretrained(model_path) else: # non-serialized checkpoint, _ckpt_epoch_{i}.ckpt, keys would start with # "model.", which need to be removed model = getattr(asteroid, conf["model"])(**conf["train_conf"]["filterbank"], **conf["train_conf"]["masknet"]) all_states = torch.load(model_path, map_location="cpu") state_dict = {k.split('.', 1)[1]: all_states["state_dict"][k] for k in all_states["state_dict"]} model.load_state_dict(state_dict) # model.load_state_dict(all_states["state_dict"], strict=False) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = make_test_dataset( corpus=conf["corpus"], test_dir=conf["test_dir"], task=conf["task"], sample_rate=conf["sample_rate"], n_src=conf["train_conf"]["data"]["n_src"], ) # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # all resulting files would be saved in eval_save_dir eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) os.makedirs(eval_save_dir, exist_ok=True) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix.unsqueeze(0)) # When inferencing separation for multi-task training, # exclude the last channel. Does not effect single-task training # models (from_scratch, pre+FT). est_sources = est_sources[:, :sources.shape[0]] _, best_perm_idx = loss_func.find_best_perm(pairwise_neg_sisdr(est_sources, sources[None]), conf["train_conf"]["data"]["n_src"]) utt_metrics = {} if hasattr(test_set, "mixture_path"): utt_metrics["mix_path"] = test_set.mixture_path utt_metrics["best_perm_idx"] = ' '.join([str(pidx) for pidx in perms[best_perm_idx[0]]]) series_list.append(pd.Series(utt_metrics)) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "best_perms.csv"))
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") if conf["target_model"] == "UNet": sys.path.append('UNet_model') AsteroidModelModule = my_import("unet_model.UNet") else: sys.path.append('ConvTasNet_model') AsteroidModelModule = my_import("conv_tasnet_norm.ConvTasNetNorm") model = AsteroidModelModule.from_pretrained( model_path, sample_rate=conf["sample_rate"]) if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = PodcastLoader(csv_dir=conf["test_dir"], sample_rate=conf["sample_rate"], segment=conf["segment"]) eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples_podcast/") torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix = test_set[idx] mix = tensors_to_device(mix, device=model_device) if conf["target_model"] == "UNet": est_sources = model(mix.unsqueeze(0)).squeeze(0) else: est_sources = model(mix) mix_np = mix.cpu().data.numpy() est_sources_np = est_sources.squeeze(0).cpu().data.numpy() # Save some examples in a folder. Wav files and metrics as text. local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx + 1)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the estimates sources for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], )
def inference_wav(file_path, conf, model_device, model, ex_save_dir): wavid = os.path.basename(file_path).split('.')[0] mixture, _ = sf.read(file_path, dtype="float32") mixture = torch.from_numpy(mixture) mix = tensors_to_device(mixture, device=model_device) mul = 1 mix = mix.view(-1, 1).repeat(1, mul).view(-1) mix_np = mix.cpu().data.numpy() est_sources = model(mix.unsqueeze(0)) est_sources_np = est_sources.squeeze(0).cpu().data.numpy() local_save_dir = os.path.join(ex_save_dir, "ex/") os.makedirs(local_save_dir, exist_ok=True) print(local_save_dir) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "{}_s{}_estimate.wav".format(wavid, src_idx), est_src, conf["sample_rate"], )
def main(conf): # Make the model model, _ = make_model_and_optimizer(conf['train_conf']) # Load best model with open(os.path.join(conf['exp_dir'], 'best_k_models.json'), "r") as f: best_k = json.load(f) best_model_path = min(best_k, key=best_k.get) # Load checkpoint checkpoint = torch.load(best_model_path, map_location='cpu') state = checkpoint['state_dict'] state_copy = state.copy() # Remove unwanted keys for keys, values in state.items(): if keys.startswith('loss'): del state_copy[keys] print(keys) model = torch_utils.load_state_dict_in(state_copy, model) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameterss()).device test_set = LibriMix(csv_dir=conf['test_dir'], task=conf['task'], sample_rate=conf['sample_rate'], n_src=conf['train_conf']['data']['n_src'], segment=None) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf['exp_dir'], conf['out_dir']) ex_save_dir = os.path.join(eval_save_dir, 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix.unsqueeze(0)) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.squeeze().cpu().data.numpy() est_sources_np = reordered_sources.squeeze().cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate']) utt_metrics['mix_path'] = test_set.mixture_path series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(eval_save_dir, 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0)
def main(conf): model_path = os.path.join(conf['exp_dir'], 'best_model.pth') model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset(conf['test_dir'], conf['task'], sample_rate=conf['sample_rate'], nondefault_nsrc=model.masker.n_src, segment=None) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.squeeze().cpu().data.numpy() est_sources_np = reordered_sources.squeeze().cpu().data.numpy() utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate']) utt_metrics['mix_path'] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx+1), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location='cpu') publishable = save_publishable( os.path.join(conf['exp_dir'], 'publish_dir'), model_dict, metrics=final_results, train_conf=train_conf )
def main(conf): model_path = os.path.join(conf["exp_dir"], conf["ckpt_path"]) # all resulting files would be saved in eval_save_dir eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) os.makedirs(eval_save_dir, exist_ok=True) if not os.path.exists(os.path.join(eval_save_dir, "final_metrics.json")): if conf["ckpt_path"] == "best_model.pth": # serialized checkpoint model = getattr(asteroid, conf["model"]).from_pretrained(model_path) else: # non-serialized checkpoint, _ckpt_epoch_{i}.ckpt, keys would start with # "model.", which need to be removed model = getattr(asteroid, conf["model"])(**conf["train_conf"]["filterbank"], **conf["train_conf"]["masknet"]) all_states = torch.load(model_path, map_location="cpu") state_dict = { k.split('.', 1)[1]: all_states["state_dict"][k] for k in all_states["state_dict"] } model.load_state_dict(state_dict) # model.load_state_dict(all_states["state_dict"], strict=False) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = make_test_dataset( corpus=conf["corpus"], test_dir=conf["test_dir"], task=conf["task"], sample_rate=conf["sample_rate"], n_src=conf["train_conf"]["data"]["n_src"], ) # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix.unsqueeze(0)) # When inferencing separation for multi-task training, # exclude the last channel. Does not effect single-task training # models (from_scratch, pre+FT). est_sources = est_sources[:, :sources.shape[0]] loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) if hasattr(test_set, "mixture_path"): utt_metrics["mix_path"] = test_set.mixture_path series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[ input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) else: with open(os.path.join(eval_save_dir, "final_metrics.json"), "r") as f: final_results = json.load(f) if conf["publishable"]: assert conf["ckpt_path"] == "best_model.pth" model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = TransMask.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device if conf['file_path'] == '': test_set = LibriMix( csv_dir=conf["test_dir"], task=conf["task"], sample_rate=conf["sample_rate"], n_src=conf["train_conf"]["masknet"]["n_src"], segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1 and conf['file_path'] == '': conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) else: save_idx = 0 series_list = [] torch.no_grad().__enter__() sdr = 0 rtf = 0 if conf['file_path'] != '': file_path = conf['file_path'] if os.path.isdir(file_path): wavs = [ os.path.join(file_path, wav) for wav in os.listdir(file_path) if '.wav' in wav ] for wav in wavs: inference_wav(wav, conf, model_device, model, ex_save_dir) else: inference_wav(file_path, conf, model_device, model, ex_save_dir) return for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) mul = 8 mix = mix.view(-1, 1).repeat(1, mul).view(-1) sources = sources.repeat(1, mul) #print('DEVICE') #print(model_device) ss = time() est_sources = model(mix.unsqueeze(0)) dur = time() - ss ll = len(mix) / 8000 rtf += (dur / ll) print(rtf / (idx + 1)) #import pdb;pdb.set_trace() loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) sdr += utt_metrics['sdr'] print(sdr / (idx + 1)) utt_metrics["mix_path"] = test_set.mixture_path series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) # publishable = save_publishable( save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): best_model_path = os.path.join(conf["exp_dir"], "best_model.pth") if not os.path.exists(best_model_path): # make pth from checkpoint model = load_best_model(conf["train_conf"], conf["exp_dir"], sample_rate=conf["sample_rate"]) torch.save(model.state_dict(), best_model_path) else: model, _ = make_model_and_optimizer(conf["train_conf"], sample_rate=conf["sample_rate"]) model.eval() model.load_state_dict(torch.load(best_model_path)) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_dirs = [ conf["test_dir"].format(n_src) for n_src in conf["train_conf"]["masknet"]["n_srcs"] ] test_set = Wsj0mixVariable( json_dirs=test_dirs, n_srcs=conf["train_conf"]["masknet"]["n_srcs"], sample_rate=conf["train_conf"]["data"]["sample_rate"], seglen=None, minlen=None, ) # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = [ torch.Tensor(x) for x in tensors_to_device(test_set[idx], device=model_device) ] est_sources = model.separate(mix[None]) p_si_snr = Penalized_PIT_Wrapper(pairwise_neg_sisdr_loss)(est_sources, sources) utt_metrics = { "P-Si-SNR": p_si_snr.item(), "counting_accuracy": float(sources.size(0) == est_sources.size(0)), } utt_metrics["mix_path"] = test_set.data[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = est_sources.cpu().data.numpy() local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in ["P-Si-SNR", "counting_accuracy"]: final_results[metric_name] = all_metrics_df[metric_name].mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0)
def main(conf): model = load_best_model(conf['train_conf'], conf['exp_dir']) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device test_set = Wsj0mixDataset(conf['test_dir'], n_src=conf['n_src'], segment=None) # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, mode='pairwise') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) if conf['train_conf']['training']['loss_alpha'] == 1: # If Deep clustering only, use DC masks. est_sources, dic_out = model.dc_head_separate(mix[None, None]) else: # If Chimera, use mask-inference head masks est_sources, dic_out = model.separate(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.squeeze().cpu().data.numpy() est_sources_np = reordered_sources.squeeze().cpu().data.numpy() utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate'], metrics_list=compute_metrics) utt_metrics['mix_path'] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0)
def main(conf): model_path = os.path.join(conf['exp_dir'], 'best_model.pth') model = ConvTasNet.from_pretrained(model_path) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device # get data for evaluation - this should change in the future to work on real test data the was not used for training dataset = SeparationDataset(combination_list_path=os.path.join( conf['exp_dir'], 'combination_list.pkl')) n_val = int( len(dataset) * conf['train_conf']['data'] ['fraction_of_examples_to_use_for_validation']) train_set, val_set = random_split(dataset, [len(dataset) - n_val, n_val]) # noqa # test_set = val_set test_set = train_set # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) # series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) # noqa mix_np = to_complex(mix[None].cpu().data.numpy()) sources_np = to_complex(sources.cpu().data.numpy()) est_sources_np = to_complex( reordered_sources.squeeze(0).cpu().data.numpy()) # utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, # sample_rate=conf['sample_rate'], # metrics_list=compute_metrics) # utt_metrics['mix_path'] = test_set.mix[idx][0] # series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) iq_data = mix_np[0] ax = plot_spectogram(iq_data, scale=False, show_plot=False) ax.figure.savefig(local_save_dir + 'mixture.png') # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): iq_data = src ax = plot_spectogram(iq_data, scale=False, show_plot=False) ax.figure.savefig(local_save_dir + "s{}.png".format(src_idx + 1)) for src_idx, est_src in enumerate(est_sources_np): # est_src *= np.max(np.abs(mix_np))/np.max(np.abs(est_src)) iq_data = np.reshape(est_src, (32, 128)).T ax = plot_spectogram(iq_data, scale=False, show_plot=False) ax.figure.savefig(local_save_dir + "s{}_estimate.png".format(src_idx + 1))
# Randomly choose the indexes of sentences to save. <<<<<<< HEAD ex_save_dir = os.path.join(conf["exp_dir"], "examples/") ======= eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples/") >>>>>>> 210b5e4eb8ce24fe25780e008c89a4bb71bbd0ea if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. <<<<<<< HEAD mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() ======= mix, sources, ids = test_set[idx] mix, sources = tensors_to_device([mix, sources], device=model_device) est_sources = model(mix.unsqueeze(0)) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset( conf["test_dir"], conf["task"], sample_rate=conf["sample_rate"], nondefault_nsrc=None, segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) _, indxs = torch.sort(torch.sqrt(torch.mean(est_sources**2, dim=-1)), descending=True) indxs = indxs[:, :2] # we know a-priori that there are 2 sources in WHAM-clean (WSJ0-2mix clean) # so we sort the estimated signals and take only the two with highest energy. est_sources = est_sources.gather( 1, indxs.unsqueeze(-1).repeat(1, 1, est_sources.shape[-1])) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): model = load_best_model(conf["train_conf"], conf["exp_dir"]) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = WhamRDataset( conf["test_dir"], conf["task"], sample_rate=conf["sample_rate"], nondefault_nsrc=model.n_src, segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0)
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") #model = ConvTasNet.from_pretrained(model_path) model = DCUNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = BBCSODataset( conf["json_dir"], conf["n_src"], conf["sample_rate"], conf["batch_size"], 220500, train = False ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) mix = mix.unsqueeze(0) sources = sources.unsqueeze(0) est_sources = model(mix) loss, reordered_sources = loss_func(est_sources, sources, return_est=True) #mix_np = mix.squeeze(0).cpu().data.numpy() mix_np = mix.cpu().data.numpy() sources_np = sources.squeeze(0).cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) #utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) #print(mix_np.shape) sf.write(local_save_dir + "mixture.wav", np.swapaxes(mix_np,0,1), conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): compute_metrics = update_compute_metrics(conf["compute_wer"], COMPUTE_METRICS) anno_df = pd.read_csv( Path(conf["test_dir"]).parent.parent.parent / "test_annotations.csv") wer_tracker = (MockWERTracker() if not conf["compute_wer"] else WERTracker( ASR_MODEL_PATH, anno_df)) model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = LibriMix( csv_dir=conf["test_dir"], task=conf["task"], sample_rate=conf["sample_rate"], n_src=conf["train_conf"]["data"]["n_src"], segment=None, return_id=True, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"]) ex_save_dir = os.path.join(eval_save_dir, "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources, ids = test_set[idx] mix, sources = tensors_to_device([mix, sources], device=model_device) est_sources = model(mix.unsqueeze(0)) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix.cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() # For each utterance, we get a dictionary with the mixture path, # the input and output metrics utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=COMPUTE_METRICS, ) utt_metrics["mix_path"] = test_set.mixture_path est_sources_np_normalized = normalize_estimates(est_sources_np, mix_np) utt_metrics.update(**wer_tracker( mix=mix_np, clean=sources_np, estimate=est_sources_np_normalized, wav_id=ids, sample_rate=conf["sample_rate"], )) series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np, conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np_normalized): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) if conf["compute_wer"]: print("\nWER report") wer_card = wer_tracker.final_report_as_markdown() print(wer_card) # Save the report with open(os.path.join(eval_save_dir, "final_wer.md"), "w") as f: f.write(wer_card) with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )