def test_save_and_load_dprnn(fb): model1 = DPRNNTasNet( n_src=2, n_repeats=2, bn_chan=16, hid_size=4, chunk_size=20, n_filters=32, fb_name=fb ) test_input = torch.randn(1, 800) model_conf = model1.serialize() reconstructed_model = DPRNNTasNet.from_pretrained(model_conf) assert_allclose(model1(test_input), reconstructed_model(test_input))
def test_dprnntasnet_sep(): nnet = DPRNNTasNet(n_src=2, n_repeats=2, bn_chan=16, hid_size=4, chunk_size=20, n_filters=32) # Test torch input wav = torch.rand(1, 800) out = nnet.separate(wav) assert isinstance(out, torch.Tensor) # Test numpy input wav = np.random.randn(1, 800).astype("float32") out = nnet.separate(wav) assert isinstance(out, np.ndarray)
def model_fn(model_dir): # with open(os.path.join(model_dir, 'best_model.pth'), 'rb') as f: # model = DPRNNTasNet.from_pretrained(f) model_path = os.path.join(model_dir, 'best_model.pth') model = DPRNNTasNet.from_pretrained(model_path) return model
def test_save_and_load_dprnn(fb): _default_test_model( DPRNNTasNet(n_src=2, n_repeats=2, bn_chan=16, hid_size=4, chunk_size=20, n_filters=32, fb_name=fb))
def test_save_and_load_dprnn(fb, sample_rate, use_mulcat): _default_test_model( DPRNNTasNet( n_src=2, n_repeats=2, bn_chan=16, hid_size=4, chunk_size=20, n_filters=32, fb_name=fb, sample_rate=sample_rate, use_mulcat=use_mulcat, ))
def DPRNN(conf): sys.path.append('./asteroid') from asteroid.models import DPRNNTasNet from asteroid.utils import tensors_to_device from asteroid.models import save_publishable model_path = "./models/dprnn_usecase1" model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device torch.no_grad().__enter__() mix, fs = sf.read(conf["input_path"]) mix = torch.from_numpy(mix).type(torch.FloatTensor) outputs = model.float()(mix) return outputs
def model_fn(model_dir): with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: model = DPRNNTasNet.from_pretrained(f) # return model
def test_dprnntasnet_sep_from_hf(): model = DPRNNTasNet.from_pretrained(HF_EXAMPLE_MODEL_IDENTIFER) assert isinstance(model, DPRNNTasNet)
# MiniLibriMix is a tiny version of LibriMix (https://github.com/JorisCos/LibriMix), # which is a free speech separation dataset. from asteroid.data import LibriMix # Asteroid's System is a convenience wrapper for PyTorch-Lightning. from asteroid.engine import System if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('--seed', type=int, default=1234) parser.add_argument('--download', type=bool, default=True) parser.add_argument('--max_epochs', type=int, default=1) parser.add_argument('--learning_rate', type=float, default=1e-3) parser.add_argument('--gpus', type=int, default=None) args = parser.parse_args() # This will automatically download MiniLibriMix from Zenodo on the first run. train_loader, val_loader = LibriMix.loaders_from_mini(task="sep_clean", batch_size=16) # Tell DPRNN that we want to separate to 2 sources. model = DPRNNTasNet(n_src=2) # PITLossWrapper works with any loss function. loss = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) system = System(model, optimizer, loss, train_loader, val_loader) # Train for 1 epoch using a single GPU. If you're running this on Google Colab, # be sure to select a GPU runtime (Runtime → Change runtime type → Hardware accelarator). trainer = Trainer(max_epochs=args.max_epochs, gpus=args.gpus) trainer.fit(system)
def _train(args): train_dir = args.train val_dir = args.test with open('conf.yml') as f: def_conf = yaml.safe_load(f) pp = argparse.ArgumentParser() parser = prepare_parser_from_dict(def_conf, parser=pp) arg_dic, plain_args = parse_args_as_dict(parser, return_plain_args=True) print(arg_dic) conf = arg_dic train_set = WhamDataset_no_sf( train_dir, conf['data']['task'], sample_rate=conf['data']['sample_rate'], segment=conf['data']['segment'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset_no_sf( val_dir, conf['data']['task'], segment=conf['data']['segment'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) # train_loader = DataLoader(train_set, shuffle=True, # batch_size=args.batch_size, # num_workers=conf['training']['num_workers'], # drop_last=True) # val_loader = DataLoader(val_set, shuffle=False, # batch_size=args.batch_size, # num_workers=conf['training']['num_workers'], # drop_last=True) # Update number of source values (It depends on the task) print("!!!!!!!!!") print(train_set.__getitem__(0)) print(val_set.__getitem__(0)) print("!!!!!!!!!") conf['masknet'].update({'n_src': train_set.n_src}) model = DPRNNTasNet(**conf['filterbank'], **conf['masknet']) optimizer = make_optimizer(model.parameters(), **conf['optim']) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. # exp_dir = conf['main_args']['exp_dir'] # os.makedirs(exp_dir, exist_ok=True) exp_dir = args.model_dir conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) system.batch_size = 1 # Define callbacks # checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') # checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', # mode='min', save_top_k=5, verbose=1) # early_stopping = False # if conf['training']['early_stop']: # early_stopping = EarlyStopping(monitor='val_loss', patience=10, # verbose=1) # Don't ask GPU if they are not available. # print("!!!!!!!{}".format(torch.cuda.is_available())) # print(torch.__version__) gpus = -1 if torch.cuda.is_available() else None # trainer = pl.Trainer(max_epochs=conf['training']['epochs'], # checkpoint_callback=checkpoint, # early_stop_callback=early_stopping, # default_root_dir=exp_dir, # gpus=gpus, # distributed_backend='ddp', # gradient_clip_val=conf['training']["gradient_clipping"]) trainer = pl.Trainer( max_epochs=args.epochs, default_root_dir=exp_dir, gpus=gpus, distributed_backend='ddp', gradient_clip_val=conf['training']["gradient_clipping"]) trainer.fit(system) # print("!!!!!!!!!!!!!!") # print(checkpoint) # print(checkpoint.best_k_models) # print(checkpoint.best_k_models.items()) # onlyfiles = [f for f in listdir(checkpoint_dir) if isfile(os.path.join(checkpoint_dir, f))] # print(onlyfiles) # best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} # with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: # json.dump(best_k, f, indent=0) # # Save best model (next PL version will make this easier) # best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] best_path = os.path.join(exp_dir, "__temp_weight_ddp_end.ckpt") state_dict = torch.load(best_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() # to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, 'best_model.pth'))
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset( conf["test_dir"], conf["task"], sample_rate=conf["sample_rate"], nondefault_nsrc=None, segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) _, indxs = torch.sort(torch.sqrt(torch.mean(est_sources**2, dim=-1)), descending=True) indxs = indxs[:, :2] # we know a-priori that there are 2 sources in WHAM-clean (WSJ0-2mix clean) # so we sort the estimated signals and take only the two with highest energy. est_sources = est_sources.gather( 1, indxs.unsqueeze(-1).repeat(1, 1, est_sources.shape[-1])) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )