def main(conf): train_set = LibriMix( csv_dir=conf["data"]["train_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["data"]["n_src"], segment=conf["data"]["segment"], ) val_set = LibriMix( csv_dir=conf["data"]["valid_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["data"]["n_src"], segment=conf["data"]["segment"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) conf["masknet"].update({"n_src": conf["data"]["n_src"]}) model = DCCRNet(**conf["filterbank"], **conf["masknet"], sample_rate=conf["data"]["sample_rate"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): # train_set = WhamDataset( # conf["data"]["train_dir"], # conf["data"]["task"], # sample_rate=conf["data"]["sample_rate"], # segment=conf["data"]["segment"], # nondefault_nsrc=conf["data"]["nondefault_nsrc"], # ) # val_set = WhamDataset( # conf["data"]["valid_dir"], # conf["data"]["task"], # sample_rate=conf["data"]["sample_rate"], # nondefault_nsrc=conf["data"]["nondefault_nsrc"], # ) train_set = LibriMix( csv_dir=conf["data"]["train_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["masknet"]["n_src"], segment=conf["data"]["segment"], ) val_set = LibriMix( csv_dir=conf["data"]["valid_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["masknet"]["n_src"], segment=conf["data"]["segment"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) # TODO: redundant conf["masknet"].update({"n_src": train_set.n_src}) model = DPRNNTasNet(**conf["filterbank"], **conf["masknet"], sample_rate=conf['data']['sample_rate']) # from torchsummary import summary # model.cuda() # summary(model, (24000,)) # import pdb # pdb.set_trace() optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None if conf["training"]["cont"]: from glob import glob ckpts = glob('%s/*.ckpt' % checkpoint_dir) ckpts.sort() latest_ckpt = ckpts[-1] trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=conf["training"]["gradient_clipping"], resume_from_checkpoint=latest_ckpt) else: trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) # Save best model (next PL version will make this easier) # best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] # state_dict = torch.load(best_path) state_dict = torch.load(checkpoint.best_model_path) # state_dict = torch.load('exp/train_dprnn_130d5f9a/checkpoints/epoch=154.ckpt') system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): # train_set = WhamDataset( # conf["data"]["train_dir"], # conf["data"]["task"], # sample_rate=conf["data"]["sample_rate"], # segment=conf["data"]["segment"], # nondefault_nsrc=conf["data"]["nondefault_nsrc"], # ) # val_set = WhamDataset( # conf["data"]["valid_dir"], # conf["data"]["task"], # sample_rate=conf["data"]["sample_rate"], # nondefault_nsrc=conf["data"]["nondefault_nsrc"], # ) train_set = LibriMix( csv_dir=conf["data"]["train_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["masknet"]["n_src"], segment=conf["data"]["segment"], ) print(conf["data"]["train_dir"]) val_set = LibriMix( csv_dir=conf["data"]["valid_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["masknet"]["n_src"], segment=conf["data"]["segment"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) # TODO params # model = TransMask(**conf["filterbank"], **conf["masknet"]) model = DPTrans(**conf["filterbank"], **conf["masknet"], sample_rate=conf['data']['sample_rate']) # from torchsummary import summary # model.cuda() # summary(model, (24000,)) # import pdb # pdb.set_trace() optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # # TODO warmup for transformer # from asteroid.engine.schedulers import DPTNetScheduler # schedulers = { # "scheduler": DPTNetScheduler( # # optimizer, len(train_loader) // conf["training"]["batch_size"], 64 # # optimizer, len(train_loader), 64, # optimizer, len(train_loader), 128, # stride=2, # # exp_max=0.0004 * 16, # # warmup_steps=1000 # ), # "interval": "batch", # } # from torch.optim.lr_scheduler import ReduceLROnPlateau # if conf["training"]["half_lr"]: # print('Use ReduceLROnPlateau halflr...........') # schedulers = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") # checkpoint_dir = os.path.join(exp_dir) checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True ) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append(EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None if conf["training"]["cont"]: from glob import glob ckpts = glob('%s/*.ckpt' % checkpoint_dir) ckpts.sort() latest_ckpt = ckpts[-1] trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=conf["training"]["gradient_clipping"], resume_from_checkpoint=latest_ckpt ) else: trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) # state_dict = torch.load('exp/train_transmask_rnn_acous_gelu_6layer_peconv_stride2_batch6/_ckpt_epoch_208.ckpt') system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): train_set = LibriMix(csv_dir=conf['data']['train_dir'], task=conf['data']['task'], sample_rate=conf['data']['sample_rate'], n_src=conf['data']['n_src'], segment=conf['data']['segment']) val_set = LibriMix(csv_dir=conf['data']['valid_dir'], task=conf['data']['task'], sample_rate=conf['data']['sample_rate'], n_src=conf['data']['n_src'], segment=conf['data']['segment']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) conf['masknet'].update({'n_src': conf['data']['n_src']}) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer(conf) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_top_k=5, verbose=1) early_stopping = False if conf['training']['early_stop']: early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1) # Don't ask GPU if they are not available. if not torch.cuda.is_available(): print('No available GPU were found, set gpus to None') conf['main_args']['gpus'] = None trainer = pl.Trainer(max_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_save_path=exp_dir, gpus=conf['main_args']['gpus'], distributed_backend='dp', train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.) trainer.fit(system) with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(checkpoint.best_k_models, f, indent=0)
def main(conf): train_set = LibriMix(csv_dir=conf['data']['train_dir'], task=conf['data']['task'], sample_rate=conf['data']['sample_rate'], n_src=conf['data']['n_src'], segment=conf['data']['segment']) val_set = LibriMix(csv_dir=conf['data']['valid_dir'], task=conf['data']['task'], sample_rate=conf['data']['sample_rate'], n_src=conf['data']['n_src'], segment=conf['data']['segment']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) conf['masknet'].update({'n_src': conf['data']['n_src']}) model = ConvTasNet(**conf['filterbank'], **conf['masknet']) optimizer = make_optimizer(model.parameters(), **conf['optim']) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_top_k=5, verbose=1) early_stopping = False if conf['training']['early_stop']: early_stopping = EarlyStopping(monitor='val_loss', patience=30, verbose=1) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer(max_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_save_path=exp_dir, gpus=gpus, distributed_backend='dp', train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) # Save best model (next PL version will make this easier) best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] state_dict = torch.load(best_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, 'best_model.pth'))