def make_model_and_optimizer(conf): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ # Define building blocks for local model stft, istft = make_enc_dec('stft', **conf['filterbank']) # Because we concatenate (re, im, mag) as input and compute a complex mask. if conf['main_args']['is_complex']: inp_size = int(stft.n_feats_out * 3 / 2) output_size = stft.n_feats_out else: inp_size = output_size = int(stft.n_feats_out / 2) # Add these fields to the mask model dict conf['masknet'].update(dict(input_size=inp_size, output_size=output_size)) masker = SimpleModel(**conf['masknet']) # Make the complete model model = Model(stft, masker, istft, is_complex=conf['main_args']['is_complex']) # Define optimizer of this model optimizer = make_optimizer(model.parameters(), **conf['optim']) return model, optimizer
def make_model_and_optimizer(conf): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ # Define building blocks for local model # The encoder and decoder can directly be made from the dictionary. encoder, decoder = fb.make_enc_dec(**conf['filterbank']) # The input post-processing changes the dimensions of input features to # the mask network. Different type of masks impose different output # dimensions to the mask network's output. We correct for these here. nn_in = int(encoder.n_feats_out * encoder.in_chan_mul) nn_out = int(encoder.n_feats_out * encoder.out_chan_mul) masker = TDConvNet(in_chan=nn_in, out_chan=nn_out, **conf['masknet']) # Another possibility is to correct for these effects inside of Model, # but then instantiation of masker should also be done inside. model = Model(encoder, masker, decoder) # The model is defined in Container, which is passed to DataParallel. # Define optimizer : can be instantiate from dictonary as well. optimizer = make_optimizer(model.parameters(), **conf['optim']) return model, optimizer
def make_model_and_optimizer(conf, model_part='filterbank', pretrained_filterbank=None): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. model_part: Either filterbank (in other words adaptive front-end and back-end) or separator. pretrained_filterbank: The separator needs a pre-trained filterbank in order to be initialized appropriately. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ # Define building blocks for local model if model_part == 'filterbank': model = AdaptiveEncoderDecoder( freq_res=conf['filterbank']['n_filters'], sample_res=conf['filterbank']['kernel_size'], n_sources=conf['masknet']['n_src']) elif model_part == 'separator': if pretrained_filterbank is None: raise ValueError('A pretrained filterbank is required for the ' 'initialization of the separator.') model = Model(pretrained_filterbank, conf) else: raise ValueError('Part to train: {} is not available.'.format( model_part)) # Define optimizer of this model optimizer = make_optimizer( model.parameters(), optimizer=conf[model_part + '_training'][model_part[0] + '_optimizer'], lr=conf[model_part + '_training'][model_part[0] + '_lr']) return model, optimizer
def main(conf): # Define data pipeline with datasets and loaders train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) val_loader = DataLoader(val_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) loaders = {'train_loader': train_loader, 'val_loader': val_loader} # Define model # First define the encoder and the decoder. # This can be either done by passing a string and the config # dictionary (with number of filters, filter size and stride, see conf.yml) # to fb.make_enc_dec. enc, dec = fb.make_enc_dec('free', **conf['filterbank']) # Or done by instantiating the filterbanks and passing them to the # Encoder and Decoder classes, as follows : # enc = fb.Encoder(fb.FreeFB(**conf['filterbank'])) # dec = fb.Encoder(fb.FreeFB(**conf['filterbank'])) # Define the mask network with input and output dimensions dictated by # by the encoder (also passing a dictionary defined in conf.yml). masker = TDConvNet(in_chan=enc.filterbank.n_feats_out, out_chan=enc.filterbank.n_feats_out, n_src=train_set.n_src, **conf['masknet']) # Pass the encoder, masker and decoder to the container class which # handles the forward for such architectures model = nn.DataParallel(Container(enc, masker, dec)) if conf['main_args']['use_cuda']: model.cuda() # Define Loss function loss_class = PITLossContainer(pairwise_neg_sisdr, n_src=train_set.n_src) # Define optimizer optimizer = make_optimizer(model.parameters(), **conf['optim']) # Pass everything to the solver with a training dicitonary defined in # the conf.yml file. Finally, call .train() and that's it. solver = Solver(loaders, model, loss_class, optimizer, model_path=conf['main_args']['model_path'], **conf['training']) solver.train()
def make_model_and_optimizer(conf, sample_rate): """Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ model = MultiDecoderDPRNN(**conf["masknet"], **conf["filterbank"], sample_rate=sample_rate) optimizer = make_optimizer(model.parameters(), **conf["optim"]) return model, optimizer
def make_model_and_optimizer(conf): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ model = TasNet(conf['filterbank'], conf['masknet']) # Define optimizer of this model optimizer = make_optimizer(model.parameters(), **conf['optim']) return model, optimizer
def make_model_and_optimizer(conf): """Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ enc, dec = fb.make_enc_dec("stft", **conf["filterbank"]) masker = Chimera(enc.n_feats_out // 2, **conf["masknet"]) model = Model(enc, masker, dec) optimizer = make_optimizer(model.parameters(), **conf["optim"]) return model, optimizer
def make_generator_and_optimizer(conf): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ model = Generator() # Define optimizer of this model optimizer = make_optimizer(model.parameters(), **conf['optim']) g_loss = GeneratorLoss(conf['g_loss']['l']) return model, optimizer, g_loss
def make_model_and_optimizer(conf): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ # Define building blocks for local model enc, dec = fb.make_enc_dec('free', **conf['filterbank']) masker = DPRNN(**conf['masknet']) model = Model(enc, masker, dec) # Define optimizer of this model optimizer = make_optimizer(model.parameters(), **conf['optim']) return model, optimizer
def make_discriminator_and_optimizer(conf, train_loader): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ # Define building blocks for local model model = Discriminator(train_loader) # Define optimizer of this model optimizer = make_optimizer(model.parameters(), **conf['optim']) d_loss = DiscriminatorLoss() return model, optimizer, d_loss
def make_model_and_optimizer(conf): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ enc = fb.Encoder(fb.STFTFB(**conf['filterbank'])) masker = ChimeraPP(int(enc.filterbank.n_feats_out/2), 2, embedding_dim=20, n_layers=2, hidden_size=600, \ dropout=0, bidirectional=True) model = Model(enc, masker) optimizer = make_optimizer(model.parameters(), **conf['optim']) return model, optimizer
def init_optims(self, model, criterion, optimizer=None): if optimizer is None: self.optimizer = make_optimizer(model.parameters(), **self.conf["optim"]) else: self.optimizer = optimizer if self.conf["train"]["half_lr"]: self.scheduler = ReduceLROnPlateau( optimizer=self.optimizer, factor=self.conf["scheduler"]["factor"], patience=self.conf["scheduler"]["patience"], verbose=self.conf["scheduler"]["verbose"]) if self.conf["train"]["early_stop"]: self.early_stop = EarlyStopping(monitor="val_loss", patience=20, verbose=True) self.criterion = criterion
def make_discriminator_and_optimizer(conf): """ Function to define the model and optimizer for a config dictionary. Args: conf: Dictionary containing the output of hierachical argparse. Returns: model, optimizer. The main goal of this function is to make reloading for resuming and evaluation very simple. """ # Define building blocks for local model encoder, decoder = make_enc_dec(**conf['filterbank']) model = Discriminator(encoder, decoder) # Define optimizer of this model optimizer = make_optimizer(model.parameters(), **conf['optim']) d_loss = DiscriminatorLoss(conf['metric_to_opt']['metric'], conf['data']['rate']) return model, optimizer, d_loss
def main(conf): # Define data pipeline train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) val_loader = DataLoader(val_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) loaders = {'train_loader': train_loader, 'val_loader': val_loader} # Define model # The encoder and decoder can directly be made from the dictionary. encoder, decoder = filterbanks.make_enc_dec(**conf['filterbank']) # The input post-processing changes the dimensions of input features to # the mask network. Different type of masks impose different output # dimensions to the mask network's output. We correct for these here. nn_in = int(encoder.n_feats_out * encoder.in_chan_mul) nn_out = int(encoder.n_feats_out * encoder.out_chan_mul) masker = TDConvNet(in_chan=nn_in, out_chan=nn_out, n_src=train_set.n_src, **conf['masknet']) # The model is defined in Container, which is passed to DataParallel. model = nn.DataParallel(Container(encoder, masker, decoder)) if conf['main_args']['use_cuda']: model.cuda() # Define Loss function : Here we use time domain SI-SDR. loss_class = PITLossContainer(pairwise_neg_sisdr, n_src=train_set.n_src) # Define optimizer : can be instantiate from dictonary as well. optimizer = make_optimizer(model.parameters(), **conf['optim']) # Pass everything to the solver and train solver = Solver(loaders, model, loss_class, optimizer, model_path=conf['main_args']['model_path'], **conf['training']) # solver.train() solver.run_one_epoch(0, validation=True)
def make_model_and_optimizer(conf, gpu_ids=[0]): """Define model and optimizer. Args: conf: Configuration for model and optimizer. Returns: model, optimizer """ device = torch.device(conf["training"]["device"]) model = Audio_Visual_Fusion(conf["main_args"]["n_src"], device) model = model.to(device) device_count = torch.cuda.device_count() if len(gpu_ids) > 1 and device_count > 1: if len(gpu_ids) != device_count: print(f"Using {gpu_ids} GPUs") else: print(f"Using all {device_count} GPUs") model = torch.nn.DataParallel(model, device_ids=gpu_ids) optimizer = make_optimizer(model.parameters(), **conf["optim"]) return model, optimizer
def main(conf): train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) # Update number of source values (It depends on the task) conf['masknet'].update({'n_src': train_set.n_src}) # Define model and optimizer model = ConvTasNet(**conf['filterbank'], **conf['masknet']) optimizer = make_optimizer(model.parameters(), **conf['optim']) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_top_k=5, verbose=1) early_stopping = False if conf['training']['early_stop']: early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_save_path=exp_dir, gpus=gpus, distributed_backend='dp', train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) # Save best model (next PL version will make this easier) best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] state_dict = torch.load(best_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, 'best_model.pth'))
def main(conf): # Define dataloader using ORIGINAL mixture. dataset_kwargs = { "root_path": Path(conf["data"]["root_path"]), "sample_rate": conf["data"]["sample_rate"], "num_workers": conf["training"]["num_workers"], "mixture": conf["data"]["mixture"], "task": conf["data"]["task"], } train_set = DAMPVSEPSinglesDataset( split=f"train_{conf['data']['train_set']}", random_segments=True, segment=conf["data"]["segment"], ex_per_track=conf["data"]["ex_per_track"], **dataset_kwargs, ) val_set = DAMPVSEPSinglesDataset(split="valid", **dataset_kwargs) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=1, num_workers=conf["training"]["num_workers"] ) model = ConvTasNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. # Combine_Loss is not complete. Needs improvement # loss_func = Combine_Loss(alpha=conf['training']['loss_alpha'], # sample_rate=conf['data']['sample_rate']) loss_func = torch.nn.L1Loss() system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=10, verbose=True ) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=20, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="ddp", train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): train_enh_dir = conf["main_args"].get("train_enh_dir", None) resume_ckpt = conf["main_args"].get("resume_ckpt", None) train_loader, val_loader, train_set_infos = make_dataloaders( corpus=conf["main_args"]["corpus"], train_dir=conf["data"]["train_dir"], val_dir=conf["data"]["valid_dir"], train_enh_dir=train_enh_dir, task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["data"]["n_src"], segment=conf["data"]["segment"], batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], ) if conf["main_args"]["strategy"] != "multi_task": conf["masknet"].update({"n_src": conf["data"]["n_src"]}) else: conf["masknet"].update({"n_src": conf["data"]["n_src"] + 1}) model = getattr(asteroid.models, conf["main_args"]["model"])(**conf["filterbank"], **conf["masknet"]) if conf["main_args"]["strategy"] == "pretrained": if conf["main_args"]["load_path"] is not None: all_states = torch.load(conf["main_args"]["load_path"], map_location="cpu") assert "state_dict" in all_states # If the checkpoint is not the serialized "best_model.pth", its keys # would start with "model.", which should be removed to avoid none # of the parameters are loaded. for key in list(all_states["state_dict"].keys()): if key.startswith("model"): all_states["state_dict"][key.split( '.', 1)[1]] = all_states["state_dict"][key] del all_states["state_dict"][key] # For debugging, set strict=True to check whether only the following # parameters have different sizes (since n_src=1 for pre-training # and n_src=2 for fine-tuning): # for ConvTasNet: "masker.mask_net.1.*" # for DPRNNTasNet/DPTNet: "masker.first_out.1.*" if conf["main_args"]["model"] == "ConvTasNet": del all_states["state_dict"]["masker.mask_net.1.weight"] del all_states["state_dict"]["masker.mask_net.1.bias"] elif conf["main_args"]["model"] in ["DPRNNTasNet", "DPTNet"]: del all_states["state_dict"]["masker.first_out.1.weight"] del all_states["state_dict"]["masker.first_out.1.bias"] model.load_state_dict(all_states["state_dict"], strict=False) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["main_args"]["model"] in [ "DPTNet", "SepFormerTasNet", "SepFormer2TasNet" ]: steps_per_epoch = len( train_loader) // conf["main_args"]["accumulate_grad_batches"] conf["scheduler"]["steps_per_epoch"] = steps_per_epoch scheduler = { "scheduler": DPTNetScheduler( optimizer=optimizer, steps_per_epoch=steps_per_epoch, d_model=model.masker.mha_in_dim, ), "interval": "batch", } elif conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. if conf["main_args"]["strategy"] == "multi_task": loss_func = MultiTaskLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") else: loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( dirpath=checkpoint_dir, filename='{epoch}-{step}', monitor="val_loss", mode="min", save_top_k=conf["training"]["epochs"], save_last=True, verbose=True, ) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) loggers = [] tb_logger = pl.loggers.TensorBoardLogger(os.path.join(exp_dir, "tb_logs/"), ) loggers.append(tb_logger) if conf["main_args"]["comet"]: comet_logger = pl.loggers.CometLogger( save_dir=os.path.join(exp_dir, "comet_logs/"), experiment_key=conf["main_args"].get("comet_exp_key", None), log_code=True, log_graph=True, parse_args=True, log_env_details=True, log_git_metadata=True, log_git_patch=True, log_env_gpu=True, log_env_cpu=True, log_env_host=True, ) comet_logger.log_hyperparams(conf) loggers.append(comet_logger) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available( ) else None # Don't use ddp for multi-task training trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], logger=loggers, callbacks=callbacks, # checkpoint_callback=checkpoint, # early_stop_callback=callbacks[1], default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment # fast_dev_run=True, # Useful for debugging # overfit_batches=0.001, # Useful for debugging gradient_clip_val=5.0, accumulate_grad_batches=conf["main_args"]["accumulate_grad_batches"], resume_from_checkpoint=resume_ckpt, deterministic=True, replace_sampler_ddp=False if conf["main_args"]["strategy"] == "multi_task" else True, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set_infos) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): train_set = WhamDataset( conf["data"]["train_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) val_set = WhamDataset( conf["data"]["valid_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) model = DPRNNTasNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', verbose=True, mode='min', save_top_k=5) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=1) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend='ddp', gradient_clip_val=conf['training']["gradient_clipping"]) trainer.fit(system) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): train_set = DeMaskDataset(conf, conf["data"]["clean_train"], True, conf["data"]["rirs_train"]) val_set = DeMaskDataset(conf, conf["data"]["clean_dev"], False, conf["data"]["rirs_dev"]) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) model = DeMask(**conf["filterbank"], **conf["demask_net"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = singlesrc_neg_sisdr system = DeMaskSystem( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=conf["training"]["save_top_k"], verbose=True, ) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=conf["training"]["patience"], verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="ddp", gradient_clip_val=conf["training"]["gradient_clipping"], train_percent_check=0.1, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth")) save_publishable( os.path.join(exp_dir, "publish_dir"), to_save, metrics=dict(), train_conf=conf, recipe="asteroid/demask", )
def main(conf): assert (conf["training"]["batch_size"] % 2 == 0), "Batch size must be divisible by two to run this recipe" train_set = WhamDataset( conf["data"]["train_dir"], "sep_clean", sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], nondefault_nsrc=None, ) val_set = WhamDataset( conf["data"]["valid_dir"], "sep_clean", sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=None, ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) model = DPRNNTasNet(**conf["filterbank"], **conf["masknet"], sample_rate=conf["data"]["sample_rate"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = { "pit": PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx"), "mixit": MixITLossWrapper(pairwise_neg_sisdr, generalized=True), } system = MixITSystem( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): # train_set = WhamDataset( # conf["data"]["train_dir"], # conf["data"]["task"], # sample_rate=conf["data"]["sample_rate"], # segment=conf["data"]["segment"], # nondefault_nsrc=conf["data"]["nondefault_nsrc"], # ) # val_set = WhamDataset( # conf["data"]["valid_dir"], # conf["data"]["task"], # sample_rate=conf["data"]["sample_rate"], # nondefault_nsrc=conf["data"]["nondefault_nsrc"], # ) train_set = LibriMix( csv_dir=conf["data"]["train_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["masknet"]["n_src"], segment=conf["data"]["segment"], ) val_set = LibriMix( csv_dir=conf["data"]["valid_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["masknet"]["n_src"], segment=conf["data"]["segment"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) # TODO: redundant conf["masknet"].update({"n_src": train_set.n_src}) model = DPRNNTasNet(**conf["filterbank"], **conf["masknet"], sample_rate=conf['data']['sample_rate']) # from torchsummary import summary # model.cuda() # summary(model, (24000,)) # import pdb # pdb.set_trace() optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None if conf["training"]["cont"]: from glob import glob ckpts = glob('%s/*.ckpt' % checkpoint_dir) ckpts.sort() latest_ckpt = ckpts[-1] trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=conf["training"]["gradient_clipping"], resume_from_checkpoint=latest_ckpt) else: trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) # Save best model (next PL version will make this easier) # best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] # state_dict = torch.load(best_path) state_dict = torch.load(checkpoint.best_model_path) # state_dict = torch.load('exp/train_dprnn_130d5f9a/checkpoints/epoch=154.ckpt') system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): train_set = LibriMix( csv_dir=conf["data"]["train_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["data"]["n_src"], segment=conf["data"]["segment"], ) val_set = LibriMix( csv_dir=conf["data"]["valid_dir"], task=conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], n_src=conf["data"]["n_src"], segment=conf["data"]["segment"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) conf["masknet"].update({"n_src": conf["data"]["n_src"]}) model = ConvTasNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True ) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="dp", train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): ''' train_set = TACDataset(conf["data"]["train_json"], conf["data"]["segment"], train=True) val_set = TACDataset(conf["data"]["dev_json"], conf["data"]["segment"], train=False) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) ''' train_loader = make_dataloader(train=True, batch_size=conf['training']["batch_size"], chunk_size=conf['data']['chunk'], num_workers=conf['training']['num_workers']) val_loader = make_dataloader(train=False, batch_size=conf['training']['batch_size'], chunk_size=conf['data']['chunk'], num_workers=conf['training']['num_workers']) #Prep(train_loader) #Prep(val_loader) #for data in train_loader: #print(type(data[0])) model = TasNet() # model_parameters = filter(lambda p: p.requires_grad, model.parameters()) # params = sum([np.prod(p.size()) for p in model_parameters]) # print(params) # exit() optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=conf["training"]["patience"]) else: scheduler = None # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = MSELoss() system = AngleSystem( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=conf["training"]["save_top_k"], verbose=True, ) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=conf["training"]["patience"], verbose=True)) # Don't ask GPU if they are not available. gpus = [-1] trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, #gpus=gpus, distributed_backend="ddp", gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() #to_save = system.model.serialize() #to_save.update(train_set.get_infos()) torch.save(system.model.state_dict(), os.path.join(exp_dir, "best_model.ckpt"))
def main(conf): exp_dir = conf["main_args"]["exp_dir"] # Define Dataloader """ total_set = MedleydbDataset( conf["data"]["json_dir"], n_src=conf["data"]["n_inst"], n_poly=conf["data"]["n_poly"], sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], threshold=conf["data"]["threshold"], ) validation_size = int(conf["data"]["validation_split"] * len(total_set)) train_size = len(total_set) - validation_size torch.manual_seed(conf["training"]["random_seed"]) train_set, val_set = data.random_split(total_set, [train_size, validation_size]) """ train_set = SourceFolderDataset( train_dir, train_dir, conf["data"]["n_poly"], conf["data"]["sample_rate"], conf["training"]["batch_size"], ) val_set = SourceFolderDataset( val_dir, val_dir, conf["data"]["n_poly"], conf["data"]["sample_rate"], conf["training"]["batch_size"], ) train_loader = data.DataLoader( train_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = data.DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": conf["data"]["n_inst"] * conf["data"]["n_poly"]}) model = DPRNNTasNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True ) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append(EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend="ddp", gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def test_make_optimizer(): optimizers.make_optimizer(global_model.parameters(), "adam", lr=1e-3)
def main(conf): train_set = LibriVADDataset(md_file_path=conf["data"]["train_dir"]) val_set = LibriVADDataset(md_file_path=conf["data"]["valid_dir"]) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) model = VADNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = F1_loss() system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, # limit_train_batches=0.0002, # Useful for fast experiment # limit_val_batches=0.0035, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf, args): # Set seed for random torch.manual_seed(args.seed) random.seed(args.seed) # create output dir if not exist exp_dir = Path(args.output) exp_dir.mkdir(parents=True, exist_ok=True) # Load Datasets train_dataset, valid_dataset = dataloader.load_datasets(parser, args) dataloader_kwargs = ({ "num_workers": args.num_workers, "pin_memory": True } if torch.cuda.is_available() else {}) train_sampler = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **dataloader_kwargs) valid_sampler = torch.utils.data.DataLoader(valid_dataset, batch_size=1, **dataloader_kwargs) # Define model and optimizer if args.pretrained is not None: scaler_mean = None scaler_std = None else: scaler_mean, scaler_std = get_statistics(args, train_dataset) max_bin = bandwidth_to_max_bin(train_dataset.sample_rate, args.in_chan, args.bandwidth) x_unmix = XUMX( window_length=args.window_length, input_mean=scaler_mean, input_scale=scaler_std, nb_channels=args.nb_channels, hidden_size=args.hidden_size, in_chan=args.in_chan, n_hop=args.nhop, sources=args.sources, max_bin=max_bin, bidirectional=args.bidirectional, sample_rate=train_dataset.sample_rate, spec_power=args.spec_power, return_time_signals=True if args.loss_use_multidomain else False, ) optimizer = make_optimizer(x_unmix.parameters(), lr=args.lr, optimizer="adam", weight_decay=args.weight_decay) # Define scheduler scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=args.lr_decay_gamma, patience=args.lr_decay_patience, cooldown=10) # Save config conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) es = EarlyStopping(monitor="val_loss", mode="min", patience=args.patience, verbose=True) # Define Loss function. loss_func = MultiDomainLoss( window_length=args.window_length, in_chan=args.in_chan, n_hop=args.nhop, spec_power=args.spec_power, nb_channels=args.nb_channels, loss_combine_sources=args.loss_combine_sources, loss_use_multidomain=args.loss_use_multidomain, mix_coef=args.mix_coef, ) system = XUMXManager( model=x_unmix, loss_func=loss_func, optimizer=optimizer, train_loader=train_sampler, val_loader=valid_sampler, scheduler=scheduler, config=conf, val_dur=args.val_dur, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) callbacks.append(es) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=args.epochs, callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, limit_train_batches=1.0, # Useful for fast experiment ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_dataset.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): train_set = WhamDataset( conf["data"]["train_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) val_set = WhamDataset( conf["data"]["valid_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) model = DPTNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) from asteroid.engine.schedulers import DPTNetScheduler schedulers = { "scheduler": DPTNetScheduler(optimizer, len(train_loader) // conf["training"]["batch_size"], 64), "interval": "step", } # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, scheduler=schedulers, train_loader=train_loader, val_loader=val_loader, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="ddp", gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def _train(args): train_dir = args.train val_dir = args.test with open('conf.yml') as f: def_conf = yaml.safe_load(f) pp = argparse.ArgumentParser() parser = prepare_parser_from_dict(def_conf, parser=pp) arg_dic, plain_args = parse_args_as_dict(parser, return_plain_args=True) print(arg_dic) conf = arg_dic train_set = WhamDataset_no_sf( train_dir, conf['data']['task'], sample_rate=conf['data']['sample_rate'], segment=conf['data']['segment'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset_no_sf( val_dir, conf['data']['task'], segment=conf['data']['segment'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) # train_loader = DataLoader(train_set, shuffle=True, # batch_size=args.batch_size, # num_workers=conf['training']['num_workers'], # drop_last=True) # val_loader = DataLoader(val_set, shuffle=False, # batch_size=args.batch_size, # num_workers=conf['training']['num_workers'], # drop_last=True) # Update number of source values (It depends on the task) print("!!!!!!!!!") print(train_set.__getitem__(0)) print(val_set.__getitem__(0)) print("!!!!!!!!!") conf['masknet'].update({'n_src': train_set.n_src}) model = DPRNNTasNet(**conf['filterbank'], **conf['masknet']) optimizer = make_optimizer(model.parameters(), **conf['optim']) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. # exp_dir = conf['main_args']['exp_dir'] # os.makedirs(exp_dir, exist_ok=True) exp_dir = args.model_dir conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) system.batch_size = 1 # Define callbacks # checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') # checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', # mode='min', save_top_k=5, verbose=1) # early_stopping = False # if conf['training']['early_stop']: # early_stopping = EarlyStopping(monitor='val_loss', patience=10, # verbose=1) # Don't ask GPU if they are not available. # print("!!!!!!!{}".format(torch.cuda.is_available())) # print(torch.__version__) gpus = -1 if torch.cuda.is_available() else None # trainer = pl.Trainer(max_epochs=conf['training']['epochs'], # checkpoint_callback=checkpoint, # early_stop_callback=early_stopping, # default_root_dir=exp_dir, # gpus=gpus, # distributed_backend='ddp', # gradient_clip_val=conf['training']["gradient_clipping"]) trainer = pl.Trainer( max_epochs=args.epochs, default_root_dir=exp_dir, gpus=gpus, distributed_backend='ddp', gradient_clip_val=conf['training']["gradient_clipping"]) trainer.fit(system) # print("!!!!!!!!!!!!!!") # print(checkpoint) # print(checkpoint.best_k_models) # print(checkpoint.best_k_models.items()) # onlyfiles = [f for f in listdir(checkpoint_dir) if isfile(os.path.join(checkpoint_dir, f))] # print(onlyfiles) # best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} # with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: # json.dump(best_k, f, indent=0) # # Save best model (next PL version will make this easier) # best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] best_path = os.path.join(exp_dir, "__temp_weight_ddp_end.ckpt") state_dict = torch.load(best_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() # to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, 'best_model.pth'))