def train_model_part(conf, train_part='filterbank', pretrained_filterbank=None): train_loader, val_loader = get_data_loaders(conf, train_part=train_part) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer( conf, model_part=train_part, pretrained_filterbank=pretrained_filterbank ) # Define scheduler scheduler = None if conf[train_part + '_training'][train_part[0] + '_half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir, checkpoint_dir = get_encoded_paths(conf, train_part) os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(PairwiseNegSDR('sisdr', zero_mean=False), pit_from='pw_mtx') system = SystemTwoStep(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, module=train_part) # Define callbacks checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_top_k=1, verbose=1) early_stopping = False if conf[train_part + '_training'][train_part[0] + '_early_stop']: early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1) # Don't ask GPU if they are not available. if not torch.cuda.is_available(): print('No available GPU were found, set gpus to None') conf['main_args']['gpus'] = None trainer = pl.Trainer( max_nb_epochs=conf[train_part + '_training'][train_part[0] + '_epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_save_path=exp_dir, gpus=conf['main_args']['gpus'], distributed_backend='dp', train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.) trainer.fit(system) with open(os.path.join(checkpoint_dir, "best_k_models.json"), "w") as file: json.dump(checkpoint.best_k_models, file, indent=0)
def main(conf): filterbank = load_best_filterbank_if_available(conf) _, checkpoint_dir = get_encoded_paths(conf, 'filterbank') if filterbank is None: print('There are no available filterbanks under: {}. Going to ' 'training.'.format(checkpoint_dir)) train_model_part(conf, train_part='filterbank') else: print('Found available filterbank at: {}'.format(checkpoint_dir)) if not conf['filterbank_training']['reuse_pretrained_filterbank']: print('Refining filterbank...') train_model_part(conf, train_part='filterbank') train_model_part(conf, train_part='separator', pretrained_filterbank=filterbank)
def main(conf): filterbank = load_best_filterbank_if_available(conf) _, checkpoint_dir = get_encoded_paths(conf, "filterbank") if filterbank is None: print("There are no available filterbanks under: {}. Going to " "training.".format(checkpoint_dir)) train_model_part(conf, train_part="filterbank") filterbank = load_best_filterbank_if_available(conf) else: print("Found available filterbank at: {}".format(checkpoint_dir)) if not conf["filterbank_training"]["reuse_pretrained_filterbank"]: print("Refining filterbank...") train_model_part(conf, train_part="filterbank") filterbank = load_best_filterbank_if_available(conf) train_model_part(conf, train_part="separator", pretrained_filterbank=filterbank)
def train_model_part(conf, train_part="filterbank", pretrained_filterbank=None): train_loader, val_loader = get_data_loaders(conf, train_part=train_part) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer( conf, model_part=train_part, pretrained_filterbank=pretrained_filterbank) # Define scheduler scheduler = None if conf[train_part + "_training"][train_part[0] + "_half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir, checkpoint_dir = get_encoded_paths(conf, train_part) os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(PairwiseNegSDR("sisdr", zero_mean=False), pit_from="pw_mtx") system = SystemTwoStep( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, module=train_part, ) # Define callbacks checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=1, verbose=True) early_stopping = False if conf[train_part + "_training"][train_part[0] + "_early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf[train_part + "_training"][train_part[0] + "_epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="dp", train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system) with open(os.path.join(checkpoint_dir, "best_k_models.json"), "w") as file: json.dump(checkpoint.best_k_models, file, indent=0)