def main(args): # pylint: disable=redefined-outer-name # pylint: disable=global-variable-undefined global meta_data_train, meta_data_eval, symbols, phonemes, model_characters, speaker_mapping # Audio processor ap = AudioProcessor(**c.audio) if 'characters' in c.keys(): symbols, phonemes = make_symbols(**c.characters) # DISTRUBUTED if num_gpus > 1: init_distributed(args.rank, num_gpus, args.group_id, c.distributed["backend"], c.distributed["url"]) # set model characters model_characters = phonemes if c.use_phonemes else symbols num_chars = len(model_characters) # load data instances meta_data_train, meta_data_eval = load_meta_data(c.datasets) # set the portion of the data used for training if 'train_portion' in c.keys(): meta_data_train = meta_data_train[:int( len(meta_data_train) * c.train_portion)] if 'eval_portion' in c.keys(): meta_data_eval = meta_data_eval[:int( len(meta_data_eval) * c.eval_portion)] # parse speakers num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers( c, args, meta_data_train, OUT_PATH) # setup model model = setup_model(num_chars, num_speakers, c, speaker_embedding_dim=speaker_embedding_dim) optimizer = RAdam(model.parameters(), lr=c.lr, weight_decay=0, betas=(0.9, 0.98), eps=1e-9) criterion = GlowTTSLoss() if args.restore_path: print(f" > Restoring from {os.path.basename(args.restore_path)} ...") checkpoint = torch.load(args.restore_path, map_location='cpu') try: # TODO: fix optimizer init, model.cuda() needs to be called before # optimizer restore optimizer.load_state_dict(checkpoint['optimizer']) if c.reinit_layers: raise RuntimeError model.load_state_dict(checkpoint['model']) except: #pylint: disable=bare-except print(" > Partial model initialization.") model_dict = model.state_dict() model_dict = set_init_dict(model_dict, checkpoint['model'], c) model.load_state_dict(model_dict) del model_dict for group in optimizer.param_groups: group['initial_lr'] = c.lr print(f" > Model restored from step {checkpoint['step']:d}", flush=True) args.restore_step = checkpoint['step'] else: args.restore_step = 0 if use_cuda: model.cuda() criterion.cuda() # DISTRUBUTED if num_gpus > 1: model = DDP_th(model, device_ids=[args.rank]) if c.noam_schedule: scheduler = NoamLR(optimizer, warmup_steps=c.warmup_steps, last_epoch=args.restore_step - 1) else: scheduler = None num_params = count_parameters(model) print("\n > Model has {} parameters".format(num_params), flush=True) if args.restore_step == 0 or not args.best_path: best_loss = float('inf') print(" > Starting with inf best loss.") else: print(" > Restoring best loss from " f"{os.path.basename(args.best_path)} ...") best_loss = torch.load(args.best_path, map_location='cpu')['model_loss'] print(f" > Starting with loaded last best loss {best_loss}.") keep_all_best = c.get('keep_all_best', False) keep_after = c.get('keep_after', 10000) # void if keep_all_best False # define dataloaders train_loader = setup_loader(ap, 1, is_val=False, verbose=True) eval_loader = setup_loader(ap, 1, is_val=True, verbose=True) global_step = args.restore_step model = data_depended_init(train_loader, model) for epoch in range(0, c.epochs): c_logger.print_epoch_start(epoch, c.epochs) train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer, scheduler, ap, global_step, epoch) eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap, global_step, epoch) c_logger.print_epoch_end(epoch, eval_avg_loss_dict) target_loss = train_avg_loss_dict['avg_loss'] if c.run_eval: target_loss = eval_avg_loss_dict['avg_loss'] best_loss = save_best_model(target_loss, best_loss, model, optimizer, global_step, epoch, c.r, OUT_PATH, model_characters, keep_all_best=keep_all_best, keep_after=keep_after)
def main(args): # pylint: disable=redefined-outer-name # pylint: disable=global-variable-undefined global train_data, eval_data print(f" > Loading wavs from: {c.data_path}") if c.feature_path is not None: print(f" > Loading features from: {c.feature_path}") eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path, c.eval_split_size) else: eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size) # setup audio processor ap = AudioProcessor(**c.audio) # DISTRUBUTED if num_gpus > 1: init_distributed(args.rank, num_gpus, args.group_id, c.distributed["backend"], c.distributed["url"]) # setup models model = setup_generator(c) # scaler for mixed_precision scaler = torch.cuda.amp.GradScaler() if c.mixed_precision else None # setup optimizers optimizer = Adam(model.parameters(), lr=c.lr, weight_decay=0) # schedulers scheduler = None if 'lr_scheduler' in c: scheduler = getattr(torch.optim.lr_scheduler, c.lr_scheduler) scheduler = scheduler(optimizer, **c.lr_scheduler_params) # setup criterion criterion = torch.nn.L1Loss().cuda() if args.restore_path: checkpoint = torch.load(args.restore_path, map_location='cpu') try: print(" > Restoring Model...") model.load_state_dict(checkpoint['model']) print(" > Restoring Optimizer...") optimizer.load_state_dict(checkpoint['optimizer']) if 'scheduler' in checkpoint: print(" > Restoring LR Scheduler...") scheduler.load_state_dict(checkpoint['scheduler']) # NOTE: Not sure if necessary scheduler.optimizer = optimizer if "scaler" in checkpoint and c.mixed_precision: print(" > Restoring AMP Scaler...") scaler.load_state_dict(checkpoint["scaler"]) except RuntimeError: # retore only matching layers. print(" > Partial model initialization...") model_dict = model.state_dict() model_dict = set_init_dict(model_dict, checkpoint['model'], c) model.load_state_dict(model_dict) del model_dict # reset lr if not countinuining training. for group in optimizer.param_groups: group['lr'] = c.lr print(" > Model restored from step %d" % checkpoint['step'], flush=True) args.restore_step = checkpoint['step'] else: args.restore_step = 0 if use_cuda: model.cuda() criterion.cuda() # DISTRUBUTED if num_gpus > 1: model = DDP_th(model, device_ids=[args.rank]) num_params = count_parameters(model) print(" > WaveGrad has {} parameters".format(num_params), flush=True) if 'best_loss' not in locals(): best_loss = float('inf') global_step = args.restore_step for epoch in range(0, c.epochs): c_logger.print_epoch_start(epoch, c.epochs) _, global_step = train(model, criterion, optimizer, scheduler, scaler, ap, global_step, epoch) eval_avg_loss_dict = evaluate(model, criterion, ap, global_step, epoch) c_logger.print_epoch_end(epoch, eval_avg_loss_dict) target_loss = eval_avg_loss_dict[c.target_loss] best_loss = save_best_model(target_loss, best_loss, model, optimizer, scheduler, None, None, None, global_step, epoch, OUT_PATH, model_losses=eval_avg_loss_dict, scaler=scaler.state_dict() if c.mixed_precision else None)
def main(args): # pylint: disable=redefined-outer-name # pylint: disable=global-variable-undefined global meta_data_train, meta_data_eval, symbols, phonemes, model_characters, speaker_mapping # Audio processor ap = AudioProcessor(**config.audio.to_dict()) if config.has("characters") and config.characters: symbols, phonemes = make_symbols(**config.characters.to_dict()) # DISTRUBUTED if num_gpus > 1: init_distributed(args.rank, num_gpus, args.group_id, config.distributed["backend"], config.distributed["url"]) # set model characters model_characters = phonemes if config.use_phonemes else symbols num_chars = len(model_characters) # load data instances meta_data_train, meta_data_eval = load_meta_data(config.datasets, eval_split=True) # parse speakers num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers( config, args, meta_data_train, OUT_PATH) # setup model model = setup_model(num_chars, num_speakers, config, speaker_embedding_dim=speaker_embedding_dim) optimizer = RAdam(model.parameters(), lr=config.lr, weight_decay=0, betas=(0.9, 0.98), eps=1e-9) criterion = AlignTTSLoss(config) if args.restore_path: print(f" > Restoring from {os.path.basename(args.restore_path)} ...") checkpoint = torch.load(args.restore_path, map_location="cpu") try: # TODO: fix optimizer init, model.cuda() needs to be called before # optimizer restore optimizer.load_state_dict(checkpoint["optimizer"]) if config.reinit_layers: raise RuntimeError model.load_state_dict(checkpoint["model"]) except: # pylint: disable=bare-except print(" > Partial model initialization.") model_dict = model.state_dict() model_dict = set_init_dict(model_dict, checkpoint["model"], config) model.load_state_dict(model_dict) del model_dict for group in optimizer.param_groups: group["initial_lr"] = config.lr print(" > Model restored from step %d" % checkpoint["step"], flush=True) args.restore_step = checkpoint["step"] else: args.restore_step = 0 if use_cuda: model.cuda() criterion.cuda() # DISTRUBUTED if num_gpus > 1: model = DDP_th(model, device_ids=[args.rank]) if config.noam_schedule: scheduler = NoamLR(optimizer, warmup_steps=config.warmup_steps, last_epoch=args.restore_step - 1) else: scheduler = None num_params = count_parameters(model) print("\n > Model has {} parameters".format(num_params), flush=True) if args.restore_step == 0 or not args.best_path: best_loss = float("inf") print(" > Starting with inf best loss.") else: print(" > Restoring best loss from " f"{os.path.basename(args.best_path)} ...") best_loss = torch.load(args.best_path, map_location="cpu")["model_loss"] print(f" > Starting with loaded last best loss {best_loss}.") keep_all_best = config.keep_all_best keep_after = config.keep_after # void if keep_all_best False # define dataloaders train_loader = setup_loader(ap, 1, is_val=False, verbose=True) eval_loader = setup_loader(ap, 1, is_val=True, verbose=True) global_step = args.restore_step def set_phase(): """Set AlignTTS training phase""" if isinstance(config.phase_start_steps, list): vals = [i < global_step for i in config.phase_start_steps] if not True in vals: phase = 0 else: phase = ( len(config.phase_start_steps) - [i < global_step for i in config.phase_start_steps][::-1].index(True) - 1) else: phase = None return phase for epoch in range(0, config.epochs): cur_phase = set_phase() print(f"\n > Current AlignTTS phase: {cur_phase}") c_logger.print_epoch_start(epoch, config.epochs) train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer, scheduler, ap, global_step, epoch, cur_phase) eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap, global_step, epoch, cur_phase) c_logger.print_epoch_end(epoch, eval_avg_loss_dict) target_loss = train_avg_loss_dict["avg_loss"] if config.run_eval: target_loss = eval_avg_loss_dict["avg_loss"] best_loss = save_best_model( target_loss, best_loss, model, optimizer, global_step, epoch, 1, OUT_PATH, model_characters, keep_all_best=keep_all_best, keep_after=keep_after, )
def main(args): # pylint: disable=redefined-outer-name # pylint: disable=global-variable-undefined global train_data, eval_data print(f" > Loading wavs from: {c.data_path}") if c.feature_path is not None: print(f" > Loading features from: {c.feature_path}") eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path, c.eval_split_size) else: eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size) # setup audio processor ap = AudioProcessor(**c.audio) # DISTRUBUTED if num_gpus > 1: init_distributed(args.rank, num_gpus, args.group_id, c.distributed["backend"], c.distributed["url"]) # setup models model_gen = setup_generator(c) model_disc = setup_discriminator(c) # setup optimizers optimizer_gen = RAdam(model_gen.parameters(), lr=c.lr_gen, weight_decay=0) optimizer_disc = RAdam(model_disc.parameters(), lr=c.lr_disc, weight_decay=0) # schedulers scheduler_gen = None scheduler_disc = None if 'lr_scheduler_gen' in c: scheduler_gen = getattr(torch.optim.lr_scheduler, c.lr_scheduler_gen) scheduler_gen = scheduler_gen(optimizer_gen, **c.lr_scheduler_gen_params) if 'lr_scheduler_disc' in c: scheduler_disc = getattr(torch.optim.lr_scheduler, c.lr_scheduler_disc) scheduler_disc = scheduler_disc(optimizer_disc, **c.lr_scheduler_disc_params) # setup criterion criterion_gen = GeneratorLoss(c) criterion_disc = DiscriminatorLoss(c) if args.restore_path: print(f" > Restoring from {os.path.basename(args.restore_path)}...") checkpoint = torch.load(args.restore_path, map_location='cpu') try: print(" > Restoring Generator Model...") model_gen.load_state_dict(checkpoint['model']) print(" > Restoring Generator Optimizer...") optimizer_gen.load_state_dict(checkpoint['optimizer']) print(" > Restoring Discriminator Model...") model_disc.load_state_dict(checkpoint['model_disc']) print(" > Restoring Discriminator Optimizer...") optimizer_disc.load_state_dict(checkpoint['optimizer_disc']) if 'scheduler' in checkpoint: print(" > Restoring Generator LR Scheduler...") scheduler_gen.load_state_dict(checkpoint['scheduler']) # NOTE: Not sure if necessary scheduler_gen.optimizer = optimizer_gen if 'scheduler_disc' in checkpoint: print(" > Restoring Discriminator LR Scheduler...") scheduler_disc.load_state_dict(checkpoint['scheduler_disc']) scheduler_disc.optimizer = optimizer_disc except RuntimeError: # restore only matching layers. print(" > Partial model initialization...") model_dict = model_gen.state_dict() model_dict = set_init_dict(model_dict, checkpoint['model'], c) model_gen.load_state_dict(model_dict) model_dict = model_disc.state_dict() model_dict = set_init_dict(model_dict, checkpoint['model_disc'], c) model_disc.load_state_dict(model_dict) del model_dict # reset lr if not countinuining training. for group in optimizer_gen.param_groups: group['lr'] = c.lr_gen for group in optimizer_disc.param_groups: group['lr'] = c.lr_disc print(f" > Model restored from step {checkpoint['step']:d}", flush=True) args.restore_step = checkpoint['step'] else: args.restore_step = 0 if use_cuda: model_gen.cuda() criterion_gen.cuda() model_disc.cuda() criterion_disc.cuda() # DISTRUBUTED if num_gpus > 1: model_gen = DDP_th(model_gen, device_ids=[args.rank]) model_disc = DDP_th(model_disc, device_ids=[args.rank]) num_params = count_parameters(model_gen) print(" > Generator has {} parameters".format(num_params), flush=True) num_params = count_parameters(model_disc) print(" > Discriminator has {} parameters".format(num_params), flush=True) if args.restore_step == 0 or not args.best_path: best_loss = float('inf') print(" > Starting with inf best loss.") else: print(" > Restoring best loss from " f"{os.path.basename(args.best_path)} ...") best_loss = torch.load(args.best_path, map_location='cpu')['model_loss'] print(f" > Starting with best loss of {best_loss}.") keep_all_best = c.get('keep_all_best', False) keep_after = c.get('keep_after', 10000) # void if keep_all_best False global_step = args.restore_step for epoch in range(0, c.epochs): c_logger.print_epoch_start(epoch, c.epochs) _, global_step = train(model_gen, criterion_gen, optimizer_gen, model_disc, criterion_disc, optimizer_disc, scheduler_gen, scheduler_disc, ap, global_step, epoch) eval_avg_loss_dict = evaluate(model_gen, criterion_gen, model_disc, criterion_disc, ap, global_step, epoch) c_logger.print_epoch_end(epoch, eval_avg_loss_dict) target_loss = eval_avg_loss_dict[c.target_loss] best_loss = save_best_model( target_loss, best_loss, model_gen, optimizer_gen, scheduler_gen, model_disc, optimizer_disc, scheduler_disc, global_step, epoch, OUT_PATH, keep_all_best=keep_all_best, keep_after=keep_after, model_losses=eval_avg_loss_dict, )
def main(args): # pylint: disable=redefined-outer-name # pylint: disable=global-variable-undefined global train_data, eval_data print(f" > Loading wavs from: {c.data_path}") if c.feature_path is not None: print(f" > Loading features from: {c.feature_path}") eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path, c.eval_split_size) else: eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size) # setup audio processor ap = AudioProcessor(**c.audio.to_dict()) # DISTRUBUTED if num_gpus > 1: init_distributed(args.rank, num_gpus, args.group_id, c.distributed["backend"], c.distributed["url"]) # setup models model_gen = setup_generator(c) model_disc = setup_discriminator(c) # setup criterion criterion_gen = GeneratorLoss(c) criterion_disc = DiscriminatorLoss(c) if use_cuda: model_gen.cuda() criterion_gen.cuda() model_disc.cuda() criterion_disc.cuda() # setup optimizers # TODO: allow loading custom optimizers optimizer_gen = None optimizer_disc = None optimizer_gen = getattr(torch.optim, c.optimizer) optimizer_gen = optimizer_gen(model_gen.parameters(), lr=c.lr_gen, **c.optimizer_params) optimizer_disc = getattr(torch.optim, c.optimizer) if c.discriminator_model == "hifigan_discriminator": optimizer_disc = optimizer_disc( itertools.chain(model_disc.msd.parameters(), model_disc.mpd.parameters()), lr=c.lr_disc, **c.optimizer_params, ) else: optimizer_disc = optimizer_disc(model_disc.parameters(), lr=c.lr_disc, **c.optimizer_params) # schedulers scheduler_gen = None scheduler_disc = None if "lr_scheduler_gen" in c: scheduler_gen = getattr(torch.optim.lr_scheduler, c.lr_scheduler_gen) scheduler_gen = scheduler_gen(optimizer_gen, **c.lr_scheduler_gen_params) if "lr_scheduler_disc" in c: scheduler_disc = getattr(torch.optim.lr_scheduler, c.lr_scheduler_disc) scheduler_disc = scheduler_disc(optimizer_disc, **c.lr_scheduler_disc_params) if args.restore_path: print(f" > Restoring from {os.path.basename(args.restore_path)}...") checkpoint = torch.load(args.restore_path, map_location="cpu") try: print(" > Restoring Generator Model...") model_gen.load_state_dict(checkpoint["model"]) print(" > Restoring Generator Optimizer...") optimizer_gen.load_state_dict(checkpoint["optimizer"]) print(" > Restoring Discriminator Model...") model_disc.load_state_dict(checkpoint["model_disc"]) print(" > Restoring Discriminator Optimizer...") optimizer_disc.load_state_dict(checkpoint["optimizer_disc"]) # restore schedulers if it is a continuing training. if args.continue_path != "": if "scheduler" in checkpoint and scheduler_gen is not None: print(" > Restoring Generator LR Scheduler...") scheduler_gen.load_state_dict(checkpoint["scheduler"]) # NOTE: Not sure if necessary scheduler_gen.optimizer = optimizer_gen if "scheduler_disc" in checkpoint and scheduler_disc is not None: print(" > Restoring Discriminator LR Scheduler...") scheduler_disc.load_state_dict( checkpoint["scheduler_disc"]) scheduler_disc.optimizer = optimizer_disc if c.lr_scheduler_disc == "ExponentialLR": scheduler_disc.last_epoch = checkpoint["epoch"] except RuntimeError: # restore only matching layers. print(" > Partial model initialization...") model_dict = model_gen.state_dict() model_dict = set_init_dict(model_dict, checkpoint["model"], c) model_gen.load_state_dict(model_dict) model_dict = model_disc.state_dict() model_dict = set_init_dict(model_dict, checkpoint["model_disc"], c) model_disc.load_state_dict(model_dict) del model_dict # reset lr if not countinuining training. if args.continue_path == "": for group in optimizer_gen.param_groups: group["lr"] = c.lr_gen for group in optimizer_disc.param_groups: group["lr"] = c.lr_disc print(f" > Model restored from step {checkpoint['step']:d}", flush=True) args.restore_step = checkpoint["step"] else: args.restore_step = 0 # DISTRUBUTED if num_gpus > 1: model_gen = DDP_th(model_gen, device_ids=[args.rank]) model_disc = DDP_th(model_disc, device_ids=[args.rank]) num_params = count_parameters(model_gen) print(" > Generator has {} parameters".format(num_params), flush=True) num_params = count_parameters(model_disc) print(" > Discriminator has {} parameters".format(num_params), flush=True) if args.restore_step == 0 or not args.best_path: best_loss = float("inf") print(" > Starting with inf best loss.") else: print(" > Restoring best loss from " f"{os.path.basename(args.best_path)} ...") best_loss = torch.load(args.best_path, map_location="cpu")["model_loss"] print(f" > Starting with best loss of {best_loss}.") keep_all_best = c.get("keep_all_best", False) keep_after = c.get("keep_after", 10000) # void if keep_all_best False global_step = args.restore_step for epoch in range(0, c.epochs): c_logger.print_epoch_start(epoch, c.epochs) _, global_step = train( model_gen, criterion_gen, optimizer_gen, model_disc, criterion_disc, optimizer_disc, scheduler_gen, scheduler_disc, ap, global_step, epoch, ) eval_avg_loss_dict = evaluate(model_gen, criterion_gen, model_disc, criterion_disc, ap, global_step, epoch) c_logger.print_epoch_end(epoch, eval_avg_loss_dict) target_loss = eval_avg_loss_dict[c.target_loss] best_loss = save_best_model( target_loss, best_loss, model_gen, optimizer_gen, scheduler_gen, model_disc, optimizer_disc, scheduler_disc, global_step, epoch, OUT_PATH, keep_all_best=keep_all_best, keep_after=keep_after, model_losses=eval_avg_loss_dict, )