def train_and_eval(rank, n_gpus, hps): global global_step if rank == 0: logger = utils.get_logger(hps.model_dir) logger.info(hps) utils.check_git_hash(hps.model_dir) writer = SummaryWriter(log_dir=hps.model_dir) writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank) torch.manual_seed(hps.train.seed) torch.cuda.set_device(rank) train_dataset = TextMelLoader(hps.data.training_files, hps.data) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=n_gpus, rank=rank, shuffle=True) collate_fn = TextMelCollate(1) train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False, batch_size=hps.train.batch_size, pin_memory=True, drop_last=True, collate_fn=collate_fn, sampler=train_sampler) if rank == 0: val_dataset = TextMelLoader(hps.data.validation_files, hps.data) val_loader = DataLoader(val_dataset, num_workers=8, shuffle=False, batch_size=hps.train.batch_size, pin_memory=True, drop_last=True, collate_fn=collate_fn) generator = models.FlowGenerator( n_vocab=len(symbols), out_channels=hps.data.n_mel_channels, **hps.model).cuda(rank) optimizer_g = commons.Adam(generator.parameters(), scheduler=hps.train.scheduler, dim_model=hps.model.hidden_channels, warmup_steps=hps.train.warmup_steps, lr=hps.train.learning_rate, betas=hps.train.betas, eps=hps.train.eps) if hps.train.fp16_run: generator, optimizer_g._optim = amp.initialize(generator, optimizer_g._optim, opt_level="O1") generator = DDP(generator) epoch_str = 1 global_step = 0 try: _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), generator, optimizer_g) epoch_str += 1 optimizer_g.step_num = (epoch_str - 1) * len(train_loader) optimizer_g._update_learning_rate() global_step = (epoch_str - 1) * len(train_loader) except: if hps.train.ddi and os.path.isfile(os.path.join(hps.model_dir, "ddi_G.pth")): _ = utils.load_checkpoint(os.path.join(hps.model_dir, "ddi_G.pth"), generator, optimizer_g) for epoch in range(epoch_str, hps.train.epochs + 1): if rank==0: train(rank, epoch, hps, generator, optimizer_g, train_loader, logger, writer) evaluate(rank, epoch, hps, generator, optimizer_g, val_loader, logger, writer_eval) if epoch%50 == 0: utils.save_checkpoint(generator, optimizer_g, hps.train.learning_rate, epoch, os.path.join(hps.model_dir, "G_{}.pth".format(epoch))) else: train(rank, epoch, hps, generator, optimizer_g, train_loader, None, None)
def init(checkpoint_path, config_path, device="cuda"): hps = glow_utils.get_hparams_from_json(checkpoint_path, config_path) model = models.FlowGenerator(len(symbols), out_channels=hps.data.n_mel_channels, **hps.model).to(device) if os.path.isdir(checkpoint_path): checkpoint_path = glow_utils.latest_checkpoint_path(checkpoint_path) glow_utils.load_checkpoint(checkpoint_path, model) model.decoder.store_inverse( ) # do not calcuate jacobians for fast decoding _ = model.eval() cmu_dict = cmudict.CMUDict(hps.data.cmudict_path) return cmu_dict, model
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--input_file', type=str, help='Input file with text inside', required=True) parser.add_argument("-c", "--checkpoint_glow", type=str, default=None, required=True, help="Path to glow checkpoint.") parser.add_argument("-hp", "--hyperparams", type=str, default=None, required=True, help="Path to config file in JSON format") parser.add_argument("-o", "--output_dir", type=str, default=None, required=True, help="Output directory path, where plots and wavs will be put.") parser.add_argument("--cuda", action='store_true', help="Add to run on gpu") parser.add_argument("--spaces", action='store_true', help="Add to add start/end spaces for glow synthesis") parser.add_argument("-w", "--waveglow_path", type=str, default=None, required=True, help="Path to waveglow checkpoint") args = parser.parse_args() # set device if args.cuda: device = torch.device('cuda') else: device = torch.device('cpu') # loading models print("Loading models...") hps = utils.get_hparams_from_dir(args.hyperparams) model = models.FlowGenerator( speaker_dim=hps.model.speaker_embedding, n_vocab=len(symbols), out_channels=hps.data.n_mel_channels, **hps.model).to(device) utils.load_checkpoint(args.checkpoint_glow, model) model.decoder.store_inverse() # do not calcuate jacobians for fast decoding _ = model.eval() print("---GLOW--- loaded") # handle case of no path waveglow, denoiser = load_waveglow_model(args.waveglow_path, device) print("Using waveglow neural vocoder") # synthesis print("Synthesizing...") speakers, audio_names = synthesize_glow(model, device, hps, args.input_file, args.output_dir, waveglow, denoiser, args.spaces) print("Speech synthesis complete.")
import gradio as gr # load WaveGlow waveglow_path = WAVEGLOW_PATH waveglow = torch.load(waveglow_path, map_location=torch.device('cpu'))['model'] waveglow = waveglow.remove_weightnorm(waveglow) _ = waveglow.eval() # from apex import amp # waveglow, _ = amp.initialize(waveglow, [], opt_level="O3") # Try if you want to boost up synthesis speed. # If you are using a provided pretrained model hps = utils.get_hparams_from_file("./configs/base.json") checkpoint_path = PRETRAINED_GLOW_TTS_PATH model = models.FlowGenerator(len(symbols) + getattr(hps.data, "add_blank", False), out_channels=hps.data.n_mel_channels, **hps.model) utils.load_checkpoint(checkpoint_path, model) model.decoder.store_inverse() # do not calcuate jacobians for fast decoding _ = model.eval() cmu_dict = cmudict.CMUDict(hps.data.cmudict_path) # normalizing & type casting def normalize_audio(x, max_wav_value=hps.data.max_wav_value): return np.clip((x / np.abs(x).max()) * max_wav_value, -32768, 32767).astype("int16")