def voc_train_loop(paths: Paths, model: WaveRNN, loss_func, optimizer, train_set, test_set, lr, total_steps): # Use same device as model parameters device = next(model.parameters()).device for g in optimizer.param_groups: g['lr'] = lr total_iters = len(train_set) epochs = (total_steps - model.get_step()) // total_iters + 1 for e in range(1, epochs + 1): start = time.time() running_loss = 0. for i, (x, y, m) in enumerate(train_set, 1): x, m, y = x.to(device), m.to(device), y.to(device) # Parallelize model onto GPUS using workaround due to python bug if device.type == 'cuda' and torch.cuda.device_count() > 1: y_hat = data_parallel_workaround(model, x, m) else: y_hat = model(x, m) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) loss = loss_func(y_hat, y) optimizer.zero_grad() loss.backward() if hp.voc_clip_grad_norm is not None: grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hp.voc_clip_grad_norm) if np.isnan(grad_norm): print('grad_norm was NaN!') optimizer.step() running_loss += loss.item() avg_loss = running_loss / i speed = i / (time.time() - start) step = model.get_step() k = step // 1000 if step % hp.voc_checkpoint_every == 0: gen_testset(model, test_set, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, paths.voc_output) ckpt_name = f'wave_step{k}K' save_checkpoint('voc', paths, model, optimizer, name=ckpt_name, is_silent=True) msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) # Must save latest optimizer state to ensure that resuming training # doesn't produce artifacts save_checkpoint('voc', paths, model, optimizer, is_silent=True) model.log(paths.voc_log, msg) print(' ')
def train(run_id: str, syn_dir: Path, voc_dir: Path, models_dir: Path, ground_truth: bool, save_every: int, backup_every: int, force_restart: bool): # Check to make sure the hop length is correctly factorised assert np.cumprod(hp.voc_upsample_factors)[-1] == hp.hop_length # Instantiate the model print("Initializing the model...") model = WaveRNN(rnn_dims=hp.voc_rnn_dims, fc_dims=hp.voc_fc_dims, bits=hp.bits, pad=hp.voc_pad, upsample_factors=hp.voc_upsample_factors, feat_dims=hp.num_mels, compute_dims=hp.voc_compute_dims, res_out_dims=hp.voc_res_out_dims, res_blocks=hp.voc_res_blocks, hop_length=hp.hop_length, sample_rate=hp.sample_rate, mode=hp.voc_mode).cuda() # Initialize the optimizer optimizer = optim.Adam(model.parameters()) for p in optimizer.param_groups: p["lr"] = hp.voc_lr loss_func = F.cross_entropy if model.mode == "RAW" else discretized_mix_logistic_loss # Load the weights model_dir = models_dir.joinpath(run_id) model_dir.mkdir(exist_ok=True) weights_fpath = model_dir.joinpath(run_id + ".pt") if force_restart or not weights_fpath.exists(): print("\nStarting the training of WaveRNN from scratch\n") model.save(weights_fpath, optimizer) else: print("\nLoading weights at %s" % weights_fpath) model.load(weights_fpath, optimizer) print("WaveRNN weights loaded from step %d" % model.step) # Initialize the dataset metadata_fpath = syn_dir.joinpath("train.txt") if ground_truth else \ voc_dir.joinpath("synthesized.txt") mel_dir = syn_dir.joinpath("mels") if ground_truth else voc_dir.joinpath( "mels_gta") wav_dir = syn_dir.joinpath("audio") dataset = VocoderDataset(metadata_fpath, mel_dir, wav_dir) test_loader = DataLoader(dataset, batch_size=1, shuffle=True, pin_memory=True) # Begin the training simple_table([('Batch size', hp.voc_batch_size), ('LR', hp.voc_lr), ('Sequence Len', hp.voc_seq_len)]) for epoch in range(1, 350): data_loader = DataLoader(dataset, collate_fn=collate_vocoder, batch_size=hp.voc_batch_size, num_workers=2, shuffle=True, pin_memory=True) start = time.time() running_loss = 0. for i, (x, y, m) in enumerate(data_loader, 1): x, m, y = x.cuda(), m.cuda(), y.cuda() # Forward pass y_hat = model(x, m) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) print("y shape:", y.shape) print("y_hat shape:", y_hat.shape) # Backward pass loss = loss_func(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() speed = i / (time.time() - start) avg_loss = running_loss / i step = model.get_step() k = step // 1000 if backup_every != 0 and step % backup_every == 0: model.checkpoint(model_dir, optimizer) if save_every != 0 and step % save_every == 0: model.save(weights_fpath, optimizer) msg = f"| Epoch: {epoch} ({i}/{len(data_loader)}) | " \ f"Loss: {avg_loss:.4f} | {speed:.1f} " \ f"steps/s | Step: {k}k | " stream(msg) gen_testset(model, test_loader, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, model_dir) print("")
def train(run_id: str, models_dir: Path, metadata_path: Path, weights_path: Path, ground_truth: bool, save_every: int, backup_every: int, force_restart: bool): # Check to make sure the hop length is correctly factorised assert np.cumprod(hp.voc_upsample_factors)[-1] == hp.hop_length # Instantiate the model print("Initializing the model...") model = WaveRNN(rnn_dims=hp.voc_rnn_dims, fc_dims=hp.voc_fc_dims, bits=hp.bits, pad=hp.voc_pad, upsample_factors=hp.voc_upsample_factors, feat_dims=hp.num_mels, compute_dims=hp.voc_compute_dims, res_out_dims=hp.voc_res_out_dims, res_blocks=hp.voc_res_blocks, hop_length=hp.hop_length, sample_rate=hp.sample_rate, mode=hp.voc_mode).cuda() # Initialize the optimizer optimizer = optim.Adam(model.parameters()) for p in optimizer.param_groups: p["lr"] = hp.voc_lr loss_func = F.cross_entropy if model.mode == "RAW" else discretized_mix_logistic_loss # Load the weights model_dir = models_dir.joinpath(run_id) model_dir.mkdir(exist_ok=True) weights_fpath = weights_path metadata_fpath = metadata_path if force_restart: print("\nStarting the training of WaveRNN from scratch\n") model.save(weights_fpath, optimizer) else: print("\nLoading weights at %s" % weights_fpath) model.load(weights_fpath, optimizer) print("WaveRNN weights loaded from step %d" % model.step) # Initialize the dataset dataset = VocoderDataset(metadata_fpath) test_loader = DataLoader(dataset, batch_size=1, shuffle=True, pin_memory=True) # Begin the training simple_table([('Batch size', hp.voc_batch_size), ('LR', hp.voc_lr), ('Sequence Len', hp.voc_seq_len)]) epoch_start = int( (model.step - 428000) * 110 / dataset.get_number_of_samples()) epoch_end = 200 log_path = os.path.join(models_dir, "logs") if not os.path.isdir(log_path): os.mkdir(log_path) writer = SummaryWriter(log_path) print("Log path : " + log_path) print("Starting from epoch: " + str(epoch_start)) for epoch in range(epoch_start, epoch_start + epoch_end): data_loader = DataLoader(dataset, collate_fn=collate_vocoder, batch_size=hp.voc_batch_size, num_workers=2, shuffle=True, pin_memory=True) start = time.time() running_loss = 0. for i, (x, y, m) in enumerate(data_loader, 1): x, m, y = x.cuda(), m.cuda(), y.cuda() # Forward pass y_hat = model(x, m) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) # Backward pass loss = loss_func(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() speed = i / (time.time() - start) avg_loss = running_loss / i step = model.get_step() k = step // 1000 if backup_every != 0 and step % backup_every == 0: model.checkpoint(model_dir, optimizer) # if save_every != 0 and step % save_every == 0 : # model.save(weights_fpath, optimizer) if step % 500 == 0: writer.add_scalar('Loss/train', avg_loss, round(step / 1000, 1)) msg = f"| Epoch: {epoch} ({i}/{len(data_loader)}) | " \ f"Loss: {avg_loss:.4f} | {speed:.1f} " \ f"steps/s | Step: {k}k | " print(msg, flush=True) if step % 15000 == 0: gen_testset(model, test_loader, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, model_dir) gen_meltest(model, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, model_dir)