def save_checkpoint(upsample_net, model, optimizer, step, checkpoint_dir): checkpoint_path = os.path.join( checkpoint_dir, "model.ckpt-{}.pt".format(step)) torch.save({"upsample_net": upsample_net.state_dict(), "waveglow": model.state_dict(), "optimizer": optimizer.state_dict(), "global_step": step}, checkpoint_path) logger.info("Saved checkpoint: {}".format(checkpoint_path)) with open(os.path.join(checkpoint_dir, 'checkpoint'), 'w') as f: f.write("model.ckpt-{}".format(step))
def attempt_to_restore(upsample_net, model, optimizer, checkpoint_dir): checkpoint_list = os.path.join(checkpoint_dir, 'checkpoint') if os.path.exists(checkpoint_list): checkpoint_filename = open(checkpoint_list).readline().strip() checkpoint_path = os.path.join( checkpoint_dir, "{}.pt".format(checkpoint_filename)) logger.info("Restore from {}".format(checkpoint_path)) checkpoint = load_checkpoint(checkpoint_path, FLAGS.use_cuda) upsample_net.load_state_dict(checkpoint["upsample_net"]) model.load_state_dict(checkpoint["waveglow"]) optimizer.load_state_dict(checkpoint["optimizer"]) global_step = checkpoint["global_step"] else: global_step = 0 return global_step
def main(_): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") with open(FLAGS.waveglow_params, 'r') as f: waveglow_params = json.load(f) upsample_net, model = build_model(waveglow_params) print(upsample_net) print(model) checkpoint = torch.load(FLAGS.checkpoint, map_location=lambda storage, loc: storage) upsample_net.load_state_dict(checkpoint['upsample_net']) model.load_state_dict(checkpoint['waveglow']) upsample_net.to(device).eval() model.to(device).eval() with torch.no_grad(): local_condition = np.load(FLAGS.local_condition_file) local_condition = local_condition / local_condition.max() local_condition = torch.FloatTensor(local_condition).to(device) local_condition = local_condition.unsqueeze(0).transpose(1, 2) local_condition = upsample_net(local_condition) noise = torch.FloatTensor( 1, waveglow_params['waveglow']['squeeze_factor'], local_condition.shape[2]).normal_(0.0, 0.6) noise, local_condition = noise.to(device), local_condition.to(device) logger.info("Generating samples...") waveform = model(noise, reverse=True, logdet=None, local_condition=local_condition) waveform = torch.clamp(torch.clamp(waveform[0], min=-1.), max=1.) waveform = waveform.cpu().numpy() wav_path = os.path.splitext( os.path.basename(FLAGS.local_condition_file))[0] + '.wav' wav_path = os.path.join(FLAGS.output, wav_path) write_wav(waveform, waveglow_params['waveglow']['sample_rate'], wav_path)
def main(_): device = torch.device("cuda" if FLAGS.use_cuda else "cpu") with open(FLAGS.waveglow_params, 'r') as f: params = json.load(f) upsample_net, model = build_model(params) print(upsample_net) print(model) dataset = WaveGlowDataset(audio_dir=FLAGS.audio_dir, sample_rate=params['waveglow']['sample_rate'], local_condition_enabled=True, local_condition_dir=FLAGS.local_condition_dir) collate_fn = WaveGlowCollate(sample_size=FLAGS.sample_size, upsample_factor=params['upsample_net']['upsample_factor'], local_condition_enabled=True) trainloader = DataLoader(dataset, batch_size=FLAGS.batch_size, shuffle=True, num_workers=FLAGS.num_workers, collate_fn=collate_fn, pin_memory=True) if FLAGS.use_cuda: logger.info("Let's use {} GPUs!".format(torch.cuda.device_count())) model.to(device) upsample_net.to(device) normal = Normal(loc=torch.tensor([0.0]).to(device), scale=torch.tensor([np.sqrt(0.5)]).to(device)) params = list(upsample_net.parameters()) + list(model.parameters()) optimizer = optim.Adam(params, lr=FLAGS.learning_rate) if FLAGS.restore_from is not None: restore_step = attempt_to_restore(upsample_net, model, optimizer, FLAGS.restore_from) global_step = attempt_to_restore(upsample_net, model, optimizer, FLAGS.save_dir) if FLAGS.restore_from is not None and global_step == 0: global_step = restore_step scheduler = StepLR(optimizer, step_size=FLAGS.decay_steps, gamma=FLAGS.decay_rate, last_epoch=global_step - 1) writer = SummaryWriter(FLAGS.save_dir) for epoch in range(FLAGS.max_epochs): epoch_loss = 0.0 for i, data in enumerate(trainloader, 0): sample, local_condition = data sample, local_condition = sample.to(device), local_condition.to(device) optimizer.zero_grad() if FLAGS.use_cuda: local_condition = data_parallel(upsample_net, (local_condition,)) logdet = torch.zeros_like(sample[:, 0, 0]) output, logdet = data_parallel( model, (sample, logdet, False, local_condition)) else: local_condition = upsample_net(local_condition) logdet = torch.zeros_like(sample[:, 0, 0]) output, logdet = model(sample, logdet=logdet, reverse=False, local_condition=local_condition) likelihood = torch.sum(normal.log_prob(output), (1, 2)) loss = -(likelihood + logdet).mean() if (i + 1) % FLAGS.log_interval == 0: logger.info('[%d, %3d] loss: %.3f' % (epoch + 1, i + 1, loss.item())) if global_step % FLAGS.summary_interval == 0: writer.add_scalar('loss', loss.item(), global_step) epoch_loss += loss.item() loss.backward() scheduler.step() optimizer.step() global_step += 1 if global_step % FLAGS.checkpoint_interval == 0: save_checkpoint(upsample_net, model, optimizer, global_step, FLAGS.save_dir) epoch_loss /= (i + 1) logger.info('[epoch %d] loss: %.3f' % (epoch + 1, epoch_loss)) writer.add_scalar('epoch_loss', epoch_loss, epoch)
def write_wav(wav, sample_rate, filename): max_value_16bit = (1 << 15) - 1 wav *= max_value_16bit wavfile.write(filename, sample_rate, wav.astype(np.int16)) logger.info('Updated wav file at {}'.format(filename))