Ejemplo n.º 1
0
def save_checkpoint(upsample_net, model, optimizer,
                    step, checkpoint_dir):
    checkpoint_path = os.path.join(
        checkpoint_dir, "model.ckpt-{}.pt".format(step))
    torch.save({"upsample_net": upsample_net.state_dict(),
                "waveglow": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "global_step": step}, checkpoint_path)
    logger.info("Saved checkpoint: {}".format(checkpoint_path))

    with open(os.path.join(checkpoint_dir, 'checkpoint'), 'w') as f:
        f.write("model.ckpt-{}".format(step))
Ejemplo n.º 2
0
def attempt_to_restore(upsample_net, model, optimizer, checkpoint_dir):
    checkpoint_list = os.path.join(checkpoint_dir, 'checkpoint')
    if os.path.exists(checkpoint_list):
        checkpoint_filename = open(checkpoint_list).readline().strip()
        checkpoint_path = os.path.join(
            checkpoint_dir, "{}.pt".format(checkpoint_filename))
        logger.info("Restore from {}".format(checkpoint_path))
        checkpoint = load_checkpoint(checkpoint_path, FLAGS.use_cuda)
        upsample_net.load_state_dict(checkpoint["upsample_net"])
        model.load_state_dict(checkpoint["waveglow"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        global_step = checkpoint["global_step"]

    else:
        global_step = 0

    return global_step
Ejemplo n.º 3
0
def main(_):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    with open(FLAGS.waveglow_params, 'r') as f:
        waveglow_params = json.load(f)

    upsample_net, model = build_model(waveglow_params)
    print(upsample_net)
    print(model)

    checkpoint = torch.load(FLAGS.checkpoint,
                            map_location=lambda storage, loc: storage)
    upsample_net.load_state_dict(checkpoint['upsample_net'])
    model.load_state_dict(checkpoint['waveglow'])

    upsample_net.to(device).eval()
    model.to(device).eval()

    with torch.no_grad():
        local_condition = np.load(FLAGS.local_condition_file)
        local_condition = local_condition / local_condition.max()

        local_condition = torch.FloatTensor(local_condition).to(device)
        local_condition = local_condition.unsqueeze(0).transpose(1, 2)
        local_condition = upsample_net(local_condition)

        noise = torch.FloatTensor(
            1, waveglow_params['waveglow']['squeeze_factor'],
            local_condition.shape[2]).normal_(0.0, 0.6)
        noise, local_condition = noise.to(device), local_condition.to(device)

        logger.info("Generating samples...")
        waveform = model(noise,
                         reverse=True,
                         logdet=None,
                         local_condition=local_condition)
        waveform = torch.clamp(torch.clamp(waveform[0], min=-1.), max=1.)
        waveform = waveform.cpu().numpy()

        wav_path = os.path.splitext(
            os.path.basename(FLAGS.local_condition_file))[0] + '.wav'
        wav_path = os.path.join(FLAGS.output, wav_path)
        write_wav(waveform, waveglow_params['waveglow']['sample_rate'],
                  wav_path)
Ejemplo n.º 4
0
def main(_):
    device = torch.device("cuda" if FLAGS.use_cuda else "cpu")

    with open(FLAGS.waveglow_params, 'r') as f:
        params = json.load(f)

    upsample_net, model = build_model(params)
    print(upsample_net)
    print(model)

    dataset = WaveGlowDataset(audio_dir=FLAGS.audio_dir,
                              sample_rate=params['waveglow']['sample_rate'],
                              local_condition_enabled=True,
                              local_condition_dir=FLAGS.local_condition_dir)
    collate_fn = WaveGlowCollate(sample_size=FLAGS.sample_size,
                                 upsample_factor=params['upsample_net']['upsample_factor'],
                                 local_condition_enabled=True)
    trainloader = DataLoader(dataset, batch_size=FLAGS.batch_size,
                             shuffle=True, num_workers=FLAGS.num_workers,
                             collate_fn=collate_fn, pin_memory=True)

    if FLAGS.use_cuda:
        logger.info("Let's use {} GPUs!".format(torch.cuda.device_count()))

    model.to(device)
    upsample_net.to(device)

    normal = Normal(loc=torch.tensor([0.0]).to(device),
                    scale=torch.tensor([np.sqrt(0.5)]).to(device))

    params = list(upsample_net.parameters()) + list(model.parameters())
    optimizer = optim.Adam(params, lr=FLAGS.learning_rate)

    if FLAGS.restore_from is not None:
        restore_step = attempt_to_restore(upsample_net, model, optimizer,
                                          FLAGS.restore_from)

    global_step = attempt_to_restore(upsample_net, model, optimizer,
                                     FLAGS.save_dir)

    if FLAGS.restore_from is not None and global_step == 0:
        global_step = restore_step

    scheduler = StepLR(optimizer, step_size=FLAGS.decay_steps,
                       gamma=FLAGS.decay_rate, last_epoch=global_step - 1)

    writer = SummaryWriter(FLAGS.save_dir)

    for epoch in range(FLAGS.max_epochs):
        epoch_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            sample, local_condition = data
            sample, local_condition = sample.to(device), local_condition.to(device)

            optimizer.zero_grad()

            if FLAGS.use_cuda:
                local_condition = data_parallel(upsample_net, (local_condition,))
                logdet = torch.zeros_like(sample[:, 0, 0])
                output, logdet = data_parallel(
                    model, (sample, logdet, False, local_condition))
            else:
                local_condition = upsample_net(local_condition)
                logdet = torch.zeros_like(sample[:, 0, 0])
                output, logdet = model(sample, logdet=logdet, reverse=False,
                                       local_condition=local_condition)

            likelihood = torch.sum(normal.log_prob(output), (1, 2))
            loss = -(likelihood + logdet).mean()

            if (i + 1) % FLAGS.log_interval == 0:
                logger.info('[%d, %3d] loss: %.3f' % (epoch + 1, i + 1, loss.item()))

            if global_step % FLAGS.summary_interval == 0:
                writer.add_scalar('loss', loss.item(), global_step)

            epoch_loss += loss.item()

            loss.backward()
            scheduler.step()
            optimizer.step()

            global_step += 1

            if global_step % FLAGS.checkpoint_interval == 0:
                save_checkpoint(upsample_net, model, optimizer, global_step, FLAGS.save_dir)

        epoch_loss /= (i + 1)
        logger.info('[epoch %d] loss: %.3f' % (epoch + 1, epoch_loss))
        writer.add_scalar('epoch_loss', epoch_loss, epoch)
Ejemplo n.º 5
0
def write_wav(wav, sample_rate, filename):
    max_value_16bit = (1 << 15) - 1
    wav *= max_value_16bit
    wavfile.write(filename, sample_rate, wav.astype(np.int16))
    logger.info('Updated wav file at {}'.format(filename))