Example #1
0
def main(args):
    # Check if the output folder is exist
    if not os.path.exists(args.folder):
        os.mkdir(args.folder)

    # Load data
    torch.manual_seed(args.seed)
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        './data', train=True, download=True, transform=transforms.ToTensor()),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)

    # Load model
    model = CVAE().cuda() if torch.cuda.is_available() else CVAE()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # Train and generate sample every epoch
    loss_list = []
    for epoch in range(1, args.epochs + 1):
        model.train()
        _loss = train(epoch, model, train_loader, optimizer)
        loss_list.append(_loss)
        model.eval()
        sample = torch.randn(100, 20)
        label = torch.from_numpy(np.asarray(list(range(10)) * 10))
        sample = Variable(
            sample).cuda() if torch.cuda.is_available() else Variable(sample)
        sample = model.decode(sample, label).cpu()
        save_image(sample.view(100, 1, 28, 28).data,
                   os.path.join(args.folder, 'sample_' + str(epoch) + '.png'),
                   nrow=10)
    plt.plot(range(len(loss_list)), loss_list, '-o')
    plt.savefig(os.path.join(args.folder, 'cvae_loss_curve.png'))
    torch.save(model.state_dict(), os.path.join(args.folder, 'cvae.pth'))
Example #2
0
def main(**kwargs):
    """
    Main function that trains the model
    1. Retrieve arguments from kwargs
    2. Prepare data
    3. Train
    4. Display and save first batch of training set (truth and reconstructed) after every epoch
    5. If latent dimension is 2, display and save latent variable of first batch of training set after every epoch
    
    Args:
        dataset: Which dataset to use
        decoder_type: How to model the output pixels, Gaussian or Bernoulli
        model_sigma: In case of Gaussian decoder, whether to model the sigmas too
        epochs: How many epochs to train model
        batch_size: Size of training / testing batch
        lr: Learning rate
        latent_dim: Dimension of latent variable
        print_every: How often to print training progress
        resume_path: The path of saved model with which to resume training
        resume_epoch: In case of resuming, the number of epochs already done 

    Notes:
        - Saves model to folder 'saved_model/' every 20 epochs and when done
        - Capable of training from scratch and resuming (provide saved model location to argument resume_path)
        - Schedules learning rate with optim.lr_scheduler.ReduceLROnPlateau
            : Decays learning rate by 1/10 when mean loss of all training data does not decrease for 10 epochs
    """
    # Retrieve arguments
    dataset = kwargs.get('dataset', defaults['dataset'])
    decoder_type = kwargs.get('decoder_type', defaults['decoder_type'])
    if decoder_type == 'Gaussian':
        model_sigma = kwargs.get('model_sigma', defaults['model_sigma'])
    epochs = kwargs.get('epochs', defaults['epochs'])
    batch_size = kwargs.get('batch_size', defaults['batch_size'])
    lr = kwargs.get('learning_rate', defaults['learning_rate'])
    latent_dim = kwargs.get('latent_dim', defaults['latent_dim'])
    print_every = kwargs.get('print_every', defaults['print_every'])
    resume_path = kwargs.get('resume_path', defaults['resume_path'])
    resume_epoch = kwargs.get('resume_epoch', defaults['resume_epoch'])

    # Specify dataset transform on load
    if decoder_type == 'Bernoulli':
        trsf = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: (x >= 0.5).float())
        ])
    elif decoder_type == 'Gaussian':
        trsf = transforms.ToTensor()

    # Load dataset with transform
    if dataset == 'MNIST':
        train_data = datasets.MNIST(root='MNIST',
                                    train=True,
                                    transform=trsf,
                                    download=True)
        test_data = datasets.MNIST(root='MNIST',
                                   train=False,
                                   transform=trsf,
                                   download=True)
    elif dataset == 'CIFAR10':
        train_data = datasets.CIFAR10(root='CIFAR10',
                                      train=True,
                                      transform=trsf,
                                      download=True)
        test_data = datasets.CIFAR10(root='CIFAR10',
                                     train=False,
                                     transform=trsf,
                                     download=True)

    # Instantiate dataloader
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              shuffle=False)

    # Instantiate/Load model and optimizer
    if resume_path:
        autoencoder = torch.load(resume_path, map_location=device)
        optimizer = optim.Adam(autoencoder.parameters(), lr=lr)
        print('Loaded saved model at ' + resume_path)
    else:
        if decoder_type == 'Bernoulli':
            autoencoder = CVAE(latent_dim, dataset, decoder_type).to(device)
        else:
            autoencoder = CVAE(latent_dim, dataset, decoder_type,
                               model_sigma).to(device)
        optimizer = optim.Adam(autoencoder.parameters(), lr=lr)

    # Instantiate learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     verbose=True,
                                                     patience=5)

    # Announce current mode
    print(
        f'Start training CVAE with Gaussian encoder and {decoder_type} decoder on {dataset} dataset from epoch {resume_epoch+1}'
    )

    # Prepare batch to display with plt
    first_test_batch, first_test_batch_label = iter(test_loader).next()
    first_test_batch, first_test_batch_label = first_test_batch.to(
        device), first_test_batch_label.to(device)

    # Display latent variable distribution before any training
    if latent_dim == 2 and resume_epoch == 0:
        autoencoder(first_test_batch, first_test_batch_label)
        display_and_save_latent(autoencoder.z, first_test_batch_label,
                                f'-{decoder_type}-z{latent_dim}-e000')

    # Train
    autoencoder.train()
    for epoch in range(resume_epoch, epochs + resume_epoch):
        loss_hist = []
        for batch_ind, (input_data, input_label) in enumerate(train_loader):
            input_data, input_label = input_data.to(device), input_label.to(
                device)

            # Forward propagation
            if decoder_type == 'Bernoulli':
                z_mu, z_sigma, p = autoencoder(input_data, input_label)
            elif model_sigma:
                z_mu, z_sigma, out_mu, out_sigma = autoencoder(
                    input_data, input_label)
            else:
                z_mu, z_sigma, out_mu = autoencoder(input_data, input_label)

            # Calculate loss
            KL_divergence_i = 0.5 * torch.sum(
                z_mu**2 + z_sigma**2 - torch.log(1e-8 + z_sigma**2) - 1.,
                dim=1)
            if decoder_type == 'Bernoulli':
                reconstruction_loss_i = -torch.sum(F.binary_cross_entropy(
                    p, input_data, reduction='none'),
                                                   dim=(1, 2, 3))
            elif model_sigma:
                reconstruction_loss_i = -0.5 * torch.sum(
                    torch.log(1e-8 + 6.28 * out_sigma**2) +
                    ((input_data - out_mu)**2) / (out_sigma**2),
                    dim=(1, 2, 3))
            else:
                reconstruction_loss_i = -0.5 * torch.sum(
                    (input_data - out_mu)**2, dim=(1, 2, 3))
            ELBO_i = reconstruction_loss_i - KL_divergence_i
            loss = -torch.mean(ELBO_i)

            loss_hist.append(loss)

            # Backward propagation
            optimizer.zero_grad()
            loss.backward()

            # Update parameters
            optimizer.step()

            # Print progress
            if batch_ind % print_every == 0:
                train_log = 'Epoch {:03d}/{:03d}\tLoss: {:.6f}\t\tTrain: [{}/{} ({:.0f}%)]           '.format(
                    epoch + 1, epochs + resume_epoch,
                    loss.cpu().item(), batch_ind + 1, len(train_loader),
                    100. * batch_ind / len(train_loader))
                print(train_log, end='\r')
                sys.stdout.flush()

        # Learning rate decay
        scheduler.step(sum(loss_hist) / len(loss_hist))

        # Save model every 20 epochs
        if (epoch + 1) % 20 == 0 and epoch + 1 != epochs:
            PATH = f'saved_model/{dataset}-{decoder_type}-e{epoch+1}-z{latent_dim}' + datetime.datetime.now(
            ).strftime("-%b-%d-%H-%M-%p")
            torch.save(autoencoder, PATH)
            print('\vTemporarily saved model to ' + PATH)

        # Display training result with test set
        data = f'-{decoder_type}-z{latent_dim}-e{epoch+1:03d}'
        with torch.no_grad():
            autoencoder.eval()
            if decoder_type == 'Bernoulli':
                z_mu, z_sigma, p = autoencoder(first_test_batch,
                                               first_test_batch_label)
                output = torch.bernoulli(p)

                if latent_dim == 2:
                    display_and_save_latent(autoencoder.z,
                                            first_test_batch_label, data)

                display_and_save_batch("Binarized-truth",
                                       first_test_batch,
                                       data,
                                       save=(epoch == 0))
                display_and_save_batch("Mean-reconstruction",
                                       p,
                                       data,
                                       save=True)
                display_and_save_batch("Sampled-reconstruction",
                                       output,
                                       data,
                                       save=True)

            elif model_sigma:
                z_mu, z_sigma, out_mu, out_sigma = autoencoder(
                    first_test_batch, first_test_batch_label)
                output = torch.normal(out_mu, out_sigma).clamp(0., 1.)

                if latent_dim == 2:
                    display_and_save_latent(autoencoder.z,
                                            first_test_batch_label, data)

                display_and_save_batch("Truth",
                                       first_test_batch,
                                       data,
                                       save=(epoch == 0))
                display_and_save_batch("Mean-reconstruction",
                                       out_mu,
                                       data,
                                       save=True)
                # display_and_save_batch("Sampled reconstruction", output, data, save=True)

            else:
                z_mu, z_sigma, out_mu = autoencoder(first_test_batch,
                                                    first_test_batch_label)
                output = torch.normal(out_mu,
                                      torch.ones_like(out_mu)).clamp(0., 1.)

                if latent_dim == 2:
                    display_and_save_latent(autoencoder.z,
                                            first_test_batch_label, data)

                display_and_save_batch("Truth",
                                       first_test_batch,
                                       data,
                                       save=(epoch == 0))
                display_and_save_batch("Mean-reconstruction",
                                       out_mu,
                                       data,
                                       save=True)
                # display_and_save_batch("Sampled reconstruction", output, data, save=True)
            autoencoder.train()

    # Save final model
    PATH = f'saved_model/{dataset}-{decoder_type}-e{epochs+resume_epoch}-z{latent_dim}' + datetime.datetime.now(
    ).strftime("-%b-%d-%H-%M-%p")
    torch.save(autoencoder, PATH)
    print('\vSaved model to ' + PATH)
Example #3
0
def train(train_A_dir,
          train_B_dir,
          model_dir,
          model_name,
          random_seed,
          val_A_dir,
          val_B_dir,
          output_dir,
          tensorboard_dir,
          load_path,
          gen_eval=True):
    np.random.seed(random_seed)

    # For now, copy hyperparams used in the CycleGAN
    num_epochs = 100000
    mini_batch_size = 1  # mini_batch_size = 1 is better
    learning_rate = 0.0002
    learning_rate_decay = learning_rate / 200000
    sampling_rate = 16000
    num_mcep = 24
    frame_period = 5.0
    n_frames = 128
    lambda_cycle = 10
    lambda_identity = 5
    device = 'cuda'

    # Use the same pre-processing as the CycleGAN
    print("Begin Preprocessing")

    wavs_A = load_wavs(wav_dir=train_A_dir, sr=sampling_rate)
    wavs_B = load_wavs(wav_dir=train_B_dir, sr=sampling_rate)
    print("Finished Loading")

    f0s_A, timeaxes_A, sps_A, aps_A, coded_sps_A = world_encode_data(
        wavs=wavs_A,
        fs=sampling_rate,
        frame_period=frame_period,
        coded_dim=num_mcep)
    f0s_B, timeaxes_B, sps_B, aps_B, coded_sps_B = world_encode_data(
        wavs=wavs_B,
        fs=sampling_rate,
        frame_period=frame_period,
        coded_dim=num_mcep)
    print("Finished Encoding")

    log_f0s_mean_A, log_f0s_std_A = logf0_statistics(f0s_A)
    log_f0s_mean_B, log_f0s_std_B = logf0_statistics(f0s_B)

    print('Log Pitch A')
    print('Mean: %f, Std: %f' % (log_f0s_mean_A, log_f0s_std_A))
    print('Log Pitch B')
    print('Mean: %f, Std: %f' % (log_f0s_mean_B, log_f0s_std_B))

    coded_sps_A_transposed = transpose_in_list(lst=coded_sps_A)
    coded_sps_B_transposed = transpose_in_list(lst=coded_sps_B)

    coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std = coded_sps_normalization_fit_transoform(
        coded_sps=coded_sps_A_transposed)
    print("Input data fixed.")
    coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std = coded_sps_normalization_fit_transoform(
        coded_sps=coded_sps_B_transposed)

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    np.savez(os.path.join(model_dir, 'logf0s_normalization.npz'),
             mean_A=log_f0s_mean_A,
             std_A=log_f0s_std_A,
             mean_B=log_f0s_mean_B,
             std_B=log_f0s_std_B)
    np.savez(os.path.join(model_dir, 'mcep_normalization.npz'),
             mean_A=coded_sps_A_mean,
             std_A=coded_sps_A_std,
             mean_B=coded_sps_B_mean,
             std_B=coded_sps_B_std)

    if val_A_dir is not None:
        validation_A_output_dir = os.path.join(output_dir, 'converted_A')
        if not os.path.exists(validation_A_output_dir):
            os.makedirs(validation_A_output_dir)

    if val_B_dir is not None:
        validation_B_output_dir = os.path.join(output_dir, 'converted_B')
        if not os.path.exists(validation_B_output_dir):
            os.makedirs(validation_B_output_dir)

    print("End Preprocessing")

    if load_path is not None:
        model = CVAE(num_mcep, 128, num_mcep, 2)
        model.load_state_dict(torch.load(load_path))
        model.eval()
        if device == 'cuda':
            model.cuda()
        print("Loaded Model from path %s" % load_path)
        if val_A_dir is not None and gen_eval:
            print("Generating Evaluation Data")
            for file in os.listdir(val_A_dir):
                filepath = os.path.join(val_A_dir, file)
                print(
                    "Converting {0} from Class 0 to Class 1".format(filepath))
                wav, _ = librosa.load(filepath, sr=sampling_rate, mono=True)
                wav = wav_padding(wav=wav,
                                  sr=sampling_rate,
                                  frame_period=frame_period,
                                  multiple=4)
                f0, timeaxis, sp, ap = world_decompose(
                    wav=wav, fs=sampling_rate, frame_period=frame_period)
                f0_converted = pitch_conversion(f0=f0,
                                                mean_log_src=log_f0s_mean_A,
                                                std_log_src=log_f0s_std_A,
                                                mean_log_target=log_f0s_mean_B,
                                                std_log_target=log_f0s_std_B)
                coded_sp = world_encode_spectral_envelop(sp=sp,
                                                         fs=sampling_rate,
                                                         dim=num_mcep)
                coded_sp_transposed = coded_sp.T
                coded_sp_norm = (coded_sp_transposed -
                                 coded_sps_A_mean) / coded_sps_A_std
                coded_sp_converted_norm, _, _ = model.convert(
                    np.array([coded_sp_norm]), 0, 1, device)
                coded_sp_converted_norm = coded_sp_converted_norm.cpu().numpy()
                coded_sp_converted_norm = np.squeeze(coded_sp_converted_norm)
                coded_sp_converted = coded_sp_converted_norm * coded_sps_B_std + coded_sps_B_mean
                coded_sp_converted = coded_sp_converted.T
                coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
                decoded_sp_converted = world_decode_spectral_envelop(
                    coded_sp=coded_sp_converted, fs=sampling_rate)
                wav_transformed = world_speech_synthesis(
                    f0=f0_converted,
                    decoded_sp=decoded_sp_converted,
                    ap=ap,
                    fs=sampling_rate,
                    frame_period=frame_period)
                librosa.output.write_wav(
                    os.path.join(validation_A_output_dir,
                                 'eval_' + os.path.basename(file)),
                    wav_transformed, sampling_rate)
            exit(0)

    print("Begin Training")

    model = CVAE(num_mcep, 128, num_mcep, 2)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    writer = SummaryWriter(tensorboard_dir)

    if device == 'cuda':
        model.cuda()

    for epoch in tqdm(range(num_epochs)):
        dataset_A, dataset_B = sample_train_data(dataset_A=coded_sps_A_norm,
                                                 dataset_B=coded_sps_B_norm,
                                                 n_frames=n_frames)
        dataset_A = torch.tensor(dataset_A).to(torch.float)
        dataset_B = torch.tensor(dataset_B).to(torch.float)

        n_samples, input_dim, depth = dataset_A.shape
        y_A = F.one_hot(torch.zeros(depth).to(torch.int64),
                        num_classes=2).to(torch.float).T
        y_B = F.one_hot(torch.ones(depth).to(torch.int64),
                        num_classes=2).to(torch.float).T
        (y_A, y_B) = (y_A.reshape((1, 2, depth)), y_B.reshape((1, 2, depth)))
        y_A = torch.cat([y_A] * n_samples)
        y_B = torch.cat([y_B] * n_samples)

        # dataset_A = torch.cat((dataset_A, y_A), axis=1)
        # dataset_B = torch.cat((dataset_B, y_B), axis=1)

        X = torch.cat((dataset_A, dataset_B)).to(device)
        Y = torch.cat((y_A, y_B)).to(device)

        # out, z_mu, z_var = model(dataset_A, y_A)
        # rec_loss = F.binary_cross_entropy(out, dataset_A, size_average=False)
        # kl_diver = -0.5 * torch.sum(1 + z_var - z_mu.pow(2) - z_var.exp())
        out, z_mu, z_var = model(X, Y)

        rec_loss = F.binary_cross_entropy(out, X, size_average=False)
        kl_diver = -0.5 * torch.sum(1 + z_var - z_mu.pow(2) - z_var.exp())

        loss = rec_loss + kl_diver

        writer.add_scalar('Reconstruction Loss', rec_loss, epoch)
        writer.add_scalar('KL-Divergence', kl_diver, epoch)
        writer.add_scalar('Total Loss', loss, epoch)

        # print("loss = {0} || rec = {1} || kl = {2}".format(loss, rec_loss, kl_diver))

        loss.backward()
        optimizer.step()

        if val_A_dir is not None:
            if epoch % 1000 == 0:
                print('Generating Validation Data...')
                for file in os.listdir(val_A_dir):
                    filepath = os.path.join(val_A_dir, file)
                    print("Converting {0} from Class 0 to Class 1".format(
                        filepath))
                    wav, _ = librosa.load(filepath,
                                          sr=sampling_rate,
                                          mono=True)
                    wav = wav_padding(wav=wav,
                                      sr=sampling_rate,
                                      frame_period=frame_period,
                                      multiple=4)
                    f0, timeaxis, sp, ap = world_decompose(
                        wav=wav, fs=sampling_rate, frame_period=frame_period)
                    f0_converted = pitch_conversion(
                        f0=f0,
                        mean_log_src=log_f0s_mean_A,
                        std_log_src=log_f0s_std_A,
                        mean_log_target=log_f0s_mean_B,
                        std_log_target=log_f0s_std_B)
                    coded_sp = world_encode_spectral_envelop(sp=sp,
                                                             fs=sampling_rate,
                                                             dim=num_mcep)
                    coded_sp_transposed = coded_sp.T
                    coded_sp_norm = (coded_sp_transposed -
                                     coded_sps_A_mean) / coded_sps_A_std
                    coded_sp_converted_norm, _, _ = model.convert(
                        np.array([coded_sp_norm]), 0, 1, device)
                    coded_sp_converted_norm = coded_sp_converted_norm.cpu(
                    ).numpy()
                    coded_sp_converted_norm = np.squeeze(
                        coded_sp_converted_norm)
                    coded_sp_converted = coded_sp_converted_norm * coded_sps_B_std + coded_sps_B_mean
                    coded_sp_converted = coded_sp_converted.T
                    coded_sp_converted = np.ascontiguousarray(
                        coded_sp_converted)
                    decoded_sp_converted = world_decode_spectral_envelop(
                        coded_sp=coded_sp_converted, fs=sampling_rate)
                    wav_transformed = world_speech_synthesis(
                        f0=f0_converted,
                        decoded_sp=decoded_sp_converted,
                        ap=ap,
                        fs=sampling_rate,
                        frame_period=frame_period)
                    librosa.output.write_wav(
                        os.path.join(validation_A_output_dir,
                                     str(epoch) + '_' +
                                     os.path.basename(file)), wav_transformed,
                        sampling_rate)
                    break
        if epoch % 1000 == 0:
            print('Saving Checkpoint')
            filepath = os.path.join(model_dir, model_name)
            if not os.path.exists(filepath):
                os.makedirs(filepath)
            torch.save(model.state_dict(),
                       os.path.join(filepath, '{0}.ckpt'.format(epoch)))
Example #4
0
          train_loader=train_gen,
          val_loader=val_gen,
          tqdm=tqdm.tqdm,
          device=device,
          writer=writer,
          start_time=start_time,
          dataset_name=opt.dataset,
          iter_max=opt.max_iter,
          iter_log=opt.log_iter,
          iter_save=opt.save_iter,
          mask_prob=opt.mask_prob)
elif opt.mode == 'test':
    ut.load_model_by_name(model,
                          opt.eval_model,
                          global_step=opt.eval_model_iter)
    model.eval()
    os.makedirs(os.path.join("result", opt.dataset), exist_ok=True)
    with open(os.path.join("result", opt.dataset, "result_1.txt"),
              "w") as handle:
        for i in tqdm.tqdm(range(100)):
            generated_list = model.generate_samples(opt, vocab, samples=10)
            for ind, sentence in enumerate(generated_list):
                handle.write("%s, %d\n" % (sentence, ind % 5))
                # handle.write("%s\n" % sentence)
                handle.flush()
    # generated_list = model.generate_samples(opt, vocab)
    # for ind, sentence in enumerate(generated_list):
    #     print(sentence)
elif opt.mode == 'latent':

    def init_figure():
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.Resize((224, 224), Image.LANCZOS),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224, 0.225))])

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    pad_idx = vocab.word2idx['<pad>']
    sos_idx = vocab.word2idx['<start>']
    eos_idx = vocab.word2idx['<end>']
    unk_idx = vocab.word2idx['<unk>']

    # Build the models
    model = CVAE(
        vocab_size=len(vocab),
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        embedding_dropout=args.embedding_dropout,
        latent_size=args.latent_size,
        max_sequence_length=args.max_sequence_length,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        pad_idx=pad_idx,
        sos_idx=sos_idx,
        eos_idx=eos_idx,
        unk_idx=unk_idx
    )

    if not os.path.exists(args.load_checkpoint):
        raise FileNotFoundError(args.load_checkpoint)

    model.load_state_dict(torch.load(args.load_checkpoint))
    print("Model loaded from {}".format(args.load_checkpoint))

    model.to(device)
    model.eval()

    # Build data loader
    train_data_loader, valid_data_loader = get_loader(args.train_image_dir, args.val_image_dir,
                                                      args.train_caption_path, args.val_caption_path, vocab,
                                                      args.batch_size,
                                                      shuffle=True, num_workers=args.num_workers)

    f1 = open('{}/results/generated_captions.txt'.format(dataset_root_dir), 'w')
    f2 = open('{}/results/ground_truth_captions.txt'.format(dataset_root_dir), 'w')
    for i, (images, captions, lengths) in enumerate(valid_data_loader):
        images = images.to(device)

        sampled_ids, z = model.inference(n=args.batch_size, c=images)

        sampled_ids_batches = sampled_ids.cpu().numpy()  # (batch_size, max_seq_length)
        captions = captions.cpu().numpy()

        # Convert word_ids to words
        for j, sampled_ids in enumerate(sampled_ids_batches):
            sampled_caption = []
            for word_id in sampled_ids:
                word = vocab.idx2word[word_id]
                sampled_caption.append(word)
                if word == '<end>':
                    break
            generated_sentence = ' '.join(sampled_caption)
            generated_sentence = generated_sentence.rstrip()
            generated_sentence = generated_sentence.replace("\n", "")
            generated_sentence = "{0}\n".format(generated_sentence)
            if j == 0:
                print("RE: {}".format(generated_sentence))
            f1.write(generated_sentence)

        for g, ground_truth_ids in enumerate(captions):
            ground_truth_caption = []
            for word_id in ground_truth_ids:
                word = vocab.idx2word[word_id]
                ground_truth_caption.append(word)
                if word == '<end>':
                    break
            ground_truth_sentence = ' '.join(ground_truth_caption)
            ground_truth_sentence = ground_truth_sentence.rstrip()
            ground_truth_sentence = ground_truth_sentence.replace("\n", "")
            ground_truth_sentence = "{0}\n".format(ground_truth_sentence)
            if g == 0:
                print("GT: {}".format(ground_truth_sentence))
            f2.write(ground_truth_sentence)
        if i % 10 == 0:
            print("This is the {0}th batch".format(i))
    f1.close()
    f2.close()
Example #6
0
class CVAEInterface():
    def __init__(self, run_id=1, output_path="", env_path_root=""):
        super().__init__()
        self.cvae = CVAE(run_id=run_id)
        self.device = torch.device('cuda' if CUDA_AVAILABLE else 'cpu')
        self.output_path = output_path
        self.env_path_root = env_path_root

        if self.output_path is not None:
            if os.path.exists(self.output_path):
                shutil.rmtree(self.output_path)
            os.mkdir(self.output_path)

    def load_dataset(self, dataset_root, data_type="arm", mode="train"):
        assert (data_type == "both" or data_type == "arm"
                or data_type == "base")
        assert (mode == "train" or mode == "test")
        # Should show different count and path for different modes
        print("Loading {} dataset for mode : {}, path : {}".format(
            data_type, mode, dataset_root))
        self.data_type = data_type

        paths_dataset = PathsDataset(type="FULL_STATE")
        c_test_dataset = PathsDataset(type="CONDITION_ONLY")
        env_dir_paths = os.listdir(dataset_root)
        # Get all C vars to test sample generation on each
        all_condition_vars = []
        for env_dir_index in filter(lambda f: f[0].isdigit(), env_dir_paths):
            env_paths_file = os.path.join(dataset_root, env_dir_index,
                                          "data_{}.txt".format(data_type))
            env_paths = np.loadtxt(env_paths_file)
            # 4 to 16
            if IGNORE_START:
                start = env_paths[:, X_DIM:2 * X_DIM]
                samples = env_paths[:, :X_DIM]
                euc_dist = np.linalg.norm(start - samples, axis=1)
                far_from_start = np.where(euc_dist > 5.0)
                print(far_from_start)
                env_paths = env_paths[far_from_start[0], :]
                condition_vars = env_paths[:, 2 * X_DIM:2 * X_DIM + C_DIM]
            else:
                if mode == "train":
                    # Testing, less points near start to reduce them in sampled output
                    start = env_paths[:, X_DIM:X_DIM + POINT_DIM]
                    samples = env_paths[:, :X_DIM]
                    euc_dist = np.linalg.norm(start - samples, axis=1)
                    far_from_start = np.where(euc_dist > 2.0)
                    # print(far_from_start)
                    env_paths = env_paths[far_from_start[0], :]

                condition_vars = env_paths[:, X_DIM:X_DIM + C_DIM]
            # print(env_paths.shape)
            # Stuff for train dataloader
            # Take only required elements
            # env_paths = env_paths[:, :X_DIM + C_DIM]
            env_paths = np.hstack((env_paths[:, :X_DIM], condition_vars))
            # Uniquify to remove duplicates
            env_paths = np.unique(env_paths, axis=0)
            env_index = np.empty((env_paths.shape[0], 1))
            env_index.fill(env_dir_index)
            data = np.hstack((env_index, env_paths))
            paths_dataset.add_env_paths(data.tolist())

            # Stuff for test dataloader
            env_index = np.empty((condition_vars.shape[0], 1))
            env_index.fill(env_dir_index)
            data = np.hstack((env_index, condition_vars))
            all_condition_vars += data.tolist()
            print("Added {} states from {} environment".format(
                env_paths.shape[0], env_dir_index))

        dataloader = DataLoader(paths_dataset,
                                batch_size=TRAIN_BATCH_SIZE,
                                shuffle=True)

        if data_type != "both":

            # Depending on which dataset is being loaded, set the right variables
            if mode == "train":
                self.train_dataloader = dataloader
                self.train_paths_dataset = paths_dataset
            elif mode == "test":
                self.test_condition_vars = np.unique(all_condition_vars,
                                                     axis=0)
                print("Unique test conditions count : {}".format(
                    self.test_condition_vars.shape[0]))
                # Tile condition variables to predict given number of samples for x
                all_condition_vars_tile = np.repeat(self.test_condition_vars,
                                                    TEST_SAMPLES, 0)
                c_test_dataset.add_env_paths(all_condition_vars_tile.tolist())
                c_test_dataloader = DataLoader(c_test_dataset,
                                               batch_size=TEST_BATCH_SIZE,
                                               shuffle=False)
                self.test_dataloader = c_test_dataloader
        else:
            arm_test_dataset = PathsDataset(type="CONDITION_ONLY")
            base_test_dataset = PathsDataset(type="CONDITION_ONLY")

            all_condition_vars = np.array(all_condition_vars)
            self.test_condition_vars = np.delete(all_condition_vars, [4, 5],
                                                 axis=1)
            self.test_condition_vars = np.unique(self.test_condition_vars,
                                                 axis=0)
            print("Unique test conditions count : {}".format(
                self.test_condition_vars.shape[0]))
            # print(self.test_condition_vars)
            arm_condition_vars = np.insert(self.test_condition_vars,
                                           2 * POINT_DIM,
                                           1,
                                           axis=1)
            arm_condition_vars = np.insert(arm_condition_vars,
                                           2 * POINT_DIM,
                                           0,
                                           axis=1)

            arm_condition_vars = np.repeat(arm_condition_vars, TEST_SAMPLES, 0)
            arm_test_dataset.add_env_paths(arm_condition_vars.tolist())
            arm_test_dataloader = DataLoader(arm_test_dataset,
                                             batch_size=TEST_BATCH_SIZE,
                                             shuffle=False)

            base_condition_vars = np.insert(self.test_condition_vars,
                                            2 * POINT_DIM,
                                            0,
                                            axis=1)
            base_condition_vars = np.insert(base_condition_vars,
                                            2 * POINT_DIM,
                                            1,
                                            axis=1)

            base_condition_vars = np.repeat(base_condition_vars, TEST_SAMPLES,
                                            0)
            base_test_dataset.add_env_paths(base_condition_vars.tolist())
            base_test_dataloader = DataLoader(base_test_dataset,
                                              batch_size=TEST_BATCH_SIZE,
                                              shuffle=False)

            if mode == "train":
                self.train_dataloader = dataloader
            elif mode == "test":
                self.arm_test_dataloader = arm_test_dataloader
                self.base_test_dataloader = base_test_dataloader

    def visualize_train_data(self, num_conditions=1):
        # Pick a random condition
        # Find all states for that condition
        # Plot them
        print("Plotting input data for {} random conditions".format(
            num_conditions))
        all_input_paths = np.array(self.train_paths_dataset.paths)[:, 1:]
        env_ids = np.array(self.train_paths_dataset.paths)[:, :1]
        # print(all_input_paths[0,:])
        for c_i in range(num_conditions):
            rand_index = np.random.randint(0, all_input_paths.shape[0])
            condition = all_input_paths[rand_index, 2:]
            env_id = env_ids[rand_index, 0]
            # print(condition)
            # condition_samples = np.argwhere(all_input_paths[:,2:] == condition)
            # indices = np.where(all_input_paths[:,2:] == condition)
            # Find all samples corresponding to this condition
            indices = np.where(
                np.isin(all_input_paths[:, 2:], condition).all(axis=1))[0]
            # print(indices)
            x = all_input_paths[indices, :2]
            fig = self.plot(x, condition, env_id=env_id)
            self.cvae.tboard.add_figure('train_data/condition_{}'.format(c_i),
                                        fig, 0)
            # print(all_input_paths[indices,:])
        self.cvae.tboard.flush()

    def visualize_map(self, env_id):
        path = "{}/{}.txt".format(self.env_path_root, int(env_id))
        plt.title('Environment - {}'.format(env_id))
        with open(path, "r") as f:
            line = f.readline()
            while line:
                line = line.split(" ")
                # print(line)
                if "wall" in line[0] or "table" in line[0]:
                    x = float(line[1])
                    y = float(line[2])
                    l = float(line[4])
                    b = float(line[5])
                    rect = Rectangle((x - l / 2, y - b / 2), l, b)
                    plt.gca().add_patch(rect)

                line = f.readline()
        plt.draw()

    def plot(self, x, c, env_id=None, suffix=0, write_file=False, show=False):
        '''
            Plot samples and environment - from train input or predicted output
        '''
        # print(c)
        if IGNORE_START:
            goal = c[0:2]
        else:
            start = c[0:2]
            goal = c[2:4]
        # For given conditional, plot the samples
        fig1 = plt.figure(figsize=(10, 6), dpi=80)
        # ax1 = fig1.add_subplot(111, aspect='equal')
        plt.scatter(x[:, 0], x[:, 1], color="green", s=70, alpha=0.1)
        if IGNORE_START == False:
            plt.scatter(start[0], start[1], color="blue", s=70, alpha=0.6)
        plt.scatter(goal[0], goal[1], color="red", s=70, alpha=0.6)
        if env_id is not None:
            self.visualize_map(env_id)
            # wall_locs = c[4:]
            # i = 0
            # while i < wall_locs.shape[0]:
            #     plt.scatter(wall_locs[i], wall_locs[i+1], color="green", s=70, alpha=0.6)
            #     i = i + 2

        plt.xlabel('x')
        plt.ylabel('y')
        plt.xlim(0, X_MAX)
        plt.ylim(0, Y_MAX)
        if write_file:
            plt.savefig('{}/gen_points_fig_{}.png'.format(
                self.output_path, suffix))
            np.savetxt('{}/gen_points_{}.txt'.format(self.output_path, suffix),
                       x,
                       fmt="%.2f",
                       delimiter=',')
            np.savetxt('{}/start_goal_{}.txt'.format(self.output_path, suffix),
                       np.vstack((start, goal)),
                       fmt="%.2f",
                       delimiter=',')
        if show:
            plt.show()
        # plt.close(fig1)
        return fig1

    def load_saved_cvae(self, decoder_path):
        print("Loading saved CVAE")
        self.cvae.load_decoder(decoder_path)

        # base_cvae = CVAE(run_id=run_id)
        # base_decoder_path = 'experiments/cvae/base/decoder-final.pkl'
        # base_cvae.load_decoder(base_decoder_path)

        # for iteration, batch in enumerate(dataloader):
    def test_single(self,
                    env_id,
                    sample_size=1000,
                    c_test=None,
                    visualize=True):
        self.cvae.eval()
        c_test_gpu = torch.from_numpy(c_test).float().to(self.device)
        c_test_gpu = torch.unsqueeze(c_test_gpu, dim=0)
        x_test = self.cvae.inference(sample_size=sample_size, c=c_test_gpu)
        x_test = x_test.detach().cpu().numpy()

        if visualize:
            self.plot(x_test,
                      c_test,
                      env_id=env_id,
                      show=False,
                      write_file=True,
                      suffix=0)
        return x_test

    def test(self, epoch, dataloader, write_file=False, suffix=""):

        x_test_predicted = []
        self.cvae.eval()
        for iteration, batch in enumerate(dataloader):
            # print(batch)
            c_test_data = batch['condition'].float().to(self.device)
            # print(c_test_data[0,:])
            x_test = self.cvae.batch_inference(c=c_test_data)
            x_test_predicted += x_test.detach().cpu().numpy().tolist()
            # print(x_test.shape)
            if iteration % LOG_INTERVAL == 0 or iteration == len(
                    dataloader) - 1:
                print(
                    "Test Epoch {:02d}/{:02d} Batch {:04d}/{:d}, Iteration {}".
                    format(epoch, num_epochs, iteration,
                           len(dataloader) - 1, iteration))

        x_test_predicted = np.array(x_test_predicted)
        # print(x_test_predicted.shape)
        # Draw plot for each unique condition
        for c_i in range(self.test_condition_vars.shape[0]):
            x_test = x_test_predicted[c_i * TEST_SAMPLES:(c_i + 1) *
                                      TEST_SAMPLES]
            # Fine because c_test is used only for plotting, we dont need arm/base label here
            c_test = self.test_condition_vars[c_i, 1:]
            env_id = self.test_condition_vars[c_i, 0]
            # print(self.test_condition_vars[c_i,:])
            fig = self.plot(x_test,
                            c_test,
                            env_id=env_id,
                            suffix=c_i,
                            write_file=write_file)
            self.cvae.tboard.add_figure(
                'test_epoch_{}/condition_{}_{}'.format(epoch, c_i, suffix),
                fig, 0)
            if c_i % LOG_INTERVAL == 0:
                print("Plotting condition : {}".format(c_i))
        self.cvae.tboard.flush()

        # for c_i in range(c_test_data.shape[0]):
        #     c_test = c_test_data[c_i,:]
        #     c_test_gpu = torch.from_numpy(c_test).float().to(device)

        #     x_test = cvae_model.inference(n=TEST_SAMPLES, c=c_test_gpu)
        #     x_test = x_test.detach().cpu().numpy()
        #     fig = plot(x_test, c_test)
        #     cvae_model.tboard.add_figure('test_epoch_{}/condition_{}'.format(epoch, c_i), fig, 0)

        #     if c_i % 50 == 0:
        #         print("Epoch : {}, Testing condition count : {} ".format(epoch, c_i))

    def train(self,
              run_id=1,
              num_epochs=1,
              initial_learning_rate=0.001,
              weight_decay=0.0001):

        optimizer = torch.optim.Adam(self.cvae.parameters(),
                                     lr=initial_learning_rate,
                                     weight_decay=weight_decay)
        for epoch in range(num_epochs):
            for iteration, batch in enumerate(self.train_dataloader):
                # print(batch['condition'][0,:])
                self.cvae.train()
                x = batch['state'].float().to(self.device)
                c = batch['condition'].float().to(self.device)
                recon_x, mean, log_var, z = self.cvae(x, c)
                # print(recon_x.shape)

                loss = self.cvae.loss_fn(recon_x, x, mean, log_var)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                counter = epoch * len(self.train_dataloader) + iteration
                if iteration % LOG_INTERVAL == 0 or iteration == len(
                        self.train_dataloader) - 1:
                    print(
                        "Train Epoch {:02d}/{:02d} Batch {:04d}/{:d}, Iteration {}, Loss {:9.4f}"
                        .format(epoch, num_epochs, iteration,
                                len(self.train_dataloader) - 1, counter,
                                loss.item()))
                    self.cvae.tboard.add_scalar('train/loss', loss.item(),
                                                counter)

                    # cvae.eval()
                    # c_test = c[0,:]
                    # x_test = cvae.inference(n=TEST_SAMPLES, c=c_test)
                    # x_test = x_test.detach().cpu().numpy()
                    # fig = plot(x_test, c_test)
                    # cvae.tboard.add_figure('test/samples', fig, counter)

            if epoch % TEST_INTERVAL == 0 or epoch == num_epochs - 1:
                # Test CVAE for all c by drawing samples
                if self.data_type != "both":
                    self.test(epoch, self.test_dataloader)
                else:
                    self.test(epoch, self.arm_test_dataloader, suffix="arm")
                    self.test(epoch, self.base_test_dataloader, suffix="base")

            if epoch % SAVE_INTERVAL == 0 and epoch > 0:
                self.cvae.save_model_weights(counter)

        self.cvae.save_model_weights('final')