예제 #1
0
    def train(self, lr, num_epochs):
        # import ipdb; ipdb.set_trace()
        ctx = d2l.try_gpu()
        embed_size = self.embed_size
        self.net = nn.Sequential()
        self.net.add(
            nn.Embedding(input_dim=len(self.idx_to_token),
                         output_dim=embed_size),
            nn.Embedding(input_dim=len(self.idx_to_token),
                         output_dim=embed_size))

        self.net.initialize(ctx=ctx, force_reinit=True)
        trainer = gluon.Trainer(self.net.collect_params(), 'adam',
                                {'learning_rate': lr})

        loss = gloss.SigmoidBinaryCrossEntropyLoss()

        for epoch in range(num_epochs):
            start, l_sum, n = time.time(), 0.0, 0
            for batch in self.data_iter:
                center, context_negative, mask, label = [
                    data.as_in_context(ctx) for data in batch
                ]
                with autograd.record():
                    pred = self.skip_gram(center, context_negative,
                                          self.net[0], self.net[1])
                    l = (loss(pred.reshape(label.shape), label, mask) *
                         mask.shape[1] / mask.sum(axis=1))

                l.backward()
                trainer.step(self.batch_size)
                # l_sum += l_sum().asscalar()
                l_sum += l_sum
                n += l.size
            print('epoch %d, loss %.2f, time %.2fs' %
                  (epoch + 1, l_sum / n, time.time() - start))
예제 #2
0
            lines = [[vocab.bos] + line + [vocab.eos] for line in lines]
        array = torch.tensor([pad(line, max_len, vocab.pad) for line in lines])
        valid_len = (array != vocab.pad).sum(1)
        return array, valid_len

    src_vocab, tgt_vocab = build_vocab(source), build_vocab(target)
    src_array, src_valid_len = build_array(source, src_vocab, max_len, True)
    tgt_array, tgt_valid_len = build_array(target, tgt_vocab, max_len, False)
    train_data = data.TensorDataset(src_array, src_valid_len, tgt_array, tgt_valid_len)
    train_iter = data.DataLoader(train_data, batch_size, shuffle=True)
    return src_vocab, tgt_vocab, train_iter


embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.0
batch_size, num_steps = 64, 10
lr, num_epochs, ctx = 0.005, 500, d2l.try_gpu()

src_vocab, tgt_vocab, train_iter = load_data_nmt(batch_size, num_steps)
encoder = d2l.Seq2SeqEncoder(
    len(src_vocab), embed_size, num_hiddens, num_layers, dropout)
decoder = Seq2SeqAttentionDecoder(
    len(tgt_vocab), embed_size, num_hiddens, num_layers, dropout)
model = d2l.EncoderDecoder(encoder, decoder)



#训练和预测

d2l.train_s2s_ch9(model, train_iter, lr, num_epochs, ctx)

예제 #3
0
print('output anchors:', anchors.shape)
print('output class preds:', cls_preds.shape)
print('output bbox preds:', bbox_preds.shape)

# %% [markdown]
# ## Training
#
# ### Data Reading and Initialization
#

# %%
batch_size = 32
train_iter, _ = d2l.load_data_pikachu(batch_size)

ctx, net = d2l.try_gpu(), TinySSD(num_classes=1)
net.initialize(init=init.Xavier(), ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.2,
    'wd': 5e-4
})

# %% [markdown]
# ### Define Losses

# %%
cls_loss = gloss.SoftmaxCrossEntropyLoss()
bbox_loss = gloss.L1Loss()


def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
예제 #4
0
# Import the basic packages
import mxnet as mx
from mxnet import nd, init, gluon, autograd, image
from mxnet.gluon import data as gdata, loss as gloss, nn
import numpy as np
import d2l
CTX = d2l.try_gpu()
import time
import matplotlib.pyplot as plt

# Import the DenseVAE model
import sys
sys.path.insert(0, "./models")
from DenseVAE import DenseVAE

all_features = nd.load('../project_data/anime_faces.ndy')[0].as_in_context(CTX)
all_features = nd.shuffle(all_features)

# Use 80% of the data as training data
# since the anime faces have no particular order, just take the first
# 80% as training set
# Prepare the training data and training data iterator
n_train = int(all_features.shape[0] * 0.8)
train_features = all_features[0:n_train]
test_features = all_features[n_train:]
batch_size = 64
train_iter = gdata.DataLoader(train_features,
                                  batch_size,
                                 shuffle=True,
                                 last_batch='keep')
# Extract the training image's shape
                             dropout, i))
        self.dense = nn.Dense(vocab_size, flatten=False)

    def init_state(self, enc_outputs, env_valid_len, *args):
        return [enc_outputs, env_valid_len, [None]*self.num_layers]

    def forward(self, X, state):
        X = self.pos_encoding(self.embedding(X) * math.sqrt(self.num_hiddens))
        for blk in self.blks:
            X, state = blk(X, state)
        return self.dense(X), state


num_hiddens, num_layers, dropout, batch_size, num_steps = 32, 2, 0.0, 64, 10
# ORG - lr, num_epochs, ctx = 0.005, 100, d2l.try_gpu()
lr, num_epochs, ctx = 0.005, 100, d2l.try_gpu() # TEST
ffn_num_hiddens, num_heads = 64, 4


# ORG src_vocab, tgt_vocab, train_iter = d2l.load_data_nmt(batch_size, num_steps)
# ToDo : Switch the data from query
"""
def ORG_load_data_nmt(batch_size, num_steps, num_examples=1000):
    text = preprocess_nmt(read_data_nmt())
    source, target = tokenize_nmt(text, num_examples)
    src_vocab = d2l.Vocab(source, min_freq=3, reserved_tokens=['<pad>', '<bos>', '<eos>'])
    tgt_vocab = d2l.Vocab(target, min_freq=3, reserved_tokens=['<pad>', '<bos>', '<eos>'])
    src_array, src_valid_len = build_array(source, src_vocab, num_steps, True)
    tgt_array, tgt_valid_len = build_array(target, tgt_vocab, num_steps, False)
    data_arrays = (src_array, src_valid_len, tgt_array, tgt_valid_len)
    data_iter = d2l.load_array(data_arrays, batch_size)
예제 #6
0
    test_iter = gluon.data.DataLoader(
        test_set, batch_size=batch_size)

    return num_users, num_items, train_iter, test_iter


if __name__ == "__main__":

    d2l.DATA_HUB['ml-100k'] = (
        'http://files.grouplens.org/datasets/movielens/ml-100k.zip',
        'cd4dcac4241c8a4ad7badc7ca635da8a69dddb83')

    ctx = d2l.try_all_gpus()
    num_users, num_items, train_iter, test_iter = split_and_load_ml100k(
        test_ratio=0.1, batch_size=512)

    net = MF(30, num_users, num_items)
    net.initialize(ctx=ctx, force_reinit=True, init=mx.init.Normal(0.01))

    # wd: weight decay. The weight decay mechanism has the same effect as the l2 regularization
    lr, num_epochs, wd, optimizer = 0.002, 20, 1e-5, 'adam'
    loss = gluon.loss.L2Loss()
    trainer = gluon.Trainer(net.collect_params(), optimizer,
                            {"learning_rate": lr, 'wd': wd})
    train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs,
                        ctx, evaluator)

    scores = net(np.array([20], dtype='int', ctx=d2l.try_gpu()),
                 np.array([30], dtype='int', ctx=d2l.try_gpu()))
    scores
예제 #7
0
        x = self.flat(x)
        return x


class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)


net = Net()
X = torch.rand(size=(1, 1, 224, 224))
for layer in net.children():
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

lr, num_epochs, batch_size, device = 0.1, 5, 128, d2l.try_gpu()


#Xavier initialization of weights
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)


net.apply(init_weights)

#Loading fashion-MNIST data
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

#criterion
criterion = nn.CrossEntropyLoss()
                         strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk


class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)


b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))
net = nn.Sequential(b1, b2, b3, b4, b5, nn.AdaptiveMaxPool2d((1, 1)),
                    Flatten(), nn.Linear(512, 10))

lr, num_epochs, batch_size, device = 0.05, 5, 256, d2l.try_gpu()


def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)


net.apply(init_weights)
criterion = nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch5(net, train_iter, test_iter, criterion, num_epochs, batch_size,
              device, lr)
corpus_indices, vocab = d2l.load_data_time_machine()

F.one_hot(torch.Tensor([0, 2]).long(), len(vocab))


def to_onehot(X, size):
    return F.one_hot(X.long().transpose(0, -1), size)


X = torch.arange(10).reshape((2, 5))
inputs = to_onehot(X, len(vocab))
len(inputs), inputs[0].shape

num_inputs, num_hiddens, num_outputs = len(vocab), 512, len(vocab)
ctx = d2l.try_gpu()
print('Using', ctx)


# Create the parameters of the model, initialize them and attach gradients
def get_params():
    def _one(shape):
        return torch.Tensor(size=shape, device=ctx).normal_(std=0.01)

    # Hidden layer parameters
    W_xh = _one((num_inputs, num_hiddens))
    W_hh = _one((num_hiddens, num_hiddens))
    b_h = torch.zeros(num_hiddens, device=ctx)
    # Output layer parameters
    W_hq = _one((num_hiddens, num_outputs))
    b_q = torch.zeros(num_outputs, device=ctx)
      trainer.set_learning_rate(trainer.learning_rate * 0.1)
    # if epoch%10 == 0:
    #   print(epoch)
    if epoch % 10 == 0:
      # animator.axes[1].imshow(postprocess(X).asnumpy())
      animator.add(epoch, [nd.add_n(*contents_l).asscalar(),
                            nd.add_n(*styles_l).asscalar(), tv_l.asscalar()])
    if epoch % 100 == 0:
      d2l.plt.imsave('neural-style'+str(epoch)+'.png', postprocess(X).asnumpy())

    


  return X

d2l.try_gpu()

ctx, image_shape = d2l.try_gpu(), (120, 200)
net.collect_params().reset_ctx(ctx)
content_X, contents_Y = get_contents(image_shape, ctx)
_, styles_Y = get_styles(image_shape, ctx)
output = train(content_X, contents_Y, styles_Y, ctx, 0.01, 200, 200)
output1 = postprocess(output)
output1 = output1.asnumpy()
matplotlib.image.imsave('name1.png', output1)

ctx, image_shape = d2l.try_gpu(), (758, 948) 
_, content_Y = get_contents(image_shape, ctx)
_, style_Y = get_styles(image_shape, ctx)
X = preprocess(postprocess(output) * 255, image_shape)
output = train(X, content_Y, style_Y, ctx, 0.01, 300, 300)
                    nn.Conv2d(384, 256, kernel_size=3, padding=1),
                    nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3,
                                                        stride=2), Flatten(),
                    nn.Dropout(p=0.5, inplace=True),
                    nn.Linear(in_features=6400, out_features=4096), nn.ReLU(),
                    nn.Dropout2d(p=0.5, inplace=True),
                    nn.Linear(in_features=4096, out_features=4096), nn.ReLU(),
                    nn.Linear(in_features=4096, out_features=10))

X = torch.randn(size=(1, 1, 224, 224))

for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, 'Output shape:\t', X.shape)

batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

lr, num_epochs, device = 0.01, 5, d2l.try_gpu()


def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)


net.apply(init_weights)
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.0005)
criterion = nn.CrossEntropyLoss()
d2l.train_ch5(net, train_iter, test_iter, criterion, num_epochs, batch_size,
              device, lr)
def train_VAE_GAN(vae_net,
                  disc_net,
                  train_features,
                  test_features,
                  test_results_dir,
                  vae_parameters_path=None,
                  batch_size=64,
                  init_lr=0.001,
                  pbp_weight=1,
                  disc_loss_mul=10,
                  n_epochs=200,
                  n_solo_epochs=0,
                  max_disc_loss=999,
                  variable_pbp_weight='constant',
                  pbp_weight_decay=1,
                  CTX=d2l.try_gpu()):

    # VAE_net is a VAE network (most likely a ConvVAE with 512 latent variables
    # 32 base channels, 3 * 64 * 64 output shape

    # disc_net is a discriminator network (most likely a ResNet)
    # whose output of (batch_size, 1)

    # test_results_dir is the directory (must end with a slash /) that contains
    # the validation images after all epochs were run

    # vae_parameters_dir is the path (so directory + filename) that the trained
    # VAE's model parameters will be saved to.

    # n_solo_epochs indicate the number of epochs that the VAE will train using
    # no discriminator; n_solo_epochs must be smaller than n_epochs, and
    # the number of epochs trained with discriminator is
    # n_epochs - n_solo_epochs

    # max_disc_loss is the maximum loss beyond which the discriminator's loss
    # will not be used in updating VAE in the generator cycle

    # If variable_pbp_weight is False/None, then the pbp weight will remain
    # constant for all epochs (except when training solo, in which case
    # the pbp weight is adjusted to 1, but will revert back to the specified value
    # after solo epochs are done.
    #
    # If variable_pbp_weight is 'decay', then for every 25 combo epochs the
    # pbp_weight will decrease by constant factor.

    #############################################################################
    ## MODEL INITIALIZATION AND TRAINER
    #############################################################################
    #
    # Initialize the VAE network and get its trainer
    print(
        '[STATE]: Initializing model parameters and constructing Gluon trainers'
    )
    # Set the pbp weight to the desired value
    vae_net.pbp_weight = pbp_weight
    vae_net.collect_params().initialize(mx.init.Xavier(),
                                        force_reinit=True,
                                        ctx=CTX)
    vae_trainer = gluon.Trainer(vae_net.collect_params(), 'adam',
                                {'learning_rate': init_lr})
    # Initialize the Disc network nd get its trainer
    disc_net.collect_params().initialize(mx.init.Xavier(),
                                         force_reinit=True,
                                         ctx=CTX)
    disc_trainer = gluon.Trainer(disc_net.collect_params(), 'adam',
                                 {'learning_rate': init_lr})

    #############################################################################
    ## Output file writer initialization
    #############################################################################
    #
    # Open a file to write to for training statistics; the training statistics csv
    # will be written to the results directory
    csv_writer = None
    try:
        csv_writer = open(test_results_dir + 'training_statistics.csv', 'w')
        print('[STATE]: Writing training statistics to ' + test_results_dir +
              'training_statistics.csv')
    except:
        print(
            '[ERROR]: test results directory not valid, writing training statistics to main directory'
        )
        csv_writer = open('./training_statistics.csv', 'w')
    # CSV file needs to open with a header that is the column names
    csv_writer.write('epoch,vae_loss,disc_loss,time_consumed\n')

    # Open a file to write README.md for displaying validation images; the README
    # file will be written to the results directory
    readme_writer = None
    try:
        readme_writer = open(test_results_dir + 'README.md', 'w')
        print('[STATE]: Writing README report to ' + test_results_dir +
              'README.md')
    except:
        print(
            '[ERROR]: test results directory not valid, writing readme to main directory'
        )
        csv_writer = open('./README.md', 'w')
    # Write a few lines on README to indicate the hyper parameters
    readme_writer.write('n_latent:{} \n\n'.format(vae_net.n_latent))
    readme_writer.write('n_base_channels:{} \n\n'.format(
        vae_net.n_base_channels))
    if variable_pbp_weight == 'constant':
        readme_writer.write('pixel-by-pixel loss weight:{} \n\n'.format(
            vae_net.pbp_weight))
    elif variable_pbp_weight == 'decay':
        readme_writer.write(
            'pixel-by-pixel loss weight initially {} and decay by {} every 25 combo epochs \n\n'
            .format(vae_net.pbp_weight, pbp_weight_decay))
    readme_writer.write('n_solo_epochs:{} \n\n'.format(n_solo_epochs))
    readme_writer.write('n_combo_epochs:{} \n\n'.format(n_epochs -
                                                        n_solo_epochs))
    readme_writer.write('max_disc_loss :{} \n\n'.format(max_disc_loss))

    #############################################################################
    ## Data iterator
    #############################################################################
    #
    # Load training features into an iterator
    train_iter = gdata.DataLoader(train_features,
                                  batch_size,
                                  shuffle=True,
                                  last_batch='keep')
    sample_size = train_features.shape[0]
    print('[STATE]: {} training samples loaded into iterator'.format(
        sample_size))

    #############################################################################
    ## Training parameters
    #############################################################################
    #
    # Figure out the number of epochs trained with VAE and Discriminator together
    n_combo_epochs = n_epochs - n_solo_epochs
    print(
        '[STATE]: {} solo epochs and {} combo epochs are to be trained'.format(
            n_solo_epochs, n_combo_epochs))

    #############################################################################
    ## Training
    #############################################################################
    #
    # Print a message, then start training
    print('[STATE]: Training started')

    # First train the solo rounds; when training the solo rounds, PBP weight
    # is 1; however it needs to be changed back to the specified constant
    # when ConvVAE is initialized, or a variable PBP weight, so I will keep
    # a copy of the specified PBP weight to be used later
    specified_pbp_weight = vae_net.pbp_weight
    vae_net.pbp_weight = 1
    for epoch in range(n_solo_epochs):

        # Initialize a list that records the average VAE loss per batch
        batch_losses = []
        epoch_start_time = time.time()

        # Iterate through the epochs
        for batch_features in train_iter:
            # Load the batch into the appropriate context
            batch_features = batch_features.as_in_context(CTX)

            # Compute loss, gradient, and update paramters using trainer
            with autograd.record():
                loss = vae_net(batch_features)
                loss.backward()
            vae_trainer.step(batch_features.shape[0])
            # Compute the mean loss among the batch, append it onto the batch
            # losses list
            batch_losses.append(nd.mean(loss).asscalar())

        # Compute the training loss per epoch by a mean of batch losses
        epoch_train_loss = np.mean(batch_losses)
        epoch_stop_time = time.time()
        time_consumed = epoch_stop_time - epoch_start_time

        # Generate the epoch report and write it to the README file
        # and print it
        epoch_report_str = 'Epoch{}, Training loss {:.10f}, Time used {:.2f}'.format(
            epoch, epoch_train_loss, time_consumed)
        readme_writer.write(epoch_report_str + '\n\n')
        print('[STATE]: ' + epoch_report_str)

    # Now that all solo rounds are over, revert the PBP weight of the vae back to the original
    # specified value
    vae_net.pbp_weight = specified_pbp_weight

    # Now train the combo rounds; we will use BinarySigmoidCrossEntropyLoss()
    # for discriminator loss
    disc_loss_func = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False)

    # Define an integer that is 0 if discriminator loss in an epoch is larger than
    # the specified max_disc_loss (discriminator is bad, don't follow it)
    # Before any training, set it to 1
    use_disc_loss = 1
    for epoch in range(n_solo_epochs, n_epochs):
        start_time = time.time()

        # Initialize the lists that records the average loss within each batch
        vae_batch_losses = []
        disc_batch_losses = []

        # Iterate through the batches
        for batch_features in train_iter:
            # Load the batch into the appropriate context
            batch_features = batch_features.as_in_context(CTX)
            # Record the batch_size because it may not be the specified batch size
            act_batch_size = batch_features.shape[0]

            # Generate some 1s and 0s for distinguishing genuine images from
            # generated images
            genuine_labels = nd.ones((act_batch_size, ), ctx=CTX)
            generated_labels = nd.zeros((act_batch_size, ), ctx=CTX)

            ############################################################################
            # UPDATE THE DISCRIMINATOR NETWORK
            ############################################################################
            with autograd.record():

                # Train with genuine images: make predictions on genuine images
                genuine_logit_preds = disc_net(batch_features)
                genuine_loss = disc_loss_func(genuine_logit_preds,
                                              genuine_labels)

                # Train with generated images: make predictions on generated images
                generated_features = vae_net.generate(batch_features)
                generated_logit_preds = disc_net(generated_features)
                generated_loss = disc_loss_func(generated_logit_preds,
                                                generated_labels)

                # Total loss is loss with genuine and with generated images
                disc_loss = genuine_loss + generated_loss
                disc_loss.backward()
                disc_batch_losses.append(nd.mean(disc_loss).asscalar())

            # Update the parameters in the convolutional discriminator
            disc_trainer.step(act_batch_size)

            ############################################################################
            # UPDATE THE VAE NETWORK
            ############################################################################
            with autograd.record():

                # Compute the discriminator loss by letting the discriminator network
                # make predictions on the generated images
                generated_features = vae_net.generate(batch_features)
                generated_logit_preds = disc_net(generated_features)
                batch_disc_loss = disc_loss_func(generated_logit_preds,
                                                 genuine_labels)

                # Sum up the VAE loss and the discriminator loss (with multiplier of 10)
                # Then multiply batch_disc_loss by an integer
                # that is 1 if
                gen_loss = vae_net(
                    batch_features
                ) + batch_disc_loss * disc_loss_mul * use_disc_loss
                gen_loss.backward()

                # Record the VAE batch loss's average
                vae_batch_losses.append(nd.mean(gen_loss).asscalar())

            # Update the parameters in the VAE network
            vae_trainer.step(act_batch_size)

        ############################################################################
        # NEAR THE END OF THIS EPOCH
        ############################################################################

        # Compute some summarical metrics of this epoch
        stop_time = time.time()
        time_consumed = stop_time - start_time
        epoch_disc_train_loss = np.mean(disc_batch_losses)
        epoch_vae_train_loss = np.mean(vae_batch_losses)

        # If variable_pbp_weight is set to decay, then decay the pbp weight
        if variable_pbp_weight == 'decay':
            if (1 + epoch) % 25 == 0:
                vae_net.pbp_weight = vae_net.pbp_weight * pbp_weight_decay
                print('VAE PBP weight adjusted to {:.10f}'.format(
                    vae_net.pbp_weight))

        # Check if discriminator is good enough at the end of this epoch
        # if good enough, keep use_disc_loss at 1
        if epoch_disc_train_loss <= max_disc_loss:
            use_disc_loss = 1
        else:
            # Note that even if use_disc_loss is set to 0
            # discriminator will still be trained in the next epoch,
            # just its loss not used in the VAE update cycle
            use_disc_loss = 0

        # Generate the README line and the csv line, and write them
        epoch_README_report = 'Epoch{}, VAE Training loss {:.5f}, ResNet Training loss {:.10f}, Time used {:.2f}'
        epoch_README_report = epoch_README_report.format(
            epoch, epoch_vae_train_loss, epoch_disc_train_loss, time_consumed)
        epoch_CSV_report = '{},{:.10f},{:.10f},{:.2f}'.format(
            epoch, epoch_vae_train_loss, epoch_disc_train_loss, time_consumed)
        readme_writer.write(epoch_README_report + '\n\n')
        csv_writer.write(epoch_CSV_report + '\n')
        print('[STATE]: ' + epoch_README_report)

    ############################################################################
    # END OF TRAINING, now onto the validation process
    ############################################################################
    # Close the CSV writer because there is nothing left to write
    csv_writer.close()

    # Save model parameters; if vae_parameters_path is not valid, do not save it
    try:
        vae_net.save_parameters(vae_parameters_path)
    except:
        print(
            '[ERROR]: VAE parameters path is not valid; parameters will be saved to main directory'
        )
        vae_net.save_parameters('./recent_model.params')

    # Define the number of validation images to generate
    # then use the vae_net to generate them
    n_validations = 10
    img_arrays = vae_net.generate(
        test_features[0:n_validations].as_in_context(CTX)).asnumpy()

    for i in range(n_validations):
        # Write a line in the README report the displaying the generated images
        readme_writer.write('![' + str(i) + '](./' + str(i) + '.png)')

        # Reshape the output from (n_channels, width, height) to (width, height, n_channels)
        # Note that the vae_net instance already has such information regarding
        # the training images
        img_array = img_arrays[i].reshape(
            (vae_net.out_width, vae_net.out_height, vae_net.n_channels))

        # Show the plot, save it. If test_results_dir is not valid,
        # save it to main directory
        plt.imshow(img_array)
        try:
            plt.savefig(test_results_dir + str(i) + '.png')
            print('[STATE]: ' + test_results_dir + str(i) + '.png' + ' saved')

        except:
            print(
                '[ERROR]: test results directory not valid, saving images to main directory'
            )
            plt.savefig('./' + str(i) + '.png')
        plt.close()

    # Close the README writer
    readme_writer.close()