def main(): # Use argparse to decide if user wants to re-train VAE parser = argparse.ArgumentParser() parser.add_argument("-train", action='store_true') args = parser.parse_args() num_labelled = 100 num_batches = 100 z_dim = 50 epochs = 1000 learning_rate = 0.0003 alpha = 0.1 mnist_path = 'mnist/mnist_28.pkl.gz' # Uses anglpy module from original paper (linked at top) to split the dataset for semi-supervised training train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(mnist_path, binarize_y=True) x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, num_labelled) x_lab, y_lab = x_l.T, y_l.T x_ulab, y_ulab = x_u.T, y_u.T x_valid, y_valid = valid_x.T, valid_y.T x_test, y_test = test_x.T, test_y.T x_dim = x_lab.shape[1] y_dim = y_lab.shape[1] # Restore previously trained VAE, M1, and get parameters of encoded latent variable z from image as input for M2 M1_model_path = "./model_M1/VAE.ckpt" M1_vae = M1(x_dim=x_dim, z_dim=z_dim) with M1_vae.session: M1_vae.saver.restore(M1_vae.session, M1_model_path) z1_mu_lab, z1_logvar_lab = M1_vae.session.run([M1_vae.encoder_mu, M1_vae.encoder_logvar], feed_dict={M1_vae.x: x_lab, M1_vae.phase: True}) z1_mu_ulab, z1_logvar_ulab = M1_vae.session.run([M1_vae.encoder_mu, M1_vae.encoder_logvar], feed_dict={M1_vae.x: x_ulab, M1_vae.phase: True}) z1_mu_valid, z1_logvar_valid = M1_vae.session.run([M1_vae.encoder_mu, M1_vae.encoder_logvar], feed_dict={M1_vae.x: x_valid, M1_vae.phase: True}) M2_model_path = "./model_M2/GC.ckpt" M2_vae = M2(z1_dim=z_dim, z2_dim=z_dim, y_dim=y_dim, num_examples=x_lab.shape[0] + x_ulab.shape[0], num_labelled=num_labelled, num_batches=num_batches, alpha=alpha) if args.train: M2_vae.train(z1=np.hstack([z1_mu_lab, z1_logvar_lab]), y=y_lab, unlabelled_z1=np.hstack([z1_mu_ulab, z1_logvar_ulab]), epochs=epochs, z1_valid=np.hstack([z1_mu_valid, z1_logvar_valid]), y_valid=y_valid)
learning_rate = 3e-4 #Learning rate of ADAM alpha = 0.1 #Discriminatory factor (see equation (9) of http://arxiv.org/pdf/1406.5298v2.pdf) seed = 31415 #Seed for RNG #Neural Networks parameterising p(x|z,y), q(z|x,y) and q(y|x) hidden_layers_px = [500] hidden_layers_qz = [500] hidden_layers_qy = [500] #################### ''' Load Dataset ''' #################### mnist_path = 'mnist/mnist_28.pkl.gz' #Uses anglpy module from original paper (linked at top) to split the dataset for semi-supervised training train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split( mnist_path, binarize_y=True) x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, num_lab) x_lab, y_lab = x_l.T, y_l.T x_ulab, y_ulab = x_u.T, y_u.T x_valid, y_valid = valid_x.T, valid_y.T x_test, y_test = test_x.T, test_y.T np.save('x_lab', x_lab) np.save('y_lab', y_lab) np.save('x_ulab', x_ulab) np.save('y_ulab', y_ulab) np.save('x_valid', x_valid) np.save('y_valid', y_valid) np.save('x_test', x_test) np.save('y_test', y_test)
learning_rate = 3e-4 #Learning rate of ADAM alpha = 0.1 #Discriminatory factor (see equation (9) of http://arxiv.org/pdf/1406.5298v2.pdf) seed = 31415 #Seed for RNG #Neural Networks parameterising p(x|z,y), q(z|x,y) and q(y|x) hidden_layers_px = [ 500 ] hidden_layers_qz = [ 500 ] hidden_layers_qy = [ 500 ] #################### ''' Load Dataset ''' #################### mnist_path = 'mnist/mnist_28.pkl.gz' #Uses anglpy module from original paper (linked at top) to split the dataset for semi-supervised training train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(mnist_path, binarize_y=True) x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, num_lab) x_lab, y_lab = x_l.T, y_l.T x_ulab, y_ulab = x_u.T, y_u.T x_valid, y_valid = valid_x.T, valid_y.T x_test, y_test = test_x.T, test_y.T ################ ''' Load VAE ''' ################ VAE_model_path = 'models/VAE_600-600-0.0003-50.cpkt' min_std = 0.1 #Dimensions with std < min_std are removed before training with GC data_lab, data_ulab, data_valid, data_test = encode_dataset( VAE_model_path, min_std )
def main(): num_labelled = 100 num_batches = 100 z_dim = 50 alpha = 0.1 mnist_path = 'mnist/mnist_28.pkl.gz' # Uses anglpy module from original paper (linked at top) to split the dataset for semi-supervised training train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split( mnist_path, binarize_y=True) x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, num_labelled) x_lab, y_lab = x_l.T, y_l.T x_ulab, y_ulab = x_u.T, y_u.T x_dim = x_lab.shape[1] y_dim = y_lab.shape[1] # Get 5 random 50D z vectors rand_vec = np.random.normal(scale=1.5, size=(10, 50)) rand_vec_tile = np.tile(rand_vec, 10) y_vec = np.eye(10) y_vec_tile = np.tile(y_vec, 10) z2_vec = np.zeros((10, 500)) M2_model_path = "./model_M2/GC.cpkt" M2_GC = M2(z1_dim=z_dim, z2_dim=z_dim, y_dim=y_dim, num_examples=x_lab.shape[0] + x_ulab.shape[0], num_labelled=num_labelled, num_batches=num_batches, alpha=alpha) with M2_GC.session: M2_GC.saver.restore(M2_GC.session, M2_model_path) for i in range(10): cur_y = y_vec[i] [sample_mu, sample_logvar] = M2_GC.session.run( [M2_GC.decoder_z1hat_mu, M2_GC.decoder_z1hat_logvar], feed_dict={ M2_GC.decoder_combined_input: np.hstack((rand_vec_tile[:, i * z_dim:(i + 1) * z_dim], y_vec_tile[:, i * y_dim:(i + 1) * y_dim])) }) sample = sample_mu + np.random.normal(size=(10, 50)) * np.exp( 0.5 * sample_logvar) z2_vec[:, i * 50:(i + 1) * 50] = sample tf.reset_default_graph() # Plot for analogy plot_arr = np.zeros((10 * 28, 10 * 28)) M1_model_path = "./model_M1/VAE.ckpt" M1_vae = M1(x_dim=x_dim, z_dim=z_dim) with M1_vae.session: M1_vae.saver.restore(M1_vae.session, M1_model_path) for i in range(10): cur_z = z2_vec[:, i * 50:(i + 1) * 50] [generated_img] = M1_vae.session.run([M1_vae.decoder_xhat], feed_dict={ M1_vae.z: cur_z, M1_vae.phase: True }) plot_arr[:, i * 28:(i + 1) * 28] = np.reshape(generated_img, (10 * 28, 28)) plt.imshow(plot_arr, cmap="gray") plt.title("MNIST analogies") plt.savefig('temp/analogy.png', format='png')