def run_adgmssl_mnist(): """ Train a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels. """ n_labeled = 100 # The total number of labeled data points. n_samples = 100 # The number of sampled labeled data points for each batch. n_batches = 600 # The number of batches. mnist_data = mnist.load_semi_supervised(n_batches=n_batches, n_labeled=n_labeled, n_samples=n_samples, filter_std=0.0, seed=123456, train_valid_combine=True) n, n_x = mnist_data[0][0].shape # Datapoints in the dataset, input features. bs = n / n_batches # The batchsize. # Initialize the auxiliary deep generative model. model = ADGMSSL(n_x=n_x, n_a=100, n_z=100, n_y=10, a_hidden=[500, 500], z_hidden=[500, 500], xhat_hidden=[500, 500], y_hidden=[500, 500], trans_func=rectify, x_dist='bernoulli') # Get the training functions. f_train, f_test, f_validate, train_args, test_args, validate_args = model.build_model(*mnist_data) # Update the default function arguments. train_args['inputs']['batchsize'] = bs train_args['inputs']['batchsize_labeled'] = n_samples train_args['inputs']['beta'] = 0.01 * n train_args['inputs']['learningrate'] = 3e-4 train_args['inputs']['beta1'] = 0.9 train_args['inputs']['beta2'] = 0.999 train_args['inputs']['samples'] = 10 # if running a cpu: set this the no. of samples to 1. test_args['inputs']['samples'] = 1 validate_args['inputs']['samples'] = 1 # Evaluate the approximated classification error with 100 MC samples for a good estimate. def error_evaluation(*args): mean_evals = model.get_output(mnist_data[1][0], 100) t_class = np.argmax(mnist_data[1][1], axis=1) y_class = np.argmax(mean_evals, axis=1) missclass = (np.sum(y_class != t_class, dtype='float32') / len(y_class)) * 100. train.write_to_logger("test 100-samples: %0.2f%%." % missclass) # Define training loop. Output training evaluations every 1 epoch and the approximated good estimate # of the classification error every 10 epochs. train = TrainModel(model=model, anneal_lr=.75, anneal_lr_freq=200, output_freq=1, pickle_f_custom_freq=10, f_custom_eval=error_evaluation) train.add_initial_training_notes("Training the auxiliary deep generative model with %i labels." % n_labeled) train.train_model(f_train, train_args, f_test, test_args, f_validate, validate_args, n_train_batches=n_batches, n_epochs=3000)
def run_adgmssl_mnist(): """ Train a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels. """ n_labeled = 100 # The total number of labeled data points. n_samples = 100 # The number of sampled labeled data points for each batch. n_batches = 600 # The number of batches. mnist_data = mnist.load_semi_supervised(n_batches=n_batches, n_labeled=n_labeled, n_samples=n_samples, filter_std=0.0, seed=123456, train_valid_combine=True) n, n_x = mnist_data[0][ 0].shape # Datapoints in the dataset, input features. bs = n / n_batches # The batchsize. # Initialize the auxiliary deep generative model. model = ADGMSSL(n_x=n_x, n_a=100, n_z=100, n_y=10, a_hidden=[500, 500], z_hidden=[500, 500], xhat_hidden=[500, 500], y_hidden=[500, 500], trans_func=rectify, x_dist='bernoulli') # Get the training functions. f_train, f_test, f_validate, train_args, test_args, validate_args = model.build_model( *mnist_data) # Update the default function arguments. train_args['inputs']['batchsize'] = bs train_args['inputs']['batchsize_labeled'] = n_samples train_args['inputs']['beta'] = 0.01 * n train_args['inputs']['learningrate'] = 3e-4 train_args['inputs']['beta1'] = 0.9 train_args['inputs']['beta2'] = 0.999 train_args['inputs'][ 'samples'] = 10 # if running a cpu: set this the no. of samples to 1. test_args['inputs']['samples'] = 1 validate_args['inputs']['samples'] = 1 # Evaluate the approximated classification error with 100 MC samples for a good estimate. def error_evaluation(*args): mean_evals = model.get_output(mnist_data[1][0], 100) t_class = np.argmax(mnist_data[1][1], axis=1) y_class = np.argmax(mean_evals, axis=1) missclass = (np.sum(y_class != t_class, dtype='float32') / len(y_class)) * 100. train.write_to_logger("test 100-samples: %0.2f%%." % missclass) # Define training loop. Output training evaluations every 1 epoch and the approximated good estimate # of the classification error every 10 epochs. train = TrainModel(model=model, anneal_lr=.75, anneal_lr_freq=200, output_freq=1, pickle_f_custom_freq=10, f_custom_eval=error_evaluation) train.add_initial_training_notes( "Training the auxiliary deep generative model with %i labels." % n_labeled) train.train_model(f_train, train_args, f_test, test_args, f_validate, validate_args, n_train_batches=n_batches, n_epochs=3000)
def run_adgmssl_mnist(): """ Evaluate a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels. """ # Load the mnist supervised dataset for evaluation. (train_x, train_t), (test_x, test_t), (valid_x, valid_t) = mnist.load_supervised(filter_std=0.0, train_valid_combine=True) # Initialize the auxiliary deep generative model. model = ADGMSSL(n_x=train_x.shape[-1], n_a=100, n_z=100, n_y=10, a_hidden=[500, 500], z_hidden=[500, 500], xhat_hidden=[500, 500], y_hidden=[500, 500], trans_func=rectify, x_dist='bernoulli') model_id = 20151209002003 # Insert the trained model id here. model.load_model(model_id) # Load trained model. See configurations in the log file. # Evaluate the test error of the ADGM. mean_evals = model.get_output(test_x, 100) # 100 MC to get a good estimate for the auxiliary unit. t_class = np.argmax(test_t, axis=1) y_class = np.argmax(mean_evals, axis=1) class_err = np.sum(y_class != t_class) / 100. print "test set 100-samples: %0.2f%%." % class_err # Evaluate the active units in the auxiliary and latent distribution. f_a_mu_logvar = theano.function([model.sym_x_l], get_output([model.l_a_mu, model.l_a_logvar], model.sym_x_l)) q_a_mu, q_a_logvar = f_a_mu_logvar(test_x) log_pa = -0.5 * (np.log(2 * np.pi) + (q_a_mu ** 2 + np.exp(q_a_logvar))) log_qa_x = -0.5 * (np.log(2 * np.pi) + 1 + q_a_logvar) diff_pa_qa_x = (log_pa - log_qa_x).mean(axis=(1, 2)) mean_diff_pa_qa_x = np.abs(np.mean(diff_pa_qa_x, axis=0)) inputs = {model.l_x_in: model.sym_x_l, model.l_y_in: model.sym_t_l} f_z_mu_logvar = theano.function([model.sym_x_l, model.sym_t_l], get_output([model.l_z_mu, model.l_z_logvar], inputs)) q_z_mu, q_z_logvar = f_z_mu_logvar(test_x, test_t) log_pz = -0.5 * (np.log(2 * np.pi) + (q_z_mu ** 2 + np.exp(q_z_logvar))) log_qz_x = -0.5 * (np.log(2 * np.pi) + 1 + q_z_logvar) diff_pz_qz_x = (log_pz - log_qz_x).mean(axis=(1, 2)) mean_diff_pz_qz_x = np.abs(np.mean(diff_pz_qz_x, axis=0)) plt.figure() plt.subplot(111, axisbg='white') plt.plot(sorted(mean_diff_pa_qa_x)[::-1], color="#c0392b", label=r"$\log \frac{p(a_i)}{q(a_i|x)}$") plt.plot(sorted(mean_diff_pz_qz_x)[::-1], color="#9b59b6", label=r"$\log \frac{p(z_i)}{q(z_i|x)}$") plt.grid(color='0.9', linestyle='dashed', axis="y") plt.xlabel("stochastic units") plt.ylabel(r"$\log \frac{p(\cdot)}{q(\cdot)}$") plt.ylim((0, 2.7)) plt.legend() plt.savefig("output/diff.png", format="png") # Sample 100 random normal distributed samples with fixed class y in the latent space and generate xhat. table_size = 10 samples = 1 z = np.random.random_sample((table_size ** 2, 100)) y = np.eye(10, k=0).reshape(10, 1, 10).repeat(10, axis=1).reshape((-1, 10)) xhat = model.f_xhat(z, y, samples) plt.figure(figsize=(20, 20), dpi=300) i = 0 img_out = np.zeros((28 * table_size, 28 * table_size)) for x in range(table_size): for y in range(table_size): xa, xb = x * 28, (x + 1) * 28 ya, yb = y * 28, (y + 1) * 28 im = np.reshape(xhat[i], (28, 28)) img_out[xa:xb, ya:yb] = im i += 1 plt.matshow(img_out, cmap=plt.cm.binary) plt.xticks(np.array([])) plt.yticks(np.array([])) plt.savefig("output/mnist.png", format="png")
def run_adgmssl_mnist(): """ Evaluate a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels. """ # Load the mnist supervised dataset for evaluation. (train_x, train_t), (test_x, test_t), (valid_x, valid_t) = mnist.load_supervised( filter_std=0.0, train_valid_combine=True) # Initialize the auxiliary deep generative model. model = ADGMSSL(n_x=train_x.shape[-1], n_a=100, n_z=100, n_y=10, a_hidden=[500, 500], z_hidden=[500, 500], xhat_hidden=[500, 500], y_hidden=[500, 500], trans_func=rectify, x_dist='bernoulli') model_id = 20151209002003 # Insert the trained model id here. model.load_model( model_id) # Load trained model. See configurations in the log file. # Evaluate the test error of the ADGM. mean_evals = model.get_output( test_x, 100) # 100 MC to get a good estimate for the auxiliary unit. t_class = np.argmax(test_t, axis=1) y_class = np.argmax(mean_evals, axis=1) class_err = np.sum(y_class != t_class) / 100. print "test set 100-samples: %0.2f%%." % class_err # Evaluate the active units in the auxiliary and latent distribution. f_a_mu_logvar = theano.function( [model.sym_x_l], get_output([model.l_a_mu, model.l_a_logvar], model.sym_x_l)) q_a_mu, q_a_logvar = f_a_mu_logvar(test_x) log_pa = -0.5 * (np.log(2 * np.pi) + (q_a_mu**2 + np.exp(q_a_logvar))) log_qa_x = -0.5 * (np.log(2 * np.pi) + 1 + q_a_logvar) diff_pa_qa_x = (log_pa - log_qa_x).mean(axis=(1, 2)) mean_diff_pa_qa_x = np.abs(np.mean(diff_pa_qa_x, axis=0)) inputs = {model.l_x_in: model.sym_x_l, model.l_y_in: model.sym_t_l} f_z_mu_logvar = theano.function( [model.sym_x_l, model.sym_t_l], get_output([model.l_z_mu, model.l_z_logvar], inputs)) q_z_mu, q_z_logvar = f_z_mu_logvar(test_x, test_t) log_pz = -0.5 * (np.log(2 * np.pi) + (q_z_mu**2 + np.exp(q_z_logvar))) log_qz_x = -0.5 * (np.log(2 * np.pi) + 1 + q_z_logvar) diff_pz_qz_x = (log_pz - log_qz_x).mean(axis=(1, 2)) mean_diff_pz_qz_x = np.abs(np.mean(diff_pz_qz_x, axis=0)) plt.figure() plt.subplot(111, axisbg='white') plt.plot(sorted(mean_diff_pa_qa_x)[::-1], color="#c0392b", label=r"$\log \frac{p(a_i)}{q(a_i|x)}$") plt.plot(sorted(mean_diff_pz_qz_x)[::-1], color="#9b59b6", label=r"$\log \frac{p(z_i)}{q(z_i|x)}$") plt.grid(color='0.9', linestyle='dashed', axis="y") plt.xlabel("stochastic units") plt.ylabel(r"$\log \frac{p(\cdot)}{q(\cdot)}$") plt.ylim((0, 2.7)) plt.legend() plt.savefig("output/diff.png", format="png") # Sample 100 random normal distributed samples with fixed class y in the latent space and generate xhat. table_size = 10 samples = 1 z = np.random.random_sample((table_size**2, 100)) y = np.eye(10, k=0).reshape(10, 1, 10).repeat(10, axis=1).reshape((-1, 10)) xhat = model.f_xhat(z, y, samples) plt.figure(figsize=(20, 20), dpi=300) i = 0 img_out = np.zeros((28 * table_size, 28 * table_size)) for x in range(table_size): for y in range(table_size): xa, xb = x * 28, (x + 1) * 28 ya, yb = y * 28, (y + 1) * 28 im = np.reshape(xhat[i], (28, 28)) img_out[xa:xb, ya:yb] = im i += 1 plt.matshow(img_out, cmap=plt.cm.binary) plt.xticks(np.array([])) plt.yticks(np.array([])) plt.savefig("output/mnist.png", format="png")