def predict(y, phi_gmm, encoder_layers, decoder_layers, seed=0): """ Args: y: data to cluster and reconstruct phi_gmm: latent phi param encoder_layers: encoder NN architecture decoder_layers: encoder NN architecture seed: random seed Returns: reconstructed y and most probable cluster allocation """ with tf.name_scope('prediction'): # encode (reusing current encoder parameters) nb_samples = 1 phi_enc = vae.make_encoder(y, layerspecs=encoder_layers) # predict cluster allocation and sample latent variables (e-step) x_k_samples, log_r_nk, _, _ = e_step(phi_enc, phi_gmm, nb_samples, name="svae_e_step_predict", seed=seed) x_samples = subsample_x(x_k_samples, log_r_nk, seed)[:, 0, :] # decode (reusing current decoder parameters) y_mean, _ = vae.make_decoder(x_samples, layerspecs=decoder_layers) return tf.tuple((y_mean, tf.argmax(log_r_nk, axis=1)), name='prediction')
def inference(y, phi_gmm, encoder_layers, decoder_layers, nb_samples=10, stddev_init_nn=0.01, seed=0, name='inference', param_device='/gpu:0'): with tf.name_scope(name): # Use VAE encoder x_given_y_phi = vae.make_encoder(y, layerspecs=encoder_layers, stddev_init=stddev_init_nn, param_device=param_device, seed=seed) # execute E-step (update/sample local variables) x_k_samples, log_z_given_y_phi, phi_tilde, w_eta_12 = e_step(x_given_y_phi, phi_gmm, nb_samples, seed=seed) # compute reconstruction y_reconstruction = vae.make_decoder(x_k_samples, layerspecs=decoder_layers, stddev_init=stddev_init_nn, param_device=param_device, seed=seed) x_samples = subsample_x(x_k_samples, log_z_given_y_phi, seed)[:, 0, :] return y_reconstruction, x_given_y_phi, x_k_samples, x_samples, log_z_given_y_phi, phi_gmm, phi_tilde
def visualize_svae(ax, config, log_path, ratio_tr=0.7, nb_samples=20, grid_density=100, window=((-20, 20), (-20, 20)), param_device='/cpu:0'): with tf.device(param_device): if config['dataset'] in ['mnist', 'fashion']: binarise = True size_minibatch = 1024 output_type = 'bernoulli' else: binarise = False size_minibatch = -1 output_type = 'standard' # First we build the model graph so that we can load the learned parameters from a checkpoint. # Initialisations don't matter, they'll be overwritten with saver.restore(). data, lbl, _, _ = make_minibatch(config['dataset'], path_datadir='../datasets', ratio_tr=ratio_tr, seed_split=0, size_minibatch=size_minibatch, size_testbatch=-1, binarise=binarise) # define nn-architecture encoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh), (config['L'], 'natparam')] decoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh), (int(data.get_shape()[1]), output_type)] sample_size = 100 if config['dataset'] in ['mnist', 'fashion']: data = tf.where(tf.equal(data, -1), tf.zeros_like(data, dtype=tf.float32), tf.ones_like(data, dtype=tf.float32)) with tf.name_scope('model'): gmm_prior, theta = svae.init_mm(config['K'], config['L'], seed=config['seed'], param_device='/gpu:0') theta_copied = niw.natural_to_standard(tf.identity(gmm_prior[1]), tf.identity(gmm_prior[2]), tf.identity(gmm_prior[3]), tf.identity(gmm_prior[4])) _, sigma_k = niw.expected_values(theta_copied) pi_k_init = tf.nn.softmax( tf.random_normal(shape=(config['K'], ), mean=0.0, stddev=1., seed=config['seed'])) L_k = tf.cholesky(sigma_k) mu_k = tf.random_normal(shape=(config['K'], config['L']), stddev=1, seed=config['seed']) with tf.variable_scope('phi_gmm'): mu_k = variable_on_device('mu_k', shape=None, initializer=mu_k, trainable=True, device=param_device) L_k = variable_on_device('L_k', shape=None, initializer=L_k, trainable=True, device=param_device) pi_k = variable_on_device('log_pi_k', shape=None, initializer=pi_k_init, trainable=True, device=param_device) phi_gmm = mu_k, L_k, pi_k _ = vae.make_encoder(data, layerspecs=encoder_layers, stddev_init=.1, seed=config['seed']) with tf.name_scope('random_sampling'): # compute expected theta_pgm beta_k, m_k, C_k, v_k = niw.natural_to_standard(*theta[1:]) alpha_k = dirichlet.natural_to_standard(theta[0]) mean, cov = niw.expected_values((beta_k, m_k, C_k, v_k)) expected_log_pi = dirichlet.expected_log_pi(alpha_k) pi = tf.exp(expected_log_pi) # sample from prior (first from x_k_samples = tf.contrib.distributions.MultivariateNormalFullCovariance( loc=mean, covariance_matrix=cov).sample(sample_size) z_samples = tf.multinomial(logits=tf.reshape(tf.log(pi), (1, -1)), num_samples=sample_size, name='k_samples') z_samples = tf.squeeze(z_samples) assert z_samples.get_shape() == (sample_size, ) assert x_k_samples.get_shape() == (sample_size, config['K'], config['L']) # compute reconstructions y_k_samples, _ = vae.make_decoder(x_k_samples, layerspecs=decoder_layers, stddev_init=.1, seed=config['seed']) assert y_k_samples.get_shape() == (sample_size, config['K'], data.get_shape()[1]) with tf.name_scope('cluster_sample_data'): tf.get_variable_scope().reuse_variables() _, clustering = svae.predict(data, phi_gmm, encoder_layers, decoder_layers, seed=config['seed']) # load trained model saver = tf.train.Saver() model_path = log_path + '/' + generate_log_id(config) print(model_path) latest_ckpnt = tf.train.latest_checkpoint(model_path) sess_config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=sess_config) saver.restore(sess, latest_ckpnt) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) collected_y_samps = [] collected_z_samps = [] for s in range(nb_samples): y_samps, z_samps = sess.run((y_k_samples, z_samples)) collected_y_samps.append(y_samps) collected_z_samps.append(z_samps) collected_y_samps = np.concatenate(collected_y_samps, axis=0) collected_z_samps = np.concatenate(collected_z_samps, axis=0) assert collected_y_samps.shape == (nb_samples * sample_size, config['K'], data.shape[1]) assert collected_z_samps.shape == (nb_samples * sample_size, ) # use 300 sample points from the dataset data, lbl, clustering = sess.run( (data[:300], lbl[:300], clustering[:300])) # compute PCA if necessary samples_2d = [] if data.shape[1] > 2: pca = PCA(n_components=2).fit(data) data2d = pca.transform(data) for z_samples in range(config['K']): chosen = collected_z_samps == z_samples samps_k = collected_y_samps[chosen, z_samples, :] if samps_k.size > 0: samples_2d.append(pca.transform(samps_k)) else: data2d = data for z_samples in range(config['K']): chosen = (collected_z_samps == z_samples) samps_k = collected_y_samps[chosen, z_samples, :] if samps_k.size > 0: samples_2d.append(samps_k) # plot 2d-histogram (one histogram for each of the K components) from matplotlib.colors import LogNorm for z_samples, samples in enumerate(samples_2d): ax.hist2d(samples[:, 0], samples[:, 1], bins=grid_density, range=window, cmap=make_colormap(dark_colors[z_samples % len(dark_colors)]), normed=True, norm=LogNorm()) # overlay histogram with sample datapoints (coloured according to their most likely cluster allocation) labels = np.argmax(lbl, axis=1) for c in np.unique(labels): in_class_c = (labels == c) color = bright_colors[int(c % len(bright_colors))] marker = markers[int(c % len(markers))] ax.scatter(data2d[in_class_c, 0], data2d[in_class_c, 1], c=color, marker=marker, s=data_dot_size, linewidths=0)
def visualize_vae(ax, config, log_path, ratio_tr=0.7, nb_samples=20, grid_density=100, window=((-20, 20), (-20, 20)), param_device='/cpu:0'): with tf.device(param_device): data, lbl, _, _ = make_minibatch(config['dataset'], path_datadir='../datasets', ratio_tr=ratio_tr, seed_split=0, size_minibatch=-1, size_testbatch=-1) # First we build the model graph so that we can load the learned parameters from a checkpoint. # Initialisations don't matter, they'll be overwritten with saver.restore(). encoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh), (config['L'], 'natparam')] decoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh), (int(data.get_shape()[1]), 'standard')] sample_size = 100 with tf.name_scope('model'): x_mean, x_var_diag = vae.make_encoder(data, layerspecs=encoder_layers, stddev_init=.1, seed=config['seed']) x_samp = vae.reparam_trick_sampling(x_mean, x_var_diag, nb_samples, config['seed']) # generate random samples from prior N(x|0,1) x_samples = tf.contrib.distributions.MultivariateNormalDiag( loc=tf.zeros((config['L'])), scale_diag=tf.ones( (config['L']))).sample(sample_size) y_mean, _ = vae.make_decoder(x_samples, layerspecs=decoder_layers, stddev_init=.1, seed=config['seed']) assert y_mean.get_shape() == (sample_size, data.get_shape()[1]) saver = tf.train.Saver() model_path = log_path + '/' + generate_log_id(config) print(model_path) latest_ckpnt = tf.train.latest_checkpoint(model_path) latest_ckpnt = model_path + '/checkpoint-100000' sess_config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=sess_config) saver.restore(sess, latest_ckpnt) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) collected_samples = [] for s in range(nb_samples): y_samps = sess.run((y_mean)) collected_samples.append(y_samps) data, lbl = sess.run((data[:300], lbl[:300])) collected_samples = np.concatenate(collected_samples, axis=0) assert collected_samples.shape == (nb_samples * sample_size, data.shape[1]) if data.shape[1] > 2: pca = PCA(n_components=2).fit(data) data2d = pca.transform(data) samples_2d = pca.transform(collected_samples) else: data2d = data samples_2d = collected_samples from matplotlib.colors import LogNorm ax.hist2d(samples_2d[:, 0], samples_2d[:, 1], bins=grid_density, range=window, cmap=make_colormap('black'), normed=True, norm=LogNorm()) labels = np.argmax(lbl, axis=1) for c in np.unique(labels): in_class_c = (labels == c) color = bright_colors[int(c % len(bright_colors))] marker = markers[int(c % len(markers))] ax.scatter(data2d[in_class_c, 0], data2d[in_class_c, 1], c=color, marker=marker, s=data_dot_size, linewidths=0)