def build_tower_graph_font(id_): tower_x = x[id_ * tf.shape(x)[0] // FLAGS.num_gpus:(id_ + 1) * tf.shape(x)[0] // FLAGS.num_gpus] n = tf.shape(tower_x)[0] x_obs = tf.tile(tf.expand_dims(tower_x, 0), [1, 1, 1]) def log_joint(observed): decoder, _, = VAE(observed, n, is_training) log_pz_font, log_pz_char, log_px_z = decoder.local_log_prob( ['z_font', 'z_char', 'x']) return log_pz_font + log_pz_char + log_px_z #train font encoder_font, qz_samples_font = q_net_font(None, tower_x, is_training) encoder_char, qz_samples_char = q_net_char(None, tower_x, is_training) char_mean = tf.tile(tf.reduce_mean(qz_samples_char, 0), (tf.shape(qz_samples_font)[0], 1)) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'x': tower_x}, { 'z_font': [qz_samples_font, log_qz_font], 'z_char': [char_mean, log_qz_char] }, axis=0)) average_loss = tf.reduce_mean(tf.square(qz_samples_char - char_mean)) font_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder_font') + \ tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='decoder') char_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder_char') font_grads = optimizer.compute_gradients(-lower_bound, var_list=font_var_list) char_grads = optimizer.compute_gradients(average_loss, var_list=char_var_list) return font_grads, char_grads, lower_bound, average_loss
def lower_bound_and_log_likelihood(relaxed=False): def log_joint(observed): model = vae(observed, n, n_x, n_z, n_k, tau_p, n_particles, relaxed) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net({}, x, n_z, n_k, tau_q, n_particles, relaxed) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0)) # Importance sampling estimates of marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0)) return lower_bound, is_log_likelihood
def log_joint(observed): model = vae(observed, n, n_x, n_z, n_particles, is_training) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net({}, x, n_z, n_particles, is_training) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) # TODO: add tests for repeated calls of flows qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz, n_iters=n_planar_flows) qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz, n_iters=n_planar_flows) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0)) # Importance sampling estimates of log likelihood: # Fast, used for evaluation during training is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0)) learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4) grads = optimizer.compute_gradients(-lower_bound) infer = optimizer.apply_gradients(grads) params = tf.trainable_variables() for i in params: print(i.name, i.get_shape())
# Labeled x_labeled_ph = tf.placeholder(tf.int32, shape=[None, n_x], name='x_l') x_labeled_obs = tf.tile(tf.expand_dims(x_labeled_ph, 0), [n_particles, 1, 1]) y_labeled_ph = tf.placeholder(tf.int32, shape=[None, n_y], name='y_l') y_labeled_obs = tf.tile(tf.expand_dims(y_labeled_ph, 0), [n_particles, 1, 1]) variational = qz_xy(x_labeled_ph, y_labeled_ph, n_z, n_particles) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) labeled_lower_bound = tf.reduce_mean( zs.sgvb(log_joint, { 'x': x_labeled_obs, 'y': y_labeled_obs }, {'z': [qz_samples, log_qz]}, axis=0)) # Unlabeled x_unlabeled_ph = tf.placeholder(tf.int32, shape=[None, n_x], name='x_u') n = tf.shape(x_unlabeled_ph)[0] y_diag = tf.diag(tf.ones(n_y, dtype=tf.int32)) y_u = tf.reshape(tf.tile(tf.expand_dims(y_diag, 0), [n, 1, 1]), [-1, n_y]) x_u = tf.reshape(tf.tile(tf.expand_dims(x_unlabeled_ph, 1), [1, n_y, 1]), [-1, n_x]) x_unlabeled_obs = tf.tile(tf.expand_dims(x_u, 0), [n_particles, 1, 1]) y_unlabeled_obs = tf.tile(tf.expand_dims(y_u, 0), [n_particles, 1, 1]) variational = qz_xy(x_u, y_u, n_z, n_particles) qz_samples, log_qz = variational.query('z', outputs=True,
if __name__ == "__main__": # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[]) def log_joint(observed): model = toy2d_intractable_posterior(observed, n_particles) log_pz1, log_pz2 = model.local_log_prob(['z1', 'z2']) return log_pz1 + log_pz2 variational, z_mean, z_logstd = mean_field_variational(n_particles) [qz1_samples, log_qz1], [qz2_samples, log_qz2] = variational.query( ['z1', 'z2'], outputs=True, local_log_prob=True) lower_bound = zs.sgvb( log_joint, {}, {'z1': [qz1_samples, log_qz1], 'z2': [qz2_samples, log_qz2]}, axis=0) optimizer = tf.train.AdamOptimizer(learning_rate=0.1) infer = optimizer.minimize(-lower_bound) # Set up figure. fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) # Set up plotting code. def plot_isocontours(ax, func, xlimits, ylimits, numticks=101): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) xx, yy = np.meshgrid(x, y)
y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1, 1]) def log_joint(observed): model, _ = var_dropout(observed, x_obs, n, net_size, n_particles, is_training) log_pe = model.local_log_prob(e_names) log_py_xe = model.local_log_prob('y') return tf.add_n(log_pe) / x_train.shape[0] + log_py_xe variational = q({}, n, net_size, n_particles) qe_queries = variational.query(e_names, outputs=True, local_log_prob=True) qe_samples, log_qes = zip(*qe_queries) log_qes = [log_qe / x_train.shape[0] for log_qe in log_qes] e_dict = dict(zip(e_names, zip(qe_samples, log_qes))) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'y': y_obs}, e_dict, axis=0)) _, h_pred = var_dropout(dict(zip(e_names, qe_samples)), x_obs, n, net_size, n_particles, is_training) h_pred = tf.reduce_mean(h_pred, 0) y_pred = tf.argmax(h_pred, 1) sparse_y = tf.argmax(y, 1) acc = tf.reduce_mean(tf.cast(tf.equal(y_pred, sparse_y), tf.float32)) learning_rate_ph = tf.placeholder(tf.float32, shape=()) optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4) infer = optimizer.minimize(-lower_bound) params = tf.trainable_variables() for i in params: print('variable name = {}, shape = {}'.format(i.name, i.get_shape()))
def main(): np.random.seed(1234) tf.set_random_seed(1237) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, 'housing.data') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) N, n_x = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] @zs.reuse('model') def bayesianNN(observed, x, n_x, layer_sizes, n_particles): with zs.BayesianNet(observed=observed) as model: ws = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): w_mu = tf.zeros([1, n_out, n_in + 1]) ws.append( zs.Normal('w' + str(i), w_mu, std=1., n_samples=n_particles, group_event_ndims=2)) # forward ly_x = tf.expand_dims( tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]), 3) for i in range(len(ws)): w = tf.tile(ws[i], [1, tf.shape(x)[0], 1, 1]) ly_x = tf.concat( [ly_x, tf.ones([n_particles, tf.shape(x)[0], 1, 1])], 2) ly_x = tf.matmul(w, ly_x) / \ tf.sqrt(tf.to_float(tf.shape(ly_x)[2])) if i < len(ws) - 1: ly_x = tf.nn.relu(ly_x) y_mean = tf.squeeze(ly_x, [2, 3]) y_logstd = tf.get_variable('y_logstd', shape=[], initializer=tf.constant_initializer(0.)) y = zs.Normal('y', y_mean, logstd=y_logstd) return model, y_mean def mean_field_variational(layer_sizes, n_particles): with zs.BayesianNet() as variational: ws = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): w_mean = tf.get_variable( 'w_mean_' + str(i), shape=[1, n_out, n_in + 1], initializer=tf.constant_initializer(0.)) w_logstd = tf.get_variable( 'w_logstd_' + str(i), shape=[1, n_out, n_in + 1], initializer=tf.constant_initializer(0.)) ws.append( zs.Normal('w' + str(i), w_mean, logstd=w_logstd, n_samples=n_particles, group_event_ndims=2)) return variational # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x = tf.placeholder(tf.float32, shape=[None, n_x]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [n_x] + n_hiddens + [1] w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)] def log_joint(observed): model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles) log_pws = model.local_log_prob(w_names) log_py_xw = model.local_log_prob('y') return tf.add_n(log_pws) + log_py_xw * N variational = mean_field_variational(layer_sizes, n_particles) qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True) latent = dict(zip(w_names, qw_outputs)) y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1]) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'y': y_obs}, latent, axis=0)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) grads = optimizer.compute_gradients(-lower_bound) infer = optimizer.apply_gradients(grads) # prediction: rmse & log likelihood observed = dict((w_name, latent[w_name][0]) for w_name in w_names) observed.update({'y': y_obs}) model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles) y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train log_py_xw = model.local_log_prob('y') log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \ tf.log(std_y_train) # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 10 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer, lower_bound], feed_dict={ n_particles: lb_samples, x: x_batch, y: y_batch }) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_lb, test_rmse, test_ll = sess.run( [lower_bound, rmse, log_likelihood], feed_dict={ n_particles: ll_samples, x: x_test, y: y_test }) print('>> TEST') print('>> lower bound = {}, rmse = {}, log_likelihood = {}'. format(test_lb, test_rmse, test_ll))
model, _, _ = vae(ob_dict, n, n_x, n_h, n_z, n_particles) log_pz_i, log_ph_z_i, log_px_h = model.local_log_prob( ['z', 'h', 'x']) log_pz.append(log_pz_i) log_ph_z.append(log_ph_z_i) log_pz = tf.stack(log_pz, axis=-1) log_ph_z = tf.stack(log_ph_z, axis=-1) # P(X,H) = P(X|H) * sum[(P(H|z_i) * P(z_i))] return log_px_h + tf.reduce_logsumexp(log_pz + log_ph_z, axis=-1) variational = q_net(x, n_h, n_particles) qh_samples, log_qh = variational.query('h', outputs=True, local_log_prob=True) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'x': x_obs}, {'h': [qh_samples, log_qh]}, axis=0)) optimizer = tf.train.AdamOptimizer(learning_rate) infer = optimizer.minimize(-lower_bound) # Computation graph of generating images n_gen = 100 n_per_class = n_gen // n_z z_gen = np.zeros([n_gen, n_z]) for i in range(n_z): z_gen[i * n_per_class:(i + 1) * n_per_class, i] = 1 z_gen = np.expand_dims(z_gen, axis=0) _, x_logits, z_onehot = vae({'z': z_gen}, n_gen, n_x, n_h,
y = tf.placeholder(tf.float32, shape=[None]) y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1]) layer_sizes = [n_x] + n_hiddens + [1] w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)] def log_joint(observed): model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles) log_pws = model.local_log_prob(w_names) log_py_xw = model.local_log_prob('y') return tf.add_n(log_pws) + log_py_xw * N variational = mean_field_variational(layer_sizes, n_particles) qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True) latent = dict(zip(w_names, qw_outputs)) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'y': y_obs}, latent, axis=0)) learning_rate_ph = tf.placeholder(tf.float32, shape=[]) optimizer = tf.train.AdamOptimizer(learning_rate_ph) grads = optimizer.compute_gradients(-lower_bound) infer = optimizer.apply_gradients(grads) # prediction: rmse & log likelihood observed = dict((w_name, latent[w_name][0]) for w_name in w_names) observed.update({'y': y_obs}) model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles) y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y) ** 2)) * std_y_train log_py_xw = model.local_log_prob('y') log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \ tf.log(std_y_train)
def main(): # manual seed #seed = random.randint(0, 10000) # fix seed seed = 1234 # N=100, K=3 print("Random Seed: ", seed) random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) # load MNIST data --------------------------------------------------------- data_path = os.path.join('../data/', 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]).astype('float32') # model parameters -------------------------------------------------------- K = 10 D = 40 dim_z = K dim_h = D dim_x = x_train.shape[1] # 784 N = x_train.shape[0] # Define training/evaluation parameters --------------------------------------------- resume = False epoches = 50 # 2000 save_freq = 5 batch_size = 100 train_iters = int(np.ceil(N / batch_size)) learning_rate = 0.001 anneal_lr_freq = 10 anneal_lr_rate = 0.9 n_particles = 20 n_gen = 100 result_path = "./results/3_gmvae" @zs.reuse(scope='decoder') def vae(observed, n, n_particles, is_training, dim_h=40, dim_z=10, dim_x=784): '''decoder: z-->h-->x n: batch_size dim_z: K = 10 dim_x: 784 dim_h: D = 40 ''' with zs.BayesianNet(observed=observed) as model: normalizer_params = { 'is_training': is_training, 'updates_collections': None } pai = tf.get_variable('pai', shape=[dim_z], dtype=tf.float32, trainable=True, initializer=tf.constant_initializer(1.0)) n_pai = tf.tile(tf.expand_dims(pai, 0), [n, 1]) z = zs.OnehotCategorical('z', logits=n_pai, dtype=tf.float32, n_samples=n_particles) mu = tf.get_variable('mu', shape=[dim_z, dim_h], dtype=tf.float32, initializer=tf.random_uniform_initializer( -1, 1)) log_sigma = tf.get_variable( 'log_sigma', shape=[dim_z, dim_h], dtype=tf.float32, initializer=tf.random_uniform_initializer(-3, -2)) h_mean = tf.reshape( tf.matmul(tf.reshape(z, [-1, dim_z]), mu), [n_particles, -1, dim_h]) # [n_particles, None, dim_x] h_logstd = tf.reshape( tf.matmul(tf.reshape(z, [-1, dim_z]), log_sigma), [n_particles, -1, dim_h]) h = zs.Normal( 'h', mean=h_mean, logstd=h_logstd, #n_samples=n_particles, group_event_ndims=1) lx_h = layers.fully_connected( h, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params ) lx_h = layers.fully_connected( lx_h, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params ) x_logits = layers.fully_connected( lx_h, dim_x, activation_fn=None) # the log odds of being 1 x = zs.Bernoulli( 'x', x_logits, #n_samples=n_particles, group_event_ndims=1) return model, x_logits, h, z.tensor @zs.reuse(scope='encoder') def q_net(x, dim_h, n_particles, is_training): '''encoder: x-->h''' with zs.BayesianNet() as variational: normalizer_params = { 'is_training': is_training, # 'updates_collections': None } lh_x = layers.fully_connected( tf.to_float(x), 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, weights_initializer=tf.contrib.layers.xavier_initializer()) lh_x = tf.contrib.layers.dropout(lh_x, keep_prob=0.9, is_training=is_training) lh_x = layers.fully_connected( lh_x, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, weights_initializer=tf.contrib.layers.xavier_initializer()) lh_x = tf.contrib.layers.dropout(lh_x, keep_prob=0.9, is_training=is_training) h_mean = layers.fully_connected( lh_x, dim_h, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer()) h_logstd = layers.fully_connected( lh_x, dim_h, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer()) h = zs.Normal('h', mean=h_mean, logstd=h_logstd, n_samples=n_particles, group_event_ndims=1) return variational x_ph = tf.placeholder(tf.int32, shape=[None, dim_x], name='x_ph') x_orig_ph = tf.placeholder(tf.float32, shape=[None, dim_x], name='x_orig_ph') x_bin = tf.cast( tf.less(tf.random_uniform(tf.shape(x_orig_ph), 0, 1), x_orig_ph), tf.int32) is_training_ph = tf.placeholder(tf.bool, shape=[], name='is_training_ph') n = tf.shape(x_ph)[0] def log_joint(observed): z_obs = tf.eye(dim_z, batch_shape=[n_particles, n]) z_obs = tf.transpose(z_obs, [2, 0, 1, 3]) # [K, n_p, bs, K] log_pz_list = [] log_ph_z_list = [] log_px_h = None for i in range(dim_z): observed['z'] = z_obs[i, :] # the i-th dimension is 1 model, _, _, _ = vae(observed, n, n_particles, is_training_ph, dim_h=dim_h, dim_z=dim_z, dim_x=dim_x) log_pz_i, log_ph_z_i, log_px_h = model.local_log_prob( ['z', 'h', 'x']) log_pz_list.append(log_pz_i) log_ph_z_list.append(log_ph_z_i) log_pz = tf.stack(log_pz_list, axis=0) log_ph_z = tf.stack(log_ph_z_list, axis=0) # p(X, H) = p(X|H) sum_Z(p(Z) * p(H|Z)) # log p(X, H) = log p(X|H) + log sum_Z exp(log p(Z) + log p(H|Z)) log_p_xh = log_px_h + tf.reduce_logsumexp(log_pz + log_ph_z, axis=0) # log p(X, H) return log_p_xh variational = q_net(x_ph, dim_h, n_particles, is_training_ph) qh_samples, log_qh = variational.query('h', outputs=True, local_log_prob=True) x_obs = tf.tile(tf.expand_dims(x_ph, 0), [n_particles, 1, 1]) lower_bound = zs.sgvb(log_joint, observed={'x': x_obs}, latent={'h': [qh_samples, log_qh]}, axis=0) mean_lower_bound = tf.reduce_mean(lower_bound) with tf.name_scope('neg_lower_bound'): neg_lower_bound = tf.reduce_mean(-mean_lower_bound) train_vars = tf.trainable_variables() with tf.variable_scope('decoder', reuse=True): pai = tf.get_variable('pai') mu = tf.get_variable('mu') log_sigma = tf.get_variable('log_sigma') clip_pai = pai.assign(tf.clip_by_value(pai, 0.7, 1.3)) # _, pai_var = tf.nn.moments(pai, axes=[-1]) # _, mu_var = tf.nn.moments(mu, axes=[0, 1], keep_dims=False) # regularizer = tf.add_n([tf.nn.l2_loss(v) for v in train_vars # if not 'pai' in v.name and not 'mu' in v.name]) # loss = neg_lower_bound + pai_var - mu_var # + 1e-4 * regularizer # loss ------------- loss = neg_lower_bound #+ 0.001 * tf.nn.l2_loss(mu-1) learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4) grads_and_vars = optimizer.compute_gradients(loss) clipped_gvs = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in grads_and_vars] infer = optimizer.apply_gradients(clipped_gvs) # Generate images ----------------------------------------------------- z_manual_feed = tf.eye(dim_z, batch_shape=[10]) # [10, K, K] z_manual_feed = tf.transpose(z_manual_feed, [1, 0, 2]) # [K, 10, K] _, x_logits, _, z_onehot = vae( {'z': z_manual_feed}, 10, n_particles=1, is_training=False, dim_h=dim_h, dim_z=dim_z, dim_x=dim_x ) # n and n_particles do not matter, since we have manually feeded z print('x_logits:', x_logits.shape.as_list()) # [1, 100, 784] x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1]) z_gen = tf.argmax(tf.reshape(z_onehot, [-1, dim_z]), axis=1) # tensorboard summary --------------------------------------------------- image_for_summ = [] for i in range(n_gen // 10): tmp = [x_gen[j + i * 10, :] for j in range(10)] tmp = tf.concat(tmp, 1) image_for_summ.append(tmp) image_for_summ = tf.expand_dims(tf.concat(image_for_summ, 0), 0) print('image_for_summ:', image_for_summ.shape.as_list()) gen_image_summ = tf.summary.image('gen_images', image_for_summ, max_outputs=100) lb_summ = tf.summary.scalar("lower_bound", mean_lower_bound) lr_summ = tf.summary.scalar("learning_rate", learning_rate_ph) loss_summ = tf.summary.scalar('loss', loss) for var in train_vars: tf.summary.histogram(var.name, var) for grad, _ in grads_and_vars: tf.summary.histogram(grad.name, grad) for i in train_vars: print(i.name, i.get_shape()) # Merge all summaries into a single op merged_summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # Restore from the latest checkpoint ckpt_file = tf.train.latest_checkpoint(result_path) begin_epoch = 1 if ckpt_file is not None and resume: # resume --------------------------------------- print('Restoring model from {}...'.format(ckpt_file)) begin_epoch = int(ckpt_file.split('.')[-2]) + 1 saver.restore(sess, ckpt_file) x_train_normed = x_train # no normalization x_train_normed_no_shuffle = x_train_normed log_dir = './log/3_gmvae/' if os.path.exists(log_dir): shutil.rmtree(log_dir) summary_writer = tf.summary.FileWriter(log_dir, graph=tf.get_default_graph()) global mu_res, log_sigma_res, pai_res global gen_images, z_gen_res, epoch print( 'training...' ) # ---------------------------------------------------------------- pai_res_0, mu_res_0, log_sigma_res_0 = sess.run([pai, mu, log_sigma]) global_step = 0 for epoch in tqdm(range(begin_epoch, epoches + 1)): time_epoch = -time.time() if epoch % anneal_lr_freq == 0: learning_rate *= anneal_lr_rate np.random.shuffle(x_train_normed) # shuffle training data lbs = [] for t in tqdm(range(train_iters)): global_step += 1 x_batch = x_train_normed[t * batch_size:(t + 1) * batch_size] # get batched data x_batch_bin = sess.run(x_bin, feed_dict={x_orig_ph: x_batch}) # sess.run(clip_pai) _, lb, merge_all = sess.run( [infer, mean_lower_bound, merged_summary_op], feed_dict={ x_ph: x_batch_bin, learning_rate_ph: learning_rate, is_training_ph: True }) lbs.append(lb) time_epoch += time.time() print('Epoch {} ({:.1f}s): Lower bound = {}'.format( epoch, time_epoch, np.mean(lbs))) # print(grad_var_res[-3:]) summary_writer.add_summary(merge_all, global_step=epoch) if epoch % save_freq == 0: # save --------------------------------------------------- print('Saving model...') save_path = os.path.join(result_path, "gmvae.epoch.{}.ckpt".format(epoch)) if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) saver.save(sess, save_path) gen_images, z_gen_res = sess.run( [x_gen, z_gen]) #, feed_dict={is_training_ph: False}) # dump data pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma]) data_dump = { 'epoch': epoch, 'images': gen_images, 'clusters': z_gen_res, 'pai_0': pai_res_0, 'mu_0': mu_res_0, 'log_sigma_0': log_sigma_res_0, 'pai_res': pai_res, 'mu_res': mu_res, 'log_sigma_res': log_sigma_res } pickle.dump( data_dump, open( os.path.join( result_path, 'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'), protocol=2) save_image_with_clusters( gen_images, z_gen_res, filename="results/3_gmvae/gmvae_epoch_{}.png".format( epoch)) print('Done') pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma]) print("Random Seed: ", seed) data_dump = { 'epoch': epoch, 'images': gen_images, 'clusters': z_gen_res, 'pai_0': pai_res_0, 'mu_0': mu_res_0, 'log_sigma_0': log_sigma_res_0, 'pai_res': pai_res, 'mu_res': mu_res, 'log_sigma_res': log_sigma_res } pickle.dump(data_dump, open( os.path.join( result_path, 'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'), protocol=2) plot_images_and_clusters(gen_images, z_gen_res, epoch, save_path=result_path, ncol=10)
def main(): # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.random.binomial(1, x_train, size=x_train.shape) n_x = x_train.shape[1] # Define model parameters n_z = 40 @zs.reuse('model') def vae(observed, n, n_x, n_z): with zs.BayesianNet(observed=observed) as model: z_mean = tf.zeros([n, n_z]) z_logstd = tf.zeros([n, n_z]) z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1) lx_z = layers.fully_connected(z, 500) lx_z = layers.fully_connected(lx_z, 500) x_logits = layers.fully_connected(lx_z, n_x, activation_fn=None) x = zs.Bernoulli('x', x_logits, group_event_ndims=1) return model, x_logits @zs.reuse('variational') def q_net(x, n_z): with zs.BayesianNet() as variational: lz_x = layers.fully_connected(tf.to_float(x), 500) lz_x = layers.fully_connected(lz_x, 500) z_mean = layers.fully_connected(lz_x, n_z, activation_fn=None) z_logstd = layers.fully_connected(lz_x, n_z, activation_fn=None) z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1) return variational x = tf.placeholder(tf.int32, shape=[None, n_x], name='x') n = tf.shape(x)[0] def log_joint(observed): model, _ = vae(observed, n, n_x, n_z) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net(x, n_z) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, observed={'x': x}, latent={'z': [qz_samples, log_qz]})) optimizer = tf.train.AdamOptimizer(0.001) infer = optimizer.minimize(-lower_bound) # Generate images n_gen = 100 _, x_logits = vae({}, n_gen, n_x, n_z) x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1]) # Define training parameters epoches = 500 batch_size = 128 iters = x_train.shape[0] // batch_size save_freq = 1 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epoches + 1): np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer, lower_bound], feed_dict={x: x_batch}) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % save_freq == 0: images = sess.run(x_gen) name = "results/vae/vae.epoch.{}.png".format(epoch) save_image_collections(images, name)
variational, _, _, _ = q_net({}, x, n_z_0, n_z_1, n_z_2, n_particles, is_training) qz_samples0, log_qz0 = variational.query('z_0', outputs=True, local_log_prob=True) qz_samples1, log_qz1 = variational.query('z_1', outputs=True, local_log_prob=True) qz_samples2, log_qz2 = variational.query('z_2', outputs=True, local_log_prob=True) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'x': x_obs}, { 'z_0': [qz_samples0, log_qz0], 'z_1': [qz_samples1, log_qz1], 'z_2': [qz_samples2, log_qz2] }, axis=0)) # Importance sampling estimates of marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(log_joint, {'x': x_obs}, { 'z_0': [qz_samples0, log_qz0], 'z_1': [qz_samples1, log_qz1], 'z_2': [qz_samples2, log_qz2] }, axis=0)) learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4) grads = optimizer.compute_gradients(-lower_bound)