z = zs.OnehotCategorical('z', z_stacked_logits, dtype=tf.float32, n_samples=n_particles, group_event_ndims=1) return variational if __name__ == '__main__': tf.set_random_seed(1237) np.random.seed(1237) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]).astype('float32') x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32') # Define parameters n_z, n_k = 100, 2 # number of latent variables, categories n_x = x_train.shape[1] tau_p0 = 1.0 tau_q0 = 1.0 anneal_tau_freq = 25 anneal_tau_rate = 0.95 lb_samples = 1 ll_samples = 500 epochs = 3000
lz_x = tf.layers.dense(lz_x, 500, activation=tf.nn.relu) z_mean = tf.layers.dense(lz_x, z_dim) z_logstd = tf.layers.dense(lz_x, z_dim) z = zs.Normal('z', z_mean, logstd=z_logstd, group_ndims=1, n_samples=n_z_per_x) return variational # In[4]: # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = dataset.load_mnist_realval( data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.vstack([t_train, t_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # In[5]: x_train.shape, t_train.shape, x_valid.shape, t_valid.shape # In[6]: # Define model parameters z_dim = 40 # In[7]:
def main(): tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # Define model/inference parameters z_dim = 40 n_planar_flows = 10 # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x") x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input), tf.int32) n = tf.placeholder(tf.int32, shape=[], name="n") model = build_gen(n, x_dim, z_dim, n_particles) q_net = build_q_net(x, z_dim, n_particles) qz_samples, log_qz = q_net.query('z', outputs=True, local_log_prob=True) # TODO: add tests for repeated calls of flows qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz, n_iters=n_planar_flows) qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz, n_iters=n_planar_flows) lower_bound = zs.variational.elbo(model, observed={"x": x}, latent={"z": [qz_samples, log_qz]}, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) # Importance sampling estimates of marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(model, {'x': x}, {'z': [qz_samples, log_qz]}, axis=0)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) infer_op = optimizer.minimize(cost) # Define training/evaluation parameters epochs = 3000 batch_size = 128 iters = x_train.shape[0] // batch_size test_freq = 10 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ x_input: x_batch, n_particles: 1, n: batch_size }) lbs.append(lb) time_epoch += time.time() print('Epoch {} ({:.1f}s): Lower bound = {}'.format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] test_lls = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: 1, n: test_batch_size }) test_ll = sess.run(is_log_likelihood, feed_dict={ x: test_x_batch, n_particles: 1000, n: test_batch_size }) test_lbs.append(test_lb) test_lls.append(test_ll) time_test += time.time() print('>>> TEST ({:.1f}s)'.format(time_test)) print('>> Test lower bound = {}'.format(np.mean(test_lbs))) print('>> Test log likelihood (IS) = {}'.format( np.mean(test_lls)))
def main(): # Load MNIST data_path = os.path.join(conf.data_dir, "mnist.pkl.gz") x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # Define model parameters z_dim = 40 # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x") x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input), tf.int32) n = tf.placeholder(tf.int32, shape=[], name="n") model = build_gen(x_dim, z_dim, n, n_particles) variational = build_q_net(x, z_dim, n_particles) lower_bound = zs.variational.elbo(model, {"x": x}, variational=variational, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) # # Importance sampling estimates of marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(model, {"x": x}, proposal=variational, axis=0)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) infer_op = optimizer.minimize(cost) # Random generation x_gen = tf.reshape(model.observe()["x_mean"], [-1, 28, 28, 1]) # Define training/evaluation parameters epochs = 3000 batch_size = 128 iters = x_train.shape[0] // batch_size save_freq = 10 test_freq = 10 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size result_path = "results/vae" # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ x_input: x_batch, n_particles: 1, n: batch_size }) lbs.append(lb) time_epoch += time.time() print("Epoch {} ({:.1f}s): Lower bound = {}".format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs, test_lls = [], [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: 1, n: test_batch_size }) test_ll = sess.run(is_log_likelihood, feed_dict={ x: test_x_batch, n_particles: 1000, n: test_batch_size }) test_lbs.append(test_lb) test_lls.append(test_ll) time_test += time.time() print(">>> TEST ({:.1f}s)".format(time_test)) print(">> Test lower bound = {}".format(np.mean(test_lbs))) print('>> Test log likelihood (IS) = {}'.format( np.mean(test_lls))) if epoch % save_freq == 0: images = sess.run(x_gen, feed_dict={n: 100, n_particles: 1}) name = os.path.join(result_path, "vae.epoch.{}.png".format(epoch)) save_image_collections(images, name)
def main(): # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # Define model parameters z_dim = 40 # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name='x') x = tf.to_int32(tf.less(tf.random_uniform(tf.shape(x_input)), x_input)) n = tf.shape(x)[0] def log_joint(observed): model = vae(observed, x_dim, z_dim, n, n_particles) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net({'x': x}, x_dim, z_dim, n_particles) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) lower_bound = zs.variational.elbo(log_joint, observed={'x': x}, latent={'z': [qz_samples, log_qz]}, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) # Importance sampling estimates of marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(log_joint, {'x': x}, {'z': [qz_samples, log_qz]}, axis=0)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) infer_op = optimizer.minimize(cost) # Generate images n_gen = 100 x_mean = vae({}, x_dim, z_dim, n_gen).outputs('x_mean') x_gen = tf.reshape(x_mean, [-1, 28, 28, 1]) # Define training/evaluation parameters epochs = 3000 batch_size = 128 iters = x_train.shape[0] // batch_size save_freq = 10 test_freq = 10 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size result_path = "results/vae" # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={x_input: x_batch, n_particles: 1}) lbs.append(lb) time_epoch += time.time() print('Epoch {} ({:.1f}s): Lower bound = {}'.format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] test_lls = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size: (t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={x: test_x_batch, n_particles: 1}) test_ll = sess.run(is_log_likelihood, feed_dict={x: test_x_batch, n_particles: 1000}) test_lbs.append(test_lb) test_lls.append(test_ll) time_test += time.time() print('>>> TEST ({:.1f}s)'.format(time_test)) print('>> Test lower bound = {}'.format(np.mean(test_lbs))) print('>> Test log likelihood (IS) = {}'.format( np.mean(test_lls))) if epoch % save_freq == 0: images = sess.run(x_gen) name = os.path.join(result_path, "vae.epoch.{}.png".format(epoch)) save_image_collections(images, name)
def main(): tf.set_random_seed(1237) np.random.seed(1237) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]).astype('float32') x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32') # Define parameters n_z, n_k = 100, 2 # number of latent variables, categories n_x = x_train.shape[1] tau_p0 = 1.0 tau_q0 = 1.0 anneal_tau_freq = 25 anneal_tau_rate = 0.95 lb_samples = 1 ll_samples = 500 epochs = 3000 batch_size = 64 iters = x_train.shape[0] // batch_size learning_rate = 0.0001 test_freq = 25 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size # Build the computation graph tau_p = tf.placeholder(tf.float32, shape=[], name="tau_p") tau_q = tf.placeholder(tf.float32, shape=[], name="tau_q") n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x_orig = tf.placeholder(tf.float32, shape=[None, n_x], name='x') x_bin = tf.cast(tf.less(tf.random_uniform(tf.shape(x_orig), 0, 1), x_orig), tf.int32) x = tf.placeholder(tf.int32, shape=[None, n_x], name='x') x_obs = tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]) n = tf.shape(x)[0] def lower_bound_and_log_likelihood(relaxed=False): def log_joint(observed): model = vae(observed, n, n_x, n_z, n_k, tau_p, n_particles, relaxed) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net({}, x, n_z, n_k, tau_q, n_particles, relaxed) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) lower_bound = zs.variational.elbo(log_joint, observed={'x': x_obs}, latent={'z': [qz_samples, log_qz]}, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) # Importance sampling estimates of marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0)) return cost, lower_bound, is_log_likelihood # For training relaxed_cost, relaxed_lower_bound, _ = lower_bound_and_log_likelihood(True) # For testing and generating _, lower_bound, is_log_likelihood = lower_bound_and_log_likelihood(False) learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4) infer_op = optimizer.minimize(relaxed_cost) # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) if epoch % anneal_tau_freq == 0: tau_p0 = max(0.5, tau_p0 * anneal_tau_rate) tau_q0 = max(0.666, tau_q0 * anneal_tau_rate) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] x_batch_bin = sess.run(x_bin, feed_dict={x_orig: x_batch}) feed_dict = { x: x_batch_bin, learning_rate_ph: learning_rate, n_particles: lb_samples, tau_p: tau_p0, tau_q: tau_q0 } _, lb = sess.run([infer_op, relaxed_lower_bound], feed_dict=feed_dict) lbs.append(lb) time_epoch += time.time() print('Epoch {} ({:.1f}s): Lower bound = {}'.format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] test_lls = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] feed_dict = { x: test_x_batch, n_particles: ll_samples, tau_p: tau_p0, tau_q: tau_q0 } test_lb, test_ll = sess.run( [lower_bound, is_log_likelihood], feed_dict=feed_dict) test_lbs.append(test_lb) test_lls.append(test_ll) time_test += time.time() print('>>> TEST ({:.1f}s)'.format(time_test)) print('>> Test lower bound = {}'.format(np.mean(test_lbs))) print('>> Test log likelihood (IS) = {}'.format( np.mean(test_lls)))
eps = zs.Normal('layer' + str(i) + '/eps', 1., logstd=0.5 * tf.log(alpha + 1e-10), n_samples=n_particles, group_ndims=1) return variational if __name__ == '__main__': tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_mnist_realval(data_path, one_hot=False) x_train = np.vstack([x_train, x_valid]).astype('float32') y_train = np.concatenate([y_train, y_valid]).astype('int32') x_train, x_test, _, _ = dataset.standardize(x_train, x_test) n_x = x_train.shape[1] # Define training/evaluation parameters epochs = 500 batch_size = 1000 lb_samples = 10 ll_samples = 100 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 3 learning_rate = 0.001 anneal_lr_freq = 100 anneal_lr_rate = 0.75
def main(): tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, "mnist.pkl.gz") x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # Define model parameters z_dim = 32 # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") x_input = tf.placeholder(tf.float32, shape=[None, x_dim]) x = tf.to_int32(tf.random_uniform(tf.shape(x_input)) <= x_input) n = tf.shape(x)[0] def log_joint(observed): model, _ = vae_conv(observed, n, x_dim, z_dim, n_particles) log_pz, log_px_z = model.local_log_prob(["z", "x"]) return log_pz + log_px_z variational = q_net(x, z_dim, n_particles) qz_samples, log_qz = variational.query("z", outputs=True, local_log_prob=True) lower_bound = zs.variational.elbo(log_joint, observed={"x": x}, latent={"z": [qz_samples, log_qz]}, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) optimizer = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5) infer_op = optimizer.minimize(cost) # Generate images n_gen = 100 _, x_logits = vae_conv({}, n_gen, x_dim, z_dim, 1) x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1]) # Define training/evaluation parameters epochs = 3000 batch_size = 128 iters = x_train.shape[0] // batch_size save_freq = 10 test_freq = 10 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size result_path = "results/vae_conv" # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ x_input: x_batch, n_particles: 1 }) lbs.append(lb) time_epoch += time.time() print("Epoch {} ({:.1f}s): Lower bound = {}".format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: 1 }) test_lbs.append(test_lb) time_test += time.time() print(">>> TEST ({:.1f}s)".format(time_test)) print(">> Test lower bound = {}".format(np.mean(test_lbs))) if epoch % save_freq == 0: print("Saving images...") images = sess.run(x_gen) name = os.path.join(result_path, "vae.epoch.{}.png".format(epoch)) save_image_collections(images, name)
def main(): # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # Define model parameters z_dim = 40 # Build the computation graph is_training = tf.placeholder(tf.bool, shape=[], name='is_training') n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name='x') x = tf.to_int32(tf.less(tf.random_uniform(tf.shape(x_input)), x_input)) n = tf.shape(x)[0] def log_joint(observed): model = vae(observed, n, x_dim, z_dim, n_particles, is_training) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net(x, z_dim, n_particles, is_training) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) cx = tf.expand_dims(baseline_net(x), 0) lower_bound = zs.variational.elbo(log_joint, observed={'x': x}, latent={'z': [qz_samples, log_qz]}, axis=0) cost, baseline_cost = lower_bound.reinforce(baseline=cx) cost = tf.reduce_mean(cost + baseline_cost) lower_bound = tf.reduce_mean(lower_bound) # Importance sampling estimates of marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(log_joint, {'x': x}, {'z': [qz_samples, log_qz]}, axis=0)) optimizer = tf.train.AdamOptimizer(0.001) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): infer_op = optimizer.minimize(cost) # Define training/evaluation parameters epochs = 3000 batch_size = 128 iters = x_train.shape[0] // batch_size test_freq = 10 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ x_input: x_batch, is_training: True, n_particles: 1 }) lbs.append(lb) time_epoch += time.time() print('Epoch {} ({:.1f}s): Lower bound = {}'.format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] test_lls = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, is_training: False, n_particles: 1 }) test_ll = sess.run(is_log_likelihood, feed_dict={ x: test_x_batch, is_training: False, n_particles: 1000 }) test_lbs.append(test_lb) test_lls.append(test_ll) time_test += time.time() print('>>> TEST ({:.1f}s)'.format(time_test)) print('>> Test lower bound = {}'.format(np.mean(test_lbs))) print('>> Test log likelihood (IS) = {}'.format( np.mean(test_lls)))
def main(): tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, "mnist.pkl.gz") x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # Define model parameters z_dim = 40 # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x") x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input), tf.int32) n = tf.placeholder(tf.int32, shape=[], name="n") model = build_gen(n, x_dim, z_dim, n_particles) variational = build_q_net(x, z_dim, n_particles) lower_bound = zs.variational.importance_weighted_objective( model, {'x': x}, variational=variational, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) infer_op = optimizer.minimize(cost) # Define training/evaluation parameters lb_samples = 50 epochs = 3000 batch_size = 128 iters = x_train.shape[0] // batch_size test_freq = 10 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ x_input: x_batch, n_particles: lb_samples, n: batch_size }) lbs.append(lb) time_epoch += time.time() print("Epoch {} ({:.1f}s): IWAE bound = {}".format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: lb_samples, n: test_batch_size }) test_lbs.append(test_lb) time_test += time.time() print(">>> TEST ({:.1f}s)".format(time_test)) print(">> Test IWAE bound = {}".format(np.mean(test_lbs)))
def main(): tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, "mnist.pkl.gz") x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) x_dim = x_train.shape[1] # Define model parameters h_dim = 200 # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x") x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input), tf.int32) n = tf.placeholder(tf.int32, shape=[], name="n") model = build_sbn(n, x_dim, h_dim, n_particles) proposal = build_proposal(x, h_dim, n_particles) optimizer = tf.train.AdamOptimizer(learning_rate=0.001, epsilon=1e-4) # learning model parameters lower_bound = tf.reduce_mean( zs.variational.importance_weighted_objective(model, observed={"x": x}, variational=proposal, axis=0)) model_params = tf.trainable_variables(scope="sbn") model_grads = optimizer.compute_gradients(-lower_bound, model_params) # adapting the proposal klpq_obj = zs.variational.klpq(model, observed={"x": x}, variational=proposal, axis=0) klpq_cost = tf.reduce_mean(klpq_obj.importance()) proposal_params = tf.trainable_variables(scope="proposal") klpq_grads = optimizer.compute_gradients(klpq_cost, proposal_params) infer_op = optimizer.apply_gradients(model_grads + klpq_grads) # Define training/evaluation parameters lb_samples = 10 ll_samples = 1000 epochs = 3000 batch_size = 24 iters = x_train.shape[0] // batch_size test_freq = 10 test_batch_size = 100 test_iters = x_test.shape[0] // test_batch_size # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ x_input: x_batch, n_particles: lb_samples, n: batch_size }) lbs.append(lb) time_epoch += time.time() print("Epoch {} ({:.1f}s): Lower bound = {}".format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] test_lls = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: lb_samples, n: test_batch_size }) test_ll = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: ll_samples, n: test_batch_size }) test_lbs.append(test_lb) test_lls.append(test_ll) time_test += time.time() print(">>> TEST ({:.1f}s)".format(time_test)) print(">> Test lower bound = {}".format(np.mean(test_lbs))) print(">> Test log likelihood = {}".format(np.mean(test_lls)))
def main(): tf.set_random_seed(1237) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]).astype('float32') np.random.seed(1234) x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32') n_x = x_train.shape[1] # Define model parameters n_z = 40 # Define training/evaluation parameters lb_samples = 50 epochs = 3000 batch_size = 1000 iters = x_train.shape[0] // batch_size learning_rate = 0.001 anneal_lr_freq = 200 anneal_lr_rate = 0.75 test_freq = 10 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x_orig = tf.placeholder(tf.float32, shape=[None, n_x], name='x') x_bin = tf.cast(tf.less(tf.random_uniform(tf.shape(x_orig), 0, 1), x_orig), tf.int32) x = tf.placeholder(tf.int32, shape=[None, n_x], name='x') x_obs = tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]) n = tf.shape(x)[0] def log_joint(observed): model = vae(observed, n, n_x, n_z, n_particles) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net({}, x, n_z, n_particles) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) lower_bound = zs.variational.importance_weighted_objective( log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4) infer_op = optimizer.minimize(cost) # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() if epoch % anneal_lr_freq == 0: learning_rate *= anneal_lr_rate np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] x_batch_bin = sess.run(x_bin, feed_dict={x_orig: x_batch}) _, lb = sess.run( [infer_op, lower_bound], feed_dict={ x: x_batch_bin, learning_rate_ph: learning_rate, n_particles: lb_samples }) lbs.append(lb) time_epoch += time.time() print('Epoch {} ({:.1f}s): Lower bound = {}'.format( epoch, time_epoch, np.mean(lbs))) if epoch % test_freq == 0: time_test = -time.time() test_lbs = [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: lb_samples }) test_lbs.append(test_lb) time_test += time.time() print('>>> TEST ({:.1f}s)'.format(time_test)) print('>> Test IWAE bound = {}'.format(np.mean(test_lbs)))
def main(): tf.set_random_seed(1234) np.random.seed(1234) # Load MINST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) n_xl = 28 n_channels = 1 x_train = np.vstack([x_train, x_valid]).astype(np.float32).reshape( (-1, n_xl, n_xl, n_channels)) # Define model parameters n_z = 40 # Define training/evaluation parameters epochs = 1000 batch_size = 64 * FLAGS.num_gpus gen_size = 100 iters = x_train.shape[0] // batch_size print_freq = 100 save_freq = 100 # Build the computation graph is_training = tf.placeholder(tf.bool, shape=[], name='is_training') x = tf.placeholder(tf.float32, shape=(None, n_xl, n_xl, n_channels), name='x') optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0002, decay=0.5) def build_tower_graph(x, id_): tower_x = x[id_ * tf.shape(x)[0] // FLAGS.num_gpus:(id_ + 1) * tf.shape(x)[0] // FLAGS.num_gpus] n = tf.shape(tower_x)[0] gen, x_gen = generator(None, n, n_z, is_training) x_critic = discriminator(tower_x, is_training) x_gen_critic = discriminator(x_gen, is_training) gen_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') disc_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') disc_loss = -tf.reduce_mean(x_critic - x_gen_critic) gen_loss = -tf.reduce_mean(x_gen_critic) disc_grads = optimizer.compute_gradients(disc_loss, var_list=disc_var_list) gen_grads = optimizer.compute_gradients(gen_loss, var_list=gen_var_list) grads = disc_grads + gen_grads return grads, gen_loss, disc_loss tower_losses = [] tower_grads = [] for i in range(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): grads, gen_loss, disc_loss = build_tower_graph(x, i) tower_losses.append([gen_loss, disc_loss]) tower_grads.append(grads) gen_loss, disc_loss = multi_gpu.average_losses(tower_losses) w_distance = -disc_loss grads = multi_gpu.average_gradients(tower_grads) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): infer_op = optimizer.apply_gradients(grads) # Clip weights of the critic to ensure 1-Lipschitz disc_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') with tf.control_dependencies([infer_op]): clip_op = tf.group(*[ var.assign(tf.clip_by_value(var, -0.01, 0.01)) for var in disc_var_list ]) # Generate images _, eval_x_gen = generator(None, gen_size, n_z, False) # Run the inference with multi_gpu.create_session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): np.random.shuffle(x_train) w_losses = [] time_train = -time.time() for t in range(iters): iter = t + 1 x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, _, w_loss = sess.run([infer_op, clip_op, w_distance], feed_dict={ x: x_batch, is_training: True }) w_losses.append(w_loss) if iter % print_freq == 0: print('Epoch={} Iter={} ({:.3f}s/iter): ' 'wasserstein distance = {}'.format( epoch, iter, (time.time() + time_train) / print_freq, np.mean(w_losses))) w_losses = [] if iter % save_freq == 0: images = sess.run(eval_x_gen) name = "results/wgan/wgan.epoch.{}.iter.{}.png".format( epoch, iter) save_image_collections(images, name, scale_each=True) if iter % print_freq == 0: time_train = -time.time()
def main(): # manual seed #seed = random.randint(0, 10000) # fix seed seed = 1234 # N=100, K=3 print("Random Seed: ", seed) random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) # load MNIST data --------------------------------------------------------- data_path = os.path.join('../data/', 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]).astype('float32') # model parameters -------------------------------------------------------- K = 10 D = 40 dim_z = K dim_h = D dim_x = x_train.shape[1] # 784 N = x_train.shape[0] # Define training/evaluation parameters --------------------------------------------- resume = False epoches = 50 # 2000 save_freq = 5 batch_size = 100 train_iters = int(np.ceil(N / batch_size)) learning_rate = 0.001 anneal_lr_freq = 10 anneal_lr_rate = 0.9 n_particles = 20 n_gen = 100 result_path = "./results/3_gmvae" @zs.reuse(scope='decoder') def vae(observed, n, n_particles, is_training, dim_h=40, dim_z=10, dim_x=784): '''decoder: z-->h-->x n: batch_size dim_z: K = 10 dim_x: 784 dim_h: D = 40 ''' with zs.BayesianNet(observed=observed) as model: normalizer_params = { 'is_training': is_training, 'updates_collections': None } pai = tf.get_variable('pai', shape=[dim_z], dtype=tf.float32, trainable=True, initializer=tf.constant_initializer(1.0)) n_pai = tf.tile(tf.expand_dims(pai, 0), [n, 1]) z = zs.OnehotCategorical('z', logits=n_pai, dtype=tf.float32, n_samples=n_particles) mu = tf.get_variable('mu', shape=[dim_z, dim_h], dtype=tf.float32, initializer=tf.random_uniform_initializer( -1, 1)) log_sigma = tf.get_variable( 'log_sigma', shape=[dim_z, dim_h], dtype=tf.float32, initializer=tf.random_uniform_initializer(-3, -2)) h_mean = tf.reshape( tf.matmul(tf.reshape(z, [-1, dim_z]), mu), [n_particles, -1, dim_h]) # [n_particles, None, dim_x] h_logstd = tf.reshape( tf.matmul(tf.reshape(z, [-1, dim_z]), log_sigma), [n_particles, -1, dim_h]) h = zs.Normal( 'h', mean=h_mean, logstd=h_logstd, #n_samples=n_particles, group_event_ndims=1) lx_h = layers.fully_connected( h, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params ) lx_h = layers.fully_connected( lx_h, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params ) x_logits = layers.fully_connected( lx_h, dim_x, activation_fn=None) # the log odds of being 1 x = zs.Bernoulli( 'x', x_logits, #n_samples=n_particles, group_event_ndims=1) return model, x_logits, h, z.tensor @zs.reuse(scope='encoder') def q_net(x, dim_h, n_particles, is_training): '''encoder: x-->h''' with zs.BayesianNet() as variational: normalizer_params = { 'is_training': is_training, # 'updates_collections': None } lh_x = layers.fully_connected( tf.to_float(x), 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, weights_initializer=tf.contrib.layers.xavier_initializer()) lh_x = tf.contrib.layers.dropout(lh_x, keep_prob=0.9, is_training=is_training) lh_x = layers.fully_connected( lh_x, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, weights_initializer=tf.contrib.layers.xavier_initializer()) lh_x = tf.contrib.layers.dropout(lh_x, keep_prob=0.9, is_training=is_training) h_mean = layers.fully_connected( lh_x, dim_h, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer()) h_logstd = layers.fully_connected( lh_x, dim_h, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer()) h = zs.Normal('h', mean=h_mean, logstd=h_logstd, n_samples=n_particles, group_event_ndims=1) return variational x_ph = tf.placeholder(tf.int32, shape=[None, dim_x], name='x_ph') x_orig_ph = tf.placeholder(tf.float32, shape=[None, dim_x], name='x_orig_ph') x_bin = tf.cast( tf.less(tf.random_uniform(tf.shape(x_orig_ph), 0, 1), x_orig_ph), tf.int32) is_training_ph = tf.placeholder(tf.bool, shape=[], name='is_training_ph') n = tf.shape(x_ph)[0] def log_joint(observed): z_obs = tf.eye(dim_z, batch_shape=[n_particles, n]) z_obs = tf.transpose(z_obs, [2, 0, 1, 3]) # [K, n_p, bs, K] log_pz_list = [] log_ph_z_list = [] log_px_h = None for i in range(dim_z): observed['z'] = z_obs[i, :] # the i-th dimension is 1 model, _, _, _ = vae(observed, n, n_particles, is_training_ph, dim_h=dim_h, dim_z=dim_z, dim_x=dim_x) log_pz_i, log_ph_z_i, log_px_h = model.local_log_prob( ['z', 'h', 'x']) log_pz_list.append(log_pz_i) log_ph_z_list.append(log_ph_z_i) log_pz = tf.stack(log_pz_list, axis=0) log_ph_z = tf.stack(log_ph_z_list, axis=0) # p(X, H) = p(X|H) sum_Z(p(Z) * p(H|Z)) # log p(X, H) = log p(X|H) + log sum_Z exp(log p(Z) + log p(H|Z)) log_p_xh = log_px_h + tf.reduce_logsumexp(log_pz + log_ph_z, axis=0) # log p(X, H) return log_p_xh variational = q_net(x_ph, dim_h, n_particles, is_training_ph) qh_samples, log_qh = variational.query('h', outputs=True, local_log_prob=True) x_obs = tf.tile(tf.expand_dims(x_ph, 0), [n_particles, 1, 1]) lower_bound = zs.sgvb(log_joint, observed={'x': x_obs}, latent={'h': [qh_samples, log_qh]}, axis=0) mean_lower_bound = tf.reduce_mean(lower_bound) with tf.name_scope('neg_lower_bound'): neg_lower_bound = tf.reduce_mean(-mean_lower_bound) train_vars = tf.trainable_variables() with tf.variable_scope('decoder', reuse=True): pai = tf.get_variable('pai') mu = tf.get_variable('mu') log_sigma = tf.get_variable('log_sigma') clip_pai = pai.assign(tf.clip_by_value(pai, 0.7, 1.3)) # _, pai_var = tf.nn.moments(pai, axes=[-1]) # _, mu_var = tf.nn.moments(mu, axes=[0, 1], keep_dims=False) # regularizer = tf.add_n([tf.nn.l2_loss(v) for v in train_vars # if not 'pai' in v.name and not 'mu' in v.name]) # loss = neg_lower_bound + pai_var - mu_var # + 1e-4 * regularizer # loss ------------- loss = neg_lower_bound #+ 0.001 * tf.nn.l2_loss(mu-1) learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4) grads_and_vars = optimizer.compute_gradients(loss) clipped_gvs = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in grads_and_vars] infer = optimizer.apply_gradients(clipped_gvs) # Generate images ----------------------------------------------------- z_manual_feed = tf.eye(dim_z, batch_shape=[10]) # [10, K, K] z_manual_feed = tf.transpose(z_manual_feed, [1, 0, 2]) # [K, 10, K] _, x_logits, _, z_onehot = vae( {'z': z_manual_feed}, 10, n_particles=1, is_training=False, dim_h=dim_h, dim_z=dim_z, dim_x=dim_x ) # n and n_particles do not matter, since we have manually feeded z print('x_logits:', x_logits.shape.as_list()) # [1, 100, 784] x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1]) z_gen = tf.argmax(tf.reshape(z_onehot, [-1, dim_z]), axis=1) # tensorboard summary --------------------------------------------------- image_for_summ = [] for i in range(n_gen // 10): tmp = [x_gen[j + i * 10, :] for j in range(10)] tmp = tf.concat(tmp, 1) image_for_summ.append(tmp) image_for_summ = tf.expand_dims(tf.concat(image_for_summ, 0), 0) print('image_for_summ:', image_for_summ.shape.as_list()) gen_image_summ = tf.summary.image('gen_images', image_for_summ, max_outputs=100) lb_summ = tf.summary.scalar("lower_bound", mean_lower_bound) lr_summ = tf.summary.scalar("learning_rate", learning_rate_ph) loss_summ = tf.summary.scalar('loss', loss) for var in train_vars: tf.summary.histogram(var.name, var) for grad, _ in grads_and_vars: tf.summary.histogram(grad.name, grad) for i in train_vars: print(i.name, i.get_shape()) # Merge all summaries into a single op merged_summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # Restore from the latest checkpoint ckpt_file = tf.train.latest_checkpoint(result_path) begin_epoch = 1 if ckpt_file is not None and resume: # resume --------------------------------------- print('Restoring model from {}...'.format(ckpt_file)) begin_epoch = int(ckpt_file.split('.')[-2]) + 1 saver.restore(sess, ckpt_file) x_train_normed = x_train # no normalization x_train_normed_no_shuffle = x_train_normed log_dir = './log/3_gmvae/' if os.path.exists(log_dir): shutil.rmtree(log_dir) summary_writer = tf.summary.FileWriter(log_dir, graph=tf.get_default_graph()) global mu_res, log_sigma_res, pai_res global gen_images, z_gen_res, epoch print( 'training...' ) # ---------------------------------------------------------------- pai_res_0, mu_res_0, log_sigma_res_0 = sess.run([pai, mu, log_sigma]) global_step = 0 for epoch in tqdm(range(begin_epoch, epoches + 1)): time_epoch = -time.time() if epoch % anneal_lr_freq == 0: learning_rate *= anneal_lr_rate np.random.shuffle(x_train_normed) # shuffle training data lbs = [] for t in tqdm(range(train_iters)): global_step += 1 x_batch = x_train_normed[t * batch_size:(t + 1) * batch_size] # get batched data x_batch_bin = sess.run(x_bin, feed_dict={x_orig_ph: x_batch}) # sess.run(clip_pai) _, lb, merge_all = sess.run( [infer, mean_lower_bound, merged_summary_op], feed_dict={ x_ph: x_batch_bin, learning_rate_ph: learning_rate, is_training_ph: True }) lbs.append(lb) time_epoch += time.time() print('Epoch {} ({:.1f}s): Lower bound = {}'.format( epoch, time_epoch, np.mean(lbs))) # print(grad_var_res[-3:]) summary_writer.add_summary(merge_all, global_step=epoch) if epoch % save_freq == 0: # save --------------------------------------------------- print('Saving model...') save_path = os.path.join(result_path, "gmvae.epoch.{}.ckpt".format(epoch)) if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) saver.save(sess, save_path) gen_images, z_gen_res = sess.run( [x_gen, z_gen]) #, feed_dict={is_training_ph: False}) # dump data pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma]) data_dump = { 'epoch': epoch, 'images': gen_images, 'clusters': z_gen_res, 'pai_0': pai_res_0, 'mu_0': mu_res_0, 'log_sigma_0': log_sigma_res_0, 'pai_res': pai_res, 'mu_res': mu_res, 'log_sigma_res': log_sigma_res } pickle.dump( data_dump, open( os.path.join( result_path, 'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'), protocol=2) save_image_with_clusters( gen_images, z_gen_res, filename="results/3_gmvae/gmvae_epoch_{}.png".format( epoch)) print('Done') pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma]) print("Random Seed: ", seed) data_dump = { 'epoch': epoch, 'images': gen_images, 'clusters': z_gen_res, 'pai_0': pai_res_0, 'mu_0': mu_res_0, 'log_sigma_0': log_sigma_res_0, 'pai_res': pai_res, 'mu_res': mu_res, 'log_sigma_res': log_sigma_res } pickle.dump(data_dump, open( os.path.join( result_path, 'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'), protocol=2) plot_images_and_clusters(gen_images, z_gen_res, epoch, save_path=result_path, ncol=10)
def main(): # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.random.binomial(1, x_train, size=x_train.shape) n_x = x_train.shape[1] # Define model parameters n_z = 40 @zs.reuse('model') def vae(observed, n, n_x, n_z): with zs.BayesianNet(observed=observed) as model: z_mean = tf.zeros([n, n_z]) z_logstd = tf.zeros([n, n_z]) z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1) lx_z = layers.fully_connected(z, 500) lx_z = layers.fully_connected(lx_z, 500) x_logits = layers.fully_connected(lx_z, n_x, activation_fn=None) x = zs.Bernoulli('x', x_logits, group_event_ndims=1) return model, x_logits @zs.reuse('variational') def q_net(x, n_z): with zs.BayesianNet() as variational: lz_x = layers.fully_connected(tf.to_float(x), 500) lz_x = layers.fully_connected(lz_x, 500) z_mean = layers.fully_connected(lz_x, n_z, activation_fn=None) z_logstd = layers.fully_connected(lz_x, n_z, activation_fn=None) z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1) return variational x = tf.placeholder(tf.int32, shape=[None, n_x], name='x') n = tf.shape(x)[0] def log_joint(observed): model, _ = vae(observed, n, n_x, n_z) log_pz, log_px_z = model.local_log_prob(['z', 'x']) return log_pz + log_px_z variational = q_net(x, n_z) qz_samples, log_qz = variational.query('z', outputs=True, local_log_prob=True) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, observed={'x': x}, latent={'z': [qz_samples, log_qz]})) optimizer = tf.train.AdamOptimizer(0.001) infer = optimizer.minimize(-lower_bound) # Generate images n_gen = 100 _, x_logits = vae({}, n_gen, n_x, n_z) x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1]) # Define training parameters epoches = 500 batch_size = 128 iters = x_train.shape[0] // batch_size save_freq = 1 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epoches + 1): np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer, lower_bound], feed_dict={x: x_batch}) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % save_freq == 0: images = sess.run(x_gen) name = "results/vae/vae.epoch.{}.png".format(epoch) save_image_collections(images, name)
def main(): # Load MNIST data_path = os.path.join(conf.data_dir, "mnist.pkl.gz") x_train, t_train, x_valid, t_valid, x_test, t_test = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]) x_test = np.random.binomial(1, x_test, size=x_test.shape) # Define model parameters x_dim = x_train.shape[1] z_dim = 40 # Build the computation graph # how many samples to draw from the distribution, more samples, more accuracy n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") # input data to feed the variational x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x") x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input), tf.int32) # batch size n = tf.placeholder(tf.int32, shape=[], name="n") # add random noise to the variance of the q_model so to # get more various samples when generating new digits std_noise = tf.placeholder_with_default(0., shape=[], name="std_noise") # build the model (encoder) and the q_model (variational or decoder) model = build_gen(x_dim, z_dim, n, n_particles) q_model = build_q_net(x, z_dim, n_particles, std_noise) variational = q_model.observe() # calculate ELBO lower_bound = zs.variational.elbo(model, {"x": x}, variational=variational, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) # calculate marginal log likelihood is_log_likelihood = tf.reduce_mean( zs.is_loglikelihood(model, {"x": x}, proposal=variational, axis=0)) # optimizer optimizer = tf.train.AdamOptimizer(learning_rate=0.001) infer_op = optimizer.minimize(cost) # define training/evaluation parameters epochs = 1000 batch_size = 128 iters = x_train.shape[0] // batch_size test_freq = 100 test_batch_size = 400 test_iters = x_test.shape[0] // test_batch_size result_path = "results/vae_digits" checkpoints_path = "checkpoints/vae_digits" # used to save checkpoints during training saver = tf.train.Saver(max_to_keep=10) save_model_freq = 100 # run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # restore the model parameters from the latest checkpoint ckpt_file = tf.train.latest_checkpoint(checkpoints_path) begin_epoch = 1 if ckpt_file is not None: print('Restoring model from {}...'.format(ckpt_file)) begin_epoch = int(ckpt_file.split('.')[-2]) + 1 saver.restore(sess, ckpt_file) # begin training for epoch in range(begin_epoch, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_train) lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ x_input: x_batch, n_particles: 1, n: batch_size }) lbs.append(lb) time_epoch += time.time() print("Epoch {} ({:.1f}s): Lower bound = {}".format( epoch, time_epoch, np.mean(lbs))) # test marginal log likelihood if epoch % test_freq == 0: time_test = -time.time() test_lbs, test_lls = [], [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_lb = sess.run(lower_bound, feed_dict={ x: test_x_batch, n_particles: 1, n: test_batch_size }) test_ll = sess.run(is_log_likelihood, feed_dict={ x: test_x_batch, n_particles: 1000, n: test_batch_size }) test_lbs.append(test_lb) test_lls.append(test_ll) time_test += time.time() print(">>> TEST ({:.1f}s)".format(time_test)) print(">> Test lower bound = {}".format(np.mean(test_lbs))) print('>> Test log likelihood (IS) = {}'.format( np.mean(test_lls))) # save model parameters if epoch % save_model_freq == 0: print('Saving model...') save_path = os.path.join(checkpoints_path, "vae.epoch.{}.ckpt".format(epoch)) if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) saver.save(sess, save_path) print('Done') # random generation of images from latent distribution x_gen = tf.reshape(model.observe()["x_mean"], [-1, 28, 28, 1]) images = sess.run(x_gen, feed_dict={n: 100, n_particles: 1}) name = os.path.join(result_path, "random_samples.png") save_image_collections(images, name) # the following code generates 100 samples for each number test_n = [3, 2, 1, 90, 95, 23, 11, 0, 84, 7] # map each digit to a corresponding sample from the test set so we can generate similar digits for i in range(len(test_n)): # get latent distribution from the variational giving as input a fixed sample from the dataset z = q_model.observe(x=np.expand_dims(x_test[test_n[i]], 0))['z'] # run the computation graph adding noise to computed variance to get different output samples latent = sess.run(z, feed_dict={ x_input: np.expand_dims(x_test[test_n[i]], 0), n: 1, n_particles: 100, std_noise: 0.7 }) # get the image from the model giving as input the latent distribution z x_gen = tf.reshape( model.observe(z=latent)["x_mean"], [-1, 28, 28, 1]) images = sess.run(x_gen, feed_dict={}) name = os.path.join(result_path, "{}.png".format(i)) save_image_collections(images, name)