Esempio n. 1
0
            z = zs.OnehotCategorical('z',
                                     z_stacked_logits,
                                     dtype=tf.float32,
                                     n_samples=n_particles,
                                     group_event_ndims=1)
    return variational


if __name__ == '__main__':
    tf.set_random_seed(1237)
    np.random.seed(1237)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid]).astype('float32')
    x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32')

    # Define parameters
    n_z, n_k = 100, 2  # number of latent variables, categories
    n_x = x_train.shape[1]

    tau_p0 = 1.0
    tau_q0 = 1.0
    anneal_tau_freq = 25
    anneal_tau_rate = 0.95

    lb_samples = 1
    ll_samples = 500
    epochs = 3000
Esempio n. 2
0
        lz_x = tf.layers.dense(lz_x, 500, activation=tf.nn.relu)
        z_mean = tf.layers.dense(lz_x, z_dim)
        z_logstd = tf.layers.dense(lz_x, z_dim)
        z = zs.Normal('z',
                      z_mean,
                      logstd=z_logstd,
                      group_ndims=1,
                      n_samples=n_z_per_x)
    return variational


# In[4]:

# Load MNIST
data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
x_train, t_train, x_valid, t_valid, x_test, t_test = dataset.load_mnist_realval(
    data_path)
x_train = np.vstack([x_train, x_valid])
y_train = np.vstack([t_train, t_valid])
x_test = np.random.binomial(1, x_test, size=x_test.shape)
x_dim = x_train.shape[1]

# In[5]:

x_train.shape, t_train.shape, x_valid.shape, t_valid.shape

# In[6]:

# Define model parameters
z_dim = 40

# In[7]:
Esempio n. 3
0
def main():
    tf.set_random_seed(1234)
    np.random.seed(1234)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)
    x_dim = x_train.shape[1]

    # Define model/inference parameters
    z_dim = 40
    n_planar_flows = 10

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x")
    x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input),
                tf.int32)
    n = tf.placeholder(tf.int32, shape=[], name="n")

    model = build_gen(n, x_dim, z_dim, n_particles)
    q_net = build_q_net(x, z_dim, n_particles)
    qz_samples, log_qz = q_net.query('z', outputs=True, local_log_prob=True)
    # TODO: add tests for repeated calls of flows
    qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples,
                                                    log_qz,
                                                    n_iters=n_planar_flows)
    qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples,
                                                    log_qz,
                                                    n_iters=n_planar_flows)

    lower_bound = zs.variational.elbo(model,
                                      observed={"x": x},
                                      latent={"z": [qz_samples, log_qz]},
                                      axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    # Importance sampling estimates of marginal log likelihood
    is_log_likelihood = tf.reduce_mean(
        zs.is_loglikelihood(model, {'x': x}, {'z': [qz_samples, log_qz]},
                            axis=0))

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    infer_op = optimizer.minimize(cost)

    # Define training/evaluation parameters
    epochs = 3000
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    test_freq = 10
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     x_input: x_batch,
                                     n_particles: 1,
                                     n: batch_size
                                 })
                lbs.append(lb)
            time_epoch += time.time()
            print('Epoch {} ({:.1f}s): Lower bound = {}'.format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                test_lls = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: 1,
                                           n: test_batch_size
                                       })
                    test_ll = sess.run(is_log_likelihood,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: 1000,
                                           n: test_batch_size
                                       })
                    test_lbs.append(test_lb)
                    test_lls.append(test_ll)
                time_test += time.time()
                print('>>> TEST ({:.1f}s)'.format(time_test))
                print('>> Test lower bound = {}'.format(np.mean(test_lbs)))
                print('>> Test log likelihood (IS) = {}'.format(
                    np.mean(test_lls)))
Esempio n. 4
0
def main():
    # Load MNIST
    data_path = os.path.join(conf.data_dir, "mnist.pkl.gz")
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)
    x_dim = x_train.shape[1]

    # Define model parameters
    z_dim = 40

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x")
    x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input),
                tf.int32)
    n = tf.placeholder(tf.int32, shape=[], name="n")

    model = build_gen(x_dim, z_dim, n, n_particles)
    variational = build_q_net(x, z_dim, n_particles)

    lower_bound = zs.variational.elbo(model, {"x": x},
                                      variational=variational,
                                      axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    # # Importance sampling estimates of marginal log likelihood
    is_log_likelihood = tf.reduce_mean(
        zs.is_loglikelihood(model, {"x": x}, proposal=variational, axis=0))

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    infer_op = optimizer.minimize(cost)

    # Random generation
    x_gen = tf.reshape(model.observe()["x_mean"], [-1, 28, 28, 1])

    # Define training/evaluation parameters
    epochs = 3000
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    save_freq = 10
    test_freq = 10
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size
    result_path = "results/vae"

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     x_input: x_batch,
                                     n_particles: 1,
                                     n: batch_size
                                 })
                lbs.append(lb)
            time_epoch += time.time()
            print("Epoch {} ({:.1f}s): Lower bound = {}".format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs, test_lls = [], []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: 1,
                                           n: test_batch_size
                                       })
                    test_ll = sess.run(is_log_likelihood,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: 1000,
                                           n: test_batch_size
                                       })
                    test_lbs.append(test_lb)
                    test_lls.append(test_ll)
                time_test += time.time()
                print(">>> TEST ({:.1f}s)".format(time_test))
                print(">> Test lower bound = {}".format(np.mean(test_lbs)))
                print('>> Test log likelihood (IS) = {}'.format(
                    np.mean(test_lls)))

            if epoch % save_freq == 0:
                images = sess.run(x_gen, feed_dict={n: 100, n_particles: 1})
                name = os.path.join(result_path,
                                    "vae.epoch.{}.png".format(epoch))
                save_image_collections(images, name)
Esempio n. 5
0
def main():
    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)
    x_dim = x_train.shape[1]

    # Define model parameters
    z_dim = 40

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name='x')
    x = tf.to_int32(tf.less(tf.random_uniform(tf.shape(x_input)), x_input))
    n = tf.shape(x)[0]

    def log_joint(observed):
        model = vae(observed, x_dim, z_dim, n, n_particles)
        log_pz, log_px_z = model.local_log_prob(['z', 'x'])
        return log_pz + log_px_z

    variational = q_net({'x': x}, x_dim, z_dim, n_particles)
    qz_samples, log_qz = variational.query('z', outputs=True,
                                           local_log_prob=True)
    lower_bound = zs.variational.elbo(log_joint,
                                      observed={'x': x},
                                      latent={'z': [qz_samples, log_qz]},
                                      axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    # Importance sampling estimates of marginal log likelihood
    is_log_likelihood = tf.reduce_mean(
        zs.is_loglikelihood(log_joint, {'x': x},
                            {'z': [qz_samples, log_qz]}, axis=0))

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    infer_op = optimizer.minimize(cost)

    # Generate images
    n_gen = 100
    x_mean = vae({}, x_dim, z_dim, n_gen).outputs('x_mean')
    x_gen = tf.reshape(x_mean, [-1, 28, 28, 1])

    # Define training/evaluation parameters
    epochs = 3000
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    save_freq = 10
    test_freq = 10
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size
    result_path = "results/vae"

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={x_input: x_batch,
                                            n_particles: 1})
                lbs.append(lb)
            time_epoch += time.time()
            print('Epoch {} ({:.1f}s): Lower bound = {}'.format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                test_lls = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:
                                          (t + 1) * test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={x: test_x_batch,
                                                  n_particles: 1})
                    test_ll = sess.run(is_log_likelihood,
                                       feed_dict={x: test_x_batch,
                                                  n_particles: 1000})
                    test_lbs.append(test_lb)
                    test_lls.append(test_ll)
                time_test += time.time()
                print('>>> TEST ({:.1f}s)'.format(time_test))
                print('>> Test lower bound = {}'.format(np.mean(test_lbs)))
                print('>> Test log likelihood (IS) = {}'.format(
                    np.mean(test_lls)))

            if epoch % save_freq == 0:
                images = sess.run(x_gen)
                name = os.path.join(result_path,
                                    "vae.epoch.{}.png".format(epoch))
                save_image_collections(images, name)
Esempio n. 6
0
def main():
    tf.set_random_seed(1237)
    np.random.seed(1237)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid]).astype('float32')
    x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32')

    # Define parameters
    n_z, n_k = 100, 2  # number of latent variables, categories
    n_x = x_train.shape[1]

    tau_p0 = 1.0
    tau_q0 = 1.0
    anneal_tau_freq = 25
    anneal_tau_rate = 0.95

    lb_samples = 1
    ll_samples = 500
    epochs = 3000
    batch_size = 64
    iters = x_train.shape[0] // batch_size
    learning_rate = 0.0001
    test_freq = 25
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size

    # Build the computation graph
    tau_p = tf.placeholder(tf.float32, shape=[], name="tau_p")
    tau_q = tf.placeholder(tf.float32, shape=[], name="tau_q")
    n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
    x_orig = tf.placeholder(tf.float32, shape=[None, n_x], name='x')
    x_bin = tf.cast(tf.less(tf.random_uniform(tf.shape(x_orig), 0, 1), x_orig),
                    tf.int32)
    x = tf.placeholder(tf.int32, shape=[None, n_x], name='x')
    x_obs = tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1])
    n = tf.shape(x)[0]

    def lower_bound_and_log_likelihood(relaxed=False):
        def log_joint(observed):
            model = vae(observed, n, n_x, n_z, n_k, tau_p, n_particles,
                        relaxed)
            log_pz, log_px_z = model.local_log_prob(['z', 'x'])
            return log_pz + log_px_z

        variational = q_net({}, x, n_z, n_k, tau_q, n_particles, relaxed)
        qz_samples, log_qz = variational.query('z',
                                               outputs=True,
                                               local_log_prob=True)

        lower_bound = zs.variational.elbo(log_joint,
                                          observed={'x': x_obs},
                                          latent={'z': [qz_samples, log_qz]},
                                          axis=0)
        cost = tf.reduce_mean(lower_bound.sgvb())
        lower_bound = tf.reduce_mean(lower_bound)

        # Importance sampling estimates of marginal log likelihood
        is_log_likelihood = tf.reduce_mean(
            zs.is_loglikelihood(log_joint, {'x': x_obs},
                                {'z': [qz_samples, log_qz]},
                                axis=0))

        return cost, lower_bound, is_log_likelihood

    # For training
    relaxed_cost, relaxed_lower_bound, _ = lower_bound_and_log_likelihood(True)
    # For testing and generating
    _, lower_bound, is_log_likelihood = lower_bound_and_log_likelihood(False)

    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr')
    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    infer_op = optimizer.minimize(relaxed_cost)

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)

            if epoch % anneal_tau_freq == 0:
                tau_p0 = max(0.5, tau_p0 * anneal_tau_rate)
                tau_q0 = max(0.666, tau_q0 * anneal_tau_rate)

            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                x_batch_bin = sess.run(x_bin, feed_dict={x_orig: x_batch})
                feed_dict = {
                    x: x_batch_bin,
                    learning_rate_ph: learning_rate,
                    n_particles: lb_samples,
                    tau_p: tau_p0,
                    tau_q: tau_q0
                }
                _, lb = sess.run([infer_op, relaxed_lower_bound],
                                 feed_dict=feed_dict)
                lbs.append(lb)
            time_epoch += time.time()
            print('Epoch {} ({:.1f}s): Lower bound = {}'.format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                test_lls = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    feed_dict = {
                        x: test_x_batch,
                        n_particles: ll_samples,
                        tau_p: tau_p0,
                        tau_q: tau_q0
                    }

                    test_lb, test_ll = sess.run(
                        [lower_bound, is_log_likelihood], feed_dict=feed_dict)

                    test_lbs.append(test_lb)
                    test_lls.append(test_ll)
                time_test += time.time()
                print('>>> TEST ({:.1f}s)'.format(time_test))
                print('>> Test lower bound = {}'.format(np.mean(test_lbs)))
                print('>> Test log likelihood (IS) = {}'.format(
                    np.mean(test_lls)))
Esempio n. 7
0
            eps = zs.Normal('layer' + str(i) + '/eps',
                            1.,
                            logstd=0.5 * tf.log(alpha + 1e-10),
                            n_samples=n_particles,
                            group_ndims=1)
    return variational


if __name__ == '__main__':
    tf.set_random_seed(1234)
    np.random.seed(1234)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, y_train, x_valid, y_valid, x_test, y_test = \
        dataset.load_mnist_realval(data_path, one_hot=False)
    x_train = np.vstack([x_train, x_valid]).astype('float32')
    y_train = np.concatenate([y_train, y_valid]).astype('int32')
    x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
    n_x = x_train.shape[1]

    # Define training/evaluation parameters
    epochs = 500
    batch_size = 1000
    lb_samples = 10
    ll_samples = 100
    iters = int(np.floor(x_train.shape[0] / float(batch_size)))
    test_freq = 3
    learning_rate = 0.001
    anneal_lr_freq = 100
    anneal_lr_rate = 0.75
Esempio n. 8
0
def main():
    tf.set_random_seed(1234)
    np.random.seed(1234)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, "mnist.pkl.gz")
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)
    x_dim = x_train.shape[1]

    # Define model parameters
    z_dim = 32

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim])
    x = tf.to_int32(tf.random_uniform(tf.shape(x_input)) <= x_input)
    n = tf.shape(x)[0]

    def log_joint(observed):
        model, _ = vae_conv(observed, n, x_dim, z_dim, n_particles)
        log_pz, log_px_z = model.local_log_prob(["z", "x"])
        return log_pz + log_px_z

    variational = q_net(x, z_dim, n_particles)
    qz_samples, log_qz = variational.query("z",
                                           outputs=True,
                                           local_log_prob=True)
    lower_bound = zs.variational.elbo(log_joint,
                                      observed={"x": x},
                                      latent={"z": [qz_samples, log_qz]},
                                      axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    optimizer = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5)
    infer_op = optimizer.minimize(cost)

    # Generate images
    n_gen = 100
    _, x_logits = vae_conv({}, n_gen, x_dim, z_dim, 1)
    x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1])

    # Define training/evaluation parameters
    epochs = 3000
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    save_freq = 10
    test_freq = 10
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size
    result_path = "results/vae_conv"

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     x_input: x_batch,
                                     n_particles: 1
                                 })
                lbs.append(lb)
            time_epoch += time.time()
            print("Epoch {} ({:.1f}s): Lower bound = {}".format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: 1
                                       })
                    test_lbs.append(test_lb)
                time_test += time.time()
                print(">>> TEST ({:.1f}s)".format(time_test))
                print(">> Test lower bound = {}".format(np.mean(test_lbs)))

            if epoch % save_freq == 0:
                print("Saving images...")
                images = sess.run(x_gen)
                name = os.path.join(result_path,
                                    "vae.epoch.{}.png".format(epoch))
                save_image_collections(images, name)
Esempio n. 9
0
def main():
    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)
    x_dim = x_train.shape[1]

    # Define model parameters
    z_dim = 40

    # Build the computation graph
    is_training = tf.placeholder(tf.bool, shape=[], name='is_training')
    n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name='x')
    x = tf.to_int32(tf.less(tf.random_uniform(tf.shape(x_input)), x_input))
    n = tf.shape(x)[0]

    def log_joint(observed):
        model = vae(observed, n, x_dim, z_dim, n_particles, is_training)
        log_pz, log_px_z = model.local_log_prob(['z', 'x'])
        return log_pz + log_px_z

    variational = q_net(x, z_dim, n_particles, is_training)
    qz_samples, log_qz = variational.query('z',
                                           outputs=True,
                                           local_log_prob=True)
    cx = tf.expand_dims(baseline_net(x), 0)
    lower_bound = zs.variational.elbo(log_joint,
                                      observed={'x': x},
                                      latent={'z': [qz_samples, log_qz]},
                                      axis=0)
    cost, baseline_cost = lower_bound.reinforce(baseline=cx)
    cost = tf.reduce_mean(cost + baseline_cost)
    lower_bound = tf.reduce_mean(lower_bound)

    # Importance sampling estimates of marginal log likelihood
    is_log_likelihood = tf.reduce_mean(
        zs.is_loglikelihood(log_joint, {'x': x}, {'z': [qz_samples, log_qz]},
                            axis=0))

    optimizer = tf.train.AdamOptimizer(0.001)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        infer_op = optimizer.minimize(cost)

    # Define training/evaluation parameters
    epochs = 3000
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    test_freq = 10
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     x_input: x_batch,
                                     is_training: True,
                                     n_particles: 1
                                 })
                lbs.append(lb)
            time_epoch += time.time()
            print('Epoch {} ({:.1f}s): Lower bound = {}'.format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                test_lls = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           is_training: False,
                                           n_particles: 1
                                       })
                    test_ll = sess.run(is_log_likelihood,
                                       feed_dict={
                                           x: test_x_batch,
                                           is_training: False,
                                           n_particles: 1000
                                       })
                    test_lbs.append(test_lb)
                    test_lls.append(test_ll)
                time_test += time.time()
                print('>>> TEST ({:.1f}s)'.format(time_test))
                print('>> Test lower bound = {}'.format(np.mean(test_lbs)))
                print('>> Test log likelihood (IS) = {}'.format(
                    np.mean(test_lls)))
Esempio n. 10
0
def main():
    tf.set_random_seed(1234)
    np.random.seed(1234)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, "mnist.pkl.gz")
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)
    x_dim = x_train.shape[1]

    # Define model parameters
    z_dim = 40

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x")
    x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input),
                tf.int32)
    n = tf.placeholder(tf.int32, shape=[], name="n")

    model = build_gen(n, x_dim, z_dim, n_particles)
    variational = build_q_net(x, z_dim, n_particles)

    lower_bound = zs.variational.importance_weighted_objective(
        model, {'x': x}, variational=variational, axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    infer_op = optimizer.minimize(cost)

    # Define training/evaluation parameters
    lb_samples = 50
    epochs = 3000
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    test_freq = 10
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     x_input: x_batch,
                                     n_particles: lb_samples,
                                     n: batch_size
                                 })
                lbs.append(lb)
            time_epoch += time.time()
            print("Epoch {} ({:.1f}s): IWAE bound = {}".format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: lb_samples,
                                           n: test_batch_size
                                       })
                    test_lbs.append(test_lb)
                time_test += time.time()
                print(">>> TEST ({:.1f}s)".format(time_test))
                print(">> Test IWAE bound = {}".format(np.mean(test_lbs)))
def main():
    tf.set_random_seed(1234)
    np.random.seed(1234)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, "mnist.pkl.gz")
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)
    x_dim = x_train.shape[1]

    # Define model parameters
    h_dim = 200

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x")
    x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input),
                tf.int32)
    n = tf.placeholder(tf.int32, shape=[], name="n")

    model = build_sbn(n, x_dim, h_dim, n_particles)
    proposal = build_proposal(x, h_dim, n_particles)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001, epsilon=1e-4)

    # learning model parameters
    lower_bound = tf.reduce_mean(
        zs.variational.importance_weighted_objective(model,
                                                     observed={"x": x},
                                                     variational=proposal,
                                                     axis=0))
    model_params = tf.trainable_variables(scope="sbn")
    model_grads = optimizer.compute_gradients(-lower_bound, model_params)

    # adapting the proposal
    klpq_obj = zs.variational.klpq(model,
                                   observed={"x": x},
                                   variational=proposal,
                                   axis=0)
    klpq_cost = tf.reduce_mean(klpq_obj.importance())
    proposal_params = tf.trainable_variables(scope="proposal")
    klpq_grads = optimizer.compute_gradients(klpq_cost, proposal_params)

    infer_op = optimizer.apply_gradients(model_grads + klpq_grads)

    # Define training/evaluation parameters
    lb_samples = 10
    ll_samples = 1000
    epochs = 3000
    batch_size = 24
    iters = x_train.shape[0] // batch_size
    test_freq = 10
    test_batch_size = 100
    test_iters = x_test.shape[0] // test_batch_size

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     x_input: x_batch,
                                     n_particles: lb_samples,
                                     n: batch_size
                                 })
                lbs.append(lb)
            time_epoch += time.time()
            print("Epoch {} ({:.1f}s): Lower bound = {}".format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                test_lls = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: lb_samples,
                                           n: test_batch_size
                                       })
                    test_ll = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: ll_samples,
                                           n: test_batch_size
                                       })
                    test_lbs.append(test_lb)
                    test_lls.append(test_ll)
                time_test += time.time()
                print(">>> TEST ({:.1f}s)".format(time_test))
                print(">> Test lower bound = {}".format(np.mean(test_lbs)))
                print(">> Test log likelihood = {}".format(np.mean(test_lls)))
Esempio n. 12
0
def main():
    tf.set_random_seed(1237)

    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid]).astype('float32')
    np.random.seed(1234)
    x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32')
    n_x = x_train.shape[1]

    # Define model parameters
    n_z = 40

    # Define training/evaluation parameters
    lb_samples = 50
    epochs = 3000
    batch_size = 1000
    iters = x_train.shape[0] // batch_size
    learning_rate = 0.001
    anneal_lr_freq = 200
    anneal_lr_rate = 0.75
    test_freq = 10
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
    x_orig = tf.placeholder(tf.float32, shape=[None, n_x], name='x')
    x_bin = tf.cast(tf.less(tf.random_uniform(tf.shape(x_orig), 0, 1), x_orig),
                    tf.int32)
    x = tf.placeholder(tf.int32, shape=[None, n_x], name='x')
    x_obs = tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1])
    n = tf.shape(x)[0]

    def log_joint(observed):
        model = vae(observed, n, n_x, n_z, n_particles)
        log_pz, log_px_z = model.local_log_prob(['z', 'x'])
        return log_pz + log_px_z

    variational = q_net({}, x, n_z, n_particles)
    qz_samples, log_qz = variational.query('z',
                                           outputs=True,
                                           local_log_prob=True)
    lower_bound = zs.variational.importance_weighted_objective(
        log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr')
    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    infer_op = optimizer.minimize(cost)

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            if epoch % anneal_lr_freq == 0:
                learning_rate *= anneal_lr_rate
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                x_batch_bin = sess.run(x_bin, feed_dict={x_orig: x_batch})
                _, lb = sess.run(
                    [infer_op, lower_bound],
                    feed_dict={
                        x: x_batch_bin,
                        learning_rate_ph: learning_rate,
                        n_particles: lb_samples
                    })
                lbs.append(lb)
            time_epoch += time.time()
            print('Epoch {} ({:.1f}s): Lower bound = {}'.format(
                epoch, time_epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs = []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: lb_samples
                                       })
                    test_lbs.append(test_lb)
                time_test += time.time()
                print('>>> TEST ({:.1f}s)'.format(time_test))
                print('>> Test IWAE bound = {}'.format(np.mean(test_lbs)))
Esempio n. 13
0
def main():
    tf.set_random_seed(1234)
    np.random.seed(1234)

    # Load MINST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    n_xl = 28
    n_channels = 1
    x_train = np.vstack([x_train, x_valid]).astype(np.float32).reshape(
        (-1, n_xl, n_xl, n_channels))

    # Define model parameters
    n_z = 40

    # Define training/evaluation parameters
    epochs = 1000
    batch_size = 64 * FLAGS.num_gpus
    gen_size = 100
    iters = x_train.shape[0] // batch_size
    print_freq = 100
    save_freq = 100

    # Build the computation graph
    is_training = tf.placeholder(tf.bool, shape=[], name='is_training')
    x = tf.placeholder(tf.float32,
                       shape=(None, n_xl, n_xl, n_channels),
                       name='x')
    optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0002, decay=0.5)

    def build_tower_graph(x, id_):
        tower_x = x[id_ * tf.shape(x)[0] // FLAGS.num_gpus:(id_ + 1) *
                    tf.shape(x)[0] // FLAGS.num_gpus]
        n = tf.shape(tower_x)[0]
        gen, x_gen = generator(None, n, n_z, is_training)
        x_critic = discriminator(tower_x, is_training)
        x_gen_critic = discriminator(x_gen, is_training)
        gen_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                         scope='generator')
        disc_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope='discriminator')
        disc_loss = -tf.reduce_mean(x_critic - x_gen_critic)
        gen_loss = -tf.reduce_mean(x_gen_critic)
        disc_grads = optimizer.compute_gradients(disc_loss,
                                                 var_list=disc_var_list)
        gen_grads = optimizer.compute_gradients(gen_loss,
                                                var_list=gen_var_list)
        grads = disc_grads + gen_grads
        return grads, gen_loss, disc_loss

    tower_losses = []
    tower_grads = []
    for i in range(FLAGS.num_gpus):
        with tf.device('/gpu:%d' % i):
            with tf.name_scope('tower_%d' % i):
                grads, gen_loss, disc_loss = build_tower_graph(x, i)
                tower_losses.append([gen_loss, disc_loss])
                tower_grads.append(grads)
    gen_loss, disc_loss = multi_gpu.average_losses(tower_losses)
    w_distance = -disc_loss
    grads = multi_gpu.average_gradients(tower_grads)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        infer_op = optimizer.apply_gradients(grads)

    # Clip weights of the critic to ensure 1-Lipschitz
    disc_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                      scope='discriminator')
    with tf.control_dependencies([infer_op]):
        clip_op = tf.group(*[
            var.assign(tf.clip_by_value(var, -0.01, 0.01))
            for var in disc_var_list
        ])

    # Generate images
    _, eval_x_gen = generator(None, gen_size, n_z, False)

    # Run the inference
    with multi_gpu.create_session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epochs + 1):
            np.random.shuffle(x_train)
            w_losses = []
            time_train = -time.time()
            for t in range(iters):
                iter = t + 1
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, _, w_loss = sess.run([infer_op, clip_op, w_distance],
                                        feed_dict={
                                            x: x_batch,
                                            is_training: True
                                        })
                w_losses.append(w_loss)

                if iter % print_freq == 0:
                    print('Epoch={} Iter={} ({:.3f}s/iter): '
                          'wasserstein distance = {}'.format(
                              epoch, iter,
                              (time.time() + time_train) / print_freq,
                              np.mean(w_losses)))
                    w_losses = []

                if iter % save_freq == 0:
                    images = sess.run(eval_x_gen)
                    name = "results/wgan/wgan.epoch.{}.iter.{}.png".format(
                        epoch, iter)
                    save_image_collections(images, name, scale_each=True)

                if iter % print_freq == 0:
                    time_train = -time.time()
Esempio n. 14
0
def main():
    # manual seed
    #seed = random.randint(0, 10000) # fix seed
    seed = 1234  # N=100, K=3
    print("Random Seed: ", seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.set_random_seed(seed)

    # load MNIST data ---------------------------------------------------------
    data_path = os.path.join('../data/', 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
            dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid]).astype('float32')

    # model parameters --------------------------------------------------------
    K = 10
    D = 40
    dim_z = K
    dim_h = D
    dim_x = x_train.shape[1]  # 784
    N = x_train.shape[0]

    # Define training/evaluation parameters ---------------------------------------------
    resume = False
    epoches = 50  # 2000
    save_freq = 5
    batch_size = 100
    train_iters = int(np.ceil(N / batch_size))

    learning_rate = 0.001
    anneal_lr_freq = 10
    anneal_lr_rate = 0.9
    n_particles = 20

    n_gen = 100

    result_path = "./results/3_gmvae"

    @zs.reuse(scope='decoder')
    def vae(observed,
            n,
            n_particles,
            is_training,
            dim_h=40,
            dim_z=10,
            dim_x=784):
        '''decoder: z-->h-->x
        n: batch_size
        dim_z: K = 10
        dim_x: 784
        dim_h: D = 40
        '''
        with zs.BayesianNet(observed=observed) as model:
            normalizer_params = {
                'is_training': is_training,
                'updates_collections': None
            }
            pai = tf.get_variable('pai',
                                  shape=[dim_z],
                                  dtype=tf.float32,
                                  trainable=True,
                                  initializer=tf.constant_initializer(1.0))
            n_pai = tf.tile(tf.expand_dims(pai, 0), [n, 1])
            z = zs.OnehotCategorical('z',
                                     logits=n_pai,
                                     dtype=tf.float32,
                                     n_samples=n_particles)
            mu = tf.get_variable('mu',
                                 shape=[dim_z, dim_h],
                                 dtype=tf.float32,
                                 initializer=tf.random_uniform_initializer(
                                     -1, 1))
            log_sigma = tf.get_variable(
                'log_sigma',
                shape=[dim_z, dim_h],
                dtype=tf.float32,
                initializer=tf.random_uniform_initializer(-3, -2))
            h_mean = tf.reshape(
                tf.matmul(tf.reshape(z, [-1, dim_z]), mu),
                [n_particles, -1, dim_h])  # [n_particles, None, dim_x]
            h_logstd = tf.reshape(
                tf.matmul(tf.reshape(z, [-1, dim_z]), log_sigma),
                [n_particles, -1, dim_h])

            h = zs.Normal(
                'h',
                mean=h_mean,
                logstd=h_logstd,
                #n_samples=n_particles,
                group_event_ndims=1)
            lx_h = layers.fully_connected(
                h,
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params
            )
            lx_h = layers.fully_connected(
                lx_h,
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params
            )
            x_logits = layers.fully_connected(
                lx_h, dim_x, activation_fn=None)  # the log odds of being 1
            x = zs.Bernoulli(
                'x',
                x_logits,
                #n_samples=n_particles,
                group_event_ndims=1)
        return model, x_logits, h, z.tensor

    @zs.reuse(scope='encoder')
    def q_net(x, dim_h, n_particles, is_training):
        '''encoder: x-->h'''
        with zs.BayesianNet() as variational:
            normalizer_params = {
                'is_training': is_training,
                # 'updates_collections': None
            }
            lh_x = layers.fully_connected(
                tf.to_float(x),
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            lh_x = tf.contrib.layers.dropout(lh_x,
                                             keep_prob=0.9,
                                             is_training=is_training)
            lh_x = layers.fully_connected(
                lh_x,
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            lh_x = tf.contrib.layers.dropout(lh_x,
                                             keep_prob=0.9,
                                             is_training=is_training)
            h_mean = layers.fully_connected(
                lh_x,
                dim_h,
                activation_fn=None,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            h_logstd = layers.fully_connected(
                lh_x,
                dim_h,
                activation_fn=None,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            h = zs.Normal('h',
                          mean=h_mean,
                          logstd=h_logstd,
                          n_samples=n_particles,
                          group_event_ndims=1)
        return variational

    x_ph = tf.placeholder(tf.int32, shape=[None, dim_x], name='x_ph')
    x_orig_ph = tf.placeholder(tf.float32,
                               shape=[None, dim_x],
                               name='x_orig_ph')
    x_bin = tf.cast(
        tf.less(tf.random_uniform(tf.shape(x_orig_ph), 0, 1), x_orig_ph),
        tf.int32)
    is_training_ph = tf.placeholder(tf.bool, shape=[], name='is_training_ph')

    n = tf.shape(x_ph)[0]

    def log_joint(observed):
        z_obs = tf.eye(dim_z, batch_shape=[n_particles, n])
        z_obs = tf.transpose(z_obs, [2, 0, 1, 3])  # [K, n_p, bs, K]
        log_pz_list = []
        log_ph_z_list = []
        log_px_h = None
        for i in range(dim_z):
            observed['z'] = z_obs[i, :]  # the i-th dimension is 1
            model, _, _, _ = vae(observed,
                                 n,
                                 n_particles,
                                 is_training_ph,
                                 dim_h=dim_h,
                                 dim_z=dim_z,
                                 dim_x=dim_x)
            log_pz_i, log_ph_z_i, log_px_h = model.local_log_prob(
                ['z', 'h', 'x'])
            log_pz_list.append(log_pz_i)
            log_ph_z_list.append(log_ph_z_i)
        log_pz = tf.stack(log_pz_list, axis=0)
        log_ph_z = tf.stack(log_ph_z_list, axis=0)
        # p(X, H) = p(X|H) sum_Z(p(Z) * p(H|Z))
        # log p(X, H) = log p(X|H) + log sum_Z exp(log p(Z) + log p(H|Z))
        log_p_xh = log_px_h + tf.reduce_logsumexp(log_pz + log_ph_z,
                                                  axis=0)  # log p(X, H)
        return log_p_xh

    variational = q_net(x_ph, dim_h, n_particles, is_training_ph)
    qh_samples, log_qh = variational.query('h',
                                           outputs=True,
                                           local_log_prob=True)

    x_obs = tf.tile(tf.expand_dims(x_ph, 0), [n_particles, 1, 1])

    lower_bound = zs.sgvb(log_joint,
                          observed={'x': x_obs},
                          latent={'h': [qh_samples, log_qh]},
                          axis=0)

    mean_lower_bound = tf.reduce_mean(lower_bound)
    with tf.name_scope('neg_lower_bound'):
        neg_lower_bound = tf.reduce_mean(-mean_lower_bound)

    train_vars = tf.trainable_variables()
    with tf.variable_scope('decoder', reuse=True):
        pai = tf.get_variable('pai')
        mu = tf.get_variable('mu')
        log_sigma = tf.get_variable('log_sigma')

    clip_pai = pai.assign(tf.clip_by_value(pai, 0.7, 1.3))

    # _, pai_var = tf.nn.moments(pai, axes=[-1])
    # _, mu_var = tf.nn.moments(mu, axes=[0, 1], keep_dims=False)
    # regularizer = tf.add_n([tf.nn.l2_loss(v) for v in train_vars
    #                     if not 'pai' in v.name and not 'mu' in v.name])
    # loss = neg_lower_bound + pai_var - mu_var # + 1e-4 * regularizer # loss -------------
    loss = neg_lower_bound  #+ 0.001 * tf.nn.l2_loss(mu-1)

    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr')

    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    grads_and_vars = optimizer.compute_gradients(loss)
    clipped_gvs = [(tf.clip_by_value(grad, -5., 5.), var)
                   for grad, var in grads_and_vars]
    infer = optimizer.apply_gradients(clipped_gvs)

    # Generate images -----------------------------------------------------
    z_manual_feed = tf.eye(dim_z, batch_shape=[10])  # [10, K, K]
    z_manual_feed = tf.transpose(z_manual_feed, [1, 0, 2])  # [K, 10, K]
    _, x_logits, _, z_onehot = vae(
        {'z': z_manual_feed},
        10,
        n_particles=1,
        is_training=False,
        dim_h=dim_h,
        dim_z=dim_z,
        dim_x=dim_x
    )  # n and n_particles do not matter, since we have manually feeded z
    print('x_logits:', x_logits.shape.as_list())  # [1, 100, 784]
    x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1])
    z_gen = tf.argmax(tf.reshape(z_onehot, [-1, dim_z]), axis=1)

    # tensorboard summary ---------------------------------------------------
    image_for_summ = []
    for i in range(n_gen // 10):
        tmp = [x_gen[j + i * 10, :] for j in range(10)]
        tmp = tf.concat(tmp, 1)
        image_for_summ.append(tmp)
    image_for_summ = tf.expand_dims(tf.concat(image_for_summ, 0), 0)
    print('image_for_summ:', image_for_summ.shape.as_list())
    gen_image_summ = tf.summary.image('gen_images',
                                      image_for_summ,
                                      max_outputs=100)
    lb_summ = tf.summary.scalar("lower_bound", mean_lower_bound)
    lr_summ = tf.summary.scalar("learning_rate", learning_rate_ph)
    loss_summ = tf.summary.scalar('loss', loss)

    for var in train_vars:
        tf.summary.histogram(var.name, var)
    for grad, _ in grads_and_vars:
        tf.summary.histogram(grad.name, grad)

    for i in train_vars:
        print(i.name, i.get_shape())
    # Merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()

    saver = tf.train.Saver(max_to_keep=10)

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.3

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        # Restore from the latest checkpoint
        ckpt_file = tf.train.latest_checkpoint(result_path)
        begin_epoch = 1
        if ckpt_file is not None and resume:  # resume ---------------------------------------
            print('Restoring model from {}...'.format(ckpt_file))
            begin_epoch = int(ckpt_file.split('.')[-2]) + 1
            saver.restore(sess, ckpt_file)

        x_train_normed = x_train  # no normalization
        x_train_normed_no_shuffle = x_train_normed

        log_dir = './log/3_gmvae/'
        if os.path.exists(log_dir):
            shutil.rmtree(log_dir)
        summary_writer = tf.summary.FileWriter(log_dir,
                                               graph=tf.get_default_graph())

        global mu_res, log_sigma_res, pai_res
        global gen_images, z_gen_res, epoch
        print(
            'training...'
        )  # ----------------------------------------------------------------
        pai_res_0, mu_res_0, log_sigma_res_0 = sess.run([pai, mu, log_sigma])
        global_step = 0
        for epoch in tqdm(range(begin_epoch, epoches + 1)):
            time_epoch = -time.time()
            if epoch % anneal_lr_freq == 0:
                learning_rate *= anneal_lr_rate
            np.random.shuffle(x_train_normed)  # shuffle training data
            lbs = []

            for t in tqdm(range(train_iters)):
                global_step += 1
                x_batch = x_train_normed[t * batch_size:(t + 1) *
                                         batch_size]  # get batched data
                x_batch_bin = sess.run(x_bin, feed_dict={x_orig_ph: x_batch})
                # sess.run(clip_pai)
                _, lb, merge_all = sess.run(
                    [infer, mean_lower_bound, merged_summary_op],
                    feed_dict={
                        x_ph: x_batch_bin,
                        learning_rate_ph: learning_rate,
                        is_training_ph: True
                    })
                lbs.append(lb)
            time_epoch += time.time()
            print('Epoch {} ({:.1f}s): Lower bound = {}'.format(
                epoch, time_epoch, np.mean(lbs)))
            # print(grad_var_res[-3:])

            summary_writer.add_summary(merge_all, global_step=epoch)

            if epoch % save_freq == 0:  # save ---------------------------------------------------
                print('Saving model...')
                save_path = os.path.join(result_path,
                                         "gmvae.epoch.{}.ckpt".format(epoch))
                if not os.path.exists(os.path.dirname(save_path)):
                    os.makedirs(os.path.dirname(save_path))
                saver.save(sess, save_path)

                gen_images, z_gen_res = sess.run(
                    [x_gen, z_gen])  #, feed_dict={is_training_ph: False})

                # dump data
                pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma])
                data_dump = {
                    'epoch': epoch,
                    'images': gen_images,
                    'clusters': z_gen_res,
                    'pai_0': pai_res_0,
                    'mu_0': mu_res_0,
                    'log_sigma_0': log_sigma_res_0,
                    'pai_res': pai_res,
                    'mu_res': mu_res,
                    'log_sigma_res': log_sigma_res
                }
                pickle.dump(
                    data_dump,
                    open(
                        os.path.join(
                            result_path,
                            'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'),
                    protocol=2)
                save_image_with_clusters(
                    gen_images,
                    z_gen_res,
                    filename="results/3_gmvae/gmvae_epoch_{}.png".format(
                        epoch))
                print('Done')

        pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma])
        print("Random Seed: ", seed)
        data_dump = {
            'epoch': epoch,
            'images': gen_images,
            'clusters': z_gen_res,
            'pai_0': pai_res_0,
            'mu_0': mu_res_0,
            'log_sigma_0': log_sigma_res_0,
            'pai_res': pai_res,
            'mu_res': mu_res,
            'log_sigma_res': log_sigma_res
        }
        pickle.dump(data_dump,
                    open(
                        os.path.join(
                            result_path,
                            'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'),
                    protocol=2)
        plot_images_and_clusters(gen_images,
                                 z_gen_res,
                                 epoch,
                                 save_path=result_path,
                                 ncol=10)
Esempio n. 15
0
def main():
    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.random.binomial(1, x_train, size=x_train.shape)
    n_x = x_train.shape[1]

    # Define model parameters
    n_z = 40

    @zs.reuse('model')
    def vae(observed, n, n_x, n_z):
        with zs.BayesianNet(observed=observed) as model:
            z_mean = tf.zeros([n, n_z])
            z_logstd = tf.zeros([n, n_z])
            z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1)
            lx_z = layers.fully_connected(z, 500)
            lx_z = layers.fully_connected(lx_z, 500)
            x_logits = layers.fully_connected(lx_z, n_x, activation_fn=None)
            x = zs.Bernoulli('x', x_logits, group_event_ndims=1)
        return model, x_logits

    @zs.reuse('variational')
    def q_net(x, n_z):
        with zs.BayesianNet() as variational:
            lz_x = layers.fully_connected(tf.to_float(x), 500)
            lz_x = layers.fully_connected(lz_x, 500)
            z_mean = layers.fully_connected(lz_x, n_z, activation_fn=None)
            z_logstd = layers.fully_connected(lz_x, n_z, activation_fn=None)
            z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1)
        return variational

    x = tf.placeholder(tf.int32, shape=[None, n_x], name='x')
    n = tf.shape(x)[0]

    def log_joint(observed):
        model, _ = vae(observed, n, n_x, n_z)
        log_pz, log_px_z = model.local_log_prob(['z', 'x'])
        return log_pz + log_px_z

    variational = q_net(x, n_z)
    qz_samples, log_qz = variational.query('z',
                                           outputs=True,
                                           local_log_prob=True)
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint,
                observed={'x': x},
                latent={'z': [qz_samples, log_qz]}))

    optimizer = tf.train.AdamOptimizer(0.001)
    infer = optimizer.minimize(-lower_bound)

    # Generate images
    n_gen = 100
    _, x_logits = vae({}, n_gen, n_x, n_z)
    x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1])

    # Define training parameters
    epoches = 500
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    save_freq = 1

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epoches + 1):
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer, lower_bound], feed_dict={x: x_batch})
                lbs.append(lb)

            print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))

            if epoch % save_freq == 0:
                images = sess.run(x_gen)
                name = "results/vae/vae.epoch.{}.png".format(epoch)
                save_image_collections(images, name)
def main():
    # Load MNIST
    data_path = os.path.join(conf.data_dir, "mnist.pkl.gz")
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid])
    x_test = np.random.binomial(1, x_test, size=x_test.shape)

    # Define model parameters
    x_dim = x_train.shape[1]
    z_dim = 40

    # Build the computation graph

    # how many samples to draw from the distribution, more samples, more accuracy
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")

    # input data to feed the variational
    x_input = tf.placeholder(tf.float32, shape=[None, x_dim], name="x")
    x = tf.cast(tf.less(tf.random_uniform(tf.shape(x_input)), x_input),
                tf.int32)

    # batch size
    n = tf.placeholder(tf.int32, shape=[], name="n")

    # add random noise to the variance of the q_model so to
    # get more various samples when generating new digits
    std_noise = tf.placeholder_with_default(0., shape=[], name="std_noise")

    # build the model (encoder) and the q_model (variational or decoder)
    model = build_gen(x_dim, z_dim, n, n_particles)
    q_model = build_q_net(x, z_dim, n_particles, std_noise)
    variational = q_model.observe()

    # calculate ELBO
    lower_bound = zs.variational.elbo(model, {"x": x},
                                      variational=variational,
                                      axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    # calculate marginal log likelihood
    is_log_likelihood = tf.reduce_mean(
        zs.is_loglikelihood(model, {"x": x}, proposal=variational, axis=0))

    # optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    infer_op = optimizer.minimize(cost)

    # define training/evaluation parameters
    epochs = 1000
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    test_freq = 100
    test_batch_size = 400
    test_iters = x_test.shape[0] // test_batch_size
    result_path = "results/vae_digits"
    checkpoints_path = "checkpoints/vae_digits"

    # used to save checkpoints during training
    saver = tf.train.Saver(max_to_keep=10)
    save_model_freq = 100

    # run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # restore the model parameters from the latest checkpoint
        ckpt_file = tf.train.latest_checkpoint(checkpoints_path)
        begin_epoch = 1
        if ckpt_file is not None:
            print('Restoring model from {}...'.format(ckpt_file))
            begin_epoch = int(ckpt_file.split('.')[-2]) + 1
            saver.restore(sess, ckpt_file)

        # begin training
        for epoch in range(begin_epoch, epochs + 1):
            time_epoch = -time.time()
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     x_input: x_batch,
                                     n_particles: 1,
                                     n: batch_size
                                 })
                lbs.append(lb)
            time_epoch += time.time()
            print("Epoch {} ({:.1f}s): Lower bound = {}".format(
                epoch, time_epoch, np.mean(lbs)))

            # test marginal log likelihood
            if epoch % test_freq == 0:
                time_test = -time.time()
                test_lbs, test_lls = [], []
                for t in range(test_iters):
                    test_x_batch = x_test[t * test_batch_size:(t + 1) *
                                          test_batch_size]
                    test_lb = sess.run(lower_bound,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: 1,
                                           n: test_batch_size
                                       })
                    test_ll = sess.run(is_log_likelihood,
                                       feed_dict={
                                           x: test_x_batch,
                                           n_particles: 1000,
                                           n: test_batch_size
                                       })
                    test_lbs.append(test_lb)
                    test_lls.append(test_ll)
                time_test += time.time()
                print(">>> TEST ({:.1f}s)".format(time_test))
                print(">> Test lower bound = {}".format(np.mean(test_lbs)))
                print('>> Test log likelihood (IS) = {}'.format(
                    np.mean(test_lls)))

            # save model parameters
            if epoch % save_model_freq == 0:
                print('Saving model...')
                save_path = os.path.join(checkpoints_path,
                                         "vae.epoch.{}.ckpt".format(epoch))
                if not os.path.exists(os.path.dirname(save_path)):
                    os.makedirs(os.path.dirname(save_path))
                saver.save(sess, save_path)
                print('Done')

        # random generation of images from latent distribution
        x_gen = tf.reshape(model.observe()["x_mean"], [-1, 28, 28, 1])
        images = sess.run(x_gen, feed_dict={n: 100, n_particles: 1})
        name = os.path.join(result_path, "random_samples.png")
        save_image_collections(images, name)

        # the following code generates 100 samples for each number
        test_n = [3, 2, 1, 90, 95, 23, 11, 0, 84, 7]
        # map each digit to a corresponding sample from the test set so we can generate similar digits
        for i in range(len(test_n)):
            # get latent distribution from the variational giving as input a fixed sample from the dataset
            z = q_model.observe(x=np.expand_dims(x_test[test_n[i]], 0))['z']
            # run the computation graph adding noise to computed variance to get different output samples
            latent = sess.run(z,
                              feed_dict={
                                  x_input:
                                  np.expand_dims(x_test[test_n[i]], 0),
                                  n: 1,
                                  n_particles: 100,
                                  std_noise: 0.7
                              })
            # get the image from the model giving as input the latent distribution z
            x_gen = tf.reshape(
                model.observe(z=latent)["x_mean"], [-1, 28, 28, 1])
            images = sess.run(x_gen, feed_dict={})
            name = os.path.join(result_path, "{}.png".format(i))
            save_image_collections(images, name)