コード例 #1
0
ファイル: step.py プロジェクト: sdy1106/VLAE
    def build_tower_graph_font(id_):
        tower_x = x[id_ * tf.shape(x)[0] // FLAGS.num_gpus:(id_ + 1) *
                    tf.shape(x)[0] // FLAGS.num_gpus]
        n = tf.shape(tower_x)[0]
        x_obs = tf.tile(tf.expand_dims(tower_x, 0), [1, 1, 1])

        def log_joint(observed):
            decoder, _, = VAE(observed, n, is_training)
            log_pz_font, log_pz_char, log_px_z = decoder.local_log_prob(
                ['z_font', 'z_char', 'x'])
            return log_pz_font + log_pz_char + log_px_z

        #train font
        encoder_font, qz_samples_font = q_net_font(None, tower_x, is_training)
        encoder_char, qz_samples_char = q_net_char(None, tower_x, is_training)

        char_mean = tf.tile(tf.reduce_mean(qz_samples_char, 0),
                            (tf.shape(qz_samples_font)[0], 1))
        lower_bound = tf.reduce_mean(
            zs.sgvb(log_joint, {'x': tower_x}, {
                'z_font': [qz_samples_font, log_qz_font],
                'z_char': [char_mean, log_qz_char]
            },
                    axis=0))
        average_loss = tf.reduce_mean(tf.square(qz_samples_char - char_mean))

        font_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope='encoder_font') + \
                   tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope='decoder')
        char_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope='encoder_char')
        font_grads = optimizer.compute_gradients(-lower_bound,
                                                 var_list=font_var_list)
        char_grads = optimizer.compute_gradients(average_loss,
                                                 var_list=char_var_list)

        return font_grads, char_grads, lower_bound, average_loss
コード例 #2
0
    def lower_bound_and_log_likelihood(relaxed=False):
        def log_joint(observed):
            model = vae(observed, n, n_x, n_z, n_k, tau_p, n_particles,
                        relaxed)
            log_pz, log_px_z = model.local_log_prob(['z', 'x'])
            return log_pz + log_px_z

        variational = q_net({}, x, n_z, n_k, tau_q, n_particles, relaxed)
        qz_samples, log_qz = variational.query('z',
                                               outputs=True,
                                               local_log_prob=True)

        lower_bound = tf.reduce_mean(
            zs.sgvb(log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]},
                    axis=0))

        # Importance sampling estimates of marginal log likelihood
        is_log_likelihood = tf.reduce_mean(
            zs.is_loglikelihood(log_joint, {'x': x_obs},
                                {'z': [qz_samples, log_qz]},
                                axis=0))

        return lower_bound, is_log_likelihood
コード例 #3
0
    def log_joint(observed):
        model = vae(observed, n, n_x, n_z, n_particles, is_training)
        log_pz, log_px_z = model.local_log_prob(['z', 'x'])
        return log_pz + log_px_z

    variational = q_net({}, x, n_z, n_particles, is_training)
    qz_samples, log_qz = variational.query('z', outputs=True,
                                           local_log_prob=True)
    # TODO: add tests for repeated calls of flows
    qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz,
                                                    n_iters=n_planar_flows)
    qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz,
                                                    n_iters=n_planar_flows)

    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {'x': x_obs}, {'z': [qz_samples, log_qz]}, axis=0))

    # Importance sampling estimates of log likelihood:
    # Fast, used for evaluation during training
    is_log_likelihood = tf.reduce_mean(
        zs.is_loglikelihood(log_joint, {'x': x_obs},
                            {'z': [qz_samples, log_qz]}, axis=0))

    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr')
    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    grads = optimizer.compute_gradients(-lower_bound)
    infer = optimizer.apply_gradients(grads)

    params = tf.trainable_variables()
    for i in params:
        print(i.name, i.get_shape())
コード例 #4
0
    # Labeled
    x_labeled_ph = tf.placeholder(tf.int32, shape=[None, n_x], name='x_l')
    x_labeled_obs = tf.tile(tf.expand_dims(x_labeled_ph, 0),
                            [n_particles, 1, 1])
    y_labeled_ph = tf.placeholder(tf.int32, shape=[None, n_y], name='y_l')
    y_labeled_obs = tf.tile(tf.expand_dims(y_labeled_ph, 0),
                            [n_particles, 1, 1])
    variational = qz_xy(x_labeled_ph, y_labeled_ph, n_z, n_particles)
    qz_samples, log_qz = variational.query('z',
                                           outputs=True,
                                           local_log_prob=True)
    labeled_lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {
            'x': x_labeled_obs,
            'y': y_labeled_obs
        }, {'z': [qz_samples, log_qz]},
                axis=0))

    # Unlabeled
    x_unlabeled_ph = tf.placeholder(tf.int32, shape=[None, n_x], name='x_u')
    n = tf.shape(x_unlabeled_ph)[0]
    y_diag = tf.diag(tf.ones(n_y, dtype=tf.int32))
    y_u = tf.reshape(tf.tile(tf.expand_dims(y_diag, 0), [n, 1, 1]), [-1, n_y])
    x_u = tf.reshape(tf.tile(tf.expand_dims(x_unlabeled_ph, 1), [1, n_y, 1]),
                     [-1, n_x])
    x_unlabeled_obs = tf.tile(tf.expand_dims(x_u, 0), [n_particles, 1, 1])
    y_unlabeled_obs = tf.tile(tf.expand_dims(y_u, 0), [n_particles, 1, 1])
    variational = qz_xy(x_u, y_u, n_z, n_particles)
    qz_samples, log_qz = variational.query('z',
                                           outputs=True,
コード例 #5
0

if __name__ == "__main__":
    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[])

    def log_joint(observed):
        model = toy2d_intractable_posterior(observed, n_particles)
        log_pz1, log_pz2 = model.local_log_prob(['z1', 'z2'])
        return log_pz1 + log_pz2

    variational, z_mean, z_logstd = mean_field_variational(n_particles)
    [qz1_samples, log_qz1], [qz2_samples, log_qz2] = variational.query(
        ['z1', 'z2'], outputs=True, local_log_prob=True)
    lower_bound = zs.sgvb(
        log_joint, {}, {'z1': [qz1_samples, log_qz1],
                        'z2': [qz2_samples, log_qz2]}, axis=0)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
    infer = optimizer.minimize(-lower_bound)

    # Set up figure.
    fig = plt.figure(figsize=(8, 8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.ion()
    plt.show(block=False)

    # Set up plotting code.
    def plot_isocontours(ax, func, xlimits, ylimits, numticks=101):
        x = np.linspace(*xlimits, num=numticks)
        y = np.linspace(*ylimits, num=numticks)
        xx, yy = np.meshgrid(x, y)
コード例 #6
0
    y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1, 1])

    def log_joint(observed):
        model, _ = var_dropout(observed, x_obs, n, net_size, n_particles,
                               is_training)
        log_pe = model.local_log_prob(e_names)
        log_py_xe = model.local_log_prob('y')
        return tf.add_n(log_pe) / x_train.shape[0] + log_py_xe

    variational = q({}, n, net_size, n_particles)
    qe_queries = variational.query(e_names, outputs=True, local_log_prob=True)
    qe_samples, log_qes = zip(*qe_queries)
    log_qes = [log_qe / x_train.shape[0] for log_qe in log_qes]
    e_dict = dict(zip(e_names, zip(qe_samples, log_qes)))
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {'y': y_obs}, e_dict, axis=0))

    _, h_pred = var_dropout(dict(zip(e_names, qe_samples)), x_obs, n, net_size,
                            n_particles, is_training)
    h_pred = tf.reduce_mean(h_pred, 0)
    y_pred = tf.argmax(h_pred, 1)
    sparse_y = tf.argmax(y, 1)
    acc = tf.reduce_mean(tf.cast(tf.equal(y_pred, sparse_y), tf.float32))

    learning_rate_ph = tf.placeholder(tf.float32, shape=())
    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    infer = optimizer.minimize(-lower_bound)

    params = tf.trainable_variables()
    for i in params:
        print('variable name = {}, shape = {}'.format(i.name, i.get_shape()))
コード例 #7
0
ファイル: bayesian_nn.py プロジェクト: trigrass2/zhusuan
def main():
    np.random.seed(1234)
    tf.set_random_seed(1237)

    # Load UCI Boston housing data
    data_path = os.path.join(conf.data_dir, 'housing.data')
    x_train, y_train, x_valid, y_valid, x_test, y_test = \
        dataset.load_uci_boston_housing(data_path)
    N, n_x = x_train.shape

    # Standardize data
    x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
    y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
        y_train, y_test)

    # Define model parameters
    n_hiddens = [50]

    @zs.reuse('model')
    def bayesianNN(observed, x, n_x, layer_sizes, n_particles):
        with zs.BayesianNet(observed=observed) as model:
            ws = []
            for i, (n_in,
                    n_out) in enumerate(zip(layer_sizes[:-1],
                                            layer_sizes[1:])):
                w_mu = tf.zeros([1, n_out, n_in + 1])
                ws.append(
                    zs.Normal('w' + str(i),
                              w_mu,
                              std=1.,
                              n_samples=n_particles,
                              group_event_ndims=2))

            # forward
            ly_x = tf.expand_dims(
                tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]), 3)
            for i in range(len(ws)):
                w = tf.tile(ws[i], [1, tf.shape(x)[0], 1, 1])
                ly_x = tf.concat(
                    [ly_x, tf.ones([n_particles,
                                    tf.shape(x)[0], 1, 1])], 2)
                ly_x = tf.matmul(w, ly_x) / \
                    tf.sqrt(tf.to_float(tf.shape(ly_x)[2]))
                if i < len(ws) - 1:
                    ly_x = tf.nn.relu(ly_x)

            y_mean = tf.squeeze(ly_x, [2, 3])
            y_logstd = tf.get_variable('y_logstd',
                                       shape=[],
                                       initializer=tf.constant_initializer(0.))
            y = zs.Normal('y', y_mean, logstd=y_logstd)

        return model, y_mean

    def mean_field_variational(layer_sizes, n_particles):
        with zs.BayesianNet() as variational:
            ws = []
            for i, (n_in,
                    n_out) in enumerate(zip(layer_sizes[:-1],
                                            layer_sizes[1:])):
                w_mean = tf.get_variable(
                    'w_mean_' + str(i),
                    shape=[1, n_out, n_in + 1],
                    initializer=tf.constant_initializer(0.))
                w_logstd = tf.get_variable(
                    'w_logstd_' + str(i),
                    shape=[1, n_out, n_in + 1],
                    initializer=tf.constant_initializer(0.))
                ws.append(
                    zs.Normal('w' + str(i),
                              w_mean,
                              logstd=w_logstd,
                              n_samples=n_particles,
                              group_event_ndims=2))
        return variational

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
    x = tf.placeholder(tf.float32, shape=[None, n_x])
    y = tf.placeholder(tf.float32, shape=[None])
    layer_sizes = [n_x] + n_hiddens + [1]
    w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)]

    def log_joint(observed):
        model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
        log_pws = model.local_log_prob(w_names)
        log_py_xw = model.local_log_prob('y')
        return tf.add_n(log_pws) + log_py_xw * N

    variational = mean_field_variational(layer_sizes, n_particles)
    qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True)
    latent = dict(zip(w_names, qw_outputs))
    y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1])
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {'y': y_obs}, latent, axis=0))

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    grads = optimizer.compute_gradients(-lower_bound)
    infer = optimizer.apply_gradients(grads)

    # prediction: rmse & log likelihood
    observed = dict((w_name, latent[w_name][0]) for w_name in w_names)
    observed.update({'y': y_obs})
    model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train
    log_py_xw = model.local_log_prob('y')
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \
        tf.log(std_y_train)

    # Define training/evaluation parameters
    lb_samples = 10
    ll_samples = 5000
    epochs = 500
    batch_size = 10
    iters = int(np.floor(x_train.shape[0] / float(batch_size)))
    test_freq = 10

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                y_batch = y_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer, lower_bound],
                                 feed_dict={
                                     n_particles: lb_samples,
                                     x: x_batch,
                                     y: y_batch
                                 })
                lbs.append(lb)
            print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                test_lb, test_rmse, test_ll = sess.run(
                    [lower_bound, rmse, log_likelihood],
                    feed_dict={
                        n_particles: ll_samples,
                        x: x_test,
                        y: y_test
                    })
                print('>> TEST')
                print('>> lower bound = {}, rmse = {}, log_likelihood = {}'.
                      format(test_lb, test_rmse, test_ll))
コード例 #8
0
ファイル: gm_vae.py プロジェクト: neycyanshi/statML
            model, _, _ = vae(ob_dict, n, n_x, n_h, n_z, n_particles)
            log_pz_i, log_ph_z_i, log_px_h = model.local_log_prob(
                ['z', 'h', 'x'])
            log_pz.append(log_pz_i)
            log_ph_z.append(log_ph_z_i)
        log_pz = tf.stack(log_pz, axis=-1)
        log_ph_z = tf.stack(log_ph_z, axis=-1)
        # P(X,H) = P(X|H) * sum[(P(H|z_i) * P(z_i))]
        return log_px_h + tf.reduce_logsumexp(log_pz + log_ph_z, axis=-1)

    variational = q_net(x, n_h, n_particles)
    qh_samples, log_qh = variational.query('h',
                                           outputs=True,
                                           local_log_prob=True)
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {'x': x_obs}, {'h': [qh_samples, log_qh]}, axis=0))

    optimizer = tf.train.AdamOptimizer(learning_rate)
    infer = optimizer.minimize(-lower_bound)

    # Computation graph of generating images
    n_gen = 100
    n_per_class = n_gen // n_z
    z_gen = np.zeros([n_gen, n_z])
    for i in range(n_z):
        z_gen[i * n_per_class:(i + 1) * n_per_class, i] = 1
    z_gen = np.expand_dims(z_gen, axis=0)
    _, x_logits, z_onehot = vae({'z': z_gen},
                                n_gen,
                                n_x,
                                n_h,
コード例 #9
0
    y = tf.placeholder(tf.float32, shape=[None])
    y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1])
    layer_sizes = [n_x] + n_hiddens + [1]
    w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)]

    def log_joint(observed):
        model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
        log_pws = model.local_log_prob(w_names)
        log_py_xw = model.local_log_prob('y')
        return tf.add_n(log_pws) + log_py_xw * N

    variational = mean_field_variational(layer_sizes, n_particles)
    qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True)
    latent = dict(zip(w_names, qw_outputs))
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {'y': y_obs}, latent, axis=0))

    learning_rate_ph = tf.placeholder(tf.float32, shape=[])
    optimizer = tf.train.AdamOptimizer(learning_rate_ph)
    grads = optimizer.compute_gradients(-lower_bound)
    infer = optimizer.apply_gradients(grads)

    # prediction: rmse & log likelihood
    observed = dict((w_name, latent[w_name][0]) for w_name in w_names)
    observed.update({'y': y_obs})
    model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y) ** 2)) * std_y_train
    log_py_xw = model.local_log_prob('y')
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \
        tf.log(std_y_train)
コード例 #10
0
def main():
    # manual seed
    #seed = random.randint(0, 10000) # fix seed
    seed = 1234  # N=100, K=3
    print("Random Seed: ", seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.set_random_seed(seed)

    # load MNIST data ---------------------------------------------------------
    data_path = os.path.join('../data/', 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
            dataset.load_mnist_realval(data_path)
    x_train = np.vstack([x_train, x_valid]).astype('float32')

    # model parameters --------------------------------------------------------
    K = 10
    D = 40
    dim_z = K
    dim_h = D
    dim_x = x_train.shape[1]  # 784
    N = x_train.shape[0]

    # Define training/evaluation parameters ---------------------------------------------
    resume = False
    epoches = 50  # 2000
    save_freq = 5
    batch_size = 100
    train_iters = int(np.ceil(N / batch_size))

    learning_rate = 0.001
    anneal_lr_freq = 10
    anneal_lr_rate = 0.9
    n_particles = 20

    n_gen = 100

    result_path = "./results/3_gmvae"

    @zs.reuse(scope='decoder')
    def vae(observed,
            n,
            n_particles,
            is_training,
            dim_h=40,
            dim_z=10,
            dim_x=784):
        '''decoder: z-->h-->x
        n: batch_size
        dim_z: K = 10
        dim_x: 784
        dim_h: D = 40
        '''
        with zs.BayesianNet(observed=observed) as model:
            normalizer_params = {
                'is_training': is_training,
                'updates_collections': None
            }
            pai = tf.get_variable('pai',
                                  shape=[dim_z],
                                  dtype=tf.float32,
                                  trainable=True,
                                  initializer=tf.constant_initializer(1.0))
            n_pai = tf.tile(tf.expand_dims(pai, 0), [n, 1])
            z = zs.OnehotCategorical('z',
                                     logits=n_pai,
                                     dtype=tf.float32,
                                     n_samples=n_particles)
            mu = tf.get_variable('mu',
                                 shape=[dim_z, dim_h],
                                 dtype=tf.float32,
                                 initializer=tf.random_uniform_initializer(
                                     -1, 1))
            log_sigma = tf.get_variable(
                'log_sigma',
                shape=[dim_z, dim_h],
                dtype=tf.float32,
                initializer=tf.random_uniform_initializer(-3, -2))
            h_mean = tf.reshape(
                tf.matmul(tf.reshape(z, [-1, dim_z]), mu),
                [n_particles, -1, dim_h])  # [n_particles, None, dim_x]
            h_logstd = tf.reshape(
                tf.matmul(tf.reshape(z, [-1, dim_z]), log_sigma),
                [n_particles, -1, dim_h])

            h = zs.Normal(
                'h',
                mean=h_mean,
                logstd=h_logstd,
                #n_samples=n_particles,
                group_event_ndims=1)
            lx_h = layers.fully_connected(
                h,
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params
            )
            lx_h = layers.fully_connected(
                lx_h,
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params
            )
            x_logits = layers.fully_connected(
                lx_h, dim_x, activation_fn=None)  # the log odds of being 1
            x = zs.Bernoulli(
                'x',
                x_logits,
                #n_samples=n_particles,
                group_event_ndims=1)
        return model, x_logits, h, z.tensor

    @zs.reuse(scope='encoder')
    def q_net(x, dim_h, n_particles, is_training):
        '''encoder: x-->h'''
        with zs.BayesianNet() as variational:
            normalizer_params = {
                'is_training': is_training,
                # 'updates_collections': None
            }
            lh_x = layers.fully_connected(
                tf.to_float(x),
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            lh_x = tf.contrib.layers.dropout(lh_x,
                                             keep_prob=0.9,
                                             is_training=is_training)
            lh_x = layers.fully_connected(
                lh_x,
                512,
                # normalizer_fn=layers.batch_norm,
                # normalizer_params=normalizer_params,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            lh_x = tf.contrib.layers.dropout(lh_x,
                                             keep_prob=0.9,
                                             is_training=is_training)
            h_mean = layers.fully_connected(
                lh_x,
                dim_h,
                activation_fn=None,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            h_logstd = layers.fully_connected(
                lh_x,
                dim_h,
                activation_fn=None,
                weights_initializer=tf.contrib.layers.xavier_initializer())
            h = zs.Normal('h',
                          mean=h_mean,
                          logstd=h_logstd,
                          n_samples=n_particles,
                          group_event_ndims=1)
        return variational

    x_ph = tf.placeholder(tf.int32, shape=[None, dim_x], name='x_ph')
    x_orig_ph = tf.placeholder(tf.float32,
                               shape=[None, dim_x],
                               name='x_orig_ph')
    x_bin = tf.cast(
        tf.less(tf.random_uniform(tf.shape(x_orig_ph), 0, 1), x_orig_ph),
        tf.int32)
    is_training_ph = tf.placeholder(tf.bool, shape=[], name='is_training_ph')

    n = tf.shape(x_ph)[0]

    def log_joint(observed):
        z_obs = tf.eye(dim_z, batch_shape=[n_particles, n])
        z_obs = tf.transpose(z_obs, [2, 0, 1, 3])  # [K, n_p, bs, K]
        log_pz_list = []
        log_ph_z_list = []
        log_px_h = None
        for i in range(dim_z):
            observed['z'] = z_obs[i, :]  # the i-th dimension is 1
            model, _, _, _ = vae(observed,
                                 n,
                                 n_particles,
                                 is_training_ph,
                                 dim_h=dim_h,
                                 dim_z=dim_z,
                                 dim_x=dim_x)
            log_pz_i, log_ph_z_i, log_px_h = model.local_log_prob(
                ['z', 'h', 'x'])
            log_pz_list.append(log_pz_i)
            log_ph_z_list.append(log_ph_z_i)
        log_pz = tf.stack(log_pz_list, axis=0)
        log_ph_z = tf.stack(log_ph_z_list, axis=0)
        # p(X, H) = p(X|H) sum_Z(p(Z) * p(H|Z))
        # log p(X, H) = log p(X|H) + log sum_Z exp(log p(Z) + log p(H|Z))
        log_p_xh = log_px_h + tf.reduce_logsumexp(log_pz + log_ph_z,
                                                  axis=0)  # log p(X, H)
        return log_p_xh

    variational = q_net(x_ph, dim_h, n_particles, is_training_ph)
    qh_samples, log_qh = variational.query('h',
                                           outputs=True,
                                           local_log_prob=True)

    x_obs = tf.tile(tf.expand_dims(x_ph, 0), [n_particles, 1, 1])

    lower_bound = zs.sgvb(log_joint,
                          observed={'x': x_obs},
                          latent={'h': [qh_samples, log_qh]},
                          axis=0)

    mean_lower_bound = tf.reduce_mean(lower_bound)
    with tf.name_scope('neg_lower_bound'):
        neg_lower_bound = tf.reduce_mean(-mean_lower_bound)

    train_vars = tf.trainable_variables()
    with tf.variable_scope('decoder', reuse=True):
        pai = tf.get_variable('pai')
        mu = tf.get_variable('mu')
        log_sigma = tf.get_variable('log_sigma')

    clip_pai = pai.assign(tf.clip_by_value(pai, 0.7, 1.3))

    # _, pai_var = tf.nn.moments(pai, axes=[-1])
    # _, mu_var = tf.nn.moments(mu, axes=[0, 1], keep_dims=False)
    # regularizer = tf.add_n([tf.nn.l2_loss(v) for v in train_vars
    #                     if not 'pai' in v.name and not 'mu' in v.name])
    # loss = neg_lower_bound + pai_var - mu_var # + 1e-4 * regularizer # loss -------------
    loss = neg_lower_bound  #+ 0.001 * tf.nn.l2_loss(mu-1)

    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr')

    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    grads_and_vars = optimizer.compute_gradients(loss)
    clipped_gvs = [(tf.clip_by_value(grad, -5., 5.), var)
                   for grad, var in grads_and_vars]
    infer = optimizer.apply_gradients(clipped_gvs)

    # Generate images -----------------------------------------------------
    z_manual_feed = tf.eye(dim_z, batch_shape=[10])  # [10, K, K]
    z_manual_feed = tf.transpose(z_manual_feed, [1, 0, 2])  # [K, 10, K]
    _, x_logits, _, z_onehot = vae(
        {'z': z_manual_feed},
        10,
        n_particles=1,
        is_training=False,
        dim_h=dim_h,
        dim_z=dim_z,
        dim_x=dim_x
    )  # n and n_particles do not matter, since we have manually feeded z
    print('x_logits:', x_logits.shape.as_list())  # [1, 100, 784]
    x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1])
    z_gen = tf.argmax(tf.reshape(z_onehot, [-1, dim_z]), axis=1)

    # tensorboard summary ---------------------------------------------------
    image_for_summ = []
    for i in range(n_gen // 10):
        tmp = [x_gen[j + i * 10, :] for j in range(10)]
        tmp = tf.concat(tmp, 1)
        image_for_summ.append(tmp)
    image_for_summ = tf.expand_dims(tf.concat(image_for_summ, 0), 0)
    print('image_for_summ:', image_for_summ.shape.as_list())
    gen_image_summ = tf.summary.image('gen_images',
                                      image_for_summ,
                                      max_outputs=100)
    lb_summ = tf.summary.scalar("lower_bound", mean_lower_bound)
    lr_summ = tf.summary.scalar("learning_rate", learning_rate_ph)
    loss_summ = tf.summary.scalar('loss', loss)

    for var in train_vars:
        tf.summary.histogram(var.name, var)
    for grad, _ in grads_and_vars:
        tf.summary.histogram(grad.name, grad)

    for i in train_vars:
        print(i.name, i.get_shape())
    # Merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()

    saver = tf.train.Saver(max_to_keep=10)

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.3

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        # Restore from the latest checkpoint
        ckpt_file = tf.train.latest_checkpoint(result_path)
        begin_epoch = 1
        if ckpt_file is not None and resume:  # resume ---------------------------------------
            print('Restoring model from {}...'.format(ckpt_file))
            begin_epoch = int(ckpt_file.split('.')[-2]) + 1
            saver.restore(sess, ckpt_file)

        x_train_normed = x_train  # no normalization
        x_train_normed_no_shuffle = x_train_normed

        log_dir = './log/3_gmvae/'
        if os.path.exists(log_dir):
            shutil.rmtree(log_dir)
        summary_writer = tf.summary.FileWriter(log_dir,
                                               graph=tf.get_default_graph())

        global mu_res, log_sigma_res, pai_res
        global gen_images, z_gen_res, epoch
        print(
            'training...'
        )  # ----------------------------------------------------------------
        pai_res_0, mu_res_0, log_sigma_res_0 = sess.run([pai, mu, log_sigma])
        global_step = 0
        for epoch in tqdm(range(begin_epoch, epoches + 1)):
            time_epoch = -time.time()
            if epoch % anneal_lr_freq == 0:
                learning_rate *= anneal_lr_rate
            np.random.shuffle(x_train_normed)  # shuffle training data
            lbs = []

            for t in tqdm(range(train_iters)):
                global_step += 1
                x_batch = x_train_normed[t * batch_size:(t + 1) *
                                         batch_size]  # get batched data
                x_batch_bin = sess.run(x_bin, feed_dict={x_orig_ph: x_batch})
                # sess.run(clip_pai)
                _, lb, merge_all = sess.run(
                    [infer, mean_lower_bound, merged_summary_op],
                    feed_dict={
                        x_ph: x_batch_bin,
                        learning_rate_ph: learning_rate,
                        is_training_ph: True
                    })
                lbs.append(lb)
            time_epoch += time.time()
            print('Epoch {} ({:.1f}s): Lower bound = {}'.format(
                epoch, time_epoch, np.mean(lbs)))
            # print(grad_var_res[-3:])

            summary_writer.add_summary(merge_all, global_step=epoch)

            if epoch % save_freq == 0:  # save ---------------------------------------------------
                print('Saving model...')
                save_path = os.path.join(result_path,
                                         "gmvae.epoch.{}.ckpt".format(epoch))
                if not os.path.exists(os.path.dirname(save_path)):
                    os.makedirs(os.path.dirname(save_path))
                saver.save(sess, save_path)

                gen_images, z_gen_res = sess.run(
                    [x_gen, z_gen])  #, feed_dict={is_training_ph: False})

                # dump data
                pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma])
                data_dump = {
                    'epoch': epoch,
                    'images': gen_images,
                    'clusters': z_gen_res,
                    'pai_0': pai_res_0,
                    'mu_0': mu_res_0,
                    'log_sigma_0': log_sigma_res_0,
                    'pai_res': pai_res,
                    'mu_res': mu_res,
                    'log_sigma_res': log_sigma_res
                }
                pickle.dump(
                    data_dump,
                    open(
                        os.path.join(
                            result_path,
                            'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'),
                    protocol=2)
                save_image_with_clusters(
                    gen_images,
                    z_gen_res,
                    filename="results/3_gmvae/gmvae_epoch_{}.png".format(
                        epoch))
                print('Done')

        pai_res, mu_res, log_sigma_res = sess.run([pai, mu, log_sigma])
        print("Random Seed: ", seed)
        data_dump = {
            'epoch': epoch,
            'images': gen_images,
            'clusters': z_gen_res,
            'pai_0': pai_res_0,
            'mu_0': mu_res_0,
            'log_sigma_0': log_sigma_res_0,
            'pai_res': pai_res,
            'mu_res': mu_res,
            'log_sigma_res': log_sigma_res
        }
        pickle.dump(data_dump,
                    open(
                        os.path.join(
                            result_path,
                            'gmvae_results_epoch_{}.pkl'.format(epoch)), 'w'),
                    protocol=2)
        plot_images_and_clusters(gen_images,
                                 z_gen_res,
                                 epoch,
                                 save_path=result_path,
                                 ncol=10)
コード例 #11
0
ファイル: vae.py プロジェクト: situgongyuan/zhusuan
def main():
    # Load MNIST
    data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz')
    x_train, t_train, x_valid, t_valid, x_test, t_test = \
        dataset.load_mnist_realval(data_path)
    x_train = np.random.binomial(1, x_train, size=x_train.shape)
    n_x = x_train.shape[1]

    # Define model parameters
    n_z = 40

    @zs.reuse('model')
    def vae(observed, n, n_x, n_z):
        with zs.BayesianNet(observed=observed) as model:
            z_mean = tf.zeros([n, n_z])
            z_logstd = tf.zeros([n, n_z])
            z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1)
            lx_z = layers.fully_connected(z, 500)
            lx_z = layers.fully_connected(lx_z, 500)
            x_logits = layers.fully_connected(lx_z, n_x, activation_fn=None)
            x = zs.Bernoulli('x', x_logits, group_event_ndims=1)
        return model, x_logits

    @zs.reuse('variational')
    def q_net(x, n_z):
        with zs.BayesianNet() as variational:
            lz_x = layers.fully_connected(tf.to_float(x), 500)
            lz_x = layers.fully_connected(lz_x, 500)
            z_mean = layers.fully_connected(lz_x, n_z, activation_fn=None)
            z_logstd = layers.fully_connected(lz_x, n_z, activation_fn=None)
            z = zs.Normal('z', z_mean, logstd=z_logstd, group_event_ndims=1)
        return variational

    x = tf.placeholder(tf.int32, shape=[None, n_x], name='x')
    n = tf.shape(x)[0]

    def log_joint(observed):
        model, _ = vae(observed, n, n_x, n_z)
        log_pz, log_px_z = model.local_log_prob(['z', 'x'])
        return log_pz + log_px_z

    variational = q_net(x, n_z)
    qz_samples, log_qz = variational.query('z',
                                           outputs=True,
                                           local_log_prob=True)
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint,
                observed={'x': x},
                latent={'z': [qz_samples, log_qz]}))

    optimizer = tf.train.AdamOptimizer(0.001)
    infer = optimizer.minimize(-lower_bound)

    # Generate images
    n_gen = 100
    _, x_logits = vae({}, n_gen, n_x, n_z)
    x_gen = tf.reshape(tf.sigmoid(x_logits), [-1, 28, 28, 1])

    # Define training parameters
    epoches = 500
    batch_size = 128
    iters = x_train.shape[0] // batch_size
    save_freq = 1

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(1, epoches + 1):
            np.random.shuffle(x_train)
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer, lower_bound], feed_dict={x: x_batch})
                lbs.append(lb)

            print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))

            if epoch % save_freq == 0:
                images = sess.run(x_gen)
                name = "results/vae/vae.epoch.{}.png".format(epoch)
                save_image_collections(images, name)
コード例 #12
0
ファイル: vae.py プロジェクト: sdy1106/VLAE
    variational, _, _, _ = q_net({}, x, n_z_0, n_z_1, n_z_2, n_particles,
                                 is_training)
    qz_samples0, log_qz0 = variational.query('z_0',
                                             outputs=True,
                                             local_log_prob=True)
    qz_samples1, log_qz1 = variational.query('z_1',
                                             outputs=True,
                                             local_log_prob=True)
    qz_samples2, log_qz2 = variational.query('z_2',
                                             outputs=True,
                                             local_log_prob=True)
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {'x': x_obs}, {
            'z_0': [qz_samples0, log_qz0],
            'z_1': [qz_samples1, log_qz1],
            'z_2': [qz_samples2, log_qz2]
        },
                axis=0))

    # Importance sampling estimates of marginal log likelihood
    is_log_likelihood = tf.reduce_mean(
        zs.is_loglikelihood(log_joint, {'x': x_obs}, {
            'z_0': [qz_samples0, log_qz0],
            'z_1': [qz_samples1, log_qz1],
            'z_2': [qz_samples2, log_qz2]
        },
                            axis=0))

    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr')
    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    grads = optimizer.compute_gradients(-lower_bound)