예제 #1
0
 def vae(self, is_reparameterized=None, z_group_ndims=1, x_group_ndims=1):
     return VAE(p_z=Normal(mean=tf.zeros([BATCH_SIZE, Z_DIMS]),
                           std=tf.ones([BATCH_SIZE, Z_DIMS])),
                p_x_given_z=Normal,
                q_z_given_x=Normal,
                h_for_p_x=self.h_for_p_x,
                h_for_q_z=self.h_for_q_z,
                is_reparameterized=is_reparameterized,
                z_group_ndims=z_group_ndims,
                x_group_ndims=x_group_ndims)
예제 #2
0
    def __init__(self,
                 hidden_net_p_x_z,
                 hidden_net_q_z_x,
                 x_dims,
                 z_dims,
                 std_epsilon=1e-4,
                 name=None,
                 scope=None):
        if not is_integer(x_dims) or x_dims <= 0:
            raise ValueError('`x_dims`必须为正整数')
        if not is_integer(z_dims) or z_dims <= 0:
            raise ValueError('`z_dims`必须为正整数')

        super(Donut, self).__init__(name=name, scope=scope)
        with reopen_variable_scope(self.variable_scope):
            # 基于VAE构造
            self._vae = VAE(
                # p(z):均值和标准差都为z维数量大小的全零数组的一元正态分布
                p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])),
                # p(x|h(z)):一元正态分布
                p_x_given_z=Normal,
                # q(z|h(x)):一元正态分布
                q_z_given_x=Normal,
                # p(x|h(z))的隐藏网络:mean、std,由p(x|z)隐藏网络输入获得
                h_for_p_x=Lambda(partial(wrap_params_net,
                                         h_for_dist=hidden_net_p_x_z,
                                         mean_layer=partial(tf.layers.dense,
                                                            units=x_dims,
                                                            name='x_mean'),
                                         std_layer=partial(softplus_std,
                                                           units=x_dims,
                                                           epsilon=std_epsilon,
                                                           name='x_std')),
                                 name='p_x_given_z'),
                # q(z|h(x))的隐藏网络:mean、std,由q(z|x)隐藏网络输入获得
                h_for_q_z=Lambda(partial(wrap_params_net,
                                         h_for_dist=hidden_net_q_z_x,
                                         mean_layer=partial(tf.layers.dense,
                                                            units=z_dims,
                                                            name='z_mean'),
                                         std_layer=partial(softplus_std,
                                                           units=z_dims,
                                                           epsilon=std_epsilon,
                                                           name='z_std')),
                                 name='q_z_given_x'))
        self._x_dims = x_dims
        self._z_dims = z_dims
예제 #3
0
    def __init__(self,
                 h_for_p_x,
                 h_for_q_z,
                 x_dims,
                 z_dims,
                 std_epsilon=1e-4,
                 name=None,
                 scope=None):
        if not is_integer(x_dims) or x_dims <= 0:
            raise ValueError('`x_dims` must be a positive integer')
        if not is_integer(z_dims) or z_dims <= 0:
            raise ValueError('`z_dims` must be a positive integer')

        super(Donut, self).__init__(name=name, scope=scope)
        with reopen_variable_scope(self.variable_scope):
            self._vae = VAE(
                p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])),
                p_x_given_z=Normal,
                q_z_given_x=Normal,
                h_for_p_x=Sequential([
                    h_for_p_x,
                    DictMapper(
                        {
                            'mean':
                            K.layers.Dense(x_dims),
                            'std':
                            lambda x: (std_epsilon + K.layers.Dense(
                                x_dims, activation=tf.nn.softplus)(x))
                        },
                        name='p_x_given_z')
                ]),
                h_for_q_z=Sequential([
                    h_for_q_z,
                    DictMapper(
                        {
                            'mean':
                            K.layers.Dense(z_dims),
                            'std':
                            lambda z: (std_epsilon + K.layers.Dense(
                                z_dims, activation=tf.nn.softplus)(z))
                        },
                        name='q_z_given_x')
                ]),
            )
        self._x_dims = x_dims
        self._z_dims = z_dims
예제 #4
0
파일: model.py 프로젝트: WenweiGu/DONUT-SMD
    def __init__(self, h_for_p_x, h_for_q_z, x_dims, z_dims, std_epsilon=1e-4,
                 name=None, scope=None) -> object:
        if not is_integer(x_dims) or x_dims <= 0:
            raise ValueError('`x_dims` must be a positive integer')
        if not is_integer(z_dims) or z_dims <= 0:
            raise ValueError('`z_dims` must be a positive integer')

        super(Donut, self).__init__(name=name, scope=scope)
        with reopen_variable_scope(self.variable_scope):
            self._vae = VAE(
                p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])),
                p_x_given_z=Normal,
                q_z_given_x=Normal,
                h_for_p_x=Lambda(
                    partial(
                        wrap_params_net,
                        h_for_dist=h_for_p_x,
                        mean_layer=partial(
                            tf.layers.dense, units=x_dims, name='x_mean'
                        ),
                        std_layer=partial(
                            softplus_std, units=x_dims, epsilon=std_epsilon,
                            name='x_std'
                        )
                    ),
                    name='p_x_given_z'
                ),
                h_for_q_z=Lambda(
                    partial(
                        wrap_params_net,
                        h_for_dist=h_for_q_z,
                        mean_layer=partial(
                            tf.layers.dense, units=z_dims, name='z_mean'
                        ),
                        std_layer=partial(
                            softplus_std, units=z_dims, epsilon=std_epsilon,
                            name='z_std'
                        )
                    ),
                    name='q_z_given_x'
                )
            )
        self._x_dims = x_dims
        self._z_dims = z_dims
예제 #5
0
def main():
    # load mnist data
    (x_train, y_train), (x_test, y_test) = \
        load_mnist(shape=[config.x_dim], dtype=np.float32, normalize=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, ) + x_train.shape[1:],
                             name='input_x')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = tf.placeholder(shape=(), dtype=tf.float32)
    learning_rate_var = AnnealingDynamicValue(config.initial_lr,
                                              config.lr_anneal_factor)
    multi_gpu = MultiGPU(disable_prebuild=False)

    # build the model
    vae = VAE(
        p_z=Normal(mean=tf.zeros([1, config.z_dim]),
                   std=tf.ones([1, config.z_dim])),
        p_x_given_z=Bernoulli,
        q_z_given_x=Normal,
        h_for_p_x=functools.partial(h_for_p_x, is_training=is_training),
        h_for_q_z=functools.partial(h_for_q_z, is_training=is_training),
    )

    grads = []
    losses = []
    lower_bounds = []
    test_nlls = []
    batch_size = get_batch_size(input_x)
    params = None
    optimizer = tf.train.AdamOptimizer(learning_rate)

    for dev, pre_build, [dev_input_x
                         ] in multi_gpu.data_parallel(batch_size, [input_x]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            if pre_build:
                with arg_scope([h_for_q_z, h_for_p_x]):
                    _ = vae.chain(dev_input_x)

            else:
                # derive the loss and lower-bound for training
                dev_vae_loss = vae.get_training_loss(dev_input_x)
                dev_loss = dev_vae_loss + regularization_loss()
                dev_lower_bound = -dev_vae_loss
                losses.append(dev_loss)
                lower_bounds.append(dev_lower_bound)

                # derive the nll and logits output for testing
                test_chain = vae.chain(dev_input_x, n_z=config.test_n_z)
                dev_test_nll = -tf.reduce_mean(
                    test_chain.vi.evaluation.is_loglikelihood())
                test_nlls.append(dev_test_nll)

                # derive the optimizer
                params = tf.trainable_variables()
                grads.append(
                    optimizer.compute_gradients(dev_loss, var_list=params))

    # merge multi-gpu outputs and operations
    [loss, lower_bound, test_nll] = \
        multi_gpu.average([losses, lower_bounds, test_nlls], batch_size)
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # derive the plotting function
    work_dev = multi_gpu.work_devices[0]
    with tf.device(work_dev), tf.name_scope('plot_x'):
        x_plots = tf.reshape(
            tf.cast(255 *
                    tf.sigmoid(vae.model(n_z=100)['x'].distribution.logits),
                    dtype=tf.uint8), [-1, 28, 28])

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots, feed_dict={is_training: False})
            save_images_collection(images=images,
                                   filename=results.prepare_parent(
                                       'plotting/{}.png'.format(loop.epoch)),
                                   grid_size=(10, 10))

    # prepare for training and testing data
    def input_x_sampler(x):
        return session.run([sampled_x], feed_dict={sample_input_x: x})

    with tf.device('/device:CPU:0'):
        sample_input_x = tf.placeholder(dtype=tf.float32,
                                        shape=(None, config.x_dim),
                                        name='sample_input_x')
        sampled_x = sample_from_probs(sample_input_x)

    train_flow = DataFlow.arrays([x_train],
                                 config.batch_size,
                                 shuffle=True,
                                 skip_incomplete=True).map(input_x_sampler)
    test_flow = DataFlow.arrays([x_test], config.test_batch_size). \
        map(input_x_sampler)

    with create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # fix the testing flow, reducing the testing time
        test_flow = test_flow.to_arrays_flow(batch_size=config.test_batch_size)

        # train the network
        with TrainLoop(params,
                       var_groups=['h_for_p_x', 'h_for_q_z'],
                       max_epoch=config.max_epoch,
                       summary_dir=results.make_dir('train_summary'),
                       summary_graph=tf.get_default_graph(),
                       early_stopping=False) as loop:
            trainer = Trainer(loop,
                              train_op, [input_x],
                              train_flow,
                              feed_dict={
                                  learning_rate: learning_rate_var,
                                  is_training: True
                              },
                              metrics={'loss': loss})
            anneal_after(trainer,
                         learning_rate_var,
                         epochs=config.lr_anneal_epoch_freq,
                         steps=config.lr_anneal_step_freq)
            evaluator = Evaluator(loop,
                                  metrics={
                                      'test_nll': test_nll,
                                      'test_lb': lower_bound
                                  },
                                  inputs=[input_x],
                                  data_flow=test_flow,
                                  feed_dict={is_training: False},
                                  time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.commit(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # write the final test_nll and test_lb
    results.commit_and_print(evaluator.last_metrics_dict)
예제 #6
0
    def test_construction(self):
        # test basic
        p_z = Mock(spec=Distribution)
        p_x_given_z = Mock(spec=DistributionFactory)
        q_z_given_x = Mock(spec=DistributionFactory)
        h_for_p_x = Mock(spec=Module)
        h_for_q_z = Mock(spec=Module)
        vae = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x,
                  h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z, z_group_ndims=3,
                  x_group_ndims=2, is_reparameterized=False)
        self.assertIs(vae.p_z, p_z)
        self.assertIs(vae.p_x_given_z, p_x_given_z)
        self.assertIs(vae.q_z_given_x, q_z_given_x)
        self.assertIs(vae.h_for_p_x, h_for_p_x)
        self.assertIs(vae.h_for_q_z, h_for_q_z)
        self.assertIs(vae.z_group_ndims, 3)
        self.assertIs(vae.x_group_ndims, 2)
        self.assertFalse(vae.is_reparameterized)

        # test wrap p_x_given_z and q_z_given_x in Lambda layer
        vae = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x,
                  h_for_p_x=lambda z: z, h_for_q_z=lambda x: x, z_group_ndims=3,
                  x_group_ndims=2, is_reparameterized=False)
        self.assertIsInstance(vae.h_for_q_z, Lambda)
        self.assertIsInstance(vae.h_for_p_x, Lambda)

        # test type error for `p_z`
        with pytest.raises(
                TypeError, match='`p_z` must be an instance of `Distribution`'):
            _ = VAE(p_z=123, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x,
                    h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z)

        # test `p_x_given_z` and `q_z_given_x` from :class:`Distribution`
        vae = VAE(p_z=p_z, p_x_given_z=Bernoulli, q_z_given_x=Normal,
                  h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z)
        self.assertIsInstance(vae.p_x_given_z, DistributionFactory)
        self.assertIs(vae.p_x_given_z.distribution_class, Bernoulli)
        self.assertIsInstance(vae.q_z_given_x, DistributionFactory)
        self.assertIs(vae.q_z_given_x.distribution_class, Normal)

        # test type error for `p_x_given_z`
        with pytest.raises(
                TypeError,
                match='p_x_given_z must be a subclass of `Distribution` or '
                      '`zhusuan.distributions.Distribution`, or '
                      'an instance of `DistributionFactory`'):
            _ = VAE(p_z=p_z, p_x_given_z=object, q_z_given_x=q_z_given_x,
                    h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z)
        with pytest.raises(
                TypeError,
                match='p_x_given_z must be a subclass of `Distribution` or '
                      '`zhusuan.distributions.Distribution`, or '
                      'an instance of `DistributionFactory`'):
            _ = VAE(p_z=p_z, p_x_given_z=object(), q_z_given_x=q_z_given_x,
                    h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z)

        # test type error for `q_z_given_x`
        with pytest.raises(
                TypeError,
                match='q_z_given_x must be a subclass of `Distribution` or '
                      '`zhusuan.distributions.Distribution`, or '
                      'an instance of `DistributionFactory`'):
            _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=object,
                    h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z)
        with pytest.raises(
                TypeError,
                match='q_z_given_x must be a subclass of `Distribution` or '
                      '`zhusuan.distributions.Distribution`, or '
                      'an instance of `DistributionFactory`'):
            _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=object(),
                    h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z)

        # test type error for `h_for_p_x`
        with pytest.raises(
                TypeError, match='`h_for_p_x` must be an instance of `Module` '
                                 'or a callable object'):
            _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x,
                    h_for_p_x=object(), h_for_q_z=h_for_q_z)

        # test type error for `h_for_q_z`
        with pytest.raises(
                TypeError, match='`h_for_q_z` must be an instance of `Module` '
                                 'or a callable object'):
            _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x,
                    h_for_p_x=h_for_p_x, h_for_q_z=object())
예제 #7
0
파일: vae.py 프로젝트: Feng37/tfsnippet
def main():
    # load mnist data
    (train_x, train_y), (test_x, test_y) = datasets.load_mnist()

    # the parameters of this experiment
    x_dim = train_x.shape[1]
    z_dim = 2
    max_epoch = 10
    batch_size = 256
    valid_portion = 0.2

    # construct the graph
    with tf.Graph().as_default(), tf.Session().as_default() as session:
        input_x = tf.placeholder(dtype=tf.float32,
                                 shape=(None, x_dim),
                                 name='input_x')
        x_binarized = tf.stop_gradient(sample_input_x(input_x))
        batch_size_tensor = tf.shape(input_x)[0]

        # derive the VAE
        z_shape = tf.stack([batch_size_tensor, z_dim])
        vae = VAE(p_z=Normal(mean=tf.zeros(z_shape), std=tf.ones(z_shape)),
                  p_x_given_z=Bernoulli,
                  q_z_given_x=Normal,
                  h_for_p_x=Sequential([
                      K.layers.Dense(100, activation=tf.nn.relu),
                      K.layers.Dense(100, activation=tf.nn.relu),
                      DictMapper(
                          {'logits': K.layers.Dense(x_dim, name='x_logits')})
                  ]),
                  h_for_q_z=Sequential([
                      tf.to_float,
                      K.layers.Dense(100, activation=tf.nn.relu),
                      K.layers.Dense(100, activation=tf.nn.relu),
                      DictMapper({
                          'mean':
                          K.layers.Dense(z_dim, name='z_mean'),
                          'logstd':
                          K.layers.Dense(z_dim, name='z_logstd'),
                      })
                  ]))

        # train the network
        train(vae.get_training_loss(x_binarized), input_x, train_x, max_epoch,
              batch_size, valid_portion)

        # plot the latent space
        q_net = vae.variational(x_binarized)
        z_posterior = q_net['z']
        z_predict = []

        for [batch_x] in DataFlow.arrays([test_x], batch_size=batch_size):
            z_predict.append(
                session.run(z_posterior, feed_dict={input_x: batch_x}))

        z_predict = np.concatenate(z_predict, axis=0)
        plt.figure(figsize=(8, 6))
        plt.scatter(z_predict[:, 0], z_predict[:, 1], c=test_y)
        plt.colorbar()
        plt.grid()
        plt.show()