def vae(self, is_reparameterized=None, z_group_ndims=1, x_group_ndims=1): return VAE(p_z=Normal(mean=tf.zeros([BATCH_SIZE, Z_DIMS]), std=tf.ones([BATCH_SIZE, Z_DIMS])), p_x_given_z=Normal, q_z_given_x=Normal, h_for_p_x=self.h_for_p_x, h_for_q_z=self.h_for_q_z, is_reparameterized=is_reparameterized, z_group_ndims=z_group_ndims, x_group_ndims=x_group_ndims)
def __init__(self, hidden_net_p_x_z, hidden_net_q_z_x, x_dims, z_dims, std_epsilon=1e-4, name=None, scope=None): if not is_integer(x_dims) or x_dims <= 0: raise ValueError('`x_dims`必须为正整数') if not is_integer(z_dims) or z_dims <= 0: raise ValueError('`z_dims`必须为正整数') super(Donut, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): # 基于VAE构造 self._vae = VAE( # p(z):均值和标准差都为z维数量大小的全零数组的一元正态分布 p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])), # p(x|h(z)):一元正态分布 p_x_given_z=Normal, # q(z|h(x)):一元正态分布 q_z_given_x=Normal, # p(x|h(z))的隐藏网络:mean、std,由p(x|z)隐藏网络输入获得 h_for_p_x=Lambda(partial(wrap_params_net, h_for_dist=hidden_net_p_x_z, mean_layer=partial(tf.layers.dense, units=x_dims, name='x_mean'), std_layer=partial(softplus_std, units=x_dims, epsilon=std_epsilon, name='x_std')), name='p_x_given_z'), # q(z|h(x))的隐藏网络:mean、std,由q(z|x)隐藏网络输入获得 h_for_q_z=Lambda(partial(wrap_params_net, h_for_dist=hidden_net_q_z_x, mean_layer=partial(tf.layers.dense, units=z_dims, name='z_mean'), std_layer=partial(softplus_std, units=z_dims, epsilon=std_epsilon, name='z_std')), name='q_z_given_x')) self._x_dims = x_dims self._z_dims = z_dims
def __init__(self, h_for_p_x, h_for_q_z, x_dims, z_dims, std_epsilon=1e-4, name=None, scope=None): if not is_integer(x_dims) or x_dims <= 0: raise ValueError('`x_dims` must be a positive integer') if not is_integer(z_dims) or z_dims <= 0: raise ValueError('`z_dims` must be a positive integer') super(Donut, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): self._vae = VAE( p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])), p_x_given_z=Normal, q_z_given_x=Normal, h_for_p_x=Sequential([ h_for_p_x, DictMapper( { 'mean': K.layers.Dense(x_dims), 'std': lambda x: (std_epsilon + K.layers.Dense( x_dims, activation=tf.nn.softplus)(x)) }, name='p_x_given_z') ]), h_for_q_z=Sequential([ h_for_q_z, DictMapper( { 'mean': K.layers.Dense(z_dims), 'std': lambda z: (std_epsilon + K.layers.Dense( z_dims, activation=tf.nn.softplus)(z)) }, name='q_z_given_x') ]), ) self._x_dims = x_dims self._z_dims = z_dims
def __init__(self, h_for_p_x, h_for_q_z, x_dims, z_dims, std_epsilon=1e-4, name=None, scope=None) -> object: if not is_integer(x_dims) or x_dims <= 0: raise ValueError('`x_dims` must be a positive integer') if not is_integer(z_dims) or z_dims <= 0: raise ValueError('`z_dims` must be a positive integer') super(Donut, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): self._vae = VAE( p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])), p_x_given_z=Normal, q_z_given_x=Normal, h_for_p_x=Lambda( partial( wrap_params_net, h_for_dist=h_for_p_x, mean_layer=partial( tf.layers.dense, units=x_dims, name='x_mean' ), std_layer=partial( softplus_std, units=x_dims, epsilon=std_epsilon, name='x_std' ) ), name='p_x_given_z' ), h_for_q_z=Lambda( partial( wrap_params_net, h_for_dist=h_for_q_z, mean_layer=partial( tf.layers.dense, units=z_dims, name='z_mean' ), std_layer=partial( softplus_std, units=z_dims, epsilon=std_epsilon, name='z_std' ) ), name='q_z_given_x' ) ) self._x_dims = x_dims self._z_dims = z_dims
def main(): # load mnist data (x_train, y_train), (x_test, y_test) = \ load_mnist(shape=[config.x_dim], dtype=np.float32, normalize=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, ) + x_train.shape[1:], name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU(disable_prebuild=False) # build the model vae = VAE( p_z=Normal(mean=tf.zeros([1, config.z_dim]), std=tf.ones([1, config.z_dim])), p_x_given_z=Bernoulli, q_z_given_x=Normal, h_for_p_x=functools.partial(h_for_p_x, is_training=is_training), h_for_q_z=functools.partial(h_for_q_z, is_training=is_training), ) grads = [] losses = [] lower_bounds = [] test_nlls = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(batch_size, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: with arg_scope([h_for_q_z, h_for_p_x]): _ = vae.chain(dev_input_x) else: # derive the loss and lower-bound for training dev_vae_loss = vae.get_training_loss(dev_input_x) dev_loss = dev_vae_loss + regularization_loss() dev_lower_bound = -dev_vae_loss losses.append(dev_loss) lower_bounds.append(dev_lower_bound) # derive the nll and logits output for testing test_chain = vae.chain(dev_input_x, n_z=config.test_n_z) dev_test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood()) test_nlls.append(dev_test_nll) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, lower_bound, test_nll] = \ multi_gpu.average([losses, lower_bounds, test_nlls], batch_size) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # derive the plotting function work_dev = multi_gpu.work_devices[0] with tf.device(work_dev), tf.name_scope('plot_x'): x_plots = tf.reshape( tf.cast(255 * tf.sigmoid(vae.model(n_z=100)['x'].distribution.logits), dtype=tf.uint8), [-1, 28, 28]) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename=results.prepare_parent( 'plotting/{}.png'.format(loop.epoch)), grid_size=(10, 10)) # prepare for training and testing data def input_x_sampler(x): return session.run([sampled_x], feed_dict={sample_input_x: x}) with tf.device('/device:CPU:0'): sample_input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='sample_input_x') sampled_x = sample_from_probs(sample_input_x) train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True).map(input_x_sampler) test_flow = DataFlow.arrays([x_test], config.test_batch_size). \ map(input_x_sampler) with create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # fix the testing flow, reducing the testing time test_flow = test_flow.to_arrays_flow(batch_size=config.test_batch_size) # train the network with TrainLoop(params, var_groups=['h_for_p_x', 'h_for_q_z'], max_epoch=config.max_epoch, summary_dir=results.make_dir('train_summary'), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) anneal_after(trainer, learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': lower_bound }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.commit(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # write the final test_nll and test_lb results.commit_and_print(evaluator.last_metrics_dict)
def test_construction(self): # test basic p_z = Mock(spec=Distribution) p_x_given_z = Mock(spec=DistributionFactory) q_z_given_x = Mock(spec=DistributionFactory) h_for_p_x = Mock(spec=Module) h_for_q_z = Mock(spec=Module) vae = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x, h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z, z_group_ndims=3, x_group_ndims=2, is_reparameterized=False) self.assertIs(vae.p_z, p_z) self.assertIs(vae.p_x_given_z, p_x_given_z) self.assertIs(vae.q_z_given_x, q_z_given_x) self.assertIs(vae.h_for_p_x, h_for_p_x) self.assertIs(vae.h_for_q_z, h_for_q_z) self.assertIs(vae.z_group_ndims, 3) self.assertIs(vae.x_group_ndims, 2) self.assertFalse(vae.is_reparameterized) # test wrap p_x_given_z and q_z_given_x in Lambda layer vae = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x, h_for_p_x=lambda z: z, h_for_q_z=lambda x: x, z_group_ndims=3, x_group_ndims=2, is_reparameterized=False) self.assertIsInstance(vae.h_for_q_z, Lambda) self.assertIsInstance(vae.h_for_p_x, Lambda) # test type error for `p_z` with pytest.raises( TypeError, match='`p_z` must be an instance of `Distribution`'): _ = VAE(p_z=123, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x, h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z) # test `p_x_given_z` and `q_z_given_x` from :class:`Distribution` vae = VAE(p_z=p_z, p_x_given_z=Bernoulli, q_z_given_x=Normal, h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z) self.assertIsInstance(vae.p_x_given_z, DistributionFactory) self.assertIs(vae.p_x_given_z.distribution_class, Bernoulli) self.assertIsInstance(vae.q_z_given_x, DistributionFactory) self.assertIs(vae.q_z_given_x.distribution_class, Normal) # test type error for `p_x_given_z` with pytest.raises( TypeError, match='p_x_given_z must be a subclass of `Distribution` or ' '`zhusuan.distributions.Distribution`, or ' 'an instance of `DistributionFactory`'): _ = VAE(p_z=p_z, p_x_given_z=object, q_z_given_x=q_z_given_x, h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z) with pytest.raises( TypeError, match='p_x_given_z must be a subclass of `Distribution` or ' '`zhusuan.distributions.Distribution`, or ' 'an instance of `DistributionFactory`'): _ = VAE(p_z=p_z, p_x_given_z=object(), q_z_given_x=q_z_given_x, h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z) # test type error for `q_z_given_x` with pytest.raises( TypeError, match='q_z_given_x must be a subclass of `Distribution` or ' '`zhusuan.distributions.Distribution`, or ' 'an instance of `DistributionFactory`'): _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=object, h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z) with pytest.raises( TypeError, match='q_z_given_x must be a subclass of `Distribution` or ' '`zhusuan.distributions.Distribution`, or ' 'an instance of `DistributionFactory`'): _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=object(), h_for_p_x=h_for_p_x, h_for_q_z=h_for_q_z) # test type error for `h_for_p_x` with pytest.raises( TypeError, match='`h_for_p_x` must be an instance of `Module` ' 'or a callable object'): _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x, h_for_p_x=object(), h_for_q_z=h_for_q_z) # test type error for `h_for_q_z` with pytest.raises( TypeError, match='`h_for_q_z` must be an instance of `Module` ' 'or a callable object'): _ = VAE(p_z=p_z, p_x_given_z=p_x_given_z, q_z_given_x=q_z_given_x, h_for_p_x=h_for_p_x, h_for_q_z=object())
def main(): # load mnist data (train_x, train_y), (test_x, test_y) = datasets.load_mnist() # the parameters of this experiment x_dim = train_x.shape[1] z_dim = 2 max_epoch = 10 batch_size = 256 valid_portion = 0.2 # construct the graph with tf.Graph().as_default(), tf.Session().as_default() as session: input_x = tf.placeholder(dtype=tf.float32, shape=(None, x_dim), name='input_x') x_binarized = tf.stop_gradient(sample_input_x(input_x)) batch_size_tensor = tf.shape(input_x)[0] # derive the VAE z_shape = tf.stack([batch_size_tensor, z_dim]) vae = VAE(p_z=Normal(mean=tf.zeros(z_shape), std=tf.ones(z_shape)), p_x_given_z=Bernoulli, q_z_given_x=Normal, h_for_p_x=Sequential([ K.layers.Dense(100, activation=tf.nn.relu), K.layers.Dense(100, activation=tf.nn.relu), DictMapper( {'logits': K.layers.Dense(x_dim, name='x_logits')}) ]), h_for_q_z=Sequential([ tf.to_float, K.layers.Dense(100, activation=tf.nn.relu), K.layers.Dense(100, activation=tf.nn.relu), DictMapper({ 'mean': K.layers.Dense(z_dim, name='z_mean'), 'logstd': K.layers.Dense(z_dim, name='z_logstd'), }) ])) # train the network train(vae.get_training_loss(x_binarized), input_x, train_x, max_epoch, batch_size, valid_portion) # plot the latent space q_net = vae.variational(x_binarized) z_posterior = q_net['z'] z_predict = [] for [batch_x] in DataFlow.arrays([test_x], batch_size=batch_size): z_predict.append( session.run(z_posterior, feed_dict={input_x: batch_x})) z_predict = np.concatenate(z_predict, axis=0) plt.figure(figsize=(8, 6)) plt.scatter(z_predict[:, 0], z_predict[:, 1], c=test_y) plt.colorbar() plt.grid() plt.show()