def get_v(n): ret = tf.get_variable(n + '_unused', [param.batch_size, param.rnn_size], trainable=False, initializer=tf.constant_initializer()) ret = symbolic_functions.shapeless_placeholder(ret, 0, name=n) return ret
def _build_graph(self, inputs): image_pos, y = inputs image_pos = tf.expand_dims(image_pos * 2.0 - 1, -1) y = tf.one_hot(y, 10, name='label_onehot') z = tf.random_uniform([BATCH, 100], -1, 1, name='z_train') z = symbf.shapeless_placeholder(z, [0], name='z') with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): image_gen = self.generator(z, y) tf.summary.image('gen', image_gen, 30) with tf.variable_scope('discrim'): vecpos = self.discriminator(image_pos, y) vecneg = self.discriminator(image_gen, y) self.build_losses(vecpos, vecneg) self.collect_variables()
def _build_graph(self, inputs): image_pos, y = inputs image_pos = tf.expand_dims(image_pos * 2.0 - 1, -1) y = tf.one_hot(y, 10, name='label_onehot') z = tf.random_uniform([BATCH, 100], -1, 1, name='z_train') z = symbf.shapeless_placeholder(z, [0], name='z') with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): image_gen = self.generator(z, y) tf.summary.image('gen', image_gen, 30) with tf.variable_scope('discrim'): vecpos = self.discriminator(image_pos, y) vecneg = self.discriminator(image_gen, y) self.build_losses(vecpos, vecneg) self.collect_variables()
def _build_graph(self, inputs): real_sample = inputs[0] real_sample = tf.expand_dims(real_sample, -1) # sample the latent code: zc = symbf.shapeless_placeholder(sample_prior(BATCH), 0, name='z_code') z_noise = symbf.shapeless_placeholder(tf.random_uniform( [BATCH, NOISE_DIM], -1, 1), 0, name='z_noise') z = tf.concat([zc, z_noise], 1, name='z') with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): fake_sample = self.generator(z) fake_sample_viz = tf.cast((fake_sample) * 255.0, tf.uint8, name='viz') tf.summary.image('gen', fake_sample_viz, max_outputs=30) # may need to investigate how bn stats should be updated across two discrim with tf.variable_scope('discrim'): real_pred, _ = self.discriminator(real_sample) fake_pred, dist_param = self.discriminator(fake_sample) """ Mutual information between x (i.e. zc in this case) and some information s (the generated samples in this case): I(x;s) = H(x) - H(x|s) = H(x) + E[\log P(x|s)] The distribution from which zc is sampled, in this case, is set to a fixed prior already. So the first term is a constant. For the second term, we can maximize its variational lower bound: E_{x \sim P(x|s)}[\log Q(x|s)] where Q(x|s) is a proposal distribution to approximate P(x|s). Here, Q(x|s) is assumed to be a distribution which shares the form of P, and whose parameters are predicted by the discriminator network. """ with tf.name_scope("mutual_information"): with tf.name_scope('prior_entropy'): cat, uni = get_distributions(DIST_PRIOR_PARAM[:NUM_CLASS], DIST_PRIOR_PARAM[NUM_CLASS:]) ents = [ cat.entropy(name='cat_entropy'), tf.reduce_sum(uni.entropy(), name='uni_entropy') ] entropy = tf.add_n(ents, name='total_entropy') # Note that the entropy of prior is a constant. The paper mentioned it but didn't use it. with tf.name_scope('conditional_entropy'): cond_ents = entropy_from_samples(zc, dist_param) cond_entropy = tf.add_n(cond_ents, name="total_entropy") MI = tf.subtract(entropy, cond_entropy, name='mutual_information') summary.add_moving_summary(entropy, cond_entropy, MI, *cond_ents) # default GAN objective self.build_losses(real_pred, fake_pred) # subtract mutual information for latent factors (we want to maximize them) self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss') self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss') summary.add_moving_summary(self.g_loss, self.d_loss) # distinguish between variables of generator and discriminator updates self.collect_variables()
def _build_graph(self, inputs): real_sample = inputs[0] real_sample = tf.expand_dims(real_sample * 2.0 - 1, -1) # latent space is cat(10) x uni(1) x uni(1) x noise(NOISE_DIM) self.factors = ProductDistribution("factors", [ CategoricalDistribution("cat", 10), GaussianWithUniformSample("uni_a", 1), GaussianWithUniformSample("uni_b", 1) ]) # prior: the assumption how the factors are presented in the dataset prior = tf.constant([0.1] * 10 + [0, 0], tf.float32, [12], name='prior') batch_prior = tf.tile(tf.expand_dims(prior, 0), [BATCH, 1], name='batch_prior') # sample the latent code: zc = symbf.shapeless_placeholder(self.factors.sample(BATCH, prior), 0, name='z_code') z_noise = symbf.shapeless_placeholder(tf.random_uniform( [BATCH, NOISE_DIM], -1, 1), 0, name='z_noise') z = tf.concat([zc, z_noise], 1, name='z') with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): fake_sample = self.generator(z) fake_sample_viz = tf.cast((fake_sample + 1) * 128.0, tf.uint8, name='viz') tf.summary.image('gen', fake_sample_viz, max_outputs=30) # TODO investigate how bn stats should be updated across two discrim with tf.variable_scope('discrim'): real_pred, _ = self.discriminator(real_sample) with tf.variable_scope('discrim', reuse=True): fake_pred, dist_param = self.discriminator(fake_sample) # post-process output vector from discriminator to become valid # distribution parameters encoder_activation = self.factors.encoder_activation(dist_param) """ Mutual information between x (i.e. zc in this case) and some information s (the generated samples in this case): I(x;s) = H(x) - H(x|s) = H(x) + E[\log P(x|s)] The distribution from which zc is sampled, in this case, is set to a fixed prior already. For the second term, we can maximize its variational lower bound: E_{x \sim P(x|s)}[\log Q(x|s)] where Q(x|s) is a proposal distribution to approximate P(x|s). Here, Q(x|s) is assumed to be a distribution which shares the form of self.factors, and whose parameters are predicted by the discriminator network. """ with tf.name_scope("mutual_information"): ents = self.factors.entropy(zc, batch_prior) entropy = tf.add_n(ents, name='total_entropy') # Note that dropping this term has no effect because the entropy # of prior is a constant. The paper mentioned it but didn't use it. # Adding this term may make the curve less stable because the # entropy estimated from the samples is not the true value. cond_ents = self.factors.entropy(zc, encoder_activation) cond_entropy = tf.add_n(cond_ents, name="total_conditional_entropy") MI = tf.subtract(entropy, cond_entropy, name='mutual_information') summary.add_moving_summary(entropy, cond_entropy, MI, *ents) # default GAN objective self.build_losses(real_pred, fake_pred) # subtract mutual information for latent factores (we want to maximize them) self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss') self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss') summary.add_moving_summary(self.g_loss, self.d_loss) # distinguish between variables of generator and discriminator updates self.collect_variables()
def _build_graph(self, inputs): real_sample = inputs[0] real_sample = tf.expand_dims(real_sample, -1) # latent space is cat(10) x uni(1) x uni(1) x noise(NOISE_DIM) self.factors = ProductDistribution("factors", [CategoricalDistribution("cat", 10), GaussianWithUniformSample("uni_a", 1), GaussianWithUniformSample("uni_b", 1)]) # prior: the assumption how the factors are presented in the dataset prior = tf.constant([0.1] * 10 + [0, 0], tf.float32, [12], name='prior') batch_prior = tf.tile(tf.expand_dims(prior, 0), [BATCH, 1], name='batch_prior') # sample the latent code: zc = symbf.shapeless_placeholder( self.factors.sample(BATCH, prior), 0, name='z_code') z_noise = symbf.shapeless_placeholder( tf.random_uniform([BATCH, NOISE_DIM], -1, 1), 0, name='z_noise') z = tf.concat([zc, z_noise], 1, name='z') with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): fake_sample = self.generator(z) fake_sample_viz = tf.cast((fake_sample) * 255.0, tf.uint8, name='viz') tf.summary.image('gen', fake_sample_viz, max_outputs=30) # may need to investigate how bn stats should be updated across two discrim with tf.variable_scope('discrim'): real_pred, _ = self.discriminator(real_sample) with tf.variable_scope('discrim', reuse=True): fake_pred, dist_param = self.discriminator(fake_sample) """ Mutual information between x (i.e. zc in this case) and some information s (the generated samples in this case): I(x;s) = H(x) - H(x|s) = H(x) + E[\log P(x|s)] The distribution from which zc is sampled, in this case, is set to a fixed prior already. For the second term, we can maximize its variational lower bound: E_{x \sim P(x|s)}[\log Q(x|s)] where Q(x|s) is a proposal distribution to approximate P(x|s). Here, Q(x|s) is assumed to be a distribution which shares the form of self.factors, and whose parameters are predicted by the discriminator network. """ with tf.name_scope("mutual_information"): ents = self.factors.entropy(zc, batch_prior) entropy = tf.add_n(ents, name='total_entropy') # Note that dropping this term has no effect because the entropy # of prior is a constant. The paper mentioned it but didn't use it. # Adding this term may make the curve less stable because the # entropy estimated from the samples is not the true value. # post-process output vector from discriminator to obtain valid distribution parameters encoder_activation = self.factors.encoder_activation(dist_param) cond_ents = self.factors.entropy(zc, encoder_activation) cond_entropy = tf.add_n(cond_ents, name="total_conditional_entropy") MI = tf.subtract(entropy, cond_entropy, name='mutual_information') summary.add_moving_summary(entropy, cond_entropy, MI, *ents) # default GAN objective self.build_losses(real_pred, fake_pred) # subtract mutual information for latent factors (we want to maximize them) self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss') self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss') summary.add_moving_summary(self.g_loss, self.d_loss) # distinguish between variables of generator and discriminator updates self.collect_variables()