예제 #1
0
 def get_v(n):
     ret = tf.get_variable(n + '_unused',
                           [param.batch_size, param.rnn_size],
                           trainable=False,
                           initializer=tf.constant_initializer())
     ret = symbolic_functions.shapeless_placeholder(ret, 0, name=n)
     return ret
예제 #2
0
    def _build_graph(self, inputs):
        image_pos, y = inputs
        image_pos = tf.expand_dims(image_pos * 2.0 - 1, -1)
        y = tf.one_hot(y, 10, name='label_onehot')

        z = tf.random_uniform([BATCH, 100], -1, 1, name='z_train')
        z = symbf.shapeless_placeholder(z, [0], name='z')

        with argscope([Conv2D, Deconv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                image_gen = self.generator(z, y)
                tf.summary.image('gen', image_gen, 30)
            with tf.variable_scope('discrim'):
                vecpos = self.discriminator(image_pos, y)
                vecneg = self.discriminator(image_gen, y)

        self.build_losses(vecpos, vecneg)
        self.collect_variables()
예제 #3
0
    def _build_graph(self, inputs):
        image_pos, y = inputs
        image_pos = tf.expand_dims(image_pos * 2.0 - 1, -1)
        y = tf.one_hot(y, 10, name='label_onehot')

        z = tf.random_uniform([BATCH, 100], -1, 1, name='z_train')
        z = symbf.shapeless_placeholder(z, [0], name='z')

        with argscope([Conv2D, Deconv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                image_gen = self.generator(z, y)
                tf.summary.image('gen', image_gen, 30)
            with tf.variable_scope('discrim'):
                vecpos = self.discriminator(image_pos, y)
                vecneg = self.discriminator(image_gen, y)

        self.build_losses(vecpos, vecneg)
        self.collect_variables()
예제 #4
0
    def _build_graph(self, inputs):
        real_sample = inputs[0]
        real_sample = tf.expand_dims(real_sample, -1)

        # sample the latent code:
        zc = symbf.shapeless_placeholder(sample_prior(BATCH), 0, name='z_code')
        z_noise = symbf.shapeless_placeholder(tf.random_uniform(
            [BATCH, NOISE_DIM], -1, 1),
                                              0,
                                              name='z_noise')
        z = tf.concat([zc, z_noise], 1, name='z')

        with argscope([Conv2D, Deconv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_sample = self.generator(z)
                fake_sample_viz = tf.cast((fake_sample) * 255.0,
                                          tf.uint8,
                                          name='viz')
                tf.summary.image('gen', fake_sample_viz, max_outputs=30)

            # may need to investigate how bn stats should be updated across two discrim
            with tf.variable_scope('discrim'):
                real_pred, _ = self.discriminator(real_sample)
                fake_pred, dist_param = self.discriminator(fake_sample)
        """
        Mutual information between x (i.e. zc in this case) and some
        information s (the generated samples in this case):

                    I(x;s) = H(x) - H(x|s)
                           = H(x) + E[\log P(x|s)]

        The distribution from which zc is sampled, in this case, is set to a fixed prior already.
        So the first term is a constant.
        For the second term, we can maximize its variational lower bound:
                    E_{x \sim P(x|s)}[\log Q(x|s)]
        where Q(x|s) is a proposal distribution to approximate P(x|s).

        Here, Q(x|s) is assumed to be a distribution which shares the form
        of P, and whose parameters are predicted by the discriminator network.
        """
        with tf.name_scope("mutual_information"):
            with tf.name_scope('prior_entropy'):
                cat, uni = get_distributions(DIST_PRIOR_PARAM[:NUM_CLASS],
                                             DIST_PRIOR_PARAM[NUM_CLASS:])
                ents = [
                    cat.entropy(name='cat_entropy'),
                    tf.reduce_sum(uni.entropy(), name='uni_entropy')
                ]
                entropy = tf.add_n(ents, name='total_entropy')
                # Note that the entropy of prior is a constant. The paper mentioned it but didn't use it.

            with tf.name_scope('conditional_entropy'):
                cond_ents = entropy_from_samples(zc, dist_param)
                cond_entropy = tf.add_n(cond_ents, name="total_entropy")

            MI = tf.subtract(entropy, cond_entropy, name='mutual_information')
            summary.add_moving_summary(entropy, cond_entropy, MI, *cond_ents)

        # default GAN objective
        self.build_losses(real_pred, fake_pred)

        # subtract mutual information for latent factors (we want to maximize them)
        self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss')
        self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss')

        summary.add_moving_summary(self.g_loss, self.d_loss)

        # distinguish between variables of generator and discriminator updates
        self.collect_variables()
예제 #5
0
    def _build_graph(self, inputs):
        real_sample = inputs[0]
        real_sample = tf.expand_dims(real_sample * 2.0 - 1, -1)

        # latent space is cat(10) x uni(1) x uni(1) x noise(NOISE_DIM)
        self.factors = ProductDistribution("factors", [
            CategoricalDistribution("cat", 10),
            GaussianWithUniformSample("uni_a", 1),
            GaussianWithUniformSample("uni_b", 1)
        ])
        # prior: the assumption how the factors are presented in the dataset
        prior = tf.constant([0.1] * 10 + [0, 0],
                            tf.float32, [12],
                            name='prior')
        batch_prior = tf.tile(tf.expand_dims(prior, 0), [BATCH, 1],
                              name='batch_prior')

        # sample the latent code:
        zc = symbf.shapeless_placeholder(self.factors.sample(BATCH, prior),
                                         0,
                                         name='z_code')
        z_noise = symbf.shapeless_placeholder(tf.random_uniform(
            [BATCH, NOISE_DIM], -1, 1),
                                              0,
                                              name='z_noise')
        z = tf.concat([zc, z_noise], 1, name='z')

        with argscope([Conv2D, Deconv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_sample = self.generator(z)
                fake_sample_viz = tf.cast((fake_sample + 1) * 128.0,
                                          tf.uint8,
                                          name='viz')
                tf.summary.image('gen', fake_sample_viz, max_outputs=30)

            # TODO investigate how bn stats should be updated across two discrim
            with tf.variable_scope('discrim'):
                real_pred, _ = self.discriminator(real_sample)

            with tf.variable_scope('discrim', reuse=True):
                fake_pred, dist_param = self.discriminator(fake_sample)

        # post-process output vector from discriminator to become valid
        # distribution parameters
        encoder_activation = self.factors.encoder_activation(dist_param)
        """
        Mutual information between x (i.e. zc in this case) and some
        information s (the generated samples in this case):

                    I(x;s) = H(x) - H(x|s)
                           = H(x) + E[\log P(x|s)]

        The distribution from which zc is sampled, in this case, is set to a fixed prior already.
        For the second term, we can maximize its variational lower bound:
                    E_{x \sim P(x|s)}[\log Q(x|s)]
        where Q(x|s) is a proposal distribution to approximate P(x|s).

        Here, Q(x|s) is assumed to be a distribution which shares the form
        of self.factors, and whose parameters are predicted by the discriminator network.
        """
        with tf.name_scope("mutual_information"):
            ents = self.factors.entropy(zc, batch_prior)
            entropy = tf.add_n(ents, name='total_entropy')
            # Note that dropping this term has no effect because the entropy
            # of prior is a constant. The paper mentioned it but didn't use it.
            # Adding this term may make the curve less stable because the
            # entropy estimated from the samples is not the true value.

            cond_ents = self.factors.entropy(zc, encoder_activation)
            cond_entropy = tf.add_n(cond_ents,
                                    name="total_conditional_entropy")

            MI = tf.subtract(entropy, cond_entropy, name='mutual_information')
            summary.add_moving_summary(entropy, cond_entropy, MI, *ents)

        # default GAN objective
        self.build_losses(real_pred, fake_pred)

        # subtract mutual information for latent factores (we want to maximize them)
        self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss')
        self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss')

        summary.add_moving_summary(self.g_loss, self.d_loss)

        # distinguish between variables of generator and discriminator updates
        self.collect_variables()
예제 #6
0
    def _build_graph(self, inputs):
        real_sample = inputs[0]
        real_sample = tf.expand_dims(real_sample, -1)

        # latent space is cat(10) x uni(1) x uni(1) x noise(NOISE_DIM)
        self.factors = ProductDistribution("factors", [CategoricalDistribution("cat", 10),
                                                       GaussianWithUniformSample("uni_a", 1),
                                                       GaussianWithUniformSample("uni_b", 1)])
        # prior: the assumption how the factors are presented in the dataset
        prior = tf.constant([0.1] * 10 + [0, 0], tf.float32, [12], name='prior')
        batch_prior = tf.tile(tf.expand_dims(prior, 0), [BATCH, 1], name='batch_prior')

        # sample the latent code:
        zc = symbf.shapeless_placeholder(
            self.factors.sample(BATCH, prior), 0, name='z_code')
        z_noise = symbf.shapeless_placeholder(
            tf.random_uniform([BATCH, NOISE_DIM], -1, 1), 0, name='z_noise')
        z = tf.concat([zc, z_noise], 1, name='z')

        with argscope([Conv2D, Deconv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_sample = self.generator(z)
                fake_sample_viz = tf.cast((fake_sample) * 255.0, tf.uint8, name='viz')
                tf.summary.image('gen', fake_sample_viz, max_outputs=30)

            # may need to investigate how bn stats should be updated across two discrim
            with tf.variable_scope('discrim'):
                real_pred, _ = self.discriminator(real_sample)

            with tf.variable_scope('discrim', reuse=True):
                fake_pred, dist_param = self.discriminator(fake_sample)

        """
        Mutual information between x (i.e. zc in this case) and some
        information s (the generated samples in this case):

                    I(x;s) = H(x) - H(x|s)
                           = H(x) + E[\log P(x|s)]

        The distribution from which zc is sampled, in this case, is set to a fixed prior already.
        For the second term, we can maximize its variational lower bound:
                    E_{x \sim P(x|s)}[\log Q(x|s)]
        where Q(x|s) is a proposal distribution to approximate P(x|s).

        Here, Q(x|s) is assumed to be a distribution which shares the form
        of self.factors, and whose parameters are predicted by the discriminator network.
        """
        with tf.name_scope("mutual_information"):
            ents = self.factors.entropy(zc, batch_prior)
            entropy = tf.add_n(ents, name='total_entropy')
            # Note that dropping this term has no effect because the entropy
            # of prior is a constant. The paper mentioned it but didn't use it.
            # Adding this term may make the curve less stable because the
            # entropy estimated from the samples is not the true value.

            # post-process output vector from discriminator to obtain valid distribution parameters
            encoder_activation = self.factors.encoder_activation(dist_param)
            cond_ents = self.factors.entropy(zc, encoder_activation)
            cond_entropy = tf.add_n(cond_ents, name="total_conditional_entropy")

            MI = tf.subtract(entropy, cond_entropy, name='mutual_information')
            summary.add_moving_summary(entropy, cond_entropy, MI, *ents)

        # default GAN objective
        self.build_losses(real_pred, fake_pred)

        # subtract mutual information for latent factors (we want to maximize them)
        self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss')
        self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss')

        summary.add_moving_summary(self.g_loss, self.d_loss)

        # distinguish between variables of generator and discriminator updates
        self.collect_variables()