Beispiel #1
0
    def discriminator(self, image, is_training, reuse=False):
        with tf.variable_scope("discriminator"):
            if reuse:
                tf.get_variable_scope().reuse_variables()
            # [batch,256,256,1] -> [batch,128,128,64]
            h0 = lrelu(conv2d(image, self.discriminator_dim,
                              scope="d_h0_conv"))
            # [batch,128,128,64] -> [batch,64,64,64*2]
            h1 = lrelu(
                batch_norm(conv2d(h0,
                                  self.discriminator_dim * 2,
                                  scope="d_h1_conv"),
                           is_training,
                           scope="d_bn_1"))
            # [batch,64,64,64*2] -> [batch,32,32,64*4]
            h2 = lrelu(
                batch_norm(conv2d(h1,
                                  self.discriminator_dim * 4,
                                  scope="d_h2_conv"),
                           is_training,
                           scope="d_bn_2"))
            # [batch,32,32,64*4] -> [batch,31,31,64*8]
            h3 = lrelu(
                batch_norm(conv2d(h2,
                                  self.discriminator_dim * 8,
                                  sh=1,
                                  sw=1,
                                  scope="d_h3_conv"),
                           is_training,
                           scope="d_bn_3"))

            # real or fake binary loss
            fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 1, scope="d_fc1")

            return tf.sigmoid(fc1), fc1
Beispiel #2
0
    def build_models(self, image):
        with tf.variable_scope('img_discriminator'):
            nf = 64
            self.img = image  # size 64, 64, 3

            # layer 1
            self.l1 = op.conv2d(self.img, nf, name='l1')
            self.l1 = op.lrelu(self.l1)
            # self.l1_bn = op.batch_norm(name='l1_bn0')
            # self.l1 = op.lrelu(self.l1_bn(self.l1, train=self.train))

            # layer 2
            self.l2 = op.conv2d(self.l1, nf * 2, name='l2')
            self.l2_bn = op.batch_norm(name='l2_bn0')
            self.l2 = op.lrelu(self.l2_bn(self.l2, train=self.train))

            # layer 3
            self.l3 = op.conv2d(self.l2, nf * 4, name='l3')
            self.l3_bn = op.batch_norm(name='l3_bn0')
            self.l3 = op.lrelu(self.l3_bn(self.l3, train=self.train))

            # layer 4
            self.l4 = op.conv2d(self.l3, nf * 8, name='l4')
            self.l4_bn = op.batch_norm(name='l4_bn0')
            self.l4 = op.lrelu(self.l4_bn(self.l4, train=self.train))

            # layer 6, actually it is different from the original paper..
            self.score = op.linear(tf.reshape(self.l4, [self.batch_size, -1]),
                                   1, 'final')
        return
Beispiel #3
0
    def discriminator(self, image, is_training, reuse=False):
        with tf.variable_scope("discriminator"):
            if reuse:
                tf.get_variable_scope().reuse_variables()
            h0 = lrelu(conv2d(image, self.discriminator_dim,
                              scope="d_h0_conv"))
            h1 = lrelu(
                batch_norm(conv2d(h0,
                                  self.discriminator_dim * 2,
                                  scope="d_h1_conv"),
                           is_training,
                           scope="d_bn_1"))
            h2 = lrelu(
                batch_norm(conv2d(h1,
                                  self.discriminator_dim * 4,
                                  scope="d_h2_conv"),
                           is_training,
                           scope="d_bn_2"))
            h3 = lrelu(
                batch_norm(conv2d(h2,
                                  self.discriminator_dim * 8,
                                  scope="d_h3_conv"),
                           is_training,
                           scope="d_bn_3"))
            # h4 = lrelu(batch_norm(conv2d(h3, self.discriminator_dim * 8, scope="d_h4_conv"),
            #                       is_training, scope="d_bn_4"))
            # h5 = lrelu(batch_norm(conv2d(h4, self.discriminator_dim * 8, sh=1, sw=1, scope="d_h5_conv"),
            #                       is_training, scope="d_bn_5"))
            # real or fake binary loss
            fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 8, scope="d_fc1")
            fc2 = fc(fc1, 1, scope="d_fc2")

            return tf.nn.sigmoid(fc2), fc2
Beispiel #4
0
    def build_image_generator(self, img_z, sen_rep):
        with tf.variable_scope('img_generator'):
            # now, calculate the size of output during the deconv upsampling
            # note that we only use stride 2 during the conv
            assert self.config.generator_l1_nchannel % 8 == 0, \
                logger.error('[ERROR] Invalid channel size')
            l5_h, l5_w, l5_c = 64, 64, 3
            l4_h, l4_w, l4_c = 32, 32, 64
            l3_h, l3_w, l3_c = 16, 16, 128
            l2_h, l2_w, l2_c = 8, 8, 256
            l1_h, l1_w, l1_c = 4, 4, 512

            # construct the network layer by layer
            # layer 0: combines the conditional vec with the noise vec
            sen_rep = op.linear(sen_rep, 128, 'conditional_vec')
            self.l0 = tf.concat(1, [img_z, op.lrelu(sen_rep)])

            # layer 1: the linear projection
            self.l1 = op.linear(self.l0, l1_w * l1_h * l1_c, 'l0_lin')
            self.l1 = tf.reshape(self.l1, [self.batch_size, l1_h, l1_w, l1_c])
            self.l1_bn = op.batch_norm(name='l1_bn0')
            self.l1 = tf.nn.relu(self.l1_bn(self.l1, train=self.train))

            # layer 2: first conv1
            self.l2 = op.deconv2d(self.l1, [self.batch_size, l2_h, l2_w, l2_c],
                                  name='l2')
            self.l2_bn = op.batch_norm(name='l2_bn0')
            self.l2 = tf.nn.relu(self.l2_bn(self.l2, train=self.train))

            # layer 3: conv2
            self.l3 = op.deconv2d(self.l2, [self.batch_size, l3_h, l3_w, l3_c],
                                  name='l3')
            self.l3_bn = op.batch_norm(name='l3_bn0')
            self.l3 = tf.nn.relu(self.l3_bn(self.l3, train=self.train))

            # layer 4: conv4
            self.l4 = op.deconv2d(self.l3, [self.batch_size, l4_h, l4_w, l4_c],
                                  name='l4')
            self.l4_bn = op.batch_norm(name='l4_bn0')
            self.l4 = tf.nn.relu(self.l4_bn(self.l4, train=self.train))

            # layer 5: conv5 / final
            self.l5 = op.deconv2d(self.l4, [self.batch_size, l5_h, l5_w, l5_c],
                                  name='l5')

            self.fake_img = tf.nn.tanh(self.l5)  # [-1, 1]
            img_shape = self.fake_img.get_shape()

            # check the size of the image
            assert (img_shape[1] == 64) and \
                (img_shape[2] == 64) and (img_shape[3] == 3), \
                logger.error('Wrong fake image dimension: {}'.format(img_shape))
        return
Beispiel #5
0
    def build_models(self, image, sentence_vec):
        with tf.variable_scope('img_discriminator'):
            self.img = image  # size 64, 64, 3
            self.sentence_vec = op.lrelu(
                op.linear(sentence_vec, 128,
                          'conditional_vec'))  # size [batch, 128]

            # set the size of each layer first, we have four conv layer
            l1_h, l1_w, l1_c = 32, 32, 64
            l2_h, l2_w, l2_c = 16, 16, 128
            l3_h, l3_w, l3_c = 8, 8, 256
            l4_h, l4_w, l4_c = 4, 4, 512

            # layer 1
            self.l1 = op.conv2d(self.img, l1_c, name='l1')
            self.l1 = op.lrelu(self.l1)
            # self.l1_bn = op.batch_norm(name='l1_bn0')
            # self.l1 = op.lrelu(self.l1_bn(self.l1, train=self.train))

            # layer 2
            self.l2 = op.conv2d(self.l1, l2_c, name='l2')
            self.l2_bn = op.batch_norm(name='l2_bn0')
            self.l2 = op.lrelu(self.l2_bn(self.l2, train=self.train))

            # layer 3
            self.l3 = op.conv2d(self.l2, l3_c, name='l3')
            self.l3_bn = op.batch_norm(name='l3_bn0')
            self.l3 = op.lrelu(self.l3_bn(self.l3, train=self.train))

            # layer 4
            self.l4 = op.conv2d(self.l3, l4_c, name='l4')
            self.l4_bn = op.batch_norm(name='l4_bn0')
            self.l4 = op.lrelu(self.l4_bn(self.l4, train=self.train))

            # now self.l4 is size 4, 4, 512, we try to connect the text info
            self.sentence_vec = tf.expand_dims(self.sentence_vec, 1)
            self.sentence_vec = tf.expand_dims(self.sentence_vec, 2)
            # batch, 1, 1, 128 to batch, 4, 4, 128
            self.sentence_vec = tf.tile(self.sentence_vec, [1, 4, 4, 1])

            self.l4 = tf.concat(3, [self.l4, self.sentence_vec])

            # layer 5
            self.l5 = op.conv2d(self.l4, l4_c, 1, 1, 1, 1, name='l5')
            self.l5_bn = op.batch_norm(name='l5_bn0')
            self.l5 = op.lrelu(self.l5_bn(self.l5, train=self.train))

            # layer 6, actually it is different from the original paper..
            self.score = op.linear(tf.reshape(self.l5, [self.batch_size, -1]),
                                   1, 'final')

        return
Beispiel #6
0
 def encode_layer(x, output_filters, layer):
     act = lrelu(x)
     conv = conv2d(act,
                   output_filters=output_filters,
                   scope="d_e%d_conv" % layer)
     enc = batch_norm(conv, is_training, scope="d_e%d_bn" % layer)
     return enc
Beispiel #7
0
    def build_image_generator(self, img_z):
        with tf.variable_scope('img_generator'):
            nf = 64
            # layer 1: a projection after the noise into 4, 4, 512
            self.l1 = op.linear(img_z, nf * 8 * 4 * 4)  # 'l0_b*[4*4*512]'
            self.l1 = tf.reshape(
                self.l1, [self.batch_size, 4, 4, nf * 8])  # 'l0_b*4*4*4*512')
            self.l1_bn = op.batch_norm(name='l1_bn0')
            self.l1 = tf.nn.relu(self.l1_bn(self.l1, train=self.train))
            print('name: {}, size: {}'.format(self.l1.name,
                                              self.l1.get_shape()))

            # layer 2: first conv1
            self.l2 = op.deconv2d(self.l1, [self.batch_size, 8, 8, nf * 4],
                                  name='l2')
            self.l2_bn = op.batch_norm(name='l2_bn0')
            self.l2 = tf.nn.relu(self.l2_bn(self.l2, train=self.train))

            # layer 3: conv2
            self.l3 = op.deconv2d(self.l2, [self.batch_size, 16, 16, nf * 2],
                                  name='l3')
            self.l3_bn = op.batch_norm(name='l3_bn0')
            self.l3 = tf.nn.relu(self.l3_bn(self.l3, train=self.train))

            # layer 4: conv4
            self.l4 = op.deconv2d(self.l3, [self.batch_size, 32, 32, nf],
                                  name='l4')
            self.l4_bn = op.batch_norm(name='l4_bn0')
            self.l4 = tf.nn.relu(self.l4_bn(self.l4, train=self.train))

            # layer 5: conv5 / final
            self.l5 = op.deconv2d(self.l4, [self.batch_size, 64, 64, 3],
                                  name='l5')
            # self.l5_bn = op.batch_norm(name='l5_bn0')
            # self.l5 = tf.nn.relu(self.l5_bn(self.l5, train=self.train))

            self.fake_img = tf.nn.tanh(self.l5)
            img_shape = self.fake_img.get_shape()

            # check the size of the image
            assert (img_shape[1] == 64) and \
                (img_shape[2] == 64) and (img_shape[3] == 3), \
                logger.error('Wrong fake image dimension: {}'.format(img_shape))
        return
 def decode_layer(x, output_width, output_filters, layer, enc_layer, dropout=False, do_concat=True):
     dec = deconv2d(tf.nn.relu(x), [self.batch_size, output_width,
                                    output_width, output_filters], scope="g_d%d_deconv" % layer)
     if layer != 8:
         # IMPORTANT: normalization for last layer
         # Very important, otherwise GAN is unstable
         dec = batch_norm(dec, is_training, scope="g_d%d_bn" % layer)
     if dropout:
         dec = tf.nn.dropout(dec, 0.5)
     if do_concat:
         dec = tf.concat([dec, enc_layer], 3)
     return dec
Beispiel #9
0
 def decode_layer(x,
                  output_width,
                  output_filters,
                  layer,
                  dropout=False):
     dec = deconv2d(tf.nn.relu(x), [
         self.batch_size, output_width, output_width, output_filters
     ],
                    scope="d_d%d_deconv" % layer)
     if layer != 8:
         dec = batch_norm(dec,
                          is_training,
                          scope="d_d%d_bn" % layer)
     if dropout:
         dec = tf.nn.dropout(dec, 0.5)
     return dec
Beispiel #10
0
    def build_sentence_generator(self):
        '''
            @brief: it is actually very tricky... not sure how we gonna
                generate the text.
        '''
        with tf.variable_scope('sen_generator'):
            # the conditional vector
            self.l0 = tf.concat(1, [self.sen_z, op.lrelu(self.img_rep)])

            # layer 1, transform from the raw state to the initial state
            self.l1, self.h0_w, self.h0_b = op.linear(
                self.l0,
                self.config.text_gen_hidden_dim,
                'l0_lin',
                with_w=True)
            self.l1 = tf.reshape(
                self.l1, [self.batch_size, self.config.text_gen_hidden_dim])
            self.l1_bn = op.batch_norm(name='l1_bn0')
            self.l1 = tf.nn.relu(self.l1_bn(self.l1, train=self.train))

            # layer 2, the rnn part
            cell = tf.nn.rnn_cell.GRUCell(self.config.text_gen_hidden_dim)

            # define the vocabulary matrix here, note that there's a diff
            # between the embedding matrix and the vocabulary matrix
            self.vocabulary_mat = tf.get_variable(
                'voc_mat',
                initializer=tf.random_normal([
                    self.config.text_gen_hidden_dim,
                    self.config.word_embedding_space_size
                ]))
            self.vocabulary_mat_trans = tf.transpose(self.vocabulary_mat)
            self.vocabulary_bias = tf.get_variable(
                'voc_bias',
                initializer=tf.random_normal([self.config.text_gen_hidden_dim
                                              ]))

            # it's tricky when it comes to teacher forcing.
            if self.teacher_forcing:
                # the loop function to be called at each time step
                loop = tf.nn.seq2seq._extract_argmax_and_embed(
                    self.word_embedding,
                    output_projection=(self.vocabulary_mat_trans,
                                       self.vocabulary_bias),
                    update_embedding=False)
            else:
                loop = None

            self.teacher_forcing_embedding = tf.nn.embedding_lookup(
                self.word_embedding, self.teacher_forcing)

            outputs, state = tf.nn.seq2seq.decoder(
                self.teacher_forcing_embedding,
                self.l0,
                cell,
                loop_function=loop)

            self.fake_sentence = [
                tf.nn.xw_plus_b(x, self.vocabulary_mat_trans,
                                self.vocabulary_bias) for x in outputs
            ]
        return 0