def decoder(self, z, is_training, reuse=False): """Implements the Bernoulli decoder.""" height = self.input_height width = self.input_width with tf.variable_scope("decoder", reuse=reuse): net = tf.nn.relu( batch_norm(linear(z, 1024, scope="de_fc1"), is_training=is_training, scope="de_bn1")) net = tf.nn.relu( batch_norm(linear(net, 128 * (height // 4) * (width // 4), scope="de_fc2"), is_training=is_training, scope="de_bn2")) net = tf.reshape(net, [self.batch_size, height // 4, width // 4, 128]) net = tf.nn.relu( batch_norm(deconv2d( net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="de_dc3"), is_training=is_training, scope="de_bn3")) out = tf.nn.sigmoid( deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="de_dc4")) return out
def generator(self, z, is_training, reuse=False): height = self.input_height width = self.input_width batch_size = self.batch_size with tf.variable_scope("generator", reuse=reuse): net = linear(z, 1024, scope="g_fc1") net = batch_norm(net, is_training=is_training, scope="g_bn1") net = lrelu(net) net = linear(net, 128 * (height // 4) * (width // 4), scope="g_fc2") net = batch_norm(net, is_training=is_training, scope="g_bn2") net = lrelu(net) net = tf.reshape(net, [batch_size, height // 4, width // 4, 128]) net = deconv2d(net, [batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="g_dc3") net = batch_norm(net, is_training=is_training, scope="g_bn3") net = lrelu(net) net = deconv2d(net, [batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="g_dc4") out = tf.nn.sigmoid(net) return out
def sn_generator(z, batch_size, output_height, output_width, output_c_dim, is_training, reuse=False): """Returns the output tensor of the SNDCGAN generator. Details are available at https://openreview.net/pdf?id=B1QRgziT-. Args: z: latent code, shape [batch_size, latent_dimensionality] batch_size: Batch size. output_height: Output image height. output_width: Output image width. output_c_dim: Number of color channels. is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: net: The generated image Tensor with entries in [0, 1]. """ s_h, s_w = output_height, output_width s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) with tf.variable_scope("generator", reuse=reuse): net = linear(z, s_h8 * s_w8 * 512, scope="g_fc1") net = batch_norm_dcgan(net, is_training, scope="g_bn1", epsilon=2e-5) net = tf.nn.relu(net) net = tf.reshape(net, [batch_size, s_h8, s_w8, 512]) net = deconv2d(net, [batch_size, s_h4, s_w4, 256], 4, 4, 2, 2, name="g_dc2") net = batch_norm_dcgan(net, is_training, scope="g_bn2", epsilon=2e-5) net = tf.nn.relu(net) net = deconv2d(net, [batch_size, s_h2, s_w2, 128], 4, 4, 2, 2, name="g_dc3") net = batch_norm_dcgan(net, is_training, scope="g_bn3", epsilon=2e-5) net = tf.nn.relu(net) net = deconv2d(net, [batch_size, s_h, s_w, 64], 4, 4, 2, 2, name="g_dc4") net = batch_norm_dcgan(net, is_training, scope="g_bn4", epsilon=2e-5) net = tf.nn.relu(net) net = deconv2d( net, [batch_size, s_h, s_w, output_c_dim], 3, 3, 1, 1, name="g_dc5") out = tf.tanh(net) # NOTE: this normalization is introduced to match current image # preprocessing, which normalize the real image to range [0, 1]. # In author's implementation, they simply use the tanh activation function # and normalize the image to range [-1, 1]. out = tf.div(out + 1.0, 2.0) return out
def discriminator(self, x, is_training, reuse=False): """BEGAN discriminator (auto-encoder). This implementation doesn't match the one from the paper, but is similar to our "standard" discriminator (same 2 conv layers, using lrelu). However, it still has less parameters (1.3M vs 8.5M) because of the huge linear layer in the standard discriminator. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction recon_error: L1 reconstrunction error of the auto-encoder code: the representation (bottleneck layer of the auto-encoder) """ height = self.input_height width = self.input_width sn = self.discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): # Encoding step (Mapping from [bs, h, w, c] to [bs, 64]) net = conv2d( x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn) # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d( net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=sn) # [bs, h/4, w/4, 128] net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] code = linear(net, 64, scope="d_fc6", use_sn=sn) # [bs, 64] if self.discriminator_normalization == consts.BATCH_NORM: code = batch_norm(code, is_training=is_training, scope="d_bn1") code = lrelu(code) # Decoding step (Mapping from [bs, 64] to [bs, h, w, c]) net = linear( code, 128 * (height // 4) * (width // 4), scope="d_fc1", use_sn=sn) # [bs, h/4 * w/4 * 128] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [ self.batch_size, height // 4, width // 4, 128]) # [bs, h/4, w/4, 128] net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="d_deconv1") # [bs, h/2, w/2, 64] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) net = deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="d_deconv2") # [bs, h, w, c] out = tf.nn.sigmoid(net) # Reconstruction loss. recon_error = tf.reduce_mean(tf.abs(out - x)) return out, recon_error, code
def discriminator(self, x, is_training, reuse=False): """BEGAN discriminator (auto-encoder). This implementation doesn't match the one from the paper, but is similar to our "standard" discriminator (same 2 conv layers, using lrelu). However, it still has less parameters (1.3M vs 8.5M) because of the huge linear layer in the standard discriminator. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction recon_error: L1 reconstrunction error of the auto-encoder code: the representation (bottleneck layer of the auto-encoder) """ height = self.input_height width = self.input_width with tf.variable_scope("discriminator", reuse=reuse): # Encoding step (Mapping from [bs, h, w, c] to [bs, 64]) net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1") # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2") # [bs, h/4, w/4, 128] net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] code = linear(net, 64, scope="d_fc6") # [bs, 64] if self.discriminator_batchnorm: code = batch_norm(code, is_training=is_training, scope="d_bn1") code = lrelu(code) # Decoding step (Mapping from [bs, 64] to [bs, h, w, c]) net = linear(code, 128 * (height // 4) * (width // 4), scope="d_fc1") # [bs, h/4 * w/4 * 128] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [ self.batch_size, height // 4, width // 4, 128]) # [bs, h/4, w/4, 128] net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="d_deconv1") # [bs, h/2, w/2, 64] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) net = deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="d_deconv2") # [bs, h, w, c] out = tf.nn.sigmoid(net) # Reconstruction loss. recon_error = tf.reduce_mean(tf.abs(out - x)) return out, recon_error, code
def generator(z, batch_size, output_height, output_width, output_c_dim, is_training, reuse=False): """Returns the output tensor of the DCGAN generator. Details are available at https://arxiv.org/abs/1511.06434. Notable changes include BatchNorm in the generator, ReLu instead of LeakyReLu and ReLu in generator, except for output which uses TanH. Args: z: latent code, shape [batch_size, latent_dimensionality] batch_size: Batch size. output_height: Output image height. output_width: Output image width. output_c_dim: Number of color channels. is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: net: The generated image Tensor with entries in [0, 1]. """ gf_dim = 64 # Dimension of filters in first convolutional layer. bs = batch_size with tf.variable_scope("generator", reuse=reuse): s_h, s_w = output_height, output_width s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2) net = linear(z, gf_dim * 8 * s_h16 * s_w16, scope="g_fc1") net = tf.reshape(net, [-1, s_h16, s_w16, gf_dim * 8]) net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn1")) net = deconv2d(net, [bs, s_h8, s_w8, gf_dim*4], 5, 5, 2, 2, name="g_dc1") net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn2")) net = deconv2d(net, [bs, s_h4, s_w4, gf_dim*2], 5, 5, 2, 2, name="g_dc2") net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn3")) net = deconv2d(net, [bs, s_h2, s_w2, gf_dim*1], 5, 5, 2, 2, name="g_dc3") net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn4")) net = deconv2d(net, [bs, s_h, s_w, output_c_dim], 5, 5, 2, 2, name="g_dc4") net = 0.5 * tf.nn.tanh(net) + 0.5 return net
def decoder(self, z, is_training, reuse=False): """Implements the Bernoulli decoder.""" height = self.input_height width = self.input_width with tf.variable_scope("decoder", reuse=reuse): net = tf.nn.relu( batch_norm(linear(z, 1024, scope="de_fc1"), is_training=is_training, scope="de_bn1")) net = tf.nn.relu( batch_norm(linear(net, 128 * (height // 4) * (width // 4), scope="de_fc2"), is_training=is_training, scope="de_bn2")) net = tf.reshape(net, [self.batch_size, height // 4, width // 4, 128]) net = tf.nn.relu(batch_norm(deconv2d( net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="de_dc3"), is_training=is_training, scope="de_bn3")) out = tf.nn.sigmoid(deconv2d( net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="de_dc4")) return out