Esempio n. 1
0
 def decoder(self, z, is_training, reuse=False):
     """Implements the Bernoulli decoder."""
     height = self.input_height
     width = self.input_width
     with tf.variable_scope("decoder", reuse=reuse):
         net = tf.nn.relu(
             batch_norm(linear(z, 1024, scope="de_fc1"),
                        is_training=is_training,
                        scope="de_bn1"))
         net = tf.nn.relu(
             batch_norm(linear(net,
                               128 * (height // 4) * (width // 4),
                               scope="de_fc2"),
                        is_training=is_training,
                        scope="de_bn2"))
         net = tf.reshape(net,
                          [self.batch_size, height // 4, width // 4, 128])
         net = tf.nn.relu(
             batch_norm(deconv2d(
                 net, [self.batch_size, height // 2, width // 2, 64],
                 4,
                 4,
                 2,
                 2,
                 name="de_dc3"),
                        is_training=is_training,
                        scope="de_bn3"))
         out = tf.nn.sigmoid(
             deconv2d(net, [self.batch_size, height, width, self.c_dim],
                      4,
                      4,
                      2,
                      2,
                      name="de_dc4"))
         return out
Esempio n. 2
0
 def generator(self, z, is_training, reuse=False):
     height = self.input_height
     width = self.input_width
     batch_size = self.batch_size
     with tf.variable_scope("generator", reuse=reuse):
         net = linear(z, 1024, scope="g_fc1")
         net = batch_norm(net, is_training=is_training, scope="g_bn1")
         net = lrelu(net)
         net = linear(net,
                      128 * (height // 4) * (width // 4),
                      scope="g_fc2")
         net = batch_norm(net, is_training=is_training, scope="g_bn2")
         net = lrelu(net)
         net = tf.reshape(net, [batch_size, height // 4, width // 4, 128])
         net = deconv2d(net, [batch_size, height // 2, width // 2, 64],
                        4,
                        4,
                        2,
                        2,
                        name="g_dc3")
         net = batch_norm(net, is_training=is_training, scope="g_bn3")
         net = lrelu(net)
         net = deconv2d(net, [batch_size, height, width, self.c_dim],
                        4,
                        4,
                        2,
                        2,
                        name="g_dc4")
         out = tf.nn.sigmoid(net)
         return out
Esempio n. 3
0
  def discriminator(self, x, is_training, reuse=False):
    """Discriminator architecture based on InfoGAN.

    Args:
      x: input images, shape [bs, h, w, channels]
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      out: a float (in [0, 1]) with discriminator prediction
      out_logit: the value "out" before sigmoid
      net: the architecture
    """
    with tf.variable_scope("discriminator", reuse=reuse):
      # Mapping x from [bs, h, w, c] to [bs, 1]
      net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1")  # [bs, h/2, w/2, 64]
      net = lrelu(net)
      net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2")  # [bs, h/4, w/4, 128]
      if self.discriminator_batchnorm:
        net = batch_norm(net, is_training=is_training, scope="d_bn2")
      net = lrelu(net)
      net = tf.reshape(net, [self.batch_size, -1])  # [bs, h * w * 8]
      net = linear(net, 1024, scope="d_fc3")  # [bs, 1024]
      if self.discriminator_batchnorm:
        net = batch_norm(net, is_training=is_training, scope="d_bn3")
      net = lrelu(net)
      out_logit = linear(net, 1, scope="d_fc4")  # [bs, 1]
      out = tf.nn.sigmoid(out_logit)
      return out, out_logit, net
Esempio n. 4
0
    def discriminator(self, x, is_training, reuse=False):
        """Discriminator architecture based on InfoGAN.

    Args:
      x: input images, shape [bs, h, w, channels]
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      out: a float (in [0, 1]) with discriminator prediction
      out_logit: the value "out" before sigmoid
      net: the architecture
    """
        sn = self.discriminator_normalization == consts.SPECTRAL_NORM
        with tf.variable_scope("discriminator", reuse=reuse):
            # Mapping x from [bs, h, w, c] to [bs, 1]
            net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1",
                         use_sn=sn)  # [bs, h/2, w/2, 64]
            net = lrelu(net)
            net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2",
                         use_sn=sn)  # [bs, h/4, w/4, 128]
            if self.discriminator_normalization == consts.BATCH_NORM:
                net = batch_norm(net, is_training=is_training, scope="d_bn2")
            net = lrelu(net)
            net = tf.reshape(net, [self.batch_size, -1])  # [bs, h * w * 8]
            net = linear(net, 1024, scope="d_fc3", use_sn=sn)  # [bs, 1024]
            if self.discriminator_normalization == consts.BATCH_NORM:
                net = batch_norm(net, is_training=is_training, scope="d_bn3")
            net = lrelu(net)
            out_logit = linear(net, 1, scope="d_fc4", use_sn=sn)  # [bs, 1]
            out = tf.nn.sigmoid(out_logit)
            return out, out_logit, net
Esempio n. 5
0
  def discriminator(self, x, is_training, reuse=False):
    """BEGAN discriminator (auto-encoder).

       This implementation doesn't match the one from the paper, but is similar
       to our "standard" discriminator (same 2 conv layers, using lrelu).
       However, it still has less parameters (1.3M vs 8.5M) because of the huge
       linear layer in the standard discriminator.

    Args:
      x: input images, shape [bs, h, w, channels]
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      out: a float (in [0, 1]) with discriminator prediction
      recon_error: L1 reconstrunction error of the auto-encoder
      code: the representation (bottleneck layer of the auto-encoder)
    """
    height = self.input_height
    width = self.input_width
    sn = self.discriminator_normalization == consts.SPECTRAL_NORM
    with tf.variable_scope("discriminator", reuse=reuse):
      # Encoding step (Mapping from [bs, h, w, c] to [bs, 64])
      net = conv2d(
          x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn)  # [bs, h/2, w/2, 64]
      net = lrelu(net)
      net = conv2d(
          net, 128, 4, 4, 2, 2, name="d_conv2",
          use_sn=sn)  # [bs, h/4, w/4, 128]
      net = tf.reshape(net, [self.batch_size, -1])  # [bs, h * w * 8]
      code = linear(net, 64, scope="d_fc6", use_sn=sn)  # [bs, 64]
      if self.discriminator_normalization == consts.BATCH_NORM:
        code = batch_norm(code, is_training=is_training, scope="d_bn1")
      code = lrelu(code)

      # Decoding step (Mapping from [bs, 64] to [bs, h, w, c])
      net = linear(
          code, 128 * (height // 4) * (width // 4), scope="d_fc1",
          use_sn=sn)  # [bs, h/4 * w/4 * 128]
      if self.discriminator_normalization == consts.BATCH_NORM:
        net = batch_norm(net, is_training=is_training, scope="d_bn2")
      net = lrelu(net)
      net = tf.reshape(net, [
          self.batch_size, height // 4, width // 4, 128])  # [bs, h/4, w/4, 128]
      net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64],
                     4, 4, 2, 2, name="d_deconv1")  # [bs, h/2, w/2, 64]
      if self.discriminator_normalization == consts.BATCH_NORM:
        net = batch_norm(net, is_training=is_training, scope="d_bn3")
      net = lrelu(net)
      net = deconv2d(net, [self.batch_size, height, width, self.c_dim],
                     4, 4, 2, 2, name="d_deconv2")  # [bs, h, w, c]
      out = tf.nn.sigmoid(net)

      # Reconstruction loss.
      recon_error = tf.reduce_mean(tf.abs(out - x))
      return out, recon_error, code
Esempio n. 6
0
    def attention_block(self, entities, reuse, name="attention_block"):
        """Performs non-local pairwise relational computations.

        Args:
          entities: A tensor of shape (B, K, D) where K is the number of entities.
          reuse: Whether to reuse the weights.
          name: The name of the block.

        Returns:
          Updated entity representation (B, K, D)
        """
        # Estimate local dimensions to support background channel.
        k, z_dim = entities.get_shape().as_list()[1:3]

        r_entities = tf.reshape(entities, [self.batch_size * k, z_dim])

        with tf.variable_scope(name, reuse=reuse):
            queries = ops.layer_norm(
                tf.nn.relu(
                    ops.linear(r_entities, self.embedding_dim, scope="q_fc")),
                reuse, "q_ln")
            queries = tf.reshape(queries,
                                 [self.batch_size, k, self.embedding_dim])

            keys = ops.layer_norm(
                tf.nn.relu(
                    ops.linear(r_entities, self.embedding_dim, scope="k_fc")),
                reuse, "k_ln")
            keys = tf.reshape(keys, [self.batch_size, k, self.embedding_dim])

            values = ops.layer_norm(
                tf.nn.relu(
                    ops.linear(r_entities, self.embedding_dim, scope="v_fc")),
                reuse, "v_ln")
            values = tf.reshape(values,
                                [self.batch_size, k, self.embedding_dim])

            attention_weights = tf.matmul(queries,
                                          tf.transpose(keys, [0, 2, 1]))
            norm_attention_weights = tf.nn.softmax(
                attention_weights /
                tf.sqrt(tf.cast(self.embedding_dim, tf.float32)),
                axis=2)

            attention = tf.matmul(norm_attention_weights, values)
            r_attention = tf.reshape(attention,
                                     [self.batch_size * k, self.embedding_dim])

            # Project back to original space.
            u_entities = tf.nn.relu(ops.linear(r_attention, z_dim, "e_fc1"))
            u_entities = tf.nn.relu(ops.linear(u_entities, z_dim, "e_fc2"))
            u_entities = ops.layer_norm(u_entities + r_entities, reuse, "e_ln")

            return tf.reshape(u_entities, [self.batch_size, k, z_dim])
Esempio n. 7
0
  def discriminator(self, x, is_training, reuse=False):
    """BEGAN discriminator (auto-encoder).

       This implementation doesn't match the one from the paper, but is similar
       to our "standard" discriminator (same 2 conv layers, using lrelu).
       However, it still has less parameters (1.3M vs 8.5M) because of the huge
       linear layer in the standard discriminator.

    Args:
      x: input images, shape [bs, h, w, channels]
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      out: a float (in [0, 1]) with discriminator prediction
      recon_error: L1 reconstrunction error of the auto-encoder
      code: the representation (bottleneck layer of the auto-encoder)
    """
    height = self.input_height
    width = self.input_width
    with tf.variable_scope("discriminator", reuse=reuse):
      # Encoding step (Mapping from [bs, h, w, c] to [bs, 64])
      net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1")  # [bs, h/2, w/2, 64]
      net = lrelu(net)
      net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2")  # [bs, h/4, w/4, 128]
      net = tf.reshape(net, [self.batch_size, -1])  # [bs, h * w * 8]
      code = linear(net, 64, scope="d_fc6")  # [bs, 64]
      if self.discriminator_batchnorm:
        code = batch_norm(code, is_training=is_training, scope="d_bn1")
      code = lrelu(code)

      # Decoding step (Mapping from [bs, 64] to [bs, h, w, c])
      net = linear(code, 128 * (height // 4) * (width // 4),
                   scope="d_fc1")  # [bs, h/4 * w/4 * 128]
      if self.discriminator_batchnorm:
        net = batch_norm(net, is_training=is_training, scope="d_bn2")
      net = lrelu(net)
      net = tf.reshape(net, [
          self.batch_size, height // 4, width // 4, 128])  # [bs, h/4, w/4, 128]
      net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64],
                     4, 4, 2, 2, name="d_deconv1")  # [bs, h/2, w/2, 64]
      if self.discriminator_batchnorm:
        net = batch_norm(net, is_training=is_training, scope="d_bn3")
      net = lrelu(net)
      net = deconv2d(net, [self.batch_size, height, width, self.c_dim],
                     4, 4, 2, 2, name="d_deconv2")  # [bs, h, w, c]
      out = tf.nn.sigmoid(net)

      # Reconstruction loss.
      recon_error = tf.reduce_mean(tf.abs(out - x))
      return out, recon_error, code
Esempio n. 8
0
def resnet_cifar_generator(noise,
                           is_training,
                           reuse=None,
                           colors=3):
  batch_size = noise.get_shape().as_list()[0]
  with tf.variable_scope("generator", reuse=reuse):
    # Map noise to the actual seed.
    output = ops.linear(
        noise, 4 * 4 * 256, scope="fc_noise")

    # Reshape the seed to be a rank-4 Tensor.
    output = tf.reshape(
        output, [batch_size, 4, 4, 256], name="fc_reshaped")

    for block_idx in range(3):
      block_scope = "B%d" % (block_idx + 1)
      output = generator_block(output, in_channels=256,
                               out_channels=256,
                               scale="up", block_scope=block_scope,
                               is_training=is_training, reuse=reuse)

    # Final processing of the output.
    output = batch_norm_resnet(
        output, is_training=is_training, scope="final_norm")
    output = tf.nn.relu(output)
    output = ops.conv2d(
        output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1,
        name="final_conv")
    output = tf.nn.sigmoid(output)

    print ("Generator output shape: ", output)
    return output
Esempio n. 9
0
def resnet_cifar_discriminator(inputs,
                               is_training,
                               discriminator_normalization,
                               reuse=None):
  _validate_image_inputs(inputs)
  colors = inputs.get_shape().as_list()[-1]
  assert colors in [1, 3]

  with tf.variable_scope("discriminator", values=[inputs], reuse=reuse):
    output = inputs
    channels = colors

    for block_idx in range(4):
      block_scope = "B%d" % block_idx
      scale = "down" if block_idx <= 1 else "none"
      output = discriminator_block(
          output, in_channels=channels, out_channels=128,
          scale=scale, block_scope=block_scope,
          is_training=is_training, reuse=reuse,
          discriminator_normalization=discriminator_normalization)
      channels = 128

    # Final part - ReLU
    output = tf.nn.relu(output)
    # Global sum pooling (it's actually "mean" here, as that's what they had in
    # their implementation for resnet5). There was no implementation for Cifar.
    pre_logits = tf.reduce_mean(output, axis=[1, 2])
    # dense -> 1
    use_sn = discriminator_normalization == consts.SPECTRAL_NORM
    out_logit = ops.linear(pre_logits, 1, scope="disc_final_fc", use_sn=use_sn)
    out = tf.nn.sigmoid(out_logit)
    return out, out_logit, None
Esempio n. 10
0
def resnet5_discriminator(inputs,
                          is_training,
                          discriminator_normalization,
                          reuse=None):
  """ResNet style discriminator.

  Construct discriminator network from inputs to the final endpoint.

  Args:
    inputs: A tensor of size [batch_size, height, width, channels]. Must be
      floating point.
    is_training: Is the model currently being trained.
    discriminator_normalization: which type of normalization to apply.
    reuse: Whether or not the network variables should be reused. `scope`
      must be given to be reused.

  Returns:
    out: The prediction of the discrminator (in [0, 1]). Shape: [bs, 1]
    out_logit: The pre-softmax activations for discrimination
    real/generated, a tensor of size [batch_size, 1]

  Raises:
    ValueError: If the input image shape is not 4-dimensional, if the spatial
      dimensions aren't defined at graph construction time, if the spatial
      dimensions aren't square, or if the spatial dimensions aren"t a power of
      two.
  """

  _validate_image_inputs(inputs)
  colors = inputs.get_shape().as_list()[-1]
  assert colors in [1, 3]

  ch = 64
  with tf.variable_scope("discriminator", values=[inputs], reuse=reuse):
    output = discriminator_block(
        inputs, in_channels=colors, out_channels=ch,
        scale="down", block_scope="B0", is_training=is_training, reuse=reuse,
        discriminator_normalization=discriminator_normalization)

    # Magic in/out channel numbers copied from SN paper.
    magic = [(1, 2), (2, 4), (4, 4), (4, 8), (8, 8)]
    for block_idx in range(5):
      block_scope = "B%d" % (block_idx + 1)
      in_channels = ch * magic[block_idx][0]
      out_channels = ch * magic[block_idx][1]
      print ("Resnet5 disc, block %d in=%d out=%d" % (
          block_idx, in_channels, out_channels))
      output = discriminator_block(
          output, in_channels=in_channels, out_channels=out_channels,
          scale="down", block_scope=block_scope, is_training=is_training,
          reuse=reuse, discriminator_normalization=discriminator_normalization)

    # Final part
    output = tf.nn.relu(output)
    pre_logits = tf.reduce_mean(output, axis=[1, 2])

    use_sn = discriminator_normalization == consts.SPECTRAL_NORM
    out_logit = ops.linear(pre_logits, 1, scope="disc_final_fc", use_sn=use_sn)
    out = tf.nn.sigmoid(out_logit)
    return out, out_logit, None
def resnet5_generator(noise,
                      is_training,
                      reuse=None,
                      colors=3,
                      output_shape=128,
                      unused_ablation_type=""):
  # Input is a noise tensor of shape [bs, z_dim]
  assert len(noise.get_shape().as_list()) == 2

  # Calculate / define a few numbers.
  batch_size = noise.get_shape().as_list()[0]
  # Each block upscales by a factor of 2:
  seed_size = 4
  # We want the last block to have 64 channels:
  ch = 64

  with tf.variable_scope("generator", reuse=reuse):
    # Map noise to the actual seed.
    output = ops.linear(noise, ch * 8 * seed_size * seed_size, scope="fc_noise")

    # Reshape the seed to be a rank-4 Tensor.
    output = tf.reshape(
        output, [batch_size, seed_size, seed_size, ch * 8], name="fc_reshaped")

    # Magic in/out channel numbers copied from SN paper.
    magic = [(8, 8), (8, 4), (4, 4), (4, 2), (2, 1)]
    up_layers = np.log2(float(output_shape) / seed_size)
    assert up_layers.is_integer(), "log2(%d/%d) must be an integer" % (
        output_shape, seed_size)
    assert up_layers <= 5 and up_layers >= 0, "Invalid output_shape %d" % (
        output_shape)
    up_layers = int(up_layers)
    for block_idx in range(5):
      block_scope = "B%d" % (block_idx + 1)
      in_channels = ch * magic[block_idx][0]
      out_channels = ch * magic[block_idx][1]
      print("Resnet5, block %d in=%d out=%d" % (block_idx, in_channels,
                                                out_channels))
      if block_idx < up_layers:
        scale = "up"
      else:
        scale = "none"
      output = generator_block(output, in_channels=in_channels,
                               out_channels=out_channels,
                               scale=scale, block_scope=block_scope,
                               is_training=is_training, reuse=reuse)

    # Final processing of the output.
    output = batch_norm_resnet(output, is_training=is_training,
                               scope="final_norm")
    output = tf.nn.relu(output)
    output = ops.conv2d(
        output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1,
        name="final_conv")
    output = tf.nn.sigmoid(output)

    print("Generator output shape: ", output)
    return output
Esempio n. 12
0
 def decoder(self, z, is_training, reuse=False):
   """Implements the Bernoulli decoder."""
   height = self.input_height
   width = self.input_width
   with tf.variable_scope("decoder", reuse=reuse):
     net = tf.nn.relu(
         batch_norm(linear(z, 1024, scope="de_fc1"), is_training=is_training,
            scope="de_bn1"))
     net = tf.nn.relu(
         batch_norm(linear(net, 128 * (height // 4) * (width // 4), scope="de_fc2"),
            is_training=is_training, scope="de_bn2"))
     net = tf.reshape(net, [self.batch_size, height // 4, width // 4, 128])
     net = tf.nn.relu(batch_norm(deconv2d(
         net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="de_dc3"),
                         is_training=is_training, scope="de_bn3"))
     out = tf.nn.sigmoid(deconv2d(
         net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="de_dc4"))
     return out
Esempio n. 13
0
def sn_generator(z,
                 batch_size,
                 output_height,
                 output_width,
                 output_c_dim,
                 is_training,
                 reuse=False):
    """Returns the output tensor of the SNDCGAN generator.

    Details are available at https://openreview.net/pdf?id=B1QRgziT-.

    Args:
      z: latent code, shape [batch_size, latent_dimensionality]
      batch_size: Batch size.
      output_height: Output image height.
      output_width: Output image width.
      output_c_dim: Number of color channels.
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      net: The generated image Tensor with entries in [0, 1].
    """
    s_h, s_w = output_height, output_width
    s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
    s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
    s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)

    with tf.variable_scope("generator", reuse=reuse):
        net = linear(z, s_h8 * s_w8 * 512, scope="g_fc1")
        net = batch_norm_dcgan(net, is_training, scope="g_bn1", epsilon=2e-5)
        net = tf.nn.relu(net)
        net = tf.reshape(net, [batch_size, s_h8, s_w8, 512])
        net = deconv2d(net, [batch_size, s_h4, s_w4, 256],
                       4, 4, 2, 2, name="g_dc2")
        net = batch_norm_dcgan(net, is_training, scope="g_bn2", epsilon=2e-5)
        net = tf.nn.relu(net)
        net = deconv2d(net, [batch_size, s_h2, s_w2, 128],
                       4, 4, 2, 2, name="g_dc3")
        net = batch_norm_dcgan(net, is_training, scope="g_bn3", epsilon=2e-5)
        net = tf.nn.relu(net)
        net = deconv2d(net, [batch_size, s_h, s_w, 64],
                       4, 4, 2, 2, name="g_dc4")
        net = batch_norm_dcgan(net, is_training, scope="g_bn4", epsilon=2e-5)
        net = tf.nn.relu(net)
        net = deconv2d(
            net, [batch_size, s_h, s_w, output_c_dim], 3, 3, 1, 1, name="g_dc5")
        out = tf.tanh(net)

        # NOTE: this normalization is introduced to match current image
        # preprocessing, which normalize the real image to range [0, 1].
        # In author's implementation, they simply use the tanh activation function
        # and normalize the image to range [-1, 1].
        out = tf.div(out + 1.0, 2.0)

        return out
Esempio n. 14
0
  def encoder(self, x, is_training, reuse=False):
    """Implements the Gaussian Encoder."""

    with tf.variable_scope("encoder", reuse=reuse):
      net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name="en_conv1"))
      net = conv2d(net, 128, 4, 4, 2, 2, name="en_conv2")
      if self.discriminator_batchnorm:
        net = batch_norm(net, is_training=is_training, scope="en_bn2")
      net = lrelu(net)
      net = tf.reshape(net, [self.batch_size, -1])
      net = linear(net, 1024, scope="en_fc3")
      if self.discriminator_batchnorm:
        net = batch_norm(net, is_training=is_training, scope="en_bn3")
      net = lrelu(net)

      gaussian_params = linear(net, 2 * self.z_dim, scope="en_fc4")
      mean = gaussian_params[:, :self.z_dim]
      stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, self.z_dim:])
    return mean, stddev
Esempio n. 15
0
    def encoder(self, x, is_training, reuse=False):
        """Implements the Gaussian Encoder."""

        with tf.variable_scope("encoder", reuse=reuse):
            net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name="en_conv1"))
            net = conv2d(net, 128, 4, 4, 2, 2, name="en_conv2")
            if self.discriminator_batchnorm:
                net = batch_norm(net, is_training=is_training, scope="en_bn2")
            net = lrelu(net)
            net = tf.reshape(net, [self.batch_size, -1])
            net = linear(net, 1024, scope="en_fc3")
            if self.discriminator_batchnorm:
                net = batch_norm(net, is_training=is_training, scope="en_bn3")
            net = lrelu(net)

            gaussian_params = linear(net, 2 * self.z_dim, scope="en_fc4")
            mean = gaussian_params[:, :self.z_dim]
            stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, self.z_dim:])
        return mean, stddev
Esempio n. 16
0
 def generator(self, z, is_training, reuse=False):
   height = self.input_height
   width = self.input_width
   batch_size = self.batch_size
   with tf.variable_scope("generator", reuse=reuse):
     net = linear(z, 1024, scope="g_fc1")
     net = batch_norm(net, is_training=is_training, scope="g_bn1")
     net = lrelu(net)
     net = linear(net, 128 * (height // 4) * (width // 4), scope="g_fc2")
     net = batch_norm(net, is_training=is_training, scope="g_bn2")
     net = lrelu(net)
     net = tf.reshape(net, [batch_size, height // 4, width // 4, 128])
     net = deconv2d(net, [batch_size, height // 2, width // 2, 64],
                    4, 4, 2, 2, name="g_dc3")
     net = batch_norm(net, is_training=is_training, scope="g_bn3")
     net = lrelu(net)
     net = deconv2d(net, [batch_size, height, width, self.c_dim],
                    4, 4, 2, 2, name="g_dc4")
     out = tf.nn.sigmoid(net)
     return out
Esempio n. 17
0
def resnet_stl_discriminator(inputs,
                             is_training,
                             discriminator_normalization,
                             reuse=None):
    _validate_image_inputs(inputs, validate_power2=False)
    colors = inputs.get_shape().as_list()[-1]
    assert colors in [1, 3]

    ch = 64
    with tf.variable_scope("discriminator", values=[inputs], reuse=reuse):
        output = discriminator_block(
            inputs,
            in_channels=colors,
            out_channels=ch,
            scale="down",
            block_scope="B0",
            is_training=is_training,
            reuse=reuse,
            discriminator_normalization=discriminator_normalization)

        # in/out channel numbers copied from SN paper.
        magic = [(1, 2), (2, 4), (4, 8), (8, 16)]
        for block_idx in range(4):
            block_scope = "B%d" % (block_idx + 1)
            in_channels = ch * magic[block_idx][0]
            out_channels = ch * magic[block_idx][1]
            print("Resnet5 disc, block %d in=%d out=%d" %
                  (block_idx, in_channels, out_channels))

            if block_idx < 3:
                scale = "down"
            else:
                scale = "none"
            output = discriminator_block(
                output,
                in_channels=in_channels,
                out_channels=out_channels,
                scale=scale,
                block_scope=block_scope,
                is_training=is_training,
                reuse=reuse,
                discriminator_normalization=discriminator_normalization)

        # Final part
        output = tf.nn.relu(output)
        pre_logits = tf.reduce_mean(output, axis=[1, 2])

        use_sn = discriminator_normalization == consts.SPECTRAL_NORM
        out_logit = ops.linear(pre_logits,
                               1,
                               scope="disc_final_fc",
                               use_sn=use_sn)
        out = tf.nn.sigmoid(out_logit)
        return out, out_logit, None
Esempio n. 18
0
def resnet107_discriminator(inputs,
                            is_training,
                            discriminator_normalization,
                            reuse=None):
    _validate_image_inputs(inputs)
    colors = inputs.get_shape().as_list()[-1]
    assert colors in [1, 3]

    ch = 64

    with tf.variable_scope("discriminator", values=[inputs], reuse=reuse):
        output = ops.conv2d(inputs,
                            output_dim=ch // 4,
                            k_h=3,
                            k_w=3,
                            d_h=1,
                            d_w=1,
                            name="color_conv")
        in_channels = ch // 4
        out_channels = ch // 2
        for superblock in range(6):
            for i in range(5):
                block_scope = "B_%d_%d" % (superblock, i)
                output = discriminator_block(
                    output,
                    in_channels=in_channels,
                    out_channels=in_channels,
                    scale="none",
                    block_scope=block_scope,
                    is_training=is_training,
                    reuse=reuse,
                    discriminator_normalization=discriminator_normalization)
            # We want to downscale 5 times.
            if superblock < 5:
                output = discriminator_block(
                    output,
                    in_channels=in_channels,
                    out_channels=out_channels,
                    scale="down",
                    block_scope="B_%d_up" % superblock,
                    is_training=is_training,
                    reuse=reuse,
                    discriminator_normalization=discriminator_normalization)
            in_channels *= 2
            out_channels *= 2

        # Final part
        output = tf.reshape(output, [-1, 4 * 4 * 8 * ch])
        use_sn = discriminator_normalization == consts.SPECTRAL_NORM
        out_logit = ops.linear(output, 1, scope="disc_final_fc", use_sn=use_sn)
        out = tf.nn.sigmoid(out_logit)

        return out, out_logit, None
Esempio n. 19
0
def discriminator(x,
                  batch_size,
                  is_training,
                  discriminator_normalization,
                  reuse=False):
    """Returns the outputs of the DCGAN discriminator.

    Details are available at https://arxiv.org/abs/1511.06434. Notable changes
    include BatchNorm in the discriminator and LeakyReLU for all layers.

    Args:
      x: input images, shape [bs, h, w, channels].
      batch_size: integer, number of samples in batch.
      is_training: boolean, are we in train or eval model.
      discriminator_normalization: which type of normalization to apply.
      reuse: boolean, should params be re-used.

    Returns:
      out: A float (in [0, 1]) with discriminator prediction.
      out_logit: Logits (activations of the last linear layer).
      net: Logits of the last ReLu layer.
    """
    assert discriminator_normalization in [
        consts.NO_NORMALIZATION, consts.SPECTRAL_NORM, consts.BATCH_NORM]
    bs = batch_size
    df_dim = 64  # Dimension of filters in first convolutional layer.
    use_sn = discriminator_normalization == consts.SPECTRAL_NORM
    with tf.variable_scope("discriminator", reuse=reuse):
        net = lrelu(conv2d(x, df_dim, 5, 5, 2, 2,
                           name="d_conv1", use_sn=use_sn))
        net = conv2d(net, df_dim * 2, 5, 5, 2, 2,
                     name="d_conv2", use_sn=use_sn)

        if discriminator_normalization == consts.BATCH_NORM:
            net = batch_norm_dcgan(net, is_training, scope="d_bn1")
        net = lrelu(net)
        net = conv2d(net, df_dim * 4, 5, 5, 2, 2,
                     name="d_conv3", use_sn=use_sn)

        if discriminator_normalization == consts.BATCH_NORM:
            net = batch_norm_dcgan(net, is_training, scope="d_bn2")
        net = lrelu(net)
        net = conv2d(net, df_dim * 8, 5, 5, 2, 2,
                     name="d_conv4", use_sn=use_sn)

        if discriminator_normalization == consts.BATCH_NORM:
            net = batch_norm_dcgan(net, is_training, scope="d_bn3")
        net = lrelu(net)
        out_logit = linear(
            tf.reshape(net, [bs, -1]), 1, scope="d_fc4", use_sn=use_sn)
        out = tf.nn.sigmoid(out_logit)
        return out, out_logit, net
Esempio n. 20
0
def resnet107_generator(noise, is_training, reuse=None, colors=3):
    # Input is a noise tensor of shape [bs, z_dim]
    assert len(noise.get_shape().as_list()) == 2

    # Calculate / define a few numbers.
    batch_size = noise.get_shape().as_list()[0]
    ch = 64

    with tf.variable_scope("generator", reuse=reuse):
        # Map noise to the actual seed.
        output = ops.linear(noise, 4 * 4 * 8 * ch, scope="fc_noise")

        # Reshape the seed to be a rank-4 Tensor.
        output = tf.reshape(output, [batch_size, 4, 4, 8 * ch],
                            name="fc_reshaped")

        in_channels = 8 * ch
        out_channels = 4 * ch
        for superblock in range(6):
            for i in range(5):
                block_scope = "B_%d_%d" % (superblock, i)
                output = generator_block(output,
                                         in_channels=in_channels,
                                         out_channels=in_channels,
                                         scale="none",
                                         block_scope=block_scope,
                                         is_training=is_training,
                                         reuse=reuse)
            # We want to upscale 5 times.
            if superblock < 5:
                output = generator_block(output,
                                         in_channels=in_channels,
                                         out_channels=out_channels,
                                         scale="up",
                                         block_scope="B_%d_up" % superblock,
                                         is_training=is_training,
                                         reuse=reuse)
            in_channels /= 2
            out_channels /= 2

        output = ops.conv2d(output,
                            output_dim=colors,
                            k_h=3,
                            k_w=3,
                            d_h=1,
                            d_w=1,
                            name="final_conv")
        output = tf.nn.sigmoid(output)

        print("Generator output shape: ", output)
        return output
Esempio n. 21
0
def sn_discriminator(x, batch_size, reuse=False, use_sn=False):
    """Returns the outputs of the SNDCGAN discriminator.

    Details are available at https://openreview.net/pdf?id=B1QRgziT-.

    Args:
      x: input images, shape [bs, h, w, channels].
      batch_size: integer, number of samples in batch.
      reuse: boolean, should params be re-used.

    Returns:
      out: A float (in [0, 1]) with discriminator prediction.
      out_logit: Logits (activations of the last linear layer).
      net: Logits of the last ReLu layer.
    """

    # In compare gan framework, the image preprocess normalize image pixel to
    # range [0, 1], while author used [-1, 1]. Apply this trick to input image
    # instead of changing our preprocessing function.
    x = x * 2.0 - 1.0
    with tf.variable_scope("discriminator", reuse=reuse):
        # Mapping x from [bs, h, w, c] to [bs, 1]
        normal = tf.random_normal_initializer
        net = conv2d(
            x, 64, 3, 3, 1, 1, name="d_conv1", initializer=normal, use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(
            net, 128, 4, 4, 2, 2, name="d_conv2", initializer=normal, use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(
            net, 128, 3, 3, 1, 1, name="d_conv3", initializer=normal, use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(
            net, 256, 4, 4, 2, 2, name="d_conv4", initializer=normal, use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(
            net, 256, 3, 3, 1, 1, name="d_conv5", initializer=normal, use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(
            net, 512, 4, 4, 2, 2, name="d_conv6", initializer=normal, use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(
            net, 512, 3, 3, 1, 1, name="d_conv7", initializer=normal, use_sn=use_sn)
        net = lrelu(net, leak=0.1)

        net = tf.reshape(net, [batch_size, -1])
        out_logit = linear(net, 1, scope="d_fc1", use_sn=use_sn)
        out_logit = tf.squeeze(out_logit)
        out = tf.nn.sigmoid(out_logit)
        return out, out_logit, net
Esempio n. 22
0
    def aggregate_heads(self, heads, reuse, name="aggregate_heads"):
        """Returns the aggregated heads."""
        # Estimate local dimensions to support background channel.
        k, z_dim = heads[0].get_shape().as_list()[1:3]

        with tf.variable_scope(name, reuse=reuse):
            heads = tf.concat(heads, axis=2)
            heads_r = tf.reshape(heads,
                                 [self.batch_size * k, self.n_heads * z_dim])
            heads_a = tf.nn.relu(
                ops.linear(tf.concat(heads_r, axis=2), z_dim, "a_fc1"))
            heads_a = ops.layer_norm(heads_a, reuse, "a_ln")
            heads_a = tf.reshape(heads_a, [self.batch_size, k, z_dim])

            return heads_a
Esempio n. 23
0
def generator(z, batch_size, output_height, output_width, output_c_dim,
              is_training, reuse=False):
    """Returns the output tensor of the DCGAN generator.

    Details are available at https://arxiv.org/abs/1511.06434. Notable changes
    include BatchNorm in the generator, ReLu instead of LeakyReLu and ReLu in
    generator, except for output which uses TanH.

    Args:
      z: latent code, shape [batch_size, latent_dimensionality]
      batch_size: Batch size.
      output_height: Output image height.
      output_width: Output image width.
      output_c_dim: Number of color channels.
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      net: The generated image Tensor with entries in [0, 1].
    """
    gf_dim = 64  # Dimension of filters in first convolutional layer.
    bs = batch_size
    with tf.variable_scope("generator", reuse=reuse):
        s_h, s_w = output_height, output_width
        s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
        s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
        s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
        s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)

        net = linear(z, gf_dim * 8 * s_h16 * s_w16, scope="g_fc1")
        net = tf.reshape(net, [-1, s_h16, s_w16, gf_dim * 8])
        net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn1"))
        net = deconv2d(net, [bs, s_h8, s_w8, gf_dim*4],
                       5, 5, 2, 2, name="g_dc1")
        net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn2"))
        net = deconv2d(net, [bs, s_h4, s_w4, gf_dim*2],
                       5, 5, 2, 2, name="g_dc2")
        net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn3"))
        net = deconv2d(net, [bs, s_h2, s_w2, gf_dim*1],
                       5, 5, 2, 2, name="g_dc3")
        net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn4"))
        net = deconv2d(net, [bs, s_h, s_w, output_c_dim],
                       5, 5, 2, 2, name="g_dc4")
        net = 0.5 * tf.nn.tanh(net) + 0.5
        return net
Esempio n. 24
0
def resnet_stl_generator(noise, is_training, reuse=None, colors=3):
  batch_size = noise.get_shape().as_list()[0]
  with tf.variable_scope("generator", reuse=reuse):
    # Map noise to the actual seed.
    output = ops.linear(noise, 6 * 6 * 512, scope="fc_noise")

    # Reshape the seed to be a rank-4 Tensor.
    output = tf.reshape(output, [batch_size, 6, 6, 512], name="fc_reshaped")

    ch = 64
    # in/out channel numbers copied from SN paper.
    magic = [(8, 4), (4, 2), (2, 1)]
    for block_idx in range(3):
      block_scope = "B%d" % (block_idx + 1)
      in_channels = ch * magic[block_idx][0]
      out_channels = ch * magic[block_idx][1]
      output = generator_block(
          output,
          in_channels=in_channels,
          out_channels=out_channels,
          scale="up",
          block_scope=block_scope,
          is_training=is_training,
          reuse=reuse)

    # Final processing of the output.
    output = batch_norm_resnet(
        output, is_training=is_training, scope="final_norm")
    output = tf.nn.relu(output)
    output = ops.conv2d(
        output,
        output_dim=colors,
        k_h=3,
        k_w=3,
        d_h=1,
        d_w=1,
        name="final_conv")
    output = tf.nn.sigmoid(output)

    print("Generator output shape: ", output)
    return output
Esempio n. 25
0
    def discriminator(self,
                      x,
                      is_training,
                      reuse=False,
                      batch_size_multiplier=1):
        """Discriminator architecture based on InfoGAN.

    Args:
      x: input images, shape [bs, h, w, channels]
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      out: a float (in [0, 1]) with discriminator prediction
      out_logit: the value "out" before sigmoid
      net: the architecture
    """
        # print("\n"*5)
        # print(reuse)
        # print("\n"*5)

        sn = self.discriminator_normalization == consts.SPECTRAL_NORM
        with tf.variable_scope("discriminator", reuse=reuse):
            # Mapping x from [bs, h, w, c] to [bs, 1]

            # print("X: ", x)
            net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1",
                         use_sn=sn)  # [bs, h/2, w/2, 64]
            # net = dropout(net, 0.1)
            net = lrelu(net)
            net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2",
                         use_sn=sn)  # [bs, h/4, w/4, 128]
            # net = dropout(net, 0.1)
            # print("Before: ", net)

            if self.discriminator_normalization == consts.BATCH_NORM:
                net0, net1 = tf.split(net, 2, 0)
                net0 = batch_norm(net0, is_training=is_training, scope="d_bn2")
                net1 = batch_norm(net1,
                                  is_training=is_training,
                                  scope="d_bn2",
                                  reuse=True)
                net = tf.concat([net0, net1], 0)

            # print("After: ", net)
            net = lrelu(net)
            net = tf.reshape(net,
                             [self.batch_size * batch_size_multiplier, -1
                              ])  # [bs, h * w * 8]
            net = linear(net, 1024, scope="d_fc3", use_sn=sn)  # [bs, 1024]
            # net = dropout(net, 0.1)

            if self.discriminator_normalization == consts.BATCH_NORM:
                net0, net1 = tf.split(net, 2, 0)
                net0 = batch_norm(net0, is_training=is_training, scope="d_bn3")
                net1 = batch_norm(net1,
                                  is_training=is_training,
                                  scope="d_bn3",
                                  reuse=True)
                net = tf.concat([net0, net1], 0)

            net = lrelu(net)
            out_logit = linear(net, 1, scope="d_fc4", use_sn=sn)  # [bs, 1]
            out = tf.nn.sigmoid(out_logit)
            return out, out_logit, net