def apply(self, x, y, is_training):
    """Apply the discriminator on a input.

    Args:
      x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: Boolean, whether the architecture should be constructed for
        training or inference.

    Returns:
      Tuple of 3 Tensors, the final prediction of the discriminator, the logits
      before the final output activation function and logits form the second
      last layer.
    """
    use_sn = self._spectral_norm
    batch_size = x.shape.as_list()[0]
    # Resulting shape: [bs, h/2, w/2, 64].
    net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=use_sn))
    # Resulting shape: [bs, h/4, w/4, 128].
    net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=use_sn)
    net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn2")
    net = lrelu(net)
    # Resulting shape: [bs, h * w * 8].
    net = tf.reshape(net, [batch_size, -1])
    # Resulting shape: [bs, 1024].
    net = linear(net, 1024, scope="d_fc3", use_sn=use_sn)
    net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn3")
    net = lrelu(net)
    # Resulting shape: [bs, 1].
    out_logit = linear(net, 1, scope="d_fc4", use_sn=use_sn)
    out = tf.nn.sigmoid(out_logit)
    return out, out_logit, net
  def apply(self, z, y, is_training):
    """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size] + self._image_shape with values in [0, 1].
    """
    del y
    h, w, c = self._image_shape
    bs = z.shape.as_list()[0]
    net = linear(z, 1024, scope="g_fc1")
    net = lrelu(batch_norm(net, is_training=is_training, name="g_bn1"))
    net = linear(net, 128 * (h // 4) * (w // 4), scope="g_fc2")
    net = lrelu(batch_norm(net, is_training=is_training, name="g_bn2"))
    net = tf.reshape(net, [bs, h // 4, w // 4, 128])
    net = deconv2d(net, [bs, h // 2, w // 2, 64], 4, 4, 2, 2, name="g_dc3")
    net = lrelu(batch_norm(net, is_training=is_training, name="g_bn3"))
    net = deconv2d(net, [bs, h, w, c], 4, 4, 2, 2, name="g_dc4")
    out = tf.nn.sigmoid(net)
    return out
Beispiel #3
0
 def f(name, x, width, n_out=None):
     with tf.variable_scope(name):
         with tf.variable_scope('dense1'):
             x = ops.linear(x, width, use_sn=False, use_bias=True)
             x = ops.lrelu(x)
         with tf.variable_scope('dense2'):
             x = ops.linear(x, n_out, use_sn=False, use_bias=True)
             x = ops.lrelu(x)
     return x
Beispiel #4
0
    def apply(self, x, y, is_training):
        """Apply the discriminator on a input.

    Args:
      x: `Tensor` of shape [batch_size, 28, 28, ?] with real or fake images.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: Boolean, whether the architecture should be constructed for
        training or inference.

    Returns:
      Tuple of 3 Tensors, the final prediction of the discriminator, the logits
      before the final output activation function and logits form the second
      last layer.
    """
        resnet_ops.validate_image_inputs(x, False)
        colors = x.shape[3].value
        if colors not in [1, 3]:
            raise ValueError(
                "Number of color channels not supported: {}".format(colors))

        output = x
        if self._wavelet_deconv:  # Add WaveletDeconv layer
            output = ops.waveletDeconv(output)
        # End WaveletDeconv layer

        for block_idx in range(2):  # make it same as generator
            block = self._resnet_block(
                name="B{}".format(block_idx + 1),
                in_channels=colors if block_idx == 0 else 128,
                out_channels=128,
                scale="down" if block_idx <= 1 else "none")
            output = block(output, z=None, y=y, is_training=is_training)

        # Final part - ReLU
        output = tf.nn.relu(output)

        h = tf.reduce_mean(output, axis=[1, 2])

        out_logit = ops.linear(h,
                               1,
                               scope="disc_final_fc",
                               use_sn=self._spectral_norm)
        if self._project_y:
            if y is None:
                raise ValueError(
                    "You must provide class information y to project.")
            embedded_y = ops.linear(y,
                                    128,
                                    use_bias=False,
                                    scope="embedding_fc",
                                    use_sn=self._spectral_norm)
            out_logit += tf.reduce_sum(embedded_y * h, axis=1, keepdims=True)
        out = tf.nn.sigmoid(out_logit)
        return out, out_logit, h
Beispiel #5
0
  def apply(self, z, y, is_training):
    """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size] + self._image_shape with values in [0, 1].
    """
    # Each block upscales by a factor of 2.
    seed_size = 4
    image_size = self._image_shape[0]

    # Map noise to the actual seed.
    net = ops.linear(
        z,
        self._ch * self._channels[0] * seed_size * seed_size,
        scope="fc_noise")
    # Reshape the seed to be a rank-4 Tensor.
    net = tf.reshape(
        net,
        [-1, seed_size, seed_size, self._ch * self._channels[0]],
        name="fc_reshaped")

    up_layers = np.log2(float(image_size) / seed_size)
    if not up_layers.is_integer():
      raise ValueError("log2({}/{}) must be an integer.".format(
          image_size, seed_size))
    if up_layers < 0 or up_layers > 5:
      raise ValueError("Invalid image_size {}.".format(image_size))
    up_layers = int(up_layers)

    for block_idx in range(5):
      block = self._resnet_block(
          name="B{}".format(block_idx + 1),
          in_channels=self._ch * self._channels[block_idx],
          out_channels=self._ch * self._channels[block_idx + 1],
          scale="up" if block_idx < up_layers else "none")
      net = block(net, z=z, y=y, is_training=is_training)

    net = self.batch_norm(
        net, z=z, y=y, is_training=is_training, name="final_norm")
    net = tf.nn.relu(net)
    net = ops.conv2d(net, output_dim=self._image_shape[2],
                     k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv")
    net = tf.nn.sigmoid(net)
    return net
Beispiel #6
0
  def apply(self, x, y, is_training):
    """Apply the discriminator on a input.

    Args:
      x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: Boolean, whether the architecture should be constructed for
        training or inference.

    Returns:
      Tuple of 3 Tensors, the final prediction of the discriminator, the logits
      before the final output activation function and logits form the second
      last layer.
    """
    resnet_ops.validate_image_inputs(x)
    colors = x.get_shape().as_list()[-1]
    assert colors in [1, 3]
    ch = 64
    output = ops.conv2d(
        x, output_dim=ch // 4, k_h=3, k_w=3, d_h=1, d_w=1,
        name="color_conv")
    in_channels = ch // 4
    out_channels = ch // 2
    for superblock in range(6):
      for i in range(5):
        block = self._resnet_block(
            name="B_{}_{}".format(superblock, i),
            in_channels=in_channels,
            out_channels=in_channels,
            scale="none")
        output = block(output, z=None, y=y, is_training=is_training)
      # We want to downscale 5 times.
      if superblock < 5:
        block = self._resnet_block(
            name="B_{}_up".format(superblock),
            in_channels=in_channels,
            out_channels=out_channels,
            scale="down")
        output = block(output, z=None, y=y, is_training=is_training)
      in_channels *= 2
      out_channels *= 2

    # Final part
    output = tf.reshape(output, [-1, 4 * 4 * 8 * ch])
    out_logit = ops.linear(output, 1, scope="disc_final_fc",
                           use_sn=self._spectral_norm)
    out = tf.nn.sigmoid(out_logit)
    return out, out_logit, output
Beispiel #7
0
    def discriminator(self,
                      x,
                      y,
                      is_training,
                      reuse=False,
                      rotation_head=False):
        """Discriminator network with augmented auxiliary predictions.

    Args:
      x: an input image tensor.
      y: Tensor with label indices.
      is_training: boolean, whether or not it is a training call.
      reuse: boolean, whether or not to reuse the variables.
      rotation_head: If True add a rotation head on top of the discriminator
        logits.

    Returns:
      real_probs: the [0, 1] probability tensor of x being real images.
      real_scores: the unbounded score tensor of x being real images.
      rotation_scores: the categorical probablity of x being rotated in one of
        the four directions.
    """
        if not rotation_head:
            return super(SSGAN, self).discriminator(x,
                                                    y=y,
                                                    is_training=is_training,
                                                    reuse=reuse)

        real_probs, real_scores, final = super(SSGAN, self).discriminator(
            x, y=y, is_training=is_training, reuse=reuse)

        # Hack to get whether to use spectral norm for the rotation head below.
        # Spectral norm is configured on the architecture (AbstractGenerator or
        # AbstrtactDiscriminator). The layer below is be part of the architecture.

        discriminator = {
            c.RESNET5_ARCH: resnet5.Discriminator,
            c.RESNET5_BIGGAN_ARCH: resnet5_biggan.Discriminator,
            c.RESNET_CIFAR: resnet_cifar.Discriminator,
            c.SNDCGAN_ARCH: sndcgan.Discriminator,
        }[self._architecture]()
        use_sn = discriminator._spectral_norm  # pylint: disable=protected-access

        with tf.variable_scope("discriminator_rotation", reuse=reuse):
            rotation_scores = linear(tf.reshape(final, (tf.shape(x)[0], -1)),
                                     NUM_ROTATIONS,
                                     scope="score_classify",
                                     use_sn=use_sn)
        return real_probs, real_scores, rotation_scores
Beispiel #8
0
  def apply(self, z, y, is_training):
    """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size] + self._image_shape with values in [0, 1].
    """
    z_shape = z.get_shape().as_list()
    if len(z_shape) != 2:
      raise ValueError("Expected shape [batch_size, z_dim], got %s." % z_shape)
    ch = 64
    colors = self._image_shape[2]
    # Map noise to the actual seed.
    output = ops.linear(z, 4 * 4 * 8 * ch, scope="fc_noise")
    # Reshape the seed to be a rank-4 Tensor.
    output = tf.reshape(output, [-1, 4, 4, 8 * ch], name="fc_reshaped")
    in_channels = 8 * ch
    out_channels = 4 * ch
    for superblock in range(6):
      for i in range(5):
        block = self._resnet_block(
            name="B_{}_{}".format(superblock, i),
            in_channels=in_channels,
            out_channels=in_channels,
            scale="none")
        output = block(output, z=z, y=y, is_training=is_training)
      # We want to upscale 5 times.
      if superblock < 5:
        block = self._resnet_block(
            name="B_{}_up".format(superblock),
            in_channels=in_channels,
            out_channels=out_channels,
            scale="up")
        output = block(output, z=z, y=y, is_training=is_training)
      in_channels /= 2
      out_channels /= 2

    output = ops.conv2d(
        output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1,
        name="final_conv")
    output = tf.nn.sigmoid(output)
    return output
Beispiel #9
0
  def apply(self, x, y, is_training):
    """Apply the discriminator on a input.

    Args:
      x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: Boolean, whether the architecture should be constructed for
        training or inference.

    Returns:
      Tuple of 3 Tensors, the final prediction of the discriminator, the logits
      before the final output activation function and logits form the second
      last layer.
    """
    resnet_ops.validate_image_inputs(x)
    colors = x.shape[3].value
    if colors not in [1, 3]:
      raise ValueError("Number of color channels not supported: {}".format(
          colors))

    block = self._resnet_block(
        name="B0",
        in_channels=colors,
        out_channels=self._ch,
        scale="down")
    output = block(x, z=None, y=y, is_training=is_training)

    for block_idx in range(5):
      block = self._resnet_block(
          name="B{}".format(block_idx + 1),
          in_channels=self._ch * self._channels[block_idx],
          out_channels=self._ch * self._channels[block_idx + 1],
          scale="down")
      output = block(output, z=None, y=y, is_training=is_training)

    output = tf.nn.relu(output)
    pre_logits = tf.reduce_mean(output, axis=[1, 2])
    out_logit = ops.linear(pre_logits, 1, scope="disc_final_fc",
                           use_sn=self._spectral_norm)
    out = tf.nn.sigmoid(out_logit)
    return out, out_logit, pre_logits
    def apply(self, x, y, is_training):
        """Apply the discriminator on a input.

    Args:
      x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: Boolean, whether the architecture should be constructed for
        training or inference.

    Returns:
      Tuple of 3 Tensors, the final prediction of the discriminator, the logits
      before the final output activation function and logits form the second
      last layer.
    """
        del is_training, y
        use_sn = self._spectral_norm
        # In compare gan framework, the image preprocess normalize image pixel to
        # range [0, 1], while author used [-1, 1]. Apply this trick to input image
        # instead of changing our preprocessing function.
        x = x * 2.0 - 1.0
        net = conv2d(x, 64, 3, 3, 1, 1, name="d_conv1", use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(net, 128, 3, 3, 1, 1, name="d_conv3", use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(net, 256, 4, 4, 2, 2, name="d_conv4", use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(net, 256, 3, 3, 1, 1, name="d_conv5", use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(net, 512, 4, 4, 2, 2, name="d_conv6", use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        net = conv2d(net, 512, 3, 3, 1, 1, name="d_conv7", use_sn=use_sn)
        net = lrelu(net, leak=0.1)
        batch_size = x.shape.as_list()[0]
        net = tf.reshape(net, [batch_size, -1])
        out_logit = linear(net, 1, scope="d_fc1", use_sn=use_sn)
        out = tf.nn.sigmoid(out_logit)
        return out, out_logit, net
    def apply(self, z, y, is_training):
        """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size, 32, 32, colors] with values in [0, 1].
    """
        ch = 64
        colors = self._image_shape[2]
        batch_size = z.get_shape().as_list()[0]
        magic = [(8, 4), (4, 2), (2, 1)]
        output = ops.linear(z, 6 * 6 * 512, scope="fc_noise")
        output = tf.reshape(output, [batch_size, 6, 6, 512],
                            name="fc_reshaped")
        for block_idx in range(3):
            block = self._resnet_block(name="B{}".format(block_idx + 1),
                                       in_channels=ch * magic[block_idx][0],
                                       out_channels=ch * magic[block_idx][1],
                                       scale="up")
            output = block(output, z=z, y=y, is_training=is_training)
        output = self.batch_norm(output,
                                 z=z,
                                 y=y,
                                 is_training=is_training,
                                 scope="final_norm")
        output = tf.nn.relu(output)
        output = ops.conv2d(output,
                            output_dim=colors,
                            k_h=3,
                            k_w=3,
                            d_h=1,
                            d_w=1,
                            name="final_conv")
        return tf.nn.sigmoid(output)
Beispiel #12
0
    def discriminator_with_rotation_head(self, x, y, is_training):
        """Discriminator network with augmented auxiliary predictions.

    Args:
      x: an input image tensor.
      y: Tensor with label indices.
      is_training: boolean, whether or not it is a training call.

    Returns:
      real_probs: the [0, 1] probability tensor of x being real images.
      real_scores: the unbounded score tensor of x being real images.
      rotation_scores: the categorical probablity of x being rotated in one of
        the four directions.
    """
        real_probs, real_scores, final = self.discriminator(
            x=x, y=y, is_training=is_training)
        use_sn = self._discriminator._spectral_norm  # pylint: disable=protected-access
        with tf.variable_scope("discriminator_rotation", reuse=tf.AUTO_REUSE):
            rotation_scores = linear(tf.reshape(final, (tf.shape(x)[0], -1)),
                                     NUM_ROTATIONS,
                                     scope="score_classify",
                                     use_sn=use_sn)
        return real_probs, real_scores, rotation_scores
Beispiel #13
0
 def apply(self, x):
     # x will be of shape [batch_size, 2 * aux_ip_size * aux_ip_size * aux_ip_channels]
     net = linear(x, self._aux_ip_channels, scope="aux_fc1")
     net = tf.nn.relu(net)
     net = linear(net, self._num_groups, scope="aux_fc2")
     return net
  def apply(self, x, y, is_training):
    """Apply the discriminator on a input.

    Args:
      x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: Boolean, whether the architecture should be constructed for
        training or inference.

    Returns:
      Tuple of 3 Tensors, the final prediction of the discriminator, the logits
      before the final output activation function and logits form the second
      last layer.
    """
    logging.info("[Discriminator] inputs are x=%s, y=%s", x.shape,
                 None if y is None else y.shape)
    resnet_ops.validate_image_inputs(x)

    in_channels, out_channels = self._get_in_out_channels(
        colors=x.shape[-1].value, resolution=x.shape[1].value)
    num_blocks = len(in_channels)

    net = x
    for block_idx in range(num_blocks):
      name = "B{}".format(block_idx + 1)
      is_last_block = block_idx == num_blocks - 1
      block = self._resnet_block(
          name=name,
          in_channels=in_channels[block_idx],
          out_channels=out_channels[block_idx],
          scale="none" if is_last_block else "down")
      net = block(net, z=None, y=y, is_training=is_training)
      if name in self._blocks_with_attention:
        logging.info("[Discriminator] Applying non-local block to %s",
                     net.shape)
        net = ops.non_local_block(net, "non_local_block",
                                  use_sn=self._spectral_norm)

    # Final part
    logging.info("[Discriminator] before final processing: %s", net.shape)
    net = tf.nn.relu(net)
    h = tf.math.reduce_sum(net, axis=[1, 2])
    out_logit = ops.linear(h, 1, scope="final_fc", use_sn=self._spectral_norm)
    logging.info("[Discriminator] after final processing: %s", net.shape)
    if self._project_y:
      if y is None:
        raise ValueError("You must provide class information y to project.")
      with tf.variable_scope("embedding_fc"):
        y_embedding_dim = out_channels[-1]
        # We do not use ops.linear() below since it does not have an option to
        # override the initializer.
        kernel = tf.get_variable(
            "kernel", [y.shape[1], y_embedding_dim], tf.float32,
            initializer=tf.initializers.glorot_normal())
        if self._spectral_norm:
          kernel = ops.spectral_norm(kernel)
        embedded_y = tf.matmul(y, kernel)
        logging.info("[Discriminator] embedded_y for projection: %s",
                     embedded_y.shape)
        out_logit += tf.reduce_sum(embedded_y * h, axis=1, keepdims=True)
    out = tf.nn.sigmoid(out_logit)
    return out, out_logit, h
  def apply(self, z, y, is_training):
    """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size] + self._image_shape with values in [0, 1].
    """
    shape_or_none = lambda t: None if t is None else t.shape
    logging.info("[Generator] inputs are z=%s, y=%s", z.shape, shape_or_none(y))
    # Each block upscales by a factor of 2.
    seed_size = 4
    z_dim = z.shape[1].value

    in_channels, out_channels = self._get_in_out_channels()
    num_blocks = len(in_channels)

    if self._embed_z:
      z = ops.linear(z, z_dim, scope="embed_z", use_sn=False,
                     use_bias=self._embed_bias)
    if self._embed_y:
      y = ops.linear(y, self._embed_y_dim, scope="embed_y", use_sn=False,
                     use_bias=self._embed_bias)
    y_per_block = num_blocks * [y]
    if self._hierarchical_z:
      z_per_block = tf.split(z, num_blocks + 1, axis=1)
      z0, z_per_block = z_per_block[0], z_per_block[1:]
      if y is not None:
        y_per_block = [tf.concat([zi, y], 1) for zi in z_per_block]
    else:
      z0 = z
      z_per_block = num_blocks * [z]

    logging.info("[Generator] z0=%s, z_per_block=%s, y_per_block=%s",
                 z0.shape, [str(shape_or_none(t)) for t in z_per_block],
                 [str(shape_or_none(t)) for t in y_per_block])

    # Map noise to the actual seed.
    net = ops.linear(
        z0,
        in_channels[0] * seed_size * seed_size,
        scope="fc_noise",
        use_sn=self._spectral_norm)
    # Reshape the seed to be a rank-4 Tensor.
    net = tf.reshape(
        net,
        [-1, seed_size, seed_size, in_channels[0]],
        name="fc_reshaped")

    for block_idx in range(num_blocks):
      name = "B{}".format(block_idx + 1)
      block = self._resnet_block(
          name=name,
          in_channels=in_channels[block_idx],
          out_channels=out_channels[block_idx],
          scale="up")
      net = block(
          net,
          z=z_per_block[block_idx],
          y=y_per_block[block_idx],
          is_training=is_training)
      if name in self._blocks_with_attention:
        logging.info("[Generator] Applying non-local block to %s", net.shape)
        net = ops.non_local_block(net, "non_local_block",
                                  use_sn=self._spectral_norm)
    # Final processing of the net.
    # Use unconditional batch norm.
    logging.info("[Generator] before final processing: %s", net.shape)
    net = ops.batch_norm(net, is_training=is_training, name="final_norm")
    net = tf.nn.relu(net)
    net = ops.conv2d(net, output_dim=self._image_shape[2], k_h=3, k_w=3,
                     d_h=1, d_w=1, name="final_conv",
                     use_sn=self._spectral_norm)
    logging.info("[Generator] after final processing: %s", net.shape)
    net = (tf.nn.tanh(net) + 1.0) / 2.0
    return net
 def apply(self, z, y, is_training):
     self.call_arg_list.append(dict(z=z, y=y, is_training=is_training))
     batch_size = z.shape[0].value
     out = arch_ops.linear(z, np.prod(self._image_shape), scope="fc_noise")
     out = tf.nn.sigmoid(out)
     return tf.reshape(out, [batch_size] + list(self._image_shape))
 def apply(self, x, y, is_training):
     self.call_arg_list.append(dict(x=x, y=y, is_training=is_training))
     h = tf.reduce_mean(x, axis=[1, 2])
     out = arch_ops.linear(h, 1)
     return tf.nn.sigmoid(out), out, h
    def apply(self, z, y, is_training):
        """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] of one hot encoded labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size] + self._image_shape with values in [0, 1].
    """
        batch_size = z.shape[0].value
        s_h, s_w, colors = self._image_shape
        s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
        s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
        s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)

        net = linear(z, s_h8 * s_w8 * 512, scope="g_fc1")
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn1")
        net = tf.nn.relu(net)
        net = tf.reshape(net, [batch_size, s_h8, s_w8, 512])
        net = deconv2d(net, [batch_size, s_h4, s_w4, 256],
                       4,
                       4,
                       2,
                       2,
                       name="g_dc2")
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn2")
        net = tf.nn.relu(net)
        net = deconv2d(net, [batch_size, s_h2, s_w2, 128],
                       4,
                       4,
                       2,
                       2,
                       name="g_dc3")
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn3")
        net = tf.nn.relu(net)
        net = deconv2d(net, [batch_size, s_h, s_w, 64],
                       4,
                       4,
                       2,
                       2,
                       name="g_dc4")
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn4")
        net = tf.nn.relu(net)
        net = deconv2d(net, [batch_size, s_h, s_w, colors],
                       3,
                       3,
                       1,
                       1,
                       name="g_dc5")
        out = tf.tanh(net)

        # This normalization from [-1, 1] to [0, 1] is introduced for consistency
        # with other models.
        out = tf.div(out + 1.0, 2.0)
        return out
Beispiel #19
0
    def apply(self, z, y, is_training):
        """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size] + self._image_shape with values in [0, 1].
    """
        shape_or_none = lambda t: None if t is None else t.shape
        logging.info("[Generator] inputs are z=%s, y=%s", z.shape,
                     shape_or_none(y))
        seed_size = 4

        if self._embed_y:
            y = ops.linear(y,
                           self._embed_y_dim,
                           scope="embed_y",
                           use_sn=False,
                           use_bias=False)
        if y is not None:
            y = tf.concat([z, y], axis=1)
            z = y

        in_channels, out_channels = self._get_in_out_channels()
        num_blocks = len(in_channels)

        # Map noise to the actual seed.
        net = ops.linear(z,
                         in_channels[0] * seed_size * seed_size,
                         scope="fc_noise",
                         use_sn=self._spectral_norm)
        # Reshape the seed to be a rank-4 Tensor.
        net = tf.reshape(net, [-1, seed_size, seed_size, in_channels[0]],
                         name="fc_reshaped")

        for block_idx in range(num_blocks):
            scale = "none" if block_idx % 2 == 0 else "up"
            block = self._resnet_block(name="B{}".format(block_idx + 1),
                                       in_channels=in_channels[block_idx],
                                       out_channels=out_channels[block_idx],
                                       scale=scale)
            net = block(net, z=z, y=y, is_training=is_training)
            # At resolution 64x64 there is a self-attention block.
            if scale == "up" and net.shape[1].value == 64:
                logging.info("[Generator] Applying non-local block to %s",
                             net.shape)
                net = ops.non_local_block(net,
                                          "non_local_block",
                                          use_sn=self._spectral_norm)
        # Final processing of the net.
        # Use unconditional batch norm.
        logging.info("[Generator] before final processing: %s", net.shape)
        net = ops.batch_norm(net, is_training=is_training, name="final_norm")
        net = tf.nn.relu(net)
        colors = self._image_shape[2]
        if self._experimental_fast_conv_to_rgb:

            net = ops.conv2d(net,
                             output_dim=128,
                             k_h=3,
                             k_w=3,
                             d_h=1,
                             d_w=1,
                             name="final_conv",
                             use_sn=self._spectral_norm)
            net = net[:, :, :, :colors]
        else:
            net = ops.conv2d(net,
                             output_dim=colors,
                             k_h=3,
                             k_w=3,
                             d_h=1,
                             d_w=1,
                             name="final_conv",
                             use_sn=self._spectral_norm)
        logging.info("[Generator] after final processing: %s", net.shape)
        net = (tf.nn.tanh(net) + 1.0) / 2.0
        return net
    def apply(self, z, y, is_training):
        """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size] + self._image_shape with values in [0, 1].
    """
        gf_dim = 64  # Dimension of filters in first convolutional layer.
        bs = z.shape[0].value
        s_h, s_w, colors = self._image_shape
        s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
        s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
        s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
        s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)

        net = linear(z, gf_dim * 8 * s_h16 * s_w16, scope="g_fc1")
        net = tf.reshape(net, [-1, s_h16, s_w16, gf_dim * 8])
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn1")
        net = tf.nn.relu(net)
        net = deconv2d(net, [bs, s_h8, s_w8, gf_dim * 4],
                       5,
                       5,
                       2,
                       2,
                       name="g_dc1")
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn2")
        net = tf.nn.relu(net)
        net = deconv2d(net, [bs, s_h4, s_w4, gf_dim * 2],
                       5,
                       5,
                       2,
                       2,
                       name="g_dc2")
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn3")
        net = tf.nn.relu(net)
        net = deconv2d(net, [bs, s_h2, s_w2, gf_dim * 1],
                       5,
                       5,
                       2,
                       2,
                       name="g_dc3")
        net = self.batch_norm(net,
                              z=z,
                              y=y,
                              is_training=is_training,
                              name="g_bn4")
        net = tf.nn.relu(net)
        net = deconv2d(net, [bs, s_h, s_w, colors], 5, 5, 2, 2, name="g_dc4")
        net = 0.5 * tf.nn.tanh(net) + 0.5
        return net
Beispiel #21
0
 def apply(self, x):
     # x will be of shape [batch_size, 2 * aux_ip_size * aux_ip_size * aux_ip_channels]
     net = linear(x, 1, scope="aux_fc")
     return net
Beispiel #22
0
    def apply(self, z, y, is_training):
        """Build the generator network for the given inputs.

    Args:
      z: `Tensor` of shape [batch_size, z_dim] with latent code.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: boolean, are we in train or eval model.

    Returns:
      A tensor of size [batch_size, 28, 28, colors] with values in [0, 1].
    """
        assert self._image_shape[0] == 28
        assert self._image_shape[1] == 28
        num_blocks = 2  # update network to generate 28x28 noise
        z_dim = z.shape[1].value

        if self._embed_z:
            z = ops.linear(z,
                           z_dim,
                           scope="embed_z",
                           use_sn=self._spectral_norm)
        if self._embed_y:
            y = ops.linear(y,
                           z_dim,
                           scope="embed_y",
                           use_sn=self._spectral_norm)
        y_per_block = num_blocks * [y]
        if self._hierarchical_z:
            z_per_block = tf.split(z, num_blocks + 1, axis=1)
            z0, z_per_block = z_per_block[0], z_per_block[1:]
            if y is not None:
                y_per_block = [tf.concat([zi, y], 1) for zi in z_per_block]
        else:
            z0 = z
            z_per_block = num_blocks * [z]

        init_channels = 256
        output = ops.linear(z0,
                            7 * 7 * init_channels,
                            scope="fc_noise",
                            use_sn=self._spectral_norm)
        output = tf.reshape(output, [-1, 7, 7, init_channels],
                            name="fc_reshaped")
        for block_idx in range(num_blocks):
            block = self._resnet_block(name="B{}".format(block_idx + 1),
                                       in_channels=init_channels,
                                       out_channels=init_channels,
                                       scale="up")
            output = block(output,
                           z=z_per_block[block_idx],
                           y=y_per_block[block_idx],
                           is_training=is_training)

        # Final processing of the output.
        output = self.batch_norm(output,
                                 z=z,
                                 y=y,
                                 is_training=is_training,
                                 name="final_norm")
        output = tf.nn.relu(output)
        output = ops.conv2d(
            output,
            output_dim=self._image_shape[2],
            k_h=3,
            k_w=3,
            d_h=1,
            d_w=1,
            name="final_conv",
            use_sn=self._spectral_norm,
        )

        if self._wavelet_deconv:  # Add WaveletDeconv layer
            output = ops.waveletDeconv(output)
        # End WaveletDeconv layer

        return tf.nn.sigmoid(output)
    def discriminator_with_additonal_heads(self, x, y, is_training):
        """Discriminator architecture with additional heads.

    Possible heads built on top of feature representation of the discriminator:
    (1) Classify the image to the correct class.
    (2) Classify the rotation of the image.

    Args:
      x: An input image tensor.
      y: One-hot encoded label. Passing all zeros implies no label was passed.
      is_training: boolean, whether or not it is a training call.

    Returns:
      Tuple of 5 Tensors: (1) discriminator predictions (in [0, 1]), (2) the
      corresponding logits, (3) predictions (logits) of the rotation of x from
      the auxiliary head, (4) logits of the class prediction from the auxiliary
      head, (5) Indicator vector identifying whether y contained a label or -1.
    """
        d_probs, d_logits, x_rep = self.discriminator(x,
                                                      y=y,
                                                      is_training=is_training)
        use_sn = self.discriminator._spectral_norm  # pylint: disable=protected-access

        is_label_available = tf.cast(
            tf.cast(tf.reduce_sum(y, axis=1, keepdims=True), tf.float32) > 0.5,
            tf.float32)
        assert x_rep.shape.ndims == 2, x_rep.shape

        # Predict the rotation of the image.
        rotation_logits = None
        if "rotation" in self._self_supervision:
            with tf.variable_scope("discriminator_rotation",
                                   reuse=tf.AUTO_REUSE):
                rotation_logits = ops.linear(x_rep,
                                             NUM_ROTATIONS,
                                             scope="score_classify",
                                             use_sn=use_sn)
                logging.info("[Discriminator] rotation head %s -> %s",
                             x_rep.shape, rotation_logits)

        if not self._project_y:
            return d_probs, d_logits, rotation_logits, None, is_label_available

        # Predict the class of the image.
        aux_logits = None
        if self._use_predictor:
            with tf.variable_scope("discriminator_predictor",
                                   reuse=tf.AUTO_REUSE):
                aux_logits = ops.linear(x_rep,
                                        y.shape[1],
                                        use_bias=True,
                                        scope="predictor_linear",
                                        use_sn=use_sn)
                # Apply the projection discriminator if needed.
                if self._use_soft_pred:
                    y_predicted = tf.nn.softmax(aux_logits)
                else:
                    y_predicted = tf.one_hot(tf.arg_max(aux_logits, 1),
                                             aux_logits.shape[1])
                y = (1.0 -
                     is_label_available) * y_predicted + is_label_available * y
                y = tf.stop_gradient(y)
                logging.info(
                    "[Discriminator] %s -> aux_logits=%s, y_predicted=%s",
                    aux_logits.shape, aux_logits.shape, y_predicted.shape)

        class_embedding = self.get_class_embedding(
            y=y, embedding_dim=x_rep.shape[-1].value, use_sn=use_sn)
        d_logits += tf.reduce_sum(class_embedding * x_rep,
                                  axis=1,
                                  keepdims=True)
        d_probs = tf.nn.sigmoid(d_logits)
        return d_probs, d_logits, rotation_logits, aux_logits, is_label_available
    def apply(self, x, y, is_training):
        """Apply the discriminator on a input.

    Args:
      x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images.
      y: `Tensor` of shape [batch_size, num_classes] with one hot encoded
        labels.
      is_training: Boolean, whether the architecture should be constructed for
        training or inference.

    Returns:
      Tuple of 3 Tensors, the final prediction of the discriminator, the logits
      before the final output activation function and logits form the second
      last layer.
    """
        bs = x.shape[0].value
        df_dim = 64  # Dimension of filters in the first convolutional layer.
        net = lrelu(
            conv2d(x,
                   df_dim,
                   5,
                   5,
                   2,
                   2,
                   name="d_conv1",
                   use_sn=self._spectral_norm))
        net = conv2d(net,
                     df_dim * 2,
                     5,
                     5,
                     2,
                     2,
                     name="d_conv2",
                     use_sn=self._spectral_norm)

        net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn1")
        net = lrelu(net)
        net = conv2d(net,
                     df_dim * 4,
                     5,
                     5,
                     2,
                     2,
                     name="d_conv3",
                     use_sn=self._spectral_norm)

        net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn2")
        net = lrelu(net)
        net = conv2d(net,
                     df_dim * 8,
                     5,
                     5,
                     2,
                     2,
                     name="d_conv4",
                     use_sn=self._spectral_norm)

        net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn3")
        net = lrelu(net)
        out_logit = linear(tf.reshape(net, [bs, -1]),
                           1,
                           scope="d_fc4",
                           use_sn=self._spectral_norm)
        out = tf.nn.sigmoid(out_logit)
        return out, out_logit, net