예제 #1
0
    def bottom_compress(self, inputs, name="bottom"):
        """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number in the range -1 to 1
    and combine channel values for each pixel to form a representation of
    size image_length x image_length dims.

    Args:
      inputs: A Tensor representing RGB pixel intensities as integers.
        [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
        with tf.variable_scope(name):
            inputs = tf.to_float(inputs)
            hp = self._model_hparams
            if hp.mode != tf.estimator.ModeKeys.PREDICT:
                tf.summary.image("inputs", inputs, max_outputs=2)
            inputs = common_layers.convert_rgb_to_symmetric_real(inputs)
            ishape = common_layers.shape_list(inputs)
            inputs = tf.reshape(inputs,
                                [-1, ishape[1], ishape[2] * ishape[3], 1])
            inputs.set_shape([None, None, None, 1])
            # We compress RGB intensities for each pixel using a conv.
            x = tf.layers.conv2d(inputs,
                                 self._body_input_depth,
                                 (1, self.num_channels),
                                 padding="VALID",
                                 strides=(1, self.num_channels),
                                 activation=tf.nn.relu,
                                 name="conv_input")
            x.set_shape([None, None, None, self._body_input_depth])
            return x
예제 #2
0
  def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum):
    channels = 3
    pred = tf.random_normal([batch, height, width, num_mixtures * 10])
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=0, maxval=256, dtype=tf.int32)
    actual_loss_num, actual_loss_den = common_layers.dml_loss(
        pred=pred, labels=labels, reduce_sum=reduce_sum)
    actual_loss = actual_loss_num / actual_loss_den

    real_labels = common_layers.convert_rgb_to_symmetric_real(labels)
    expected_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=real_labels) / channels
    if reduce_sum:
      expected_loss = tf.reduce_mean(expected_loss)

    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val)
예제 #3
0
  def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum):
    channels = 3
    pred = tf.random_normal([batch, height, width, num_mixtures * 10])
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=0, maxval=256, dtype=tf.int32)
    actual_loss_num, actual_loss_den = common_layers.dml_loss(
        pred=pred, labels=labels, reduce_sum=reduce_sum)
    actual_loss = actual_loss_num / actual_loss_den

    real_labels = common_layers.convert_rgb_to_symmetric_real(labels)
    expected_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=real_labels) / channels
    if reduce_sum:
      expected_loss = tf.reduce_mean(expected_loss)

    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val)
예제 #4
0
    def bottom_compress(self, inputs, name="bottom"):
        """Compresses channel-wise input pixels into whole pixel representions.

    Perform conversion of RGB pixel values to a real number in the range -1 to
    1. This combines pixel channels to form a representation of shape
    [img_len, img_len].

    Args:
      inputs: Tensor representing RGB pixel intensities as integers, of shape
        [batch, img_len, img_len, channels].
      name: string, scope.

    Returns:
      body_input: Tensor of shape [batch, img_len, img_len, body_input_depth].
    """
        with tf.variable_scope(name):
            inputs = tf.to_float(inputs)
            hp = self._model_hparams
            if hp.mode != tf.estimator.ModeKeys.PREDICT:
                tf.summary.image("inputs",
                                 common_layers.tpu_safe_image_summary(inputs),
                                 max_outputs=2)
            inputs = common_layers.convert_rgb_to_symmetric_real(inputs)

            # Reshape inputs to apply convolutions across [img_len, img_len*channels].
            inputs_shape = common_layers.shape_list(inputs)
            inputs = tf.reshape(
                inputs,
                [-1, inputs_shape[1], inputs_shape[2] * inputs_shape[3], 1])
            # tf.logging.info("input shape" , inputs_shape)
            # Compress RGB intensities for each pixel using a convolution.
            # ValueError: Negative dimension size caused by subtracting 3 from 1 for 'imagetransformer/parallel_0_4/
            # imagetransformer/imagetransformer/image_channel_bottom_identity_modality/output_bottom/conv_input/Conv2D'
            #  (op: 'Conv2D') with input shapes: [?,1,1,1], [1,3,1,256].
            outputs = tf.layers.conv2d(inputs,
                                       self._body_input_depth,
                                       kernel_size=(1, self.num_channels),
                                       padding="VALID",
                                       strides=(1, self.num_channels),
                                       activation=tf.nn.relu,
                                       name="conv_input")
            return outputs
예제 #5
0
  def preprocess_example(self, example, unused_mode, unused_hparams):
    """ Luptonize the examples, so that we can use t2t models easily
    """
    p = self.get_hparams()
    image = example["inputs"]

    # Apply Luptonic Asinh stretch, and return uint8 rgb images
    def my_func(x):
      return make_lupton_rgb(x[...,2], x[...,1], x[...,0], Q=15, stretch=0.5, minimum=0)

    int_image = tf.py_func(my_func, [image], tf.uint8)
    int_image.set_shape(image.shape)
    image = common_layers.convert_rgb_to_symmetric_real(int_image)

    if hasattr(p, 'attributes'):
      example["attributes"] = tf.stack([example[k] for k in p.attributes])

    example["inputs"] = image
    example["targets"] = image
    return example
예제 #6
0
  def bottom_compress(self, inputs, name="bottom"):
    """Compresses channel-wise input pixels into whole pixel representions.

    Perform conversion of RGB pixel values to a real number in the range -1 to
    1. This combines pixel channels to form a representation of shape
    [img_len, img_len].

    Args:
      inputs: Tensor representing RGB pixel intensities as integers, of shape
        [batch, img_len, img_len, channels].
      name: string, scope.

    Returns:
      body_input: Tensor of shape
        [batch, img_len, img_len, self._model_hparams.hidden_size].
    """
    with tf.variable_scope(name):
      inputs = tf.to_float(inputs)
      hp = self._model_hparams
      if hp.mode != tf.estimator.ModeKeys.PREDICT:
        tf.summary.image(
            "inputs",
            common_layers.tpu_safe_image_summary(inputs),
            max_outputs=2)
      inputs = common_layers.convert_rgb_to_symmetric_real(inputs)

      # Reshape inputs to apply convolutions across [img_len, img_len*channels].
      inputs_shape = common_layers.shape_list(inputs)
      inputs = tf.reshape(
          inputs, [-1, inputs_shape[1], inputs_shape[2] * inputs_shape[3], 1])

      # Compress RGB intensities for each pixel using a convolution.
      outputs = tf.layers.conv2d(
          inputs,
          self._model_hparams.hidden_size,
          kernel_size=(1, self.num_channels),
          padding="VALID",
          strides=(1, self.num_channels),
          activation=tf.nn.relu,
          name="conv_input")
      return outputs
예제 #7
0
  def bottom_compress(self, inputs, name="bottom"):
    """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number in the range -1 to 1
    and combine channel values for each pixel to form a representation of
    size image_length x image_length dims.

    Args:
      inputs: A Tensor representing RGB pixel intensities as integers.
        [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(name):
      inputs = tf.to_float(inputs)
      hp = self._model_hparams
      if hp.mode != tf.estimator.ModeKeys.PREDICT:
        tf.summary.image(
            "inputs",
            common_layers.tpu_safe_image_summary(inputs),
            max_outputs=2)
      inputs = common_layers.convert_rgb_to_symmetric_real(inputs)
      ishape = common_layers.shape_list(inputs)
      inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1])
      inputs.set_shape([None, None, None, 1])
      # We compress RGB intensities for each pixel using a conv.
      x = tf.layers.conv2d(
          inputs,
          self._body_input_depth, (1, self.num_channels),
          padding="VALID",
          strides=(1, self.num_channels),
          activation=tf.nn.relu,
          name="conv_input")
      x.set_shape([None, None, None, self._body_input_depth])
      return x