Example #1
0
 def loss(self, top_out, targets):
   prediction = top_out
   prediction = tf.squeeze(prediction, axis=-1)
   prediction = common_layers.convert_rgb_to_real(prediction)
   groundtruth = common_layers.convert_rgb_to_real(targets)
   loss = tf.losses.mean_squared_error(prediction, groundtruth)
   return loss, tf.constant(1.0)
Example #2
0
 def loss(self, top_out, targets):
     prediction = top_out
     prediction = tf.squeeze(prediction, axis=-1)
     prediction = common_layers.convert_rgb_to_real(prediction)
     groundtruth = common_layers.convert_rgb_to_real(targets)
     loss = tf.losses.mean_squared_error(prediction, groundtruth)
     return loss, tf.constant(1.0)
Example #3
0
  def bottom_compress(self, inputs, name="bottom"):
    """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number and combine values
    for each pixel to form representation of image_length x image_length dims.

    Args:
      inputs: A Tensor with shape [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(name):
      inputs = common_layers.convert_rgb_to_real(inputs)
      ishape = common_layers.shape_list(inputs)
      inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1])
      inputs.set_shape([None, None, None, 1])
      # We compress RGB intensities for each pixel using a conv.
      x = common_layers.conv_block(
          inputs,
          self._body_input_depth, [((1, 1), (1, 3))],
          first_relu=False,
          padding="VALID",
          strides=(1, 3),
          force2d=True,
          name="conv_input")
      return x
    def bottom_compress(self, inputs, name="bottom"):
        """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number in the range -1 to 1
    and combine channel values for each pixel to form a representation of
    size image_length x image_length dims.

    Args:
      inputs: A Tensor representing pixel intensities as integers. [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
        with tf.variable_scope(name):
            inputs = tf.to_float(inputs)
            tf.summary.image("inputs", inputs, max_outputs=2)
            inputs = common_layers.convert_rgb_to_real(inputs)
            ishape = common_layers.shape_list(inputs)
            inputs = tf.reshape(inputs,
                                [-1, ishape[1], ishape[2] * ishape[3], 1])
            inputs.set_shape([None, None, None, 1])
            # We compress RGB intensities for each pixel using a conv.
            x = tf.layers.conv2d(inputs,
                                 self._body_input_depth, (1, 3),
                                 padding="VALID",
                                 strides=(1, 3),
                                 activation=tf.nn.relu,
                                 name="conv_input")
            x.set_shape([None, None, None, self._body_input_depth])
            return x
Example #5
0
  def bottom_compress(self, inputs, name="bottom"):
    """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number and combine values
    for each pixel to form representation of image_length x image_length dims.

    Args:
      inputs: A Tensor with shape [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(name):
      inputs = common_layers.convert_rgb_to_real(inputs)
      ishape = common_layers.shape_list(inputs)
      inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1])
      inputs.set_shape([None, None, None, 1])
      # We compress RGB intensities for each pixel using a conv.
      x = common_layers.conv_block(
          inputs,
          self._body_input_depth, [((1, 1), (1, 3))],
          first_relu=False,
          padding="VALID",
          strides=(1, 3),
          force2d=True,
          name="conv_input")
      return x
Example #6
0
  def get_sampled_frame(self, pred_frame):
    """Samples the frame based on modality.

      if the modality is L2/L1 then the next predicted frame is the
      next frame and there is no sampling but in case of Softmax loss
      the next actual frame should be sampled from predicted frame.

      This enables multi-frame target prediction with Softmax loss.

    Args:
      pred_frame: predicted frame.

    Returns:
      sampled frame.

    """
    # TODO(lukaszkaiser): the logic below heavily depend on the current
    # (a bit strange) video modalities - we should change that.

    if self.is_per_pixel_softmax:
      frame_shape = common_layers.shape_list(pred_frame)
      target_shape = frame_shape[:-1] + [self.hparams.problem.num_channels]
      sampled_frame = tf.reshape(pred_frame, target_shape + [256])
      sampled_frame = pixels_from_softmax(
          sampled_frame, temperature=self.hparams.pixel_sampling_temperature)
      # TODO(lukaszkaiser): this should be consistent with modality.bottom()
      sampled_frame = common_layers.standardize_images(sampled_frame)
    else:
      x = common_layers.convert_real_to_rgb(pred_frame)
      x = x - tf.stop_gradient(x + tf.round(x))
      x = common_layers.convert_rgb_to_real(x)
      return x
    return sampled_frame
Example #7
0
  def get_sampled_frame(self, pred_frame):
    """Samples the frame based on modality.

      if the modality is L2/L1 then the next predicted frame is the
      next frame and there is no sampling but in case of Softmax loss
      the next actual frame should be sampled from predicted frame.

      This enables multi-frame target prediction with Softmax loss.

    Args:
      pred_frame: predicted frame.

    Returns:
      sampled frame.

    """
    # TODO(lukaszkaiser): the logic below heavily depend on the current
    # (a bit strange) video modalities - we should change that.

    if self.is_per_pixel_softmax:
      frame_shape = common_layers.shape_list(pred_frame)
      target_shape = frame_shape[:-1] + [self.hparams.problem.num_channels]
      sampled_frame = tf.reshape(pred_frame, target_shape + [256])
      sampled_frame = pixels_from_softmax(
          sampled_frame, temperature=self.hparams.pixel_sampling_temperature)
      # TODO(lukaszkaiser): this should be consistent with modality.bottom()
      sampled_frame = common_layers.standardize_images(sampled_frame)
    else:
      x = common_layers.convert_real_to_rgb(pred_frame)
      x = x - tf.stop_gradient(x + tf.round(x))
      x = common_layers.convert_rgb_to_real(x)
      return x
    return sampled_frame
Example #8
0
def preprocess_frame(frame):
    """Preprocess frame.

  1. Converts [0, 255] to [-0.5, 0.5]
  2. Adds uniform noise.

  Args:
    frame: 3-D Tensor representing pixels.
  Returns:
    frame: 3-D Tensor with values in between [-0.5, 0.5]
  """
    # Normalize from [0.0, 1.0] -> [-0.5, 0.5]
    frame = common_layers.convert_rgb_to_real(frame)
    frame = frame - 0.5
    frame, _ = glow_ops.uniform_binning_correction(frame)
    return frame
Example #9
0
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
     common_video.gif_summary("targets_bottom", x)
     return common_layers.convert_rgb_to_real(x)
Example #10
0
 def bottom(self, x):
     common_video.gif_summary("inputs", x)
     return common_layers.convert_rgb_to_real(x)
Example #11
0
 def convert_rgb_to_real(self, prediction, targets):
     """Convert prediction and target from rgb to real."""
     prediction = tf.squeeze(prediction, axis=-1)
     prediction = common_layers.convert_rgb_to_real(prediction)
     targets = common_layers.convert_rgb_to_real(targets)
     return prediction, targets
Example #12
0
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
     common_layers.summarize_video(x, "targets_bottom")
     return common_layers.convert_rgb_to_real(x)
Example #13
0
 def bottom(self, x):
   common_layers.summarize_video(x, "inputs")
   return common_layers.convert_rgb_to_real(x)
 def targets_bottom(self, x):
     common_layers.summarize_video(x, "targets_bottom")
     return common_layers.convert_rgb_to_real(x)
Example #15
0
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
   common_layers.summarize_video(x, "targets_bottom")
   return common_layers.convert_rgb_to_real(x)
Example #16
0
 def init_preprocess_single(self, features):
     for label in ["inputs", "targets"]:
         features[label] = common_layers.convert_rgb_to_real(
             features[label])
     return features
Example #17
0
 def bottom(self, x):
     common_layers.summarize_video(x, "inputs")
     return common_layers.convert_rgb_to_real(x)