コード例 #1
0
ファイル: modalities.py プロジェクト: kltony/tensor2tensor
 def loss(self, top_out, targets):
   prediction = top_out
   prediction = tf.squeeze(prediction, axis=-1)
   prediction = common_layers.convert_rgb_to_real(prediction)
   groundtruth = common_layers.convert_rgb_to_real(targets)
   loss = tf.losses.mean_squared_error(prediction, groundtruth)
   return loss, tf.constant(1.0)
コード例 #2
0
 def loss(self, top_out, targets):
     prediction = top_out
     prediction = tf.squeeze(prediction, axis=-1)
     prediction = common_layers.convert_rgb_to_real(prediction)
     groundtruth = common_layers.convert_rgb_to_real(targets)
     loss = tf.losses.mean_squared_error(prediction, groundtruth)
     return loss, tf.constant(1.0)
コード例 #3
0
ファイル: modalities.py プロジェクト: AranKomat/tensor2tensor
  def bottom_compress(self, inputs, name="bottom"):
    """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number and combine values
    for each pixel to form representation of image_length x image_length dims.

    Args:
      inputs: A Tensor with shape [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(name):
      inputs = common_layers.convert_rgb_to_real(inputs)
      ishape = common_layers.shape_list(inputs)
      inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1])
      inputs.set_shape([None, None, None, 1])
      # We compress RGB intensities for each pixel using a conv.
      x = common_layers.conv_block(
          inputs,
          self._body_input_depth, [((1, 1), (1, 3))],
          first_relu=False,
          padding="VALID",
          strides=(1, 3),
          force2d=True,
          name="conv_input")
      return x
コード例 #4
0
    def bottom_compress(self, inputs, name="bottom"):
        """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number in the range -1 to 1
    and combine channel values for each pixel to form a representation of
    size image_length x image_length dims.

    Args:
      inputs: A Tensor representing pixel intensities as integers. [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
        with tf.variable_scope(name):
            inputs = tf.to_float(inputs)
            tf.summary.image("inputs", inputs, max_outputs=2)
            inputs = common_layers.convert_rgb_to_real(inputs)
            ishape = common_layers.shape_list(inputs)
            inputs = tf.reshape(inputs,
                                [-1, ishape[1], ishape[2] * ishape[3], 1])
            inputs.set_shape([None, None, None, 1])
            # We compress RGB intensities for each pixel using a conv.
            x = tf.layers.conv2d(inputs,
                                 self._body_input_depth, (1, 3),
                                 padding="VALID",
                                 strides=(1, 3),
                                 activation=tf.nn.relu,
                                 name="conv_input")
            x.set_shape([None, None, None, self._body_input_depth])
            return x
コード例 #5
0
  def bottom_compress(self, inputs, name="bottom"):
    """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number and combine values
    for each pixel to form representation of image_length x image_length dims.

    Args:
      inputs: A Tensor with shape [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(name):
      inputs = common_layers.convert_rgb_to_real(inputs)
      ishape = common_layers.shape_list(inputs)
      inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1])
      inputs.set_shape([None, None, None, 1])
      # We compress RGB intensities for each pixel using a conv.
      x = common_layers.conv_block(
          inputs,
          self._body_input_depth, [((1, 1), (1, 3))],
          first_relu=False,
          padding="VALID",
          strides=(1, 3),
          force2d=True,
          name="conv_input")
      return x
コード例 #6
0
  def get_sampled_frame(self, pred_frame):
    """Samples the frame based on modality.

      if the modality is L2/L1 then the next predicted frame is the
      next frame and there is no sampling but in case of Softmax loss
      the next actual frame should be sampled from predicted frame.

      This enables multi-frame target prediction with Softmax loss.

    Args:
      pred_frame: predicted frame.

    Returns:
      sampled frame.

    """
    # TODO(lukaszkaiser): the logic below heavily depend on the current
    # (a bit strange) video modalities - we should change that.

    if self.is_per_pixel_softmax:
      frame_shape = common_layers.shape_list(pred_frame)
      target_shape = frame_shape[:-1] + [self.hparams.problem.num_channels]
      sampled_frame = tf.reshape(pred_frame, target_shape + [256])
      sampled_frame = pixels_from_softmax(
          sampled_frame, temperature=self.hparams.pixel_sampling_temperature)
      # TODO(lukaszkaiser): this should be consistent with modality.bottom()
      sampled_frame = common_layers.standardize_images(sampled_frame)
    else:
      x = common_layers.convert_real_to_rgb(pred_frame)
      x = x - tf.stop_gradient(x + tf.round(x))
      x = common_layers.convert_rgb_to_real(x)
      return x
    return sampled_frame
コード例 #7
0
ファイル: base.py プロジェクト: qixiuai/tensor2tensor
  def get_sampled_frame(self, pred_frame):
    """Samples the frame based on modality.

      if the modality is L2/L1 then the next predicted frame is the
      next frame and there is no sampling but in case of Softmax loss
      the next actual frame should be sampled from predicted frame.

      This enables multi-frame target prediction with Softmax loss.

    Args:
      pred_frame: predicted frame.

    Returns:
      sampled frame.

    """
    # TODO(lukaszkaiser): the logic below heavily depend on the current
    # (a bit strange) video modalities - we should change that.

    if self.is_per_pixel_softmax:
      frame_shape = common_layers.shape_list(pred_frame)
      target_shape = frame_shape[:-1] + [self.hparams.problem.num_channels]
      sampled_frame = tf.reshape(pred_frame, target_shape + [256])
      sampled_frame = pixels_from_softmax(
          sampled_frame, temperature=self.hparams.pixel_sampling_temperature)
      # TODO(lukaszkaiser): this should be consistent with modality.bottom()
      sampled_frame = common_layers.standardize_images(sampled_frame)
    else:
      x = common_layers.convert_real_to_rgb(pred_frame)
      x = x - tf.stop_gradient(x + tf.round(x))
      x = common_layers.convert_rgb_to_real(x)
      return x
    return sampled_frame
コード例 #8
0
def preprocess_frame(frame):
    """Preprocess frame.

  1. Converts [0, 255] to [-0.5, 0.5]
  2. Adds uniform noise.

  Args:
    frame: 3-D Tensor representing pixels.
  Returns:
    frame: 3-D Tensor with values in between [-0.5, 0.5]
  """
    # Normalize from [0.0, 1.0] -> [-0.5, 0.5]
    frame = common_layers.convert_rgb_to_real(frame)
    frame = frame - 0.5
    frame, _ = glow_ops.uniform_binning_correction(frame)
    return frame
コード例 #9
0
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
     common_video.gif_summary("targets_bottom", x)
     return common_layers.convert_rgb_to_real(x)
コード例 #10
0
 def bottom(self, x):
     common_video.gif_summary("inputs", x)
     return common_layers.convert_rgb_to_real(x)
コード例 #11
0
 def convert_rgb_to_real(self, prediction, targets):
     """Convert prediction and target from rgb to real."""
     prediction = tf.squeeze(prediction, axis=-1)
     prediction = common_layers.convert_rgb_to_real(prediction)
     targets = common_layers.convert_rgb_to_real(targets)
     return prediction, targets
コード例 #12
0
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
     common_layers.summarize_video(x, "targets_bottom")
     return common_layers.convert_rgb_to_real(x)
コード例 #13
0
ファイル: modalities.py プロジェクト: kltony/tensor2tensor
 def bottom(self, x):
   common_layers.summarize_video(x, "inputs")
   return common_layers.convert_rgb_to_real(x)
コード例 #14
0
 def targets_bottom(self, x):
     common_layers.summarize_video(x, "targets_bottom")
     return common_layers.convert_rgb_to_real(x)
コード例 #15
0
ファイル: modalities.py プロジェクト: kltony/tensor2tensor
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
   common_layers.summarize_video(x, "targets_bottom")
   return common_layers.convert_rgb_to_real(x)
コード例 #16
0
 def init_preprocess_single(self, features):
     for label in ["inputs", "targets"]:
         features[label] = common_layers.convert_rgb_to_real(
             features[label])
     return features
コード例 #17
0
 def bottom(self, x):
     common_layers.summarize_video(x, "inputs")
     return common_layers.convert_rgb_to_real(x)