Ejemplo n.º 1
0
        def process_single_frame(prev_outputs, inputs):
            """Process a single frame of the video."""
            cur_image, input_reward, action = inputs
            time_step, prev_image, prev_reward, frame_buf, lstm_states = prev_outputs

            generated_items = [prev_image]
            groundtruth_items = [cur_image]
            done_warm_start = tf.greater(time_step, context_frames - 1)
            input_image, = self.get_scheduled_sample_inputs(
                done_warm_start, groundtruth_items, generated_items, ss_func)

            # Prediction
            pred_image, lstm_states = self.construct_predictive_tower(
                input_image, None, action, lstm_states, latent)

            if self.hparams.reward_prediction:
                reward_input_image = pred_image
                if self.hparams.reward_prediction_stop_gradient:
                    reward_input_image = tf.stop_gradient(reward_input_image)
                with tf.control_dependencies([time_step]):
                    frame_buf = [reward_input_image] + frame_buf[:-1]
                pred_reward = self.reward_prediction(frame_buf, None, action,
                                                     latent)
                pred_reward = common_video.decode_to_shape(
                    pred_reward, common_layers.shape_list(input_reward),
                    "reward_dec")
            else:
                pred_reward = prev_reward

            time_step += 1
            outputs = (time_step, pred_image, pred_reward, frame_buf,
                       lstm_states)

            return outputs
Ejemplo n.º 2
0
  def reward_prediction(self, input_image, input_reward, action, latent):
    """Builds a reward prediction network."""
    del action
    del latent

    conv_size = self.tinyify([32, 32, 16, 4])

    with tf.variable_scope("reward_pred", reuse=tf.AUTO_REUSE):
      x = input_image
      x = tfcl.batch_norm(x, updates_collections=None,
                          is_training=self.is_training, scope="reward_bn0")
      x = tfl.conv2d(x, conv_size[1], [3, 3], strides=(2, 2),
                     padding="SAME", activation=tf.nn.relu, name="reward_conv1")
      x = tfcl.batch_norm(x, updates_collections=None,
                          is_training=self.is_training, scope="reward_bn1")
      x = tfl.conv2d(x, conv_size[2], [3, 3], strides=(2, 2),
                     padding="SAME", activation=tf.nn.relu, name="reward_conv2")
      x = tfcl.batch_norm(x, updates_collections=None,
                          is_training=self.is_training, scope="reward_bn2")
      x = tfl.conv2d(x, conv_size[3], [3, 3], strides=(2, 2),
                     padding="SAME", activation=tf.nn.relu, name="reward_conv3")

      pred_reward = common_video.decode_to_shape(
          x, input_reward.shape, "reward_dec")

      return pred_reward
Ejemplo n.º 3
0
    def process_single_frame(prev_outputs, inputs):
      """Process a single frame of the video."""
      cur_image, input_reward, action = inputs
      time_step, prev_image, prev_reward, frame_buf, lstm_states = prev_outputs

      # sample from softmax (by argmax). this is noop for non-softmax loss.
      prev_image = self.get_sampled_frame(prev_image)

      generated_items = [prev_image]
      groundtruth_items = [cur_image]
      done_warm_start = tf.greater(time_step, context_frames - 1)
      input_image, = self.get_scheduled_sample_inputs(
          done_warm_start, groundtruth_items, generated_items, ss_func)

      # Prediction
      pred_image, lstm_states, _ = self.construct_predictive_tower(
          input_image, None, action, lstm_states, latent)

      if self.hparams.reward_prediction:
        reward_input_image = self.get_sampled_frame(pred_image)
        if self.hparams.reward_prediction_stop_gradient:
          reward_input_image = tf.stop_gradient(reward_input_image)
        with tf.control_dependencies([time_step]):
          frame_buf = [reward_input_image] + frame_buf[:-1]
        pred_reward = self.reward_prediction(frame_buf, None, action, latent)
        pred_reward = common_video.decode_to_shape(
            pred_reward, common_layers.shape_list(input_reward), "reward_dec")
      else:
        pred_reward = prev_reward

      time_step += 1
      outputs = (time_step, pred_image, pred_reward, frame_buf, lstm_states)

      return outputs
Ejemplo n.º 4
0
    def construct_model(self, images, actions, rewards):
        images = tf.unstack(images, axis=0)
        actions = tf.unstack(actions, axis=0)
        rewards = tf.unstack(rewards, axis=0)

        batch_size = common_layers.shape_list(images[0])[0]
        context_frames = self.hparams.video_num_input_frames

        # Predicted images and rewards.
        gen_rewards, gen_images, latent_means, latent_stds = [], [], [], []

        # LSTM states.
        lstm_state = [None] * 7

        # Create scheduled sampling function
        ss_func = self.get_scheduled_sample_func(batch_size)

        pred_image = tf.zeros_like(images[0])
        pred_reward = tf.zeros_like(rewards[0])
        latent = None
        for timestep, image, action, reward in zip(range(len(images) - 1),
                                                   images[:-1], actions[:-1],
                                                   rewards[:-1]):
            # Scheduled Sampling
            done_warm_start = timestep > context_frames - 1
            groundtruth_items = [image, reward]
            generated_items = [pred_image, pred_reward]
            input_image, input_reward = self.get_scheduled_sample_inputs(
                done_warm_start, groundtruth_items, generated_items, ss_func)

            # Latent
            # TODO(mbz): should we use input_image iunstead of image?
            latent_images = tf.stack([image, images[timestep + 1]], axis=0)
            latent_mean, latent_std = self.construct_latent_tower(
                latent_images, time_axis=0)
            latent = common_video.get_gaussian_tensor(latent_mean, latent_std)
            latent_means.append(latent_mean)
            latent_stds.append(latent_std)

            # Prediction
            pred_image, lstm_state = self.construct_predictive_tower(
                input_image, input_reward, action, lstm_state, latent)

            if self.hparams.reward_prediction:
                pred_reward = self.reward_prediction(pred_image, input_reward,
                                                     action, latent)
                pred_reward = common_video.decode_to_shape(
                    pred_reward, common_layers.shape_list(input_reward),
                    "reward_dec")
            else:
                pred_reward = input_reward

            gen_images.append(pred_image)
            gen_rewards.append(pred_reward)

        gen_images = tf.stack(gen_images, axis=0)
        gen_rewards = tf.stack(gen_rewards, axis=0)

        return gen_images, gen_rewards, latent_means, latent_stds
Ejemplo n.º 5
0
  def construct_model(self, images, actions, rewards):
    images = tf.unstack(images, axis=0)
    actions = tf.unstack(actions, axis=0)
    rewards = tf.unstack(rewards, axis=0)

    batch_size = common_layers.shape_list(images[0])[0]
    context_frames = self.hparams.video_num_input_frames

    # Predicted images and rewards.
    gen_rewards, gen_images, latent_means, latent_stds = [], [], [], []

    # LSTM states.
    lstm_state = [None] * 7

    # Create scheduled sampling function
    ss_func = self.get_scheduled_sample_func(batch_size)

    pred_image = tf.zeros_like(images[0])
    pred_reward = tf.zeros_like(rewards[0])
    latent = None
    for timestep, image, action, reward in zip(
        range(len(images)-1), images[:-1], actions[:-1], rewards[:-1]):
      # Scheduled Sampling
      done_warm_start = timestep > context_frames - 1
      groundtruth_items = [image, reward]
      generated_items = [pred_image, pred_reward]
      input_image, input_reward = self.get_scheduled_sample_inputs(
          done_warm_start, groundtruth_items, generated_items, ss_func)

      # Latent
      # TODO(mbz): should we use input_image iunstead of image?
      latent_images = tf.stack([image, images[timestep+1]], axis=0)
      latent_mean, latent_std = self.construct_latent_tower(
          latent_images, time_axis=0)
      latent = common_video.get_gaussian_tensor(latent_mean, latent_std)
      latent_means.append(latent_mean)
      latent_stds.append(latent_std)

      # Prediction
      pred_image, lstm_state, _ = self.construct_predictive_tower(
          input_image, input_reward, action, lstm_state, latent)

      if self.hparams.reward_prediction:
        pred_reward = self.reward_prediction(
            pred_image, input_reward, action, latent)
        pred_reward = common_video.decode_to_shape(
            pred_reward, common_layers.shape_list(input_reward), "reward_dec")
      else:
        pred_reward = input_reward

      gen_images.append(pred_image)
      gen_rewards.append(pred_reward)

    gen_images = tf.stack(gen_images, axis=0)
    gen_rewards = tf.stack(gen_rewards, axis=0)

    return gen_images, gen_rewards, latent_means, latent_stds