Beispiel #1
0
    def pack_condition_features(episode_data, idx):
        """Pack previous episode data into condition_ep* features/labels.

    Args:
      episode_data: List of (obs, action, rew, new_obs, done, debug) tuples.
      idx: Index of the conditioning episode. 0 for demo, 1 for first trial,
        etc.
    """
        # Conditioning Context (The assumption is that policy is always adapting
        # from some conditioning data, whether it is demo and/or trials.
        episode_data = episode_to_transitions.make_fixed_length(
            episode_data, fixed_length)
        # Condition features.
        batch_obs = np.stack([t[0].image for t in episode_data])
        batch_gripper = np.stack([t[0].pose for t in episode_data])
        meta_features['condition/features/image/%d' % idx] = batch_obs.astype(
            np.uint8)
        meta_features['condition/features/gripper_pose/%d' %
                      idx] = batch_gripper.astype(np.float32)

        # Condition label.
        batch_action = np.stack([t[1] for t in episode_data])

        meta_features['condition/labels/action/%d' %
                      idx] = batch_action.astype(np.float32)
Beispiel #2
0
    def pack_condition_features(episode_data, idx):
        """Pack previous episode data into condition_ep* features/labels.

    Args:
      episode_data: List of (obs, action, rew, new_obs, done, debug) tuples.
      idx: Index of the conditioning episode. 0 for demo, 1 for first trial,
        etc.
    """
        # Conditioning Context (The assumption is that policy is always adapting
        # from some conditioning data, whether it is demo and/or trials.
        episode_data = episode_to_transitions.make_fixed_length(
            episode_data, fixed_length, randomized=not deterministic_condition)
        # Condition features.
        if vision:
            batch_obs = np.stack([t[0].image for t in episode_data])
            batch_gripper = np.stack([t[0].pose for t in episode_data])
            meta_features['condition/features/image/%d' %
                          idx] = batch_obs.astype(np.uint8)
            meta_features['condition/features/gripper_pose/%d' %
                          idx] = batch_gripper.astype(np.float32)
        else:
            batch_full_state = np.stack(
                [t[0].full_state_pose for t in episode_data])
            meta_features['condition/features/full_state_pose/%d' %
                          idx] = batch_full_state.astype(np.float32)
        # Condition label.
        batch_action = np.stack([t[1] for t in episode_data])
        meta_features['condition/labels/action/%d' %
                      idx] = batch_action.astype(np.float32)

        cumulative_return = np.sum([t[2] for t in episode_data])
        success = float(cumulative_return > 0) * np.ones(
            (40, 1), dtype=np.float32)
        meta_features['condition/labels/success/%d' % idx] = success
  def test_make_fixed_length(self):
    fixed_length = 10
    dummy_feature_dict_lists = [
        [{'dummy_feature': i} for i in range(5)],
        [{'dummy_feature': i} for i in range(20)],
    ]

    for feature_dict_list in dummy_feature_dict_lists:
      filtered_feature_dict_list = episode_to_transitions.make_fixed_length(
          feature_dict_list,
          fixed_length=fixed_length,
          always_include_endpoints=True)
      self.assertLen(filtered_feature_dict_list, fixed_length)

      # The first and last entries of the original list should be present in
      # the filtered list.
      self.assertEqual(feature_dict_list[0], filtered_feature_dict_list[0])
      self.assertEqual(feature_dict_list[-1], filtered_feature_dict_list[-1])