コード例 #1
0
def make_random_trajectory():
    time_step_spec = ts.time_step_spec(
        tensor_spec.TensorSpec([], tf.int64, name='observation'))
    action_spec = tensor_spec.BoundedTensorSpec([],
                                                tf.int32,
                                                minimum=1,
                                                maximum=2,
                                                name='action')
    # info and policy state specs match that of TFPolicyMock.
    outer_dims = [1, 6]  # (batch_size, time)
    traj = trajectory.Trajectory(
        observation=tensor_spec.sample_spec_nest(time_step_spec.observation,
                                                 outer_dims=outer_dims),
        action=tensor_spec.sample_bounded_spec(action_spec,
                                               outer_dims=outer_dims),
        policy_info=tensor_spec.sample_bounded_spec(action_spec,
                                                    outer_dims=outer_dims),
        reward=tf.fill(outer_dims, 0.0),
        # step_type is F M L F M L.
        step_type=tf.reshape(tf.range(0, 6) % 3, outer_dims),
        # next_step_type is M L F M L F.
        next_step_type=tf.reshape(tf.range(1, 7) % 3, outer_dims),
        discount=tf.fill(outer_dims, 1.0),
    )
    return traj, time_step_spec, action_spec
コード例 #2
0
 def __call__(self, observation, state=()):
     action = self._a_network(observation)[1]
     rand_action = tensor_spec.sample_bounded_spec(
         self._a_network.action_spec, outer_dims=[observation.shape[0]])
     seed = tf.random.uniform([observation.shape[0]])
     is_random = tf.less(seed, self._epsilon)
     action = tf.compat.v2.where(is_random, rand_action, action)
     return action, state
コード例 #3
0
def make_random_trajectory():
    """Creates a random trajectory.

  This trajectory contains Tensors shaped `[1, 6, ...]` where `1` is the batch
  and `6` is the number of time steps.

  Observations are unbounded but actions are bounded to take values within
  `[1, 2]`.

  Policy info is also provided, and is equal to the actions.  It can be removed
  via:

  ```python
  traj = make_random_trajectory().clone(policy_info=())
  ```

  Returns:
    A `Trajectory`.
  """
    time_step_spec = ts.time_step_spec(
        tensor_spec.TensorSpec([], tf.int32, name='observation'))
    action_spec = tensor_spec.BoundedTensorSpec([],
                                                tf.int32,
                                                minimum=1,
                                                maximum=2,
                                                name='action')
    # info and policy state specs match that of TFPolicyMock.
    outer_dims = [1, 6]  # (batch_size, time)
    traj = trajectory.Trajectory(
        observation=tensor_spec.sample_spec_nest(time_step_spec.observation,
                                                 outer_dims=outer_dims),
        action=tensor_spec.sample_bounded_spec(action_spec,
                                               outer_dims=outer_dims),
        policy_info=tensor_spec.sample_bounded_spec(action_spec,
                                                    outer_dims=outer_dims),
        reward=tf.fill(outer_dims, tf.constant(0, dtype=tf.float32)),
        # step_type is F M L F M L.
        step_type=tf.reshape(tf.range(0, 6) % 3, outer_dims),
        # next_step_type is M L F M L F.
        next_step_type=tf.reshape(tf.range(1, 7) % 3, outer_dims),
        discount=tf.fill(outer_dims, tf.constant(1, dtype=tf.float32)),
    )
    return traj, time_step_spec, action_spec
コード例 #4
0
 def __call__(self, observation, state=()):
     action = self._a_network(observation)[1]
     noise = tf.random_normal(shape=action.shape, stddev=self._std)
     action = action + noise
     spec = self._a_network.action_spec
     action = tf.clip_by_value(action, spec.minimum + self._clip_eps,
                               spec.maximum - self._clip_eps)
     rand_action = tensor_spec.sample_bounded_spec(
         self._a_network.action_spec, outer_dims=[observation.shape[0]])
     seed = tf.random.uniform([observation.shape[0]])
     is_random = tf.less(seed, self._eps)
     action = tf.compat.v2.where(is_random, rand_action, action)
     return action, state
コード例 #5
0
 def __call__(self, observation, state=()):
     action = tensor_spec.sample_bounded_spec(
         self._action_spec, outer_dims=[observation.shape[0]])
     return action, state