def _set_default_specs(self):
     self.observation_spec = ts.TensorSpec((3, 3), torch.float32)
     self.action_spec = ts.BoundedTensorSpec([7],
                                             dtype=torch.float32,
                                             minimum=-1.0,
                                             maximum=1.0)
     self.time_step_spec = ds.time_step_spec(self.observation_spec,
                                             self.action_spec,
                                             ts.TensorSpec(()))
Exemple #2
0
def restart(observation,
            action_spec,
            reward_spec=ts.TensorSpec(()),
            env_id=None,
            env_info={},
            batched=False):
    """Returns a ``TimeStep`` with ``step_type`` set equal to ``StepType.FIRST``.

    Called by ``env.reset()``.

    Args:
        observation (nested tensors): observations of the env.
        action_spec (nested TensorSpec): tensor spec of actions.
        reward_spec (TensorSpec): a rank-1 or rank-0 (default) tensor spec
        env_id (batched or scalar torch.int32): (optional) ID of the env.
        env_info (dict): extra info returned by the environment.
        batched (bool): (optional) whether batched envs or not.

    Returns:
        TimeStep:
    """
    return _generate_time_step(batched=batched,
                               observation=observation,
                               step_type=StepType.FIRST,
                               discount=1.,
                               action_spec=action_spec,
                               reward_spec=reward_spec,
                               env_id=env_id,
                               env_info=env_info)
Exemple #3
0
def _generate_time_step(batched,
                        observation,
                        step_type,
                        discount,
                        prev_action=None,
                        action_spec=None,
                        reward=None,
                        reward_spec=ts.TensorSpec(()),
                        env_id=None,
                        env_info={}):

    flat_observation = nest.flatten(observation)

    if all(map(_is_numpy_array, flat_observation)):
        md = np
        if reward is not None:
            reward = np.float32(reward)
        discount = np.float32(discount)
    else:
        assert all(
            map(torch.is_tensor,
                flat_observation)), ("Elements in observation must be Tensor")
        md = torch
        if reward is not None:
            reward = to_tensor(reward, dtype=torch.float32)
        discount = to_tensor(discount, dtype=torch.float32)

    if batched:
        batch_size = flat_observation[0].shape[0]
        outer_dims = (batch_size, )
        if env_id is None:
            env_id = md.arange(batch_size, dtype=md.int32)
        if reward is not None:
            assert reward.shape[:1] == outer_dims
        if prev_action is not None:
            flat_action = nest.flatten(prev_action)
            assert flat_action[0].shape[:1] == outer_dims
    else:
        outer_dims = ()
        if env_id is None:
            env_id = md.zeros((), dtype=md.int32)

    step_type = md.full(outer_dims, step_type, dtype=md.int32)
    if reward is None:
        reward = md.zeros(outer_dims + reward_spec.shape, dtype=md.float32)
    discount = md.ones(outer_dims, dtype=md.float32) * discount
    if prev_action is None:
        prev_action = nest.map_structure(
            lambda spec: md.zeros(outer_dims + spec.shape,
                                  dtype=getattr(
                                      md, ts.torch_dtype_to_str(spec.dtype))),
            action_spec)

    return TimeStep(step_type,
                    reward,
                    discount,
                    observation,
                    prev_action,
                    env_id,
                    env_info=env_info)
Exemple #4
0
def time_step_spec(observation_spec, action_spec, reward_spec):
    """Returns a ``TimeStep`` spec given the ``observation_spec`` and the
    ``action_spec``.
    """
    def is_valid_tensor_spec(spec):
        return isinstance(spec, ts.TensorSpec)

    assert all(map(is_valid_tensor_spec, nest.flatten(observation_spec)))
    assert all(map(is_valid_tensor_spec, nest.flatten(action_spec)))
    return TimeStep(step_type=ts.TensorSpec([], torch.int32),
                    reward=reward_spec,
                    discount=ts.BoundedTensorSpec([],
                                                  torch.float32,
                                                  minimum=0.0,
                                                  maximum=1.0),
                    observation=observation_spec,
                    prev_action=action_spec,
                    env_id=ts.TensorSpec([], torch.int32))
 def test_close_no_hang_after_init(self):
     constructor = functools.partial(
         RandomAlfEnvironment,
         ts.TensorSpec((3, 3), torch.float32),
         ts.BoundedTensorSpec([1], torch.float32, minimum=-1.0,
                              maximum=1.0),
         episode_end_probability=0,
         min_duration=2,
         max_duration=2)
     env = ProcessEnvironment(constructor)
     env.start()
     env.close()
 def __init__(self, crash_at_step, env_id=None):
     super(MockEnvironmentCrashInStep, self).__init__(
         observation_spec=ts.TensorSpec((3, 3), torch.float32),
         action_spec=ts.BoundedTensorSpec([1],
                                          torch.float32,
                                          minimum=-1.0,
                                          maximum=1.0),
         env_id=env_id,
         episode_end_probability=0,
         min_duration=crash_at_step + 1,
         max_duration=crash_at_step + 1)
     self._crash_at_step = crash_at_step
     self._steps = 0
Exemple #7
0
def transition(observation,
               prev_action,
               reward,
               reward_spec=ts.TensorSpec(()),
               discount=1.0,
               env_id=None,
               env_info={}):
    """Returns a ``TimeStep`` with ``step_type`` set equal to ``StepType.MID``.

    Called by ``env.step()`` if not 'Done'.

    The batch size is inferred from the shape of ``reward``.

    If ``discount`` is a scalar, and ``observation`` contains tensors,
    then ``discount`` will be broadcasted to match ``reward.shape``.

    Args:
        observation (nested tensors): current observations of the env.
        prev_action (nested tensors): previous actions to the the env.
        reward (float): A scalar, or 1D NumPy array, or tensor.
        reward_spec (TensorSpec): a rank-1 or rank-0 (default) tensor spec. Used
            to tell if the transition is batched or not.
        discount (float): (optional) A scalar, or 1D NumPy array, or tensor.
        env_id (torch.int32): (optional) A scalar or 1D tensor of the environment
            ID(s).
        env_info (dict): extra info returned by the environment.

    Returns:
        TimeStep:

    Raises:
        ValueError: If observations are tensors but reward's rank
        is not 0 or 1.
    """
    return _generate_time_step(
        batched=torch.as_tensor(reward).ndim > len(reward_spec.shape),
        observation=observation,
        step_type=StepType.MID,
        discount=discount,
        prev_action=prev_action,
        reward=reward,
        reward_spec=reward_spec,
        env_id=env_id,
        env_info=env_info)
Exemple #8
0
def termination(observation,
                prev_action,
                reward,
                reward_spec=ts.TensorSpec(()),
                env_id=None,
                env_info={}):
    """Returns a ``TimeStep`` with ``step_type`` set to ``StepType.LAST``.

    Called by ``env.step()`` if 'Done'. ``discount`` should not be sent in and
    will be set as 0.

    Args:
        observation (nested tensors): current observations of the env.
        prev_action (nested tensors): previous actions to the the env.
        reward (float): A scalar, or 1D NumPy array, or tensor.
        reward_spec (TensorSpec): a rank-1 or rank-0 (default) tensor spec. Used
            to tell if the termination is batched or not.
        env_id (torch.int32): (optional) A scalar or 1D tensor of the environment
            ID(s).
        env_info (dict): extra info returned by the environment.

    Returns:
        TimeStep:

    Raises:
        ValueError: If observations are tensors but reward's statically known rank
            is not 0 or 1.
    """
    return _generate_time_step(
        batched=torch.as_tensor(reward).ndim > len(reward_spec.shape),
        observation=observation,
        step_type=StepType.LAST,
        discount=0.,
        prev_action=prev_action,
        reward=reward,
        reward_spec=reward_spec,
        env_id=env_id,
        env_info=env_info)
    def __init__(self,
                 observation_spec,
                 action_spec,
                 env_id=None,
                 episode_end_probability=0.1,
                 discount=1.0,
                 reward_fn=None,
                 batch_size=None,
                 seed=42,
                 render_size=(2, 2, 3),
                 min_duration=0,
                 max_duration=None):
        """Initializes the environment.

        Args:
            observation_spec (nested TensorSpec): tensor spec for observations
            action_spec (nested TensorSpec): tensor spec for actions.
            env_id (int): (optional) ID of the environment.
            episode_end_probability (float): Probability an episode will end when the
                environment is stepped.
            discount (float): Discount to set in time_steps.
            reward_fn (Callable): Callable that takes in step_type, action, an observation(s),
                and returns a tensor of rewards.
            batch_size (int): (Optional) Number of observations generated per call.
                If this value is not `None`, then all actions are expected to
                have an additional major axis of size `batch_size`, and all outputs
                will have an additional major axis of size `batch_size`.
            seed (int): Seed to use for rng used in observation generation.
            render_size (tuple of ints): Size of the random render image to return when calling
                render.
            min_duration (int): Number of steps at the beginning of the
                episode during which the episode can not terminate.
            max_duration (int): Optional number of steps after which the episode
                terminates regarless of the termination probability.

        Raises:
            ValueError: If batch_size argument is not None and does not match the
            shapes of discount or reward.
        """
        self._batch_size = batch_size
        self._observation_spec = observation_spec
        self._action_spec = action_spec
        self._time_step_spec = ds.time_step_spec(
            self._observation_spec, action_spec, ts.TensorSpec(()))
        self._episode_end_probability = episode_end_probability
        discount = np.asarray(discount, dtype=np.float32)
        if env_id is None:
            self._env_id = np.int32(0)
        else:
            self._env_id = np.int32(env_id)

        if self._batch_size:
            if not discount.shape:
                discount = np.tile(discount, self._batch_size)
            if self._batch_size != len(discount):
                raise ValueError(
                    'Size of discounts must equal the batch size.')
        self._discount = discount

        if reward_fn is None:
            # Return a reward whose size matches the batch size
            if self._batch_size is None:
                self._reward_fn = lambda *_: np.float32(0)
            else:
                self._reward_fn = (
                    lambda *_: np.zeros(self._batch_size, dtype=np.float32))
        else:
            self._reward_fn = reward_fn

        self._done = True
        self._num_steps = 0
        self._min_duration = min_duration
        self._max_duration = max_duration
        self._rng = np.random.RandomState(seed)
        self._render_size = render_size
        super(RandomAlfEnvironment, self).__init__()
Exemple #10
0
 def reward_spec(self):
     return ts.TensorSpec(())