Exemple #1
0
def trajectory(obs_space: gym.Space, act_space: gym.Space,
               length: int) -> types.Trajectory:
    """Fixture to generate trajectory of length `length` iid sampled from spaces."""
    obs = np.array([obs_space.sample() for _ in range(length + 1)])
    acts = np.array([act_space.sample() for _ in range(length)])
    infos = np.array([{} for _ in range(length)])
    return types.Trajectory(obs=obs, acts=acts, infos=infos)
Exemple #2
0
def transitions_min(
    obs_space: gym.Space, act_space: gym.Space, length: int
) -> types.TransitionsMinimal:
    obs = np.array([obs_space.sample() for _ in range(length)])
    acts = np.array([act_space.sample() for _ in range(length)])
    infos = np.array([{}] * length)
    return types.TransitionsMinimal(obs=obs, acts=acts, infos=infos)
Exemple #3
0
def transitions(obs_space: gym.Space, act_space: gym.Space,
                length: int) -> types.Transitions:
    """Fixture to generate transitions of length `length` iid sampled from spaces."""
    obs = np.array([obs_space.sample() for _ in range(length)])
    next_obs = np.array([obs_space.sample() for _ in range(length)])
    acts = np.array([act_space.sample() for _ in range(length)])
    dones = np.zeros(length, dtype=np.bool)
    return types.Transitions(obs=obs,
                             acts=acts,
                             next_obs=next_obs,
                             dones=dones)
 def get_actions(
     self, observations: Observations, action_space: gym.Space
 ) -> Actions:
     """ Get a batch of predictions (aka actions) for these observations. """
     y_pred = action_space.sample()
     return y_pred
     return self.target_setting.Actions(y_pred)
Exemple #5
0
    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: gym.Space) -> ContinualRLSetting.Actions:
        state = observations.x
        # OK so the DQN model is built to handle a sequence of 4 observations?
        # something like that. So we have to do a bit of a "hack" to get it to
        # work here, where we create a buffer of size 4, and populate it with
        # random guesses at first, and once its filled, we can actually predict.
        # This assumes that we're being asked to give actions for a sequence of
        # observations.

        # Not sure in which order the DQN expects the sequence to be.
        state = ProcessFrame84.process(state)
        state = Transforms.to_tensor(state)
        state = Transforms.channels_first_if_needed(state)
        self.test_buffer.append(state)
        if len(self.test_buffer) < 4:
            print(
                f"Returning random action since we don't yet have 4 observations in the buffer."
            )
            return action_space.sample()
        # TODO: Fix the rest.
        # return action_space.sample()

        fake_batch = torch.stack(tuple(self.test_buffer))
        assert fake_batch.shape[0] == 4
        fake_batch = fake_batch.reshape([-1, 4, *fake_batch.shape[2:]])
        # fake_batch = fake_batches.reshape((-1, *fake_batches.shape[2:]))
        with torch.no_grad():
            fake_batch = fake_batch.to(self.model.device)
            values = self.model(fake_batch)

        chosen_actions = values.argmax(dim=-1)
        return chosen_actions.cpu().numpy()
Exemple #6
0
def transitions(transitions_min: types.TransitionsMinimal,
                obs_space: gym.Space, length: int) -> types.Transitions:
    """Fixture to generate transitions of length `length` iid sampled from spaces."""
    next_obs = np.array([obs_space.sample() for _ in range(length)])
    dones = np.zeros(length, dtype=np.bool)
    return types.Transitions(**dataclasses.asdict(transitions_min),
                             next_obs=next_obs,
                             dones=dones)
Exemple #7
0
def test_seeding_works(base_space: gym.Space):
    sparse_space = Sparse(base_space, sparsity=0.)

    base_space.seed(123)
    base_sample = base_space.sample()

    sparse_space.seed(123)
    sparse_sample = sparse_space.sample()

    assert equals(base_sample, sparse_sample)
Exemple #8
0
def generate_nan_observation(obs_space: gym.Space) -> Any:
    """The NaN observation that indicates the environment receives no seed.

    We assume that obs is complex and there must be something like float.
    Otherwise this logic doesn't work.
    """

    sample = obs_space.sample()
    sample = fill_invalid(sample)
    return sample
Exemple #9
0
def test_flatten(base_space: gym.Space):
    sparse_space = Sparse(base_space, sparsity=0.)
    base_space.seed(123)
    base_sample = base_space.sample()
    flattened_base_sample = flatten(base_space, base_sample)

    sparse_space.seed(123)
    sparse_sample = sparse_space.sample()
    flattened_sparse_sample = flatten(sparse_space, sparse_sample)

    assert equals(flattened_base_sample, flattened_sparse_sample)
Exemple #10
0
 def get_actions(self, observations: Observations, action_space: Space) -> Actions:
     # This won't work on weirder spaces.
     if action_space.shape:
         assert observations.x.shape[0] == action_space.shape[0]
     if getattr(observations.x, "shape", None):
         batch_size = 1
         if observations.x.ndim > 1:
             batch_size = observations.x.shape[0]
         self.batch_sizes.append(batch_size)
     else:
         self.batch_sizes.append(0)  # X isn't batched.
     return action_space.sample()
Exemple #11
0
 def get_actions(self, observation: np.ndarray, action_space: Space):
     return action_space.sample()
Exemple #12
0
 def get_actions(self, observations: Observations,
                 action_space: gym.Space) -> Actions:
     return action_space.sample()
Exemple #13
0
def get_dummy_batch_for_space(
    space: gym.Space,
    batch_size: int = 32,
    fill_value: Union[float, int, str] = 0.0,
    time_size: Optional[int] = None,
    time_major: bool = False,
) -> np.ndarray:
    """Returns batched dummy data (using `batch_size`) for the given `space`.

    Note: The returned batch will not pass a `space.contains(batch)` test
    as an additional batch dimension has to be added as dim=0.

    Args:
        space (gym.Space): The space to get a dummy batch for.
        batch_size(int): The required batch size (B). Note that this can also
            be 0 (only if `time_size` is None!), which will result in a
            non-batched sample for the given space (no batch dim).
        fill_value (Union[float, int, str]): The value to fill the batch with
            or "random" for random values.
        time_size (Optional[int]): If not None, add an optional time axis
            of `time_size` size to the returned batch.
        time_major (bool): If True AND `time_size` is not None, return batch
            as shape [T x B x ...], otherwise as [B x T x ...]. If `time_size`
            if None, ignore this setting and return [B x ...].

    Returns:
        The dummy batch of size `bqtch_size` matching the given space.
    """
    # Complex spaces. Perform recursive calls of this function.
    if isinstance(space, (gym.spaces.Dict, gym.spaces.Tuple)):
        return tree.map_structure(
            lambda s: get_dummy_batch_for_space(s, batch_size, fill_value),
            get_base_struct_from_space(space),
        )
    # Primivite spaces: Box, Discrete, MultiDiscrete.
    # Random values: Use gym's sample() method.
    elif fill_value == "random":
        if time_size is not None:
            assert batch_size > 0 and time_size > 0
            if time_major:
                return np.array(
                    [[space.sample() for _ in range(batch_size)]
                     for t in range(time_size)],
                    dtype=space.dtype,
                )
            else:
                return np.array(
                    [[space.sample() for t in range(time_size)]
                     for _ in range(batch_size)],
                    dtype=space.dtype,
                )
        else:
            return np.array(
                [space.sample() for _ in range(batch_size)]
                if batch_size > 0 else space.sample(),
                dtype=space.dtype,
            )
    # Fill value given: Use np.full.
    else:
        if time_size is not None:
            assert batch_size > 0 and time_size > 0
            if time_major:
                shape = [time_size, batch_size]
            else:
                shape = [batch_size, time_size]
        else:
            shape = [batch_size] if batch_size > 0 else []
        return np.full(shape + list(space.shape),
                       fill_value=fill_value,
                       dtype=space.dtype)