def trajectory(obs_space: gym.Space, act_space: gym.Space, length: int) -> types.Trajectory: """Fixture to generate trajectory of length `length` iid sampled from spaces.""" obs = np.array([obs_space.sample() for _ in range(length + 1)]) acts = np.array([act_space.sample() for _ in range(length)]) infos = np.array([{} for _ in range(length)]) return types.Trajectory(obs=obs, acts=acts, infos=infos)
def transitions_min( obs_space: gym.Space, act_space: gym.Space, length: int ) -> types.TransitionsMinimal: obs = np.array([obs_space.sample() for _ in range(length)]) acts = np.array([act_space.sample() for _ in range(length)]) infos = np.array([{}] * length) return types.TransitionsMinimal(obs=obs, acts=acts, infos=infos)
def transitions(obs_space: gym.Space, act_space: gym.Space, length: int) -> types.Transitions: """Fixture to generate transitions of length `length` iid sampled from spaces.""" obs = np.array([obs_space.sample() for _ in range(length)]) next_obs = np.array([obs_space.sample() for _ in range(length)]) acts = np.array([act_space.sample() for _ in range(length)]) dones = np.zeros(length, dtype=np.bool) return types.Transitions(obs=obs, acts=acts, next_obs=next_obs, dones=dones)
def get_actions( self, observations: Observations, action_space: gym.Space ) -> Actions: """ Get a batch of predictions (aka actions) for these observations. """ y_pred = action_space.sample() return y_pred return self.target_setting.Actions(y_pred)
def get_actions(self, observations: ContinualRLSetting.Observations, action_space: gym.Space) -> ContinualRLSetting.Actions: state = observations.x # OK so the DQN model is built to handle a sequence of 4 observations? # something like that. So we have to do a bit of a "hack" to get it to # work here, where we create a buffer of size 4, and populate it with # random guesses at first, and once its filled, we can actually predict. # This assumes that we're being asked to give actions for a sequence of # observations. # Not sure in which order the DQN expects the sequence to be. state = ProcessFrame84.process(state) state = Transforms.to_tensor(state) state = Transforms.channels_first_if_needed(state) self.test_buffer.append(state) if len(self.test_buffer) < 4: print( f"Returning random action since we don't yet have 4 observations in the buffer." ) return action_space.sample() # TODO: Fix the rest. # return action_space.sample() fake_batch = torch.stack(tuple(self.test_buffer)) assert fake_batch.shape[0] == 4 fake_batch = fake_batch.reshape([-1, 4, *fake_batch.shape[2:]]) # fake_batch = fake_batches.reshape((-1, *fake_batches.shape[2:])) with torch.no_grad(): fake_batch = fake_batch.to(self.model.device) values = self.model(fake_batch) chosen_actions = values.argmax(dim=-1) return chosen_actions.cpu().numpy()
def transitions(transitions_min: types.TransitionsMinimal, obs_space: gym.Space, length: int) -> types.Transitions: """Fixture to generate transitions of length `length` iid sampled from spaces.""" next_obs = np.array([obs_space.sample() for _ in range(length)]) dones = np.zeros(length, dtype=np.bool) return types.Transitions(**dataclasses.asdict(transitions_min), next_obs=next_obs, dones=dones)
def test_seeding_works(base_space: gym.Space): sparse_space = Sparse(base_space, sparsity=0.) base_space.seed(123) base_sample = base_space.sample() sparse_space.seed(123) sparse_sample = sparse_space.sample() assert equals(base_sample, sparse_sample)
def generate_nan_observation(obs_space: gym.Space) -> Any: """The NaN observation that indicates the environment receives no seed. We assume that obs is complex and there must be something like float. Otherwise this logic doesn't work. """ sample = obs_space.sample() sample = fill_invalid(sample) return sample
def test_flatten(base_space: gym.Space): sparse_space = Sparse(base_space, sparsity=0.) base_space.seed(123) base_sample = base_space.sample() flattened_base_sample = flatten(base_space, base_sample) sparse_space.seed(123) sparse_sample = sparse_space.sample() flattened_sparse_sample = flatten(sparse_space, sparse_sample) assert equals(flattened_base_sample, flattened_sparse_sample)
def get_actions(self, observations: Observations, action_space: Space) -> Actions: # This won't work on weirder spaces. if action_space.shape: assert observations.x.shape[0] == action_space.shape[0] if getattr(observations.x, "shape", None): batch_size = 1 if observations.x.ndim > 1: batch_size = observations.x.shape[0] self.batch_sizes.append(batch_size) else: self.batch_sizes.append(0) # X isn't batched. return action_space.sample()
def get_actions(self, observation: np.ndarray, action_space: Space): return action_space.sample()
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: return action_space.sample()
def get_dummy_batch_for_space( space: gym.Space, batch_size: int = 32, fill_value: Union[float, int, str] = 0.0, time_size: Optional[int] = None, time_major: bool = False, ) -> np.ndarray: """Returns batched dummy data (using `batch_size`) for the given `space`. Note: The returned batch will not pass a `space.contains(batch)` test as an additional batch dimension has to be added as dim=0. Args: space (gym.Space): The space to get a dummy batch for. batch_size(int): The required batch size (B). Note that this can also be 0 (only if `time_size` is None!), which will result in a non-batched sample for the given space (no batch dim). fill_value (Union[float, int, str]): The value to fill the batch with or "random" for random values. time_size (Optional[int]): If not None, add an optional time axis of `time_size` size to the returned batch. time_major (bool): If True AND `time_size` is not None, return batch as shape [T x B x ...], otherwise as [B x T x ...]. If `time_size` if None, ignore this setting and return [B x ...]. Returns: The dummy batch of size `bqtch_size` matching the given space. """ # Complex spaces. Perform recursive calls of this function. if isinstance(space, (gym.spaces.Dict, gym.spaces.Tuple)): return tree.map_structure( lambda s: get_dummy_batch_for_space(s, batch_size, fill_value), get_base_struct_from_space(space), ) # Primivite spaces: Box, Discrete, MultiDiscrete. # Random values: Use gym's sample() method. elif fill_value == "random": if time_size is not None: assert batch_size > 0 and time_size > 0 if time_major: return np.array( [[space.sample() for _ in range(batch_size)] for t in range(time_size)], dtype=space.dtype, ) else: return np.array( [[space.sample() for t in range(time_size)] for _ in range(batch_size)], dtype=space.dtype, ) else: return np.array( [space.sample() for _ in range(batch_size)] if batch_size > 0 else space.sample(), dtype=space.dtype, ) # Fill value given: Use np.full. else: if time_size is not None: assert batch_size > 0 and time_size > 0 if time_major: shape = [time_size, batch_size] else: shape = [batch_size, time_size] else: shape = [batch_size] if batch_size > 0 else [] return np.full(shape + list(space.shape), fill_value=fill_value, dtype=space.dtype)