def test_batched_state(env_name: str, batch_size: int): max_steps_per_episode = 10 env = make_batched_env(env_name, batch_size=batch_size) dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode) env: GymDataLoader = GymDataLoader( dataset, batch_size=batch_size, ) with gym.make(env_name) as temp_env: state_shape = temp_env.observation_space.shape action_shape = temp_env.action_space.shape state_shape = (batch_size, *state_shape) action_shape = (batch_size, *action_shape) reward_shape = (batch_size, ) state = env.reset() assert state.shape == state_shape env.seed(123) i = 0 for obs_batch in take(env, 5): assert obs_batch.shape == state_shape random_actions = env.action_space.sample() assert torch.as_tensor(random_actions).shape == action_shape assert temp_env.action_space.contains(random_actions[0]) reward = env.send(random_actions) assert reward.shape == reward_shape i += 1 assert i == 5
def test_spaces(env_name: str, batch_size: int): dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size)) batched_obs_space = dataset.observation_space # NOTE: the VectorEnv class creates the 'batched' action space by creating a # Tuple of the single action space, of length 'N', which seems a bit weird. # batched_action_space = vector_env.action_space batched_action_space = batch_space(dataset.single_action_space, batch_size) dataloader_env = GymDataLoader(dataset, batch_size=batch_size) assert dataloader_env.observation_space == batched_obs_space assert dataloader_env.action_space == batched_action_space dataloader_env.reset() for observation_batch in take(dataloader_env, 3): if isinstance(observation_batch, Tensor): observation_batch = observation_batch.cpu().numpy() assert observation_batch in batched_obs_space actions = dataloader_env.action_space.sample() assert len(actions) == batch_size assert actions in batched_action_space rewards = dataloader_env.send(actions) assert len(rewards) == batch_size assert rewards in dataloader_env.reward_space
def _check_environments(self): """ Do a quick check to make sure that interacting with the envs/dataloaders works correctly. """ # Check that the env's spaces are batched versions of the settings'. from gym.vector.utils import batch_space from sequoia.settings.passive import PassiveEnvironment batch_size = self.batch_size for loader_method in [ self.train_dataloader, self.val_dataloader, self.test_dataloader, ]: print(f"\n\nChecking loader method {loader_method.__name__}\n\n") env = loader_method(batch_size=batch_size) batch_size = env.batch_size # We could compare the spaces directly, but that's a bit messy, and # would be depends on the type of spaces for each. Instead, we could # check samples from such spaces on how the spaces are batched. if batch_size: expected_observation_space = batch_space( self.observation_space, n=batch_size ) expected_action_space = batch_space(self.action_space, n=batch_size) expected_reward_space = batch_space(self.reward_space, n=batch_size) else: expected_observation_space = self.observation_space expected_action_space = self.action_space expected_reward_space = self.reward_space # TODO: Batching the 'Sparse' makes it really ugly, so just # comparing the 'image' portion of the space for now. assert ( env.observation_space[0].shape == expected_observation_space[0].shape ), (env.observation_space[0], expected_observation_space[0]) assert env.action_space == expected_action_space, ( env.action_space, expected_action_space, ) assert env.reward_space == expected_reward_space, ( env.reward_space, expected_reward_space, ) # Check that the 'gym API' interaction is working correctly. reset_obs: Observations = env.reset() self._check_observations(env, reset_obs) for i in range(5): actions = env.action_space.sample() self._check_actions(env, actions) step_observations, step_rewards, done, info = env.step(actions) self._check_observations(env, step_observations) self._check_rewards(env, step_rewards) if batch_size: assert not any(done) else: assert not done # assert not (done if isinstance(done, bool) else any(done)) for batch in take(env, 5): observations: Observations rewards: Optional[Rewards] if isinstance(env, PassiveEnvironment): observations, rewards = batch else: # in RL atm, the 'dataset' gives back only the observations. # Coul observations, rewards = batch, None self._check_observations(env, observations) if rewards is not None: self._check_rewards(env, rewards) if batch_size: actions = tuple( self.action_space.sample() for _ in range(batch_size) ) else: actions = self.action_space.sample() # actions = self.Actions(torch.as_tensor(actions)) rewards = env.send(actions) self._check_rewards(env, rewards) env.close()