def batch_sparse_space(space: Sparse, n: int=1) -> gym.Space: # NOTE: This means we do something different depending on the sparsity. # Could that become an issue? # assert _is_singledispatch(batch_space) sparsity = space.sparsity if sparsity == 0: #or sparsity == 1: # If the space has 0 sparsity, then batch it just like you would its # base space. # TODO: This is convenient, but not very consistent, as the length of # the batches changes depending on the sparsity of the space.. return Sparse(batch_space(space.base, n), sparsity=sparsity) # elif sparsity == 1.: # Sticking to the default behaviour from gym for now, which is to just # return a tuple of length n with n copies of the space. return spaces.Tuple(tuple(space for _ in range(n))) # We could also do this, where we make the sub-spaces sparse: # batch_space(Sparse<Tuple<A, B>>) -> Tuple<batch_space(Sparse<A>), batch_space(Sparse<B>)> if isinstance(space.base, spaces.Tuple): return spaces.Tuple([ spaces.Tuple([Sparse(sub_space, sparsity) for _ in range(n)]) for sub_space in space.base.spaces ]) if isinstance(space.base, spaces.Dict): return spaces.Dict({ name: Sparse(batch_space(sub_space, n), sparsity) for name, sub_space in space.base.spaces.items() }) return batch_space(space.base, n)
def test_batching_works(base_space: gym.Space, sparsity: float, n: int = 10): batched_base_space = batch_space(base_space, n) sparse_space = Sparse(base_space, sparsity=sparsity) batched_sparse_space = batch_space(sparse_space, n) batched_base_space.seed(123) base_batch = batched_base_space.sample() batched_sparse_space.seed(123) sparse_batch = batched_sparse_space.sample() if sparsity == 0: # When there is no sparsity, the batching is the same as batching the # same space. assert equals(base_batch, sparse_batch) elif sparsity == 1: assert len(sparse_batch) == n assert sparse_batch == tuple([None] * n) else: assert len(sparse_batch) == n assert isinstance(sparse_batch, tuple) for i, value in enumerate(sparse_batch): if value is not None: assert value in base_space # There should be some sparsity. assert (any(v is None for v in sparse_batch) and not all(v is None for v in sparse_batch)), sparse_batch
def test_universal_encoder(): batch_size = 10 budget = None input_space = spaces.Dict({ "x": spaces.Box(low=0, high=1, shape=[3, 32, 32]), "t": spaces.Discrete(2), }) output_space = spaces.Box( -np.inf, np.inf, shape=[ 512, ], dtype=np.float32, ) encoder = create_encoder(input_space, output_space, budget=budget) batch_input_space = batch_space(input_space, batch_size) batch_output_space = batch_space(output_space, batch_size) batch_input_space = add_tensor_support(batch_input_space) output_space = add_tensor_support(output_space) sample = batch_input_space.sample() encoder_output = encoder(sample) if budget: assert n_parameters(encoder) < budget
def __init__(self, envs): env = envs[0]() self.observation_space = batch_space(env.observation_space, len(envs)) self.action_space = batch_space(env.action_space, len(envs)) env.close() del env self.processes = [factory() for factory in envs]
def test_batching_works(base_space: gym.Space, n: int = 3): batched_base_space = batch_space(base_space, n) sparse_space = Sparse(base_space) batched_sparse_space = batch_space(sparse_space, n) base_batch = batched_base_space.sample() sparse_batch = batched_sparse_space.sample() assert len(base_batch) == len(sparse_batch)
def test_spaces(env_name: str, batch_size: int): dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size)) batched_obs_space = dataset.observation_space # NOTE: the VectorEnv class creates the 'batched' action space by creating a # Tuple of the single action space, of length 'N', which seems a bit weird. # batched_action_space = vector_env.action_space batched_action_space = batch_space(dataset.single_action_space, batch_size) dataloader_env = GymDataLoader(dataset, batch_size=batch_size) assert dataloader_env.observation_space == batched_obs_space assert dataloader_env.action_space == batched_action_space dataloader_env.reset() for observation_batch in take(dataloader_env, 3): if isinstance(observation_batch, Tensor): observation_batch = observation_batch.cpu().numpy() assert observation_batch in batched_obs_space actions = dataloader_env.action_space.sample() assert len(actions) == batch_size assert actions in batched_action_space rewards = dataloader_env.send(actions) assert len(rewards) == batch_size assert rewards in dataloader_env.reward_space
def __init__(self, env_fns, observation_space: Optional[Space] = None, action_space: Optional[Space] = None, copy: bool = True) -> None: super().__init__(env_fns, observation_space, action_space, copy) self.action_space = batch_space(self.single_action_space, self.num_envs)
def __init__(self, env: Env) -> None: assert not isinstance(env, VectorEnv) super().__init__(1, env.observation_space, env.action_space) self.action_space = batch_space(env.action_space, 1) self.env = env self._observation = create_empty_array(env.observation_space, n=1, fn=np.zeros) self._reward = np.zeros((1,), dtype=np.float64) self._terminal = np.zeros((1,), dtype=np.bool_) self._action = None
def __init__(self, env_fns, observation_space: Optional[Space] = None, action_space: Optional[Space] = None, shared_memory: bool = True, copy: bool = True, context: Optional = None, daemon: bool = True, worker: Optional = None) -> None: super().__init__(env_fns, observation_space, action_space, shared_memory, copy, context, daemon, worker) self.action_space = batch_space(self.single_action_space, self.num_envs)
def __init__(self, env: gym.Env, done_space: Space = None): super().__init__(env) # boolean value. (0 or 1) if done_space is None: done_space = spaces.Box(0, 1, (), dtype=np.bool) if self.is_vectorized: self.single_observation_space = add_done( self.single_observation_space, done_space) done_space = batch_space(done_space, self.env.num_envs) self.done_space = done_space self.observation_space = add_done(self.env.observation_space, self.done_space)
def __init__(self, env, batch_size: int, fomdp: bool = False, time_limit: Optional[int] = None, float32: bool = False, **kwargs): from gym.vector.utils import batch_space if not isinstance(env.unwrapped, POMDP): raise TypeError(f'Env is not a POMDP (got {type(env)}).') if batch_size <= 0: raise ValueError( f'Batch size is not positive (got ({batch_size}).') super().__init__(env) self.num_envs = batch_size self.state = np.full([batch_size], -1, dtype=int) if fomdp: self.single_observation_space = env.state_space self._observable = True self._start_obs = None else: self.single_observation_space = gym.spaces.Discrete( env.observation_space.n + 1) self._observable = False self._start_obs = env.observation_space.n self.observation_space = batch_space(self.single_observation_space, batch_size) self.single_action_space = self.action_space self.action_space = batch_space(self.single_action_space, batch_size) self.env.R = self.env.R.astype( np.float32) # Reduce space, make compatible with networks self.max_time = time_limit or int(3000000) self.elapsed_time = np.zeros([batch_size], dtype=int) del self.env.model # More space if float32: self.env.O = self.env.O.astype(np.float32) self.env.T = self.env.T.astype(np.float32) if hasattr(self.env, 'D'): self.env.D = self.env.D.astype(np.float32)
def test_batch_space(): named_tuple_space = NamedTupleSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) assert batch_space(named_tuple_space, n=5) == NamedTupleSpace( current_state=Box(0, 1, (5, 2, 2)), action=spaces.MultiDiscrete([2, 2, 2, 2, 2]), next_state=Box(0, 1, (5, 2, 2)), dtype=StateTransition, )
def __init__(self, env: gym.Env, info_space: spaces.Space = None): super().__init__(env) self.is_vectorized = isinstance(env.unwrapped, VectorEnv) # TODO: Should we make 'info_space' mandatory here? if info_space is None: # TODO: There seems to be some issues if we have an empty info space # before the batching. info_space = spaces.Dict({}) if self.is_vectorized: info_space = batch_space(info_space, self.env.num_envs) self.info_space = info_space self.observation = add_info(self.env.observation_space, self.info_space)
def __init__(self, env: gym.Env, done_space: Space = None): super().__init__(env) # happens in the VectorEnv, done is always False! self.is_vectorized = isinstance(env.unwrapped, VectorEnv) # boolean value. (0 or 1) if done_space is None: done_space = spaces.Box(0, 1, (), dtype=np.bool) if self.is_vectorized: self.single_observation_space = add_done( self.single_observation_space, done_space) done_space = batch_space(done_space, self.env.num_envs) self.done_space = done_space self.observation_space = add_done(self.env.observation_space, self.done_space)
def __init__( self, env_fn: Callable[[], gym.Env], batch_size: int, new_episode_length: Callable[[int], int], episode_lengths: Sequence[int] = None, ): super().__init__([env_fn for _ in range(batch_size)]) self.new_episode_length = new_episode_length self.batch_size = batch_size self.episode_lengths = np.array( episode_lengths or [new_episode_length(i) for i in range(self.num_envs)] ) self.steps_left_in_episode = self.episode_lengths.copy() reward_space = spaces.Box(*self.reward_range, shape=()) self.single_reward_space = reward_space self.reward_space = batch_space(reward_space, batch_size)
def _batch_typed_dict_space(space: TypedDictSpace, n: int = 1) -> spaces.Dict: return type(space)( {key: batch_space(subspace, n=n) for (key, subspace) in space.spaces.items()}, dtype=space.dtype, )
def __init__( self, dataset: Union[IterableDataset, Dataset], split_batch_fn: Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ] = None, observation_space: gym.Space = None, action_space: gym.Space = None, reward_space: gym.Space = None, n_classes: int = None, pretend_to_be_active: bool = False, strict: bool = False, **kwargs, ): """Creates the DataLoader/Environment for the given dataset. Parameters ---------- dataset : Union[IterableDataset, Dataset] The dataset to iterate on. Should ideally be indexable (a Map-style dataset). split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional A function to call on each item in the dataset in order to split it into Observations and Rewards, by default None, in which case we assume that the dataset items are tuples of length 2. observation_space : gym.Space, optional The single (non-batched) observation space. Default to `None`, in which case this will try to infer the shape of the space using the first item in the dataset. action_space : gym.Space, optional The non-batched action space. Defaults to None, in which case the `n_classes` argument must be passed, and the action space is assumed to be discrete (i.e. that the loader is for a classification dataset). reward_space : gym.Space, optional The non-batched reward (label) space. Defaults to `None`, in which case it will be the same as the action space (as is the case in classification). n_classes : int, optional Number of classes in the dataset. Used in case `action_space` isn't passed. Defaults to `None`. pretend_to_be_active : bool, optional Wether to withhold the rewards (labels) from the batches when being iterated on like the usual dataloader, and to only give them back after an action is received through the 'send' method. False by default, in which case this behaves exactly as a normal dataloader when being iterated on. When False, the batches yielded by this dataloader will be of the form `Tuple[Observations, Rewards]` (as usual in SL). However, when set to True, the batches will be `Tuple[Observations, None]`! Rewards will then be returned by the environment when an action is passed to the Send method. strict : bool, optional [description], by default False # Examples: ```python train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10) # The usual Dataloader-style: for x, y in train_env: # train as usual (...) # OpenAI Gym style: for episode in range(5): # NOTE: "episode" in RL is an "epoch" in SL: obs = train_env.reset() done = False while not done: actions = train_env.action_space.sample() obs, rewards, done, info = train_env.step(actions) ``` """ super().__init__(dataset=dataset, **kwargs) self.split_batch_fn = split_batch_fn # TODO: When the spaces aren't passed explicitly, assumes a classification dataset. if not observation_space: # NOTE: Assuming min/max of 0 and 1 respectively, but could actually use # min_max of the dataset samples too. first_item = self.dataset[0] if isinstance(first_item, tuple): x, *_ = first_item else: assert isinstance(first_item, (np.ndarray, Tensor)) x = first_item observation_space = Image(0.0, 1.0, x.shape) if not action_space: assert n_classes, "must pass either `action_space`, or `n_classes` for now" action_space = spaces.Discrete(n_classes) elif isinstance(action_space, spaces.Discrete): n_classes = action_space.n if not reward_space: # Assuming a classification dataset by default: # (action space = reward space = Discrete(n_classes)) reward_space = action_space assert observation_space assert action_space assert reward_space self.single_observation_space: Space = observation_space self.single_action_space: Space = action_space self.single_reward_space: Space = reward_space if self.batch_size: observation_space = batch_space(observation_space, self.batch_size) action_space = batch_space(action_space, self.batch_size) reward_space = batch_space(reward_space, self.batch_size) self.observation_space: gym.Space = add_tensor_support(observation_space) self.action_space: gym.Space = add_tensor_support(action_space) self.reward_space: gym.Space = add_tensor_support(reward_space) self.pretend_to_be_active = pretend_to_be_active self._strict = strict self._reward_queue = deque(maxlen=10) self.n_classes: Optional[int] = n_classes self._iterator: Optional[_BaseDataLoaderIter] = None # NOTE: These here are never processed with self.observation or self.reward. self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None self._done: Optional[bool] = None self._closed: bool = False self._action: Optional[ActionType] = None # from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = None
def batch_namedtuple_space(space: NamedTupleSpace, n: int = 1): return NamedTupleSpace(**{ key: batch_space(space[key], n) for key in space.names }, dtype=space.dtype)
def __init__(self, env: Union[EnvDataset, PolicyEnv] = None, dataset: Union[EnvDataset, PolicyEnv] = None, batch_size: int = None, num_workers: int = None, **kwargs): assert not ( env is None and dataset is None ), "One of the `dataset` or `env` arguments must be passed." assert not ( env is not None and dataset is not None ), "Only one of the `dataset` and `env` arguments can be used." if not isinstance(env, IterableDataset): raise RuntimeError( f"The env {env} isn't an interable dataset! (You can use the " f"EnvDataset or PolicyEnv wrappers to make an IterableDataset " f"from a gym environment.") if isinstance(env.unwrapped, VectorEnv): if batch_size is not None and batch_size != env.num_envs: logger.warning( UserWarning( f"The provided batch size {batch_size} will be ignored, since " f"the provided env is vectorized with a batch_size of " f"{env.unwrapped.num_envs}.")) batch_size = env.num_envs if isinstance(env.unwrapped, BatchedVectorEnv): num_workers = env.n_workers elif isinstance(env.unwrapped, AsyncVectorEnv): num_workers = env.num_envs else: num_workers = 0 self.env = env # TODO: We could also perhaps let those parameters through to the # constructor of DataLoader, because in __iter__ we're not using the # DataLoader iterator anyway! This would have the benefit that the # batch_size and num_workers attributes would reflect the actual state # of the iterator, and things like pytorch-lightning would stop warning # us that the num_workers is too low. super().__init__( dataset=self.env, # The batch size is None, because the VecEnv takes care of # doing the batching for us. batch_size=batch_size, num_workers=num_workers, # collate_fn=None, **kwargs, ) Wrapper.__init__(self, env=self.env) assert not isinstance( self.env, GymDataLoader), "Something very wrong is happening." # self.max_epochs: int = max_epochs self.observation_space: gym.Space = self.env.observation_space self.action_space: gym.Space = self.env.action_space self.reward_space: gym.Space if isinstance(env.unwrapped, VectorEnv): env: VectorEnv batch_size = env.num_envs # TODO: Overwriting the action space to be the 'batched' version of # the single action space, rather than a Tuple(Discrete, ...) as is # done in the gym.vector.VectorEnv. self.action_space = batch_space(env.single_action_space, batch_size) if not hasattr(self.env, "reward_space"): self.reward_space = spaces.Box( low=self.env.reward_range[0], high=self.env.reward_range[1], shape=(), ) if isinstance(self.env.unwrapped, VectorEnv): # Same here, we use a 'batched' space rather than Tuple. self.reward_space = batch_space(self.reward_space, batch_size)
def _check_environments(self): """ Do a quick check to make sure that interacting with the envs/dataloaders works correctly. """ # Check that the env's spaces are batched versions of the settings'. from gym.vector.utils import batch_space from sequoia.settings.passive import PassiveEnvironment batch_size = self.batch_size for loader_method in [ self.train_dataloader, self.val_dataloader, self.test_dataloader, ]: print(f"\n\nChecking loader method {loader_method.__name__}\n\n") env = loader_method(batch_size=batch_size) batch_size = env.batch_size # We could compare the spaces directly, but that's a bit messy, and # would be depends on the type of spaces for each. Instead, we could # check samples from such spaces on how the spaces are batched. if batch_size: expected_observation_space = batch_space( self.observation_space, n=batch_size ) expected_action_space = batch_space(self.action_space, n=batch_size) expected_reward_space = batch_space(self.reward_space, n=batch_size) else: expected_observation_space = self.observation_space expected_action_space = self.action_space expected_reward_space = self.reward_space # TODO: Batching the 'Sparse' makes it really ugly, so just # comparing the 'image' portion of the space for now. assert ( env.observation_space[0].shape == expected_observation_space[0].shape ), (env.observation_space[0], expected_observation_space[0]) assert env.action_space == expected_action_space, ( env.action_space, expected_action_space, ) assert env.reward_space == expected_reward_space, ( env.reward_space, expected_reward_space, ) # Check that the 'gym API' interaction is working correctly. reset_obs: Observations = env.reset() self._check_observations(env, reset_obs) for i in range(5): actions = env.action_space.sample() self._check_actions(env, actions) step_observations, step_rewards, done, info = env.step(actions) self._check_observations(env, step_observations) self._check_rewards(env, step_rewards) if batch_size: assert not any(done) else: assert not done # assert not (done if isinstance(done, bool) else any(done)) for batch in take(env, 5): observations: Observations rewards: Optional[Rewards] if isinstance(env, PassiveEnvironment): observations, rewards = batch else: # in RL atm, the 'dataset' gives back only the observations. # Coul observations, rewards = batch, None self._check_observations(env, observations) if rewards is not None: self._check_rewards(env, rewards) if batch_size: actions = tuple( self.action_space.sample() for _ in range(batch_size) ) else: actions = self.action_space.sample() # actions = self.Actions(torch.as_tensor(actions)) rewards = env.send(actions) self._check_rewards(env, rewards) env.close()
def __init__(self, envs, *, method=None, timeout=15): ctx = mp # .get_context(method or 'forkserver') # poll an environment for its specs # XXX what if the environments are different? env = envs[0]() self.observation_space = batch_space(env.observation_space, len(envs)) self.action_space = batch_space(env.action_space, len(envs)) env.close() del env # construct shared memory buffers from env's space specs # for efficient exchange of large objects of fixed type. shm_obs = from_space(self.observation_space, ctx=ctx) shm_act = from_space(self.action_space, ctx=ctx) self.shared = shm_obs, shm_act # setup producer-consumer synchronization: a common Barrier that we # and all workers wait at for mutual synchronization and signaling # a complete batch. self.finished, self.timeout = ctx.Barrier(1 + len(envs)), timeout # self.sem_act, self.sem_obs = ctx.Semaphore(0), ctx.Semaphore(0) # spawn a worker processes for each environment state, self._errors = [], ctx.Queue() for j, env in enumerate(envs): # Create unidirectional (non-duplex) pipes, and connect them in # `crossover` mode between us and them (the worker). This set-up # avoids rare occasions where the main process or the worker read # back their own just issued message. ut_rx, ut_tx = ctx.Pipe(duplex=False) tu_rx, tu_tx = ctx.Pipe(duplex=False) # The connection end points have the following meanigs: # * `ut` and `tu` stand for `us-them` and `them-us`, respectively; # * [their] `ut_rx` (read) and `tu_tx` (write) ends are used by the # worker to receive signals and yield results, respectively; # * [our] `ut_tx` (write) and `tu_rx` (read) ends are used by the # main process to issue commands and read back responses. our, their = Endpoint(tu_rx, ut_tx), Endpoint(ut_rx, tu_tx) # crossover handles: share the us-them rx and them-us tx ends p = ctx.Process( args=(j, CloudpickleSpawner(env), their, our, self.shared), kwargs=dict(errors=self._errors, barrier=self.finished), target=vecEnvWorker.target, daemon=True, ) p.start() # close handles unused by us: them-us tx (write), us-them rx (read) # XXX pipes and connections are commonly implemented through file # descriptors, which imposes a limit on their max number. Also # connections are closed when garbage collected (`__del__`). their.tx.close() # tu_tx their.rx.close() # ut_rx state.append((p, our)) # rebuild numpy buffers, assign processes and establish communcations self.buf_obs, self.buf_act = from_shared(self.shared) self.processes, self.comm = zip(*state) # wait until all workers have started if not all(self._wait()._recv()): raise RuntimeError('Failed to launch all worker subprocesses.')
def test_loop(self, method: Method) -> "IncrementalAssumption.Results": """ WIP: Continual test loop. """ test_env = self.test_dataloader() test_env: TestEnvironment was_training = method.training method.set_testing() try: # If the Method has `test` defined, use it. method.test(test_env) test_env.close() test_env: TestEnvironment # Get the metrics from the test environment test_results: Results = test_env.get_results() except NotImplementedError: logger.debug(f"Will query the method for actions at each step, " f"since it doesn't implement a `test` method.") obs = test_env.reset() # TODO: Do we always have a maximum number of steps? or of episodes? # Will it work the same for Supervised and Reinforcement learning? max_steps: int = getattr(test_env, "step_limit", None) # Reset on the last step is causing trouble, since the env is closed. pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test") episode = 0 for step in pbar: if obs is None: break # NOTE: The env might not be closed, while `obs` is actually still there. # if test_env.is_closed(): # logger.debug(f"Env is closed") # break # logger.debug(f"At step {step}") # BUG: Need to pass an action space that actually reflects the batch # size, even for the last batch! # BUG: This doesn't work if the env isn't batched. action_space = test_env.action_space batch_size = getattr(test_env, "num_envs", getattr(test_env, "batch_size", 0)) env_is_batched = batch_size is not None and batch_size >= 1 if env_is_batched: # NOTE: Need to pass an action space that actually reflects the batch # size, even for the last batch! obs_batch_size = obs.x.shape[0] if obs.x.shape else None action_space_batch_size = (test_env.action_space.shape[0] if test_env.action_space.shape else None) if (obs_batch_size is not None and obs_batch_size != action_space_batch_size): action_space = batch_space( test_env.single_action_space, obs_batch_size) action = method.get_actions(obs, action_space) # logger.debug(f"action: {action}") # TODO: Remove this: if isinstance(action, Actions): action = action.y_pred if isinstance(action, Tensor): action = action.detach().cpu().numpy() if test_env.is_closed(): break obs, reward, done, info = test_env.step(action) if done and not test_env.is_closed(): # logger.debug(f"end of test episode {episode}") obs = test_env.reset() episode += 1 test_env.close() test_results: Results = test_env.get_results() if wandb.run: d = add_prefix(test_results.to_log_dict(), prefix="Test", sep="/") # d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/") # d["current_task"] = task_id wandb.log(d) # Restore 'training' mode, if it was set at the start. if was_training: method.set_training() return test_results
def test_check_iterate_and_step( self, setting_kwargs: Dict[str, Any], batch_size: Optional[int], ): """ Test that the observations are of the right type and shape, regardless of wether we iterate on the env by calling 'step' or by using it as a DataLoader. """ with gym.make(setting_kwargs["dataset"]) as temp_env: expected_x_space = temp_env.observation_space expected_action_space = temp_env.action_space setting = self.Setting(**setting_kwargs, num_workers=0) if batch_size is not None: expected_batched_x_space = batch_space(expected_x_space, batch_size) expected_batched_action_space = batch_space( setting.action_space, batch_size ) else: expected_batched_x_space = expected_x_space expected_batched_action_space = expected_action_space assert setting.observation_space.x == expected_x_space assert setting.action_space == expected_action_space # TODO: This is changing: assert setting.train_transforms == [] # assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels] def check_env_spaces(env: gym.Env) -> None: if env.batch_size is not None: # TODO: This might not be totally accurate, for example because the # TransformObservation wrapper applied to a VectorEnv doesn't change the # single_observation_space, AFAIR. assert env.single_observation_space.x == expected_x_space assert env.single_action_space == expected_action_space assert isinstance(env.observation_space, TypedDictSpace), (env, env.observation_space) assert env.observation_space.x == expected_batched_x_space assert env.action_space == expected_batched_action_space else: assert env.observation_space.x == expected_x_space assert env.action_space == expected_action_space # FIXME: Move this to an instance method on the test class so that subclasses # can change stuff in it. def check_obs(obs: ContinualRLSetting.Observations) -> None: if isinstance(self.Setting, partial): # NOTE: This Happens when we sneakily switch out the self.Setting # attribute in other tests (for the SettingProxy for example). assert isinstance(obs, self.Setting.args[0].Observations) else: assert isinstance(obs, self.Setting.Observations) assert obs.x in expected_batched_x_space # In this particular case here, the task labels should be None. # FIXME: For InrementalRL, this isn't correct! TestIncrementalRL should # therefore have its own version of this function. if self.Setting is ContinualRLSetting: assert obs.task_labels is None or all( task_label == None for task_label in obs.task_labels ) with setting.train_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size == batch_size check_env_spaces(env) obs = env.reset() # BUG: TODO: The observation space that we use should actually check with # isinstance and over the fields that fit in the space. Here there is a bug # because the env observations also have a `done` field, while the space # doesnt. # assert obs in env.observation_space assert obs.x in env.observation_space.x # this works though. # BUG: This doesn't currently work: (would need a tuple value rather than an # array. # assert obs.task_labels in env.observation_space.task_labels if batch_size: # FIXME: This differs between ContinualRL and IncrementalRL: if not setting.known_task_boundaries_at_train_time: assert obs.task_labels[0] in setting.task_label_space assert tuple(obs.task_labels) in env.observation_space.task_labels else: assert obs.task_labels[0] in setting.task_label_space assert obs.task_labels in env.observation_space.task_labels assert ( np.array(obs.task_labels) in env.observation_space.task_labels ) else: assert obs.task_labels in env.observation_space.task_labels reset_obs = env.reset() check_obs(reset_obs) # BUG: Environment is closed? (batch_size = 3, dataset = 'CartPole-v0') step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample()) with setting.val_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size == batch_size check_env_spaces(env) reset_obs = env.reset() check_obs(reset_obs) step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample()) # NOTE: Limitting the batch size at test time to None (i.e. a single env) # because of how the Monitor class works atm. batch_size = None expected_batched_x_space = expected_x_space expected_batched_action_space = expected_action_space with setting.test_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size is None check_env_spaces(env) reset_obs = env.reset() check_obs(reset_obs) step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) # NOTE: Can't do this here, unless the episode is over, because the Monitor # doesn't want us to end an episode early! # for iter_obs in take(env, 3): # check_obs(iter_obs) # _ = env.send(env.action_space.sample()) with setting.test_dataloader(batch_size=batch_size) as env: # NOTE: Can't do this here, unless the episode is over, because the Monitor # doesn't want us to end an episode early! for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample())