def test_deterministic(space: Space, n: int, base_seed: int): """Tests the batched spaces are deterministic by using a copied version""" # Copy the spaces and check that the np_random are not reference equal space_a = space space_a.seed(base_seed) space_b = copy.deepcopy(space_a) assert_rng_equal(space_a.np_random, space_b.np_random) assert space_a.np_random is not space_b.np_random # Batch the spaces and check that the np_random are not reference equal space_a_batched = batch_space(space_a, n) space_b_batched = batch_space(space_b, n) assert_rng_equal(space_a_batched.np_random, space_b_batched.np_random) assert space_a_batched.np_random is not space_b_batched.np_random # Create that the batched space is not reference equal to the origin spaces assert space_a.np_random is not space_a_batched.np_random # Check that batched space a and b random number generator are not effected by the original space space_a.sample() space_a_batched_sample = space_a_batched.sample() space_b_batched_sample = space_b_batched.sample() for a_sample, b_sample in zip( iterate(space_a_batched, space_a_batched_sample), iterate(space_b_batched, space_b_batched_sample), ): if isinstance(a_sample, tuple): assert len(a_sample) == len(b_sample) for a_subsample, b_subsample in zip(a_sample, b_sample): assert_array_equal(a_subsample, b_subsample) else: assert_array_equal(a_sample, b_sample)
def __init__(self, num_envs, observation_space, action_space): self.num_envs = num_envs self.is_vector_env = True self.observation_space = batch_space(observation_space, n=num_envs) self.action_space = batch_space(action_space, n=num_envs) self.closed = False self.viewer = None # The observation and action spaces of a single environment are # kept in separate properties self.single_observation_space = observation_space self.single_action_space = action_space
def fit(self, train_env: Environment, valid_env: Environment): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch y_preds = train_env.action_space.sample() if rewards is None: action_space = train_env.action_space if train_env.action_space.shape: # This is a bit complicated, but it's needed because the last batch # might have a different batch dimension than the env's action # space, (only happens on the last batch in supervised learning). # TODO: Should we perhaps drop the last batch? action_space = train_env.action_space batch_size = getattr(train_env, "num_envs", getattr(train_env, "batch_size", 0)) env_is_batched = batch_size is not None and batch_size >= 1 if env_is_batched: # NOTE: Need to pass an action space that actually reflects the batch # size, even for the last batch! obs_batch_size = observations.x.shape[ 0] if observations.x.shape else None action_space_batch_size = ( train_env.action_space.shape[0] if train_env.action_space.shape else None) if (obs_batch_size is not None and obs_batch_size != action_space_batch_size): action_space = batch_space( train_env.single_action_space, obs_batch_size) y_preds = action_space.sample() rewards = train_env.send(Actions(y_pred=y_preds))
def __init__(self, num_envs, observation_space, action_space): super(VectorEnv, self).__init__() self.num_envs = num_envs self.observation_space = batch_space(observation_space, n=num_envs) self.action_space = Tuple((action_space, ) * num_envs) # The observation and action spaces of a single environment are # kept in separate properties self.single_observation_space = observation_space self.single_action_space = action_space
def __init__(self, num_envs: int, observation_space: gym.Space, action_space: gym.Space): """Base class for vectorized environments. Args: num_envs: Number of environments in the vectorized environment. observation_space: Observation space of a single environment. action_space: Action space of a single environment. """ self.num_envs = num_envs self.is_vector_env = True self.observation_space = batch_space(observation_space, n=num_envs) self.action_space = batch_space(action_space, n=num_envs) self.closed = False self.viewer = None # The observation and action spaces of a single environment are # kept in separate properties self.single_observation_space = observation_space self.single_action_space = action_space
def test_rng_different_at_each_index(space: Space, n: int, base_seed: int): """ Tests that the rng values produced at each index are different to prevent if the rng is copied for each subspace """ space.seed(base_seed) batched_space = batch_space(space, n) assert space.np_random is not batched_space.np_random assert_rng_equal(space.np_random, batched_space.np_random) batched_sample = batched_space.sample() sample = list(iterate(batched_space, batched_sample)) assert not all(np.all(element == sample[0]) for element in sample), sample
def fit(self, train_env: Environment, valid_env: Environment): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch y_preds = train_env.action_space.sample() if rewards is None: action_space = train_env.action_space if train_env.action_space.shape: obs_batch_size = observations.x.shape[0] # BUG: Fix the `batch_size` attribute on `Batch` so it works # even when task labels are None, by checking wether there is # one or more shapes, and then if there are, then that the first # dimension match between those. action_space_batch_size = action_space.shape[0] if obs_batch_size != action_space_batch_size: action_space = batch_space( train_env.single_action_space, obs_batch_size) rewards = train_env.send(Actions(action_space.sample()))
def test_batch_space_custom_space(space, expected_batch_space_4): batch_space_4 = batch_space(space, n=4) assert batch_space_4 == expected_batch_space_4
def test_loop(self, method: Method) -> "IncrementalSetting.Results": """ (WIP): Runs an incremental test loop and returns the Results. The idea is that this loop should be exactly the same, regardless of if you're on the RL or the CL side of the tree. NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the method has the `on_task_switch` callback defined, then a callback wrapper is added that will invoke the method's `on_task_switch` and pass it the task id (or `None` if `not self.task_labels_available_at_test_time`) when a task boundary is encountered. This `on_task_switch` 'callback' wrapper gets added the same way for Supervised or Reinforcement learning settings. """ test_env = self.test_dataloader() test_env: TestEnvironment was_training = method.training method.set_testing() if self.known_task_boundaries_at_test_time and self.nb_tasks > 1: def _on_task_switch(step: int, *arg) -> None: # TODO: This attribute isn't on IncrementalSetting itself, it's defined # on ContinualRLSetting. if step not in test_env.boundary_steps: return if not hasattr(method, "on_task_switch"): logger.warning( UserWarning( f"On a task boundary, but since your method doesn't " f"have an `on_task_switch` method, it won't know about " f"it! ")) return if self.task_labels_at_test_time: # TODO: Should this 'test boundary' step depend on the batch size? task_steps = sorted(test_env.boundary_steps) # TODO: If the ordering of tasks were different (shuffled # tasks for example), then this wouldn't work, we'd need a # list of the task ids or something like that. task_id = task_steps.index(step) logger.debug( f"Calling `method.on_task_switch({task_id})` " f"since task labels are available at test-time.") method.on_task_switch(task_id) else: logger.debug(f"Calling `method.on_task_switch(None)` " f"since task labels aren't available at " f"test-time, but task boundaries are known.") method.on_task_switch(None) test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch]) try: # If the Method has `test` defined, use it. method.test(test_env) test_env.close() test_env: TestEnvironment # Get the metrics from the test environment test_results: Results = test_env.get_results() except NotImplementedError: logger.debug(f"Will query the method for actions at each step, " f"since it doesn't implement a `test` method.") obs = test_env.reset() # TODO: Do we always have a maximum number of steps? or of episodes? # Will it work the same for Supervised and Reinforcement learning? max_steps: int = getattr(test_env, "step_limit", None) # Reset on the last step is causing trouble, since the env is closed. pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test") episode = 0 for step in pbar: if obs is None: break # NOTE: The env might not be closed, while `obs` is actually still there. # if test_env.is_closed(): # logger.debug(f"Env is closed") # break # logger.debug(f"At step {step}") # BUG: Need to pass an action space that actually reflects the batch # size, even for the last batch! # BUG: This doesn't work if the env isn't batched. action_space = test_env.action_space batch_size = getattr(test_env, "num_envs", getattr(test_env, "batch_size", 0)) env_is_batched = batch_size is not None and batch_size >= 1 if env_is_batched: # NOTE: Need to pass an action space that actually reflects the batch # size, even for the last batch! obs_batch_size = obs.x.shape[0] if obs.x.shape else None action_space_batch_size = (test_env.action_space.shape[0] if test_env.action_space.shape else None) if (obs_batch_size is not None and obs_batch_size != action_space_batch_size): action_space = batch_space( test_env.single_action_space, obs_batch_size) action = method.get_actions(obs, action_space) # logger.debug(f"action: {action}") # TODO: Remove this: if isinstance(action, Actions): action = action.y_pred if isinstance(action, Tensor): action = action.cpu().numpy() if test_env.is_closed(): break obs, reward, done, info = test_env.step(action) if done and not test_env.is_closed(): # logger.debug(f"end of test episode {episode}") obs = test_env.reset() episode += 1 test_env.close() test_results: TaskSequenceResults = test_env.get_results() # Restore 'training' mode, if it was set at the start. if was_training: method.set_training() return test_results
def test_loop(self, method: Method) -> "IncrementalSetting.Results": """ (WIP): Runs an incremental test loop and returns the Results. The idea is that this loop should be exactly the same, regardless of if you're on the RL or the CL side of the tree. NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the method has the `on_task_switch` callback defined, then a callback wrapper is added that will invoke the method's `on_task_switch` and pass it the task id (or `None` if `not self.task_labels_available_at_test_time`) when a task boundary is encountered. This `on_task_switch` 'callback' wrapper gets added the same way for Supervised or Reinforcement learning settings. """ nb_tasks = self.get_attribute("nb_tasks") known_task_boundaries_at_test_time = self.get_attribute( "known_task_boundaries_at_test_time") task_labels_at_test_time = self.get_attribute( "task_labels_at_test_time") was_training = method.training method.set_testing() test_env = self.test_dataloader() if known_task_boundaries_at_test_time and nb_tasks > 1: # TODO: We need to have a way to inform the Method of task boundaries, if the # Setting allows it. # Not sure how to do this. It might be simpler to just do something like # `obs, rewards, done, info, task_switched = <endpoint>.step(actions)`? # # Add this wrapper that will call `on_task_switch` when the right step is # # reached. # test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch]) pass obs = test_env.reset() batch_size = test_env.batch_size max_steps: int = self.get_attribute("test_steps") // (batch_size or 1) # Reset on the last step is causing trouble, since the env is closed. pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test") episode = 0 for step in pbar: if test_env.is_closed(): logger.debug(f"Env is closed") break # BUG: This doesn't work if the env isn't batched. action_space = test_env.action_space env_is_batched = getattr(test_env, "num_envs", 0) >= 1 if env_is_batched: # NOTE: Need to pass an action space that actually reflects the batch # size, even for the last batch! obs_batch_size = obs.x.shape[0] if obs.x.shape else None action_space_batch_size = (test_env.action_space.shape[0] if test_env.action_space.shape else None) if obs_batch_size is not None and obs_batch_size != action_space_batch_size: action_space = batch_space(test_env.single_action_space, obs_batch_size) action = method.get_actions(obs, action_space) # logger.debug(f"action: {action}") obs, reward, done, info = test_env.step(action) # TODO: Add something to `info` that indicates when a task boundary is # reached, so that we can call the `on_task_switch` method on the Method # ourselves. if done and not test_env.is_closed(): # logger.debug(f"end of test episode {episode}") obs = test_env.reset() episode += 1 test_env.close() test_results = test_env.get_results() if was_training: method.set_training() return test_results