Example #1
0
def batch_sparse_space(space: Sparse, n: int=1) -> gym.Space:
    # NOTE: This means we do something different depending on the sparsity.
    # Could that become an issue?
    # assert _is_singledispatch(batch_space)

    sparsity = space.sparsity
    if sparsity == 0: #or sparsity == 1:
        # If the space has 0 sparsity, then batch it just like you would its
        # base space.
        # TODO: This is convenient, but not very consistent, as the length of
        # the batches changes depending on the sparsity of the space..
        return Sparse(batch_space(space.base, n), sparsity=sparsity)
    # elif sparsity == 1.:
        
    # Sticking to the default behaviour from gym for now, which is to just
    # return a tuple of length n with n copies of the space.
    return spaces.Tuple(tuple(space for _ in range(n)))

    # We could also do this, where we make the sub-spaces sparse:
    # batch_space(Sparse<Tuple<A, B>>) -> Tuple<batch_space(Sparse<A>), batch_space(Sparse<B>)>

    if isinstance(space.base, spaces.Tuple):
        return spaces.Tuple([
            spaces.Tuple([Sparse(sub_space, sparsity) for _ in range(n)])
            for sub_space in space.base.spaces
        ])
    if isinstance(space.base, spaces.Dict):
        return spaces.Dict({
            name: Sparse(batch_space(sub_space, n), sparsity)
            for name, sub_space in space.base.spaces.items()
        })

    return batch_space(space.base, n)
Example #2
0
def test_batching_works(base_space: gym.Space, sparsity: float, n: int = 10):
    batched_base_space = batch_space(base_space, n)

    sparse_space = Sparse(base_space, sparsity=sparsity)
    batched_sparse_space = batch_space(sparse_space, n)

    batched_base_space.seed(123)
    base_batch = batched_base_space.sample()

    batched_sparse_space.seed(123)
    sparse_batch = batched_sparse_space.sample()

    if sparsity == 0:
        # When there is no sparsity, the batching is the same as batching the
        # same space.
        assert equals(base_batch, sparse_batch)
    elif sparsity == 1:
        assert len(sparse_batch) == n
        assert sparse_batch == tuple([None] * n)
    else:
        assert len(sparse_batch) == n
        assert isinstance(sparse_batch, tuple)

        for i, value in enumerate(sparse_batch):
            if value is not None:
                assert value in base_space

        # There should be some sparsity.
        assert (any(v is None for v in sparse_batch)
                and not all(v is None for v in sparse_batch)), sparse_batch
def test_universal_encoder():
    batch_size = 10
    budget = None
    input_space = spaces.Dict({
        "x": spaces.Box(low=0, high=1, shape=[3, 32, 32]),
        "t": spaces.Discrete(2),
    })
    output_space = spaces.Box(
        -np.inf,
        np.inf,
        shape=[
            512,
        ],
        dtype=np.float32,
    )

    encoder = create_encoder(input_space, output_space, budget=budget)
    batch_input_space = batch_space(input_space, batch_size)
    batch_output_space = batch_space(output_space, batch_size)

    batch_input_space = add_tensor_support(batch_input_space)
    output_space = add_tensor_support(output_space)

    sample = batch_input_space.sample()
    encoder_output = encoder(sample)

    if budget:
        assert n_parameters(encoder) < budget
Example #4
0
File: vec.py Project: ivannz/rlplay
    def __init__(self, envs):
        env = envs[0]()
        self.observation_space = batch_space(env.observation_space, len(envs))
        self.action_space = batch_space(env.action_space, len(envs))
        env.close()
        del env

        self.processes = [factory() for factory in envs]
Example #5
0
def test_batching_works(base_space: gym.Space, n: int = 3):
    batched_base_space = batch_space(base_space, n)
    sparse_space = Sparse(base_space)

    batched_sparse_space = batch_space(sparse_space, n)

    base_batch = batched_base_space.sample()
    sparse_batch = batched_sparse_space.sample()
    assert len(base_batch) == len(sparse_batch)
def test_spaces(env_name: str, batch_size: int):
    dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size))

    batched_obs_space = dataset.observation_space
    # NOTE: the VectorEnv class creates the 'batched' action space by creating a
    # Tuple of the single action space, of length 'N', which seems a bit weird.
    # batched_action_space = vector_env.action_space
    batched_action_space = batch_space(dataset.single_action_space, batch_size)

    dataloader_env = GymDataLoader(dataset, batch_size=batch_size)
    assert dataloader_env.observation_space == batched_obs_space
    assert dataloader_env.action_space == batched_action_space

    dataloader_env.reset()
    for observation_batch in take(dataloader_env, 3):
        if isinstance(observation_batch, Tensor):
            observation_batch = observation_batch.cpu().numpy()
        assert observation_batch in batched_obs_space

        actions = dataloader_env.action_space.sample()
        assert len(actions) == batch_size
        assert actions in batched_action_space

        rewards = dataloader_env.send(actions)
        assert len(rewards) == batch_size
        assert rewards in dataloader_env.reward_space
 def __init__(self,
              env_fns,
              observation_space: Optional[Space] = None,
              action_space: Optional[Space] = None,
              copy: bool = True) -> None:
     super().__init__(env_fns, observation_space, action_space, copy)
     self.action_space = batch_space(self.single_action_space, self.num_envs)
 def __init__(self, env: Env) -> None:
     assert not isinstance(env, VectorEnv)
     super().__init__(1, env.observation_space, env.action_space)
     self.action_space = batch_space(env.action_space, 1)
     self.env = env
     self._observation = create_empty_array(env.observation_space, n=1, fn=np.zeros)
     self._reward = np.zeros((1,), dtype=np.float64)
     self._terminal = np.zeros((1,), dtype=np.bool_)
     self._action = None
 def __init__(self,
              env_fns,
              observation_space: Optional[Space] = None,
              action_space: Optional[Space] = None,
              shared_memory: bool = True,
              copy: bool = True,
              context: Optional = None,
              daemon: bool = True,
              worker: Optional = None) -> None:
     super().__init__(env_fns, observation_space, action_space, shared_memory, copy, context, daemon, worker)
     self.action_space = batch_space(self.single_action_space, self.num_envs)
Example #10
0
 def __init__(self, env: gym.Env, done_space: Space = None):
     super().__init__(env)
     # boolean value. (0 or 1)
     if done_space is None:
         done_space = spaces.Box(0, 1, (), dtype=np.bool)
         if self.is_vectorized:
             self.single_observation_space = add_done(
                 self.single_observation_space, done_space)
             done_space = batch_space(done_space, self.env.num_envs)
     self.done_space = done_space
     self.observation_space = add_done(self.env.observation_space,
                                       self.done_space)
Example #11
0
    def __init__(self,
                 env,
                 batch_size: int,
                 fomdp: bool = False,
                 time_limit: Optional[int] = None,
                 float32: bool = False,
                 **kwargs):
        from gym.vector.utils import batch_space
        if not isinstance(env.unwrapped, POMDP):
            raise TypeError(f'Env is not a POMDP (got {type(env)}).')
        if batch_size <= 0:
            raise ValueError(
                f'Batch size is not positive (got ({batch_size}).')

        super().__init__(env)
        self.num_envs = batch_size
        self.state = np.full([batch_size], -1, dtype=int)
        if fomdp:
            self.single_observation_space = env.state_space
            self._observable = True
            self._start_obs = None
        else:
            self.single_observation_space = gym.spaces.Discrete(
                env.observation_space.n + 1)
            self._observable = False
            self._start_obs = env.observation_space.n
        self.observation_space = batch_space(self.single_observation_space,
                                             batch_size)
        self.single_action_space = self.action_space
        self.action_space = batch_space(self.single_action_space, batch_size)
        self.env.R = self.env.R.astype(
            np.float32)  # Reduce space, make compatible with networks
        self.max_time = time_limit or int(3000000)
        self.elapsed_time = np.zeros([batch_size], dtype=int)
        del self.env.model  # More space
        if float32:
            self.env.O = self.env.O.astype(np.float32)
            self.env.T = self.env.T.astype(np.float32)
            if hasattr(self.env, 'D'):
                self.env.D = self.env.D.astype(np.float32)
Example #12
0
def test_batch_space():
    named_tuple_space = NamedTupleSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    assert batch_space(named_tuple_space, n=5) == NamedTupleSpace(
        current_state=Box(0, 1, (5, 2, 2)),
        action=spaces.MultiDiscrete([2, 2, 2, 2, 2]),
        next_state=Box(0, 1, (5, 2, 2)),
        dtype=StateTransition,
    )
Example #13
0
 def __init__(self,
              env: gym.Env,
              info_space: spaces.Space = None):
     super().__init__(env)
     self.is_vectorized = isinstance(env.unwrapped, VectorEnv)
     # TODO: Should we make 'info_space' mandatory here?
     if info_space is None:
         # TODO: There seems to be some issues if we have an empty info space
         # before the batching.
         info_space = spaces.Dict({})
         if self.is_vectorized:
             info_space = batch_space(info_space, self.env.num_envs)
     self.info_space = info_space
     self.observation = add_info(self.env.observation_space, self.info_space)
Example #14
0
 def __init__(self, env: gym.Env, done_space: Space = None):
     super().__init__(env)
     # happens in the VectorEnv, done is always False!
     self.is_vectorized = isinstance(env.unwrapped, VectorEnv)
     # boolean value. (0 or 1)
     if done_space is None:
         done_space = spaces.Box(0, 1, (), dtype=np.bool)
         if self.is_vectorized:
             self.single_observation_space = add_done(
                 self.single_observation_space, done_space)
             done_space = batch_space(done_space, self.env.num_envs)
     self.done_space = done_space
     self.observation_space = add_done(self.env.observation_space,
                                       self.done_space)
Example #15
0
    def __init__(
        self,
        env_fn: Callable[[], gym.Env],
        batch_size: int,
        new_episode_length: Callable[[int], int],
        episode_lengths: Sequence[int] = None,
    ):
        super().__init__([env_fn for _ in range(batch_size)])
        self.new_episode_length = new_episode_length
        self.batch_size = batch_size
        self.episode_lengths = np.array(
            episode_lengths or [new_episode_length(i) for i in range(self.num_envs)]
        )
        self.steps_left_in_episode = self.episode_lengths.copy()

        reward_space = spaces.Box(*self.reward_range, shape=())
        self.single_reward_space = reward_space
        self.reward_space = batch_space(reward_space, batch_size)
Example #16
0
def _batch_typed_dict_space(space: TypedDictSpace, n: int = 1) -> spaces.Dict:
    return type(space)(
        {key: batch_space(subspace, n=n) for (key, subspace) in space.spaces.items()},
        dtype=space.dtype,
    )
    def __init__(
        self,
        dataset: Union[IterableDataset, Dataset],
        split_batch_fn: Callable[
            [Tuple[Any, ...]], Tuple[ObservationType, ActionType]
        ] = None,
        observation_space: gym.Space = None,
        action_space: gym.Space = None,
        reward_space: gym.Space = None,
        n_classes: int = None,
        pretend_to_be_active: bool = False,
        strict: bool = False,
        **kwargs,
    ):
        """Creates the DataLoader/Environment for the given dataset.
        
        Parameters
        ----------
        dataset : Union[IterableDataset, Dataset]
            The dataset to iterate on. Should ideally be indexable (a Map-style
            dataset).

        split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional
            A function to call on each item in the dataset in order to split it into
            Observations and Rewards, by default None, in which case we assume that the
            dataset items are tuples of length 2.

        observation_space : gym.Space, optional
            The single (non-batched) observation space. Default to `None`, in which case
            this will try to infer the shape of the space using the first item in the
            dataset.

        action_space : gym.Space, optional
            The non-batched action space. Defaults to None, in which case the
            `n_classes` argument must be passed, and the action space is assumed to be
            discrete (i.e. that the loader is for a classification dataset).

        reward_space : gym.Space, optional
            The non-batched reward (label) space. Defaults to `None`, in which case it
            will be the same as the action space (as is the case in classification).

        n_classes : int, optional
            Number of classes in the dataset. Used in case `action_space` isn't passed.
            Defaults to `None`.

        pretend_to_be_active : bool, optional
            Wether to withhold the rewards (labels) from the batches when being
            iterated on like the usual dataloader, and to only give them back
            after an action is received through the 'send' method. False by
            default, in which case this behaves exactly as a normal dataloader
            when being iterated on.
            
            When False, the batches yielded by this dataloader will be of the form
            `Tuple[Observations, Rewards]` (as usual in SL).
            However, when set to True, the batches will be `Tuple[Observations, None]`!
            Rewards will then be returned by the environment when an action is passed to
            the Send method.

        strict : bool, optional
            [description], by default False
            
        # Examples:
        ```python
        train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10)    
        
        # The usual Dataloader-style:
        for x, y in train_env:
            # train as usual
            (...)
        
        # OpenAI Gym style:
        for episode in range(5):
            # NOTE: "episode" in RL is an "epoch" in SL:
            obs = train_env.reset()
            done = False
            while not done:
                actions = train_env.action_space.sample()
                obs, rewards, done, info = train_env.step(actions)
        ```
        
        """
        
        super().__init__(dataset=dataset, **kwargs)
        self.split_batch_fn = split_batch_fn

        # TODO: When the spaces aren't passed explicitly, assumes a classification dataset.
        if not observation_space:
            # NOTE: Assuming min/max of 0 and 1 respectively, but could actually use
            # min_max of the dataset samples too.
            first_item = self.dataset[0]
            if isinstance(first_item, tuple):
                x, *_ = first_item
            else:
                assert isinstance(first_item, (np.ndarray, Tensor))
                x = first_item
            observation_space = Image(0.0, 1.0, x.shape)
        if not action_space:
            assert n_classes, "must pass either `action_space`, or `n_classes` for now"
            action_space = spaces.Discrete(n_classes)
        elif isinstance(action_space, spaces.Discrete):
            n_classes = action_space.n

        if not reward_space:
            # Assuming a classification dataset by default:
            # (action space = reward space = Discrete(n_classes))
            reward_space = action_space

        assert observation_space
        assert action_space
        assert reward_space

        self.single_observation_space: Space = observation_space 
        self.single_action_space: Space = action_space 
        self.single_reward_space: Space = reward_space

        if self.batch_size:
            observation_space = batch_space(observation_space, self.batch_size)
            action_space = batch_space(action_space, self.batch_size)
            reward_space = batch_space(reward_space, self.batch_size)
    
        self.observation_space: gym.Space = add_tensor_support(observation_space)
        self.action_space: gym.Space = add_tensor_support(action_space)
        self.reward_space: gym.Space = add_tensor_support(reward_space)

        self.pretend_to_be_active = pretend_to_be_active
        self._strict = strict
        self._reward_queue = deque(maxlen=10)

        self.n_classes: Optional[int] = n_classes
        self._iterator: Optional[_BaseDataLoaderIter] = None
        # NOTE: These here are never processed with self.observation or self.reward.
        self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._done: Optional[bool] = None
        self._closed: bool = False

        self._action: Optional[ActionType] = None

        # from gym.envs.classic_control.rendering import SimpleImageViewer
        self.viewer = None
Example #18
0
def batch_namedtuple_space(space: NamedTupleSpace, n: int = 1):
    return NamedTupleSpace(**{
        key: batch_space(space[key], n)
        for key in space.names
    },
                           dtype=space.dtype)
Example #19
0
    def __init__(self,
                 env: Union[EnvDataset, PolicyEnv] = None,
                 dataset: Union[EnvDataset, PolicyEnv] = None,
                 batch_size: int = None,
                 num_workers: int = None,
                 **kwargs):
        assert not (
            env is None and dataset is None
        ), "One of the `dataset` or `env` arguments must be passed."
        assert not (
            env is not None and dataset is not None
        ), "Only one of the `dataset` and `env` arguments can be used."

        if not isinstance(env, IterableDataset):
            raise RuntimeError(
                f"The env {env} isn't an interable dataset! (You can use the "
                f"EnvDataset or PolicyEnv wrappers to make an IterableDataset "
                f"from a gym environment.")

        if isinstance(env.unwrapped, VectorEnv):
            if batch_size is not None and batch_size != env.num_envs:
                logger.warning(
                    UserWarning(
                        f"The provided batch size {batch_size} will be ignored, since "
                        f"the provided env is vectorized with a batch_size of "
                        f"{env.unwrapped.num_envs}."))
            batch_size = env.num_envs

        if isinstance(env.unwrapped, BatchedVectorEnv):
            num_workers = env.n_workers
        elif isinstance(env.unwrapped, AsyncVectorEnv):
            num_workers = env.num_envs
        else:
            num_workers = 0

        self.env = env
        # TODO: We could also perhaps let those parameters through to the
        # constructor of DataLoader, because in __iter__ we're not using the
        # DataLoader iterator anyway! This would have the benefit that the
        # batch_size and num_workers attributes would reflect the actual state
        # of the iterator, and things like pytorch-lightning would stop warning
        # us that the num_workers is too low.
        super().__init__(
            dataset=self.env,
            # The batch size is None, because the VecEnv takes care of
            # doing the batching for us.
            batch_size=batch_size,
            num_workers=num_workers,
            # collate_fn=None,
            **kwargs,
        )
        Wrapper.__init__(self, env=self.env)
        assert not isinstance(
            self.env, GymDataLoader), "Something very wrong is happening."

        # self.max_epochs: int = max_epochs
        self.observation_space: gym.Space = self.env.observation_space
        self.action_space: gym.Space = self.env.action_space
        self.reward_space: gym.Space
        if isinstance(env.unwrapped, VectorEnv):
            env: VectorEnv
            batch_size = env.num_envs
            # TODO: Overwriting the action space to be the 'batched' version of
            # the single action space, rather than a Tuple(Discrete, ...) as is
            # done in the gym.vector.VectorEnv.
            self.action_space = batch_space(env.single_action_space,
                                            batch_size)

        if not hasattr(self.env, "reward_space"):
            self.reward_space = spaces.Box(
                low=self.env.reward_range[0],
                high=self.env.reward_range[1],
                shape=(),
            )
            if isinstance(self.env.unwrapped, VectorEnv):
                # Same here, we use a 'batched' space rather than Tuple.
                self.reward_space = batch_space(self.reward_space, batch_size)
Example #20
0
    def _check_environments(self):
        """ Do a quick check to make sure that interacting with the envs/dataloaders
        works correctly.
        """
        # Check that the env's spaces are batched versions of the settings'.
        from gym.vector.utils import batch_space
        from sequoia.settings.passive import PassiveEnvironment

        batch_size = self.batch_size
        for loader_method in [
            self.train_dataloader,
            self.val_dataloader,
            self.test_dataloader,
        ]:
            print(f"\n\nChecking loader method {loader_method.__name__}\n\n")
            env = loader_method(batch_size=batch_size)

            batch_size = env.batch_size

            # We could compare the spaces directly, but that's a bit messy, and
            # would be depends on the type of spaces for each. Instead, we could
            # check samples from such spaces on how the spaces are batched.
            if batch_size:
                expected_observation_space = batch_space(
                    self.observation_space, n=batch_size
                )
                expected_action_space = batch_space(self.action_space, n=batch_size)
                expected_reward_space = batch_space(self.reward_space, n=batch_size)
            else:
                expected_observation_space = self.observation_space
                expected_action_space = self.action_space
                expected_reward_space = self.reward_space

            # TODO: Batching the 'Sparse' makes it really ugly, so just
            # comparing the 'image' portion of the space for now.
            assert (
                env.observation_space[0].shape == expected_observation_space[0].shape
            ), (env.observation_space[0], expected_observation_space[0])

            assert env.action_space == expected_action_space, (
                env.action_space,
                expected_action_space,
            )
            assert env.reward_space == expected_reward_space, (
                env.reward_space,
                expected_reward_space,
            )

            # Check that the 'gym API' interaction is working correctly.
            reset_obs: Observations = env.reset()
            self._check_observations(env, reset_obs)

            for i in range(5):
                actions = env.action_space.sample()
                self._check_actions(env, actions)
                step_observations, step_rewards, done, info = env.step(actions)
                self._check_observations(env, step_observations)
                self._check_rewards(env, step_rewards)
                if batch_size:
                    assert not any(done)
                else:
                    assert not done
                # assert not (done if isinstance(done, bool) else any(done))

            for batch in take(env, 5):
                observations: Observations
                rewards: Optional[Rewards]

                if isinstance(env, PassiveEnvironment):
                    observations, rewards = batch
                else:
                    # in RL atm, the 'dataset' gives back only the observations.
                    # Coul
                    observations, rewards = batch, None

                self._check_observations(env, observations)
                if rewards is not None:
                    self._check_rewards(env, rewards)

                if batch_size:
                    actions = tuple(
                        self.action_space.sample() for _ in range(batch_size)
                    )
                else:
                    actions = self.action_space.sample()
                # actions = self.Actions(torch.as_tensor(actions))
                rewards = env.send(actions)
                self._check_rewards(env, rewards)

            env.close()
Example #21
0
File: vec.py Project: ivannz/rlplay
    def __init__(self, envs, *, method=None, timeout=15):
        ctx = mp  # .get_context(method or 'forkserver')

        # poll an environment for its specs
        # XXX what if the environments are different?
        env = envs[0]()
        self.observation_space = batch_space(env.observation_space, len(envs))
        self.action_space = batch_space(env.action_space, len(envs))
        env.close()
        del env

        # construct shared memory buffers from env's space specs
        #  for efficient exchange of large objects of fixed type.
        shm_obs = from_space(self.observation_space, ctx=ctx)
        shm_act = from_space(self.action_space, ctx=ctx)
        self.shared = shm_obs, shm_act

        # setup producer-consumer synchronization: a common Barrier that we
        #  and all workers wait at for mutual synchronization and signaling
        #  a complete batch.
        self.finished, self.timeout = ctx.Barrier(1 + len(envs)), timeout
        # self.sem_act, self.sem_obs = ctx.Semaphore(0), ctx.Semaphore(0)

        # spawn a worker processes for each environment
        state, self._errors = [], ctx.Queue()
        for j, env in enumerate(envs):
            # Create unidirectional (non-duplex) pipes, and connect them in
            # `crossover` mode between us and them (the worker). This set-up
            #  avoids rare occasions where the main process or the worker read
            #  back their own just issued message.
            ut_rx, ut_tx = ctx.Pipe(duplex=False)
            tu_rx, tu_tx = ctx.Pipe(duplex=False)

            # The connection end points have the following meanigs:
            # * `ut` and `tu` stand for `us-them` and `them-us`, respectively;
            # * [their] `ut_rx` (read) and `tu_tx` (write) ends are used by the
            #   worker to receive signals and yield results, respectively;
            # * [our] `ut_tx` (write) and `tu_rx` (read) ends are used by the
            #   main process to issue commands and read back responses.
            our, their = Endpoint(tu_rx, ut_tx), Endpoint(ut_rx, tu_tx)

            # crossover handles: share the us-them rx and them-us tx ends
            p = ctx.Process(
                args=(j, CloudpickleSpawner(env), their, our, self.shared),
                kwargs=dict(errors=self._errors, barrier=self.finished),
                target=vecEnvWorker.target,
                daemon=True,
            )
            p.start()

            # close handles unused by us: them-us tx (write), us-them rx (read)
            # XXX pipes and connections are commonly implemented through file
            # descriptors, which imposes a limit on their max number. Also
            # connections are closed when garbage collected (`__del__`).
            their.tx.close()  # tu_tx
            their.rx.close()  # ut_rx

            state.append((p, our))

        # rebuild numpy buffers, assign processes and establish communcations
        self.buf_obs, self.buf_act = from_shared(self.shared)
        self.processes, self.comm = zip(*state)

        # wait until all workers have started
        if not all(self._wait()._recv()):
            raise RuntimeError('Failed to launch all worker subprocesses.')
Example #22
0
    def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
        """ WIP: Continual test loop.
        """
        test_env = self.test_dataloader()

        test_env: TestEnvironment

        was_training = method.training
        method.set_testing()

        try:
            # If the Method has `test` defined, use it.
            method.test(test_env)
            test_env.close()
            test_env: TestEnvironment
            # Get the metrics from the test environment
            test_results: Results = test_env.get_results()

        except NotImplementedError:
            logger.debug(f"Will query the method for actions at each step, "
                         f"since it doesn't implement a `test` method.")
            obs = test_env.reset()

            # TODO: Do we always have a maximum number of steps? or of episodes?
            # Will it work the same for Supervised and Reinforcement learning?
            max_steps: int = getattr(test_env, "step_limit", None)

            # Reset on the last step is causing trouble, since the env is closed.
            pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
            episode = 0

            for step in pbar:
                if obs is None:
                    break
                # NOTE: The env might not be closed, while `obs` is actually still there.
                # if test_env.is_closed():
                #     logger.debug(f"Env is closed")
                #     break
                # logger.debug(f"At step {step}")

                # BUG: Need to pass an action space that actually reflects the batch
                # size, even for the last batch!

                # BUG: This doesn't work if the env isn't batched.
                action_space = test_env.action_space
                batch_size = getattr(test_env, "num_envs",
                                     getattr(test_env, "batch_size", 0))
                env_is_batched = batch_size is not None and batch_size >= 1
                if env_is_batched:
                    # NOTE: Need to pass an action space that actually reflects the batch
                    # size, even for the last batch!
                    obs_batch_size = obs.x.shape[0] if obs.x.shape else None
                    action_space_batch_size = (test_env.action_space.shape[0]
                                               if test_env.action_space.shape
                                               else None)
                    if (obs_batch_size is not None
                            and obs_batch_size != action_space_batch_size):
                        action_space = batch_space(
                            test_env.single_action_space, obs_batch_size)

                action = method.get_actions(obs, action_space)

                # logger.debug(f"action: {action}")
                # TODO: Remove this:
                if isinstance(action, Actions):
                    action = action.y_pred
                if isinstance(action, Tensor):
                    action = action.detach().cpu().numpy()

                if test_env.is_closed():
                    break

                obs, reward, done, info = test_env.step(action)

                if done and not test_env.is_closed():
                    # logger.debug(f"end of test episode {episode}")
                    obs = test_env.reset()
                    episode += 1

            test_env.close()
            test_results: Results = test_env.get_results()

        if wandb.run:
            d = add_prefix(test_results.to_log_dict(), prefix="Test", sep="/")
            # d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/")
            # d["current_task"] = task_id
            wandb.log(d)

        # Restore 'training' mode, if it was set at the start.
        if was_training:
            method.set_training()

        return test_results
Example #23
0
    def test_check_iterate_and_step(
        self, setting_kwargs: Dict[str, Any], batch_size: Optional[int],
    ):
        """ Test that the observations are of the right type and shape, regardless
        of wether we iterate on the env by calling 'step' or by using it as a
        DataLoader.
        """
        with gym.make(setting_kwargs["dataset"]) as temp_env:
            expected_x_space = temp_env.observation_space
            expected_action_space = temp_env.action_space

        setting = self.Setting(**setting_kwargs, num_workers=0)

        if batch_size is not None:
            expected_batched_x_space = batch_space(expected_x_space, batch_size)
            expected_batched_action_space = batch_space(
                setting.action_space, batch_size
            )
        else:
            expected_batched_x_space = expected_x_space
            expected_batched_action_space = expected_action_space

        assert setting.observation_space.x == expected_x_space
        assert setting.action_space == expected_action_space

        # TODO: This is changing:
        assert setting.train_transforms == []
        # assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]

        def check_env_spaces(env: gym.Env) -> None:
            if env.batch_size is not None:
                # TODO: This might not be totally accurate, for example because the
                # TransformObservation wrapper applied to a VectorEnv doesn't change the
                # single_observation_space, AFAIR.
                assert env.single_observation_space.x == expected_x_space
                assert env.single_action_space == expected_action_space
                assert isinstance(env.observation_space, TypedDictSpace), (env, env.observation_space)
                assert env.observation_space.x == expected_batched_x_space
                assert env.action_space == expected_batched_action_space
            else:
                assert env.observation_space.x == expected_x_space
                assert env.action_space == expected_action_space

        # FIXME: Move this to an instance method on the test class so that subclasses
        # can change stuff in it.
        def check_obs(obs: ContinualRLSetting.Observations) -> None:
            if isinstance(self.Setting, partial):
                # NOTE: This Happens when we sneakily switch out the self.Setting
                # attribute in other tests (for the SettingProxy for example).
                assert isinstance(obs, self.Setting.args[0].Observations)
            else:
                assert isinstance(obs, self.Setting.Observations)
            assert obs.x in expected_batched_x_space
            # In this particular case here, the task labels should be None.
            # FIXME: For InrementalRL, this isn't correct! TestIncrementalRL should
            # therefore have its own version of this function.
            if self.Setting is ContinualRLSetting:
                assert obs.task_labels is None or all(
                    task_label == None for task_label in obs.task_labels
                )

        with setting.train_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size == batch_size
            check_env_spaces(env)

            obs = env.reset()
            # BUG: TODO: The observation space that we use should actually check with
            # isinstance and over the fields that fit in the space. Here there is a bug
            # because the env observations also have a `done` field, while the space
            # doesnt.
            # assert obs in env.observation_space
            assert obs.x in env.observation_space.x  # this works though.

            # BUG: This doesn't currently work: (would need a tuple value rather than an
            # array.
            # assert obs.task_labels in env.observation_space.task_labels

            if batch_size:
                # FIXME: This differs between ContinualRL and IncrementalRL:
                if not setting.known_task_boundaries_at_train_time:
                    assert obs.task_labels[0] in setting.task_label_space
                    assert tuple(obs.task_labels) in env.observation_space.task_labels
                else:
                    assert obs.task_labels[0] in setting.task_label_space
                    assert obs.task_labels in env.observation_space.task_labels
                    assert (
                        np.array(obs.task_labels) in env.observation_space.task_labels
                    )
            else:
                assert obs.task_labels in env.observation_space.task_labels

            reset_obs = env.reset()
            check_obs(reset_obs)

            # BUG: Environment is closed? (batch_size = 3, dataset = 'CartPole-v0')
            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())

        with setting.val_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size == batch_size
            check_env_spaces(env)

            reset_obs = env.reset()
            check_obs(reset_obs)

            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())

        # NOTE: Limitting the batch size at test time to None (i.e. a single env)
        # because of how the Monitor class works atm.
        batch_size = None
        expected_batched_x_space = expected_x_space
        expected_batched_action_space = expected_action_space
        with setting.test_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size is None
            check_env_spaces(env)

            reset_obs = env.reset()
            check_obs(reset_obs)

            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            # NOTE: Can't do this here, unless the episode is over, because the Monitor
            # doesn't want us to end an episode early!
            # for iter_obs in take(env, 3):
            #     check_obs(iter_obs)
            #     _ = env.send(env.action_space.sample())

        with setting.test_dataloader(batch_size=batch_size) as env:
            # NOTE: Can't do this here, unless the episode is over, because the Monitor
            # doesn't want us to end an episode early!
            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())