Beispiel #1
0
 def _convert_state(self, state):
     if isinstance(state, (float, int)):
         state = ch.totensor(state)
     if isinstance(state, dict):
         state = {k: self._convert_state(state[k]) for k in state}
     if isinstance(state, np.ndarray):
         state = ch.totensor(state)
     if self.is_vectorized and isinstance(state, th.Tensor):
         state = state.squeeze(0)
     return state
Beispiel #2
0
 def _convert_state(self, state):
     if isinstance(state, (float, int)):
         state = ch.totensor(state)
     if isinstance(state, dict):
         state = {k: self._convert_state(state[k]) for k in state}
     if isinstance(state, np.ndarray):
         state = ch.totensor(state)
     # we need to check for num_envs because self.is_vectorized returns
     # False when the num_envs=1, but the state still needs squeezing.
     if hasattr(self, 'num_envs') and isinstance(state, th.Tensor):
         state = state.squeeze(0)
     return state
        def test_config(n_envs, base_env, use_torch, use_logger, return_info):
            config = 'n_envs' + str(n_envs) + '-base_env' + str(base_env) \
                    + '-torch' + str(use_torch) + '-logger' + str(use_logger) \
                    + '-info' + str(return_info)
            if isinstance(base_env, str):
                env = vec_env = gym.vector.make(base_env, num_envs=n_envs)
            else:

                def make_env():
                    env = base_env()
                    return env

                env_fns = [make_env for _ in range(n_envs)]
                env = vec_env = AsyncVectorEnv(env_fns)

            if use_logger:
                env = envs.Logger(env, interval=5, logger=self.logger)

            if use_torch:
                env = envs.Torch(env)
                policy = lambda x: ch.totensor(vec_env.action_space.sample())
            else:
                policy = lambda x: vec_env.action_space.sample()

            if return_info:
                agent = lambda x: (policy(x), {'policy': policy(x)[0]})
            else:
                agent = policy

            # Gather experience
            env = envs.Runner(env)
            replay = env.run(agent, steps=NUM_STEPS)

            # Pre-compute some shapes
            shape = (NUM_STEPS, n_envs)
            state_shape = vec_env.observation_space.sample()[0]
            if isinstance(state_shape, (int, float)):
                state_shape = tuple()
            else:
                state_shape = state_shape.shape
            action_shape = vec_env.action_space.sample()[0]
            if isinstance(action_shape, (int, float)):
                action_shape = (1, )
            else:
                action_shape = action_shape.shape
            done_shape = tuple()

            # Check shapes
            states = replay.state()
            self.assertEqual(states.shape, shape + state_shape, config)
            actions = replay.action()
            self.assertEqual(actions.shape, shape + action_shape, config)
            dones = replay.done()
            self.assertEqual(dones.shape, shape + done_shape, config)
            if return_info:
                policies = replay.policy()
                self.assertEqual(policies.shape, (NUM_STEPS, ) + action_shape,
                                 config)
Beispiel #4
0
    def append(self,
               state=None,
               action=None,
               reward=None,
               next_state=None,
               done=None,
               **infos):
        """
        **Description**

        Appends new data to the list ExperienceReplay.

        **Arguments**

        * **state** (tensor/ndarray/list) - Originating state.
        * **action** (tensor/ndarray/list) - Executed action.
        * **reward** (tensor/ndarray/list) - Observed reward.
        * **next_state** (tensor/ndarray/list) - Resulting state.
        * **done** (tensor/bool) - Is `next_state` a terminal (absorbing)
          state ?
        * **infos** (dict, *optional*, default=None) - Additional information
          on the transition.

        **Example**
        ~~~python
        replay.append(state, action, reward, next_state, done, info={
            'density': density,
            'log_prob': density.log_prob(action),
        })
        ~~~
        """
        for key in infos:
            if _istensorable(infos[key]):
                infos[key] = ch.totensor(infos[key])
        sars = Transition(ch.totensor(state), ch.totensor(action),
                          ch.totensor(reward), ch.totensor(next_state),
                          ch.totensor(done), **infos)
        self._storage.append(sars.to(self.device))
        def test_config(n_envs, n_episodes, base_env, use_torch, use_logger,
                        return_info, retry):
            config = 'n_envs' + str(n_envs) + '-n_eps' + str(n_episodes) \
                    + '-base_env' + str(base_env) \
                    + '-torch' + str(use_torch) + '-logger' + str(use_logger) \
                    + '-info' + str(return_info)
            if isinstance(base_env, str):
                env = vec_env = gym.vector.make(base_env, num_envs=n_envs)
            else:

                def make_env():
                    env = base_env()
                    return env

                env_fns = [make_env for _ in range(n_envs)]
                env = vec_env = AsyncVectorEnv(env_fns)

            if use_logger:
                env = envs.Logger(env, interval=5, logger=self.logger)

            if use_torch:
                env = envs.Torch(env)
                policy = lambda x: ch.totensor(vec_env.action_space.sample())
            else:
                policy = lambda x: vec_env.action_space.sample()

            if return_info:
                agent = lambda x: (policy(x), {
                    'policy': policy(x)[0],
                    'act': policy(x)
                })
            else:
                agent = policy

            # Gather experience
            env = envs.Runner(env)
            replay = env.run(agent, episodes=n_episodes)
            if retry:
                replay = env.run(agent, episodes=n_episodes)

            # Pre-compute some shapes
            shape = (len(replay), )
            state_shape = vec_env.observation_space.sample().shape[1:]
            action_shape = np.array(vec_env.action_space.sample())[0].shape
            if len(action_shape) == 0:
                action_shape = (1, )
            done_shape = (1, )

            # Check shapes
            states = replay.state()
            self.assertEqual(states.shape, shape + state_shape, config)
            actions = replay.action()
            self.assertEqual(actions.shape, shape + action_shape, config)
            dones = replay.done()
            self.assertEqual(dones.shape, shape + done_shape, config)
            if return_info:
                policies = replay.policy()
                self.assertEqual(policies.shape, shape + action_shape, config)
                acts = replay.act()
                self.assertEqual(acts.shape,
                                 (len(replay), n_envs) + action_shape, config)