Ejemplo n.º 1
0
        def test_random_actor(self):
            """Test."""
            env = gym.make("LunarLanderContinuous-v2")
            env = ensure_vec_env(env)
            actor = RandomActor(env.action_space)
            env = ResidualWrapper(env, actor)
            ob, ac = env.reset()
            assert ac.shape == (1, *env.action_space.shape)
            assert ob.shape == (1, *env.observation_space.spaces[0].shape)
            assert ac.shape == (1, *env.observation_space.spaces[1].shape)
            assert isinstance(env.observation_space, Tuple)

            for _ in range(10):
                residual_ac = [env.action_space.sample()]
                (ob, ac_next), _, _, infos = env.step(residual_ac)
                rac = np.minimum(np.maximum(residual_ac[0] + ac[0], -1), 1)
                assert np.allclose(infos[0]['action'], rac)
                ac = ac_next
Ejemplo n.º 2
0
        self.t = time.time()
        self.env.render()
        action = self._get_human_action()
        return action

    def reset(self):
        self.human_agent_action[:] = 0.


if __name__ == '__main__':
    import gym
    import residual_shared_autonomy.drone_sim
    from dl.rl import ensure_vec_env

    env = gym.make("DroneReacherHuman-v0")
    env = ensure_vec_env(env)

    actor = DroneJoystickActor(env)

    for _ in range(10):
        ob = env.reset()
        actor.reset()
        env.render()
        done = False
        reward = 0.0

        while not done:
            ob, r, done, _ = env.step(actor(ob))
            reward += r
        print(reward)