def test_random_actor(self): """Test.""" env = gym.make("LunarLanderContinuous-v2") env = ensure_vec_env(env) actor = RandomActor(env.action_space) env = ResidualWrapper(env, actor) ob, ac = env.reset() assert ac.shape == (1, *env.action_space.shape) assert ob.shape == (1, *env.observation_space.spaces[0].shape) assert ac.shape == (1, *env.observation_space.spaces[1].shape) assert isinstance(env.observation_space, Tuple) for _ in range(10): residual_ac = [env.action_space.sample()] (ob, ac_next), _, _, infos = env.step(residual_ac) rac = np.minimum(np.maximum(residual_ac[0] + ac[0], -1), 1) assert np.allclose(infos[0]['action'], rac) ac = ac_next
self.t = time.time() self.env.render() action = self._get_human_action() return action def reset(self): self.human_agent_action[:] = 0. if __name__ == '__main__': import gym import residual_shared_autonomy.drone_sim from dl.rl import ensure_vec_env env = gym.make("DroneReacherHuman-v0") env = ensure_vec_env(env) actor = DroneJoystickActor(env) for _ in range(10): ob = env.reset() actor.reset() env.render() done = False reward = 0.0 while not done: ob, r, done, _ = env.step(actor(ob)) reward += r print(reward)