def test_render_heading_control(self): self.setUp(plane=aircraft.a320, task_type=tasks.TurnHeadingControlTask, shaping=tasks.Shaping.EXTRA_SEQUENTIAL) agent = RandomAgent(self.env.action_space) render_every = 5 report_every = 20 EPISODES = 50 for _ in range(EPISODES): ep_reward = 0 done = False state = self.env.reset() self.env.render(mode='flightgear') step_number = 0 while not done: action = agent.act(state) state, reward, done, info = self.env.step(action) ep_reward += reward if step_number % render_every == 0: self.env.render(mode='flightgear') if step_number % report_every == 0: heading_target = tasks.HeadingControlTask.target_track_deg print(f'time:\t{self.env.sim.get_sim_time()} s') print(f'last reward:\t{reward}') print(f'episode reward:\t{ep_reward}') print(f'gear status:\t{self.env.sim[prp.gear]}') print(f'thrust eng0:\t{self.env.sim[prp.engine_thrust_lbs]}') print(f'thrust eng1:\t {self.env.sim[prp.Property("propulsion/engine[1]/thrust-lbs", "")]}') print(f'heading:\t{self.env.sim[prp.heading_deg]}') print(f'target heading:\t{self.env.sim[heading_target]}') print('\n') step_number += 1 print(f'***\n' f'EPISODE REWARD: {ep_reward}\n' f'***\n')
def test_render_steady_level_flight_random(self): """ Runs steady level flight task with a random agent. """ self.setUp(task_type=tasks.HeadingControlTask) agent = RandomAgent(self.env.action_space) render_every = 5 ep_reward = 0 done = False state = self.env.reset() step_number = 0 while not done: action = agent.act(state) state, reward, done, info = self.env.step(action) ep_reward += reward if step_number % render_every == 0: self.env.render(mode='human') step_number += 1
def init_and_reset_env(self, env: JsbSimEnv): self.assertIsInstance(env.task, HeadingControlTask) # we interact at 5 Hz, so we expect the sim to run 12 timesteps per # interaction since it runs at 120 Hz self.assertEqual(12, env.sim_steps_per_agent_step) # we init a random agent with a seed agent = RandomAgent(action_space=env.action_space) self.assertEqual(env.action_space, agent.action_space) # this task has an action space of three controls: aileron, elevator, rudder expected_num_actions = 3 self.assertEqual(expected_num_actions, len(agent.action_space.low)) # we see that the action space has the correct low and high range of +-1.0 expect_low = np.array([-1.0] * expected_num_actions) expect_high = np.array([1.0] * expected_num_actions) np.testing.assert_array_almost_equal(expect_high, env.action_space.high) np.testing.assert_array_almost_equal(expect_low, env.action_space.low) # we reset the env and receive the first state; the env is now ready state = env.reset() self.assertEqual(len(env.observation_space.low), len(state)) # we close the env and JSBSim closes with it env.close() self.assertIsNone(env.sim.jsbsim)
class TestRandomAgent(unittest.TestCase): def setUp(self): self.action_space = FlightTaskStub().get_action_space() self.agent = RandomAgent(action_space=self.action_space) def test_act_generates_valid_actions(self): num_test_actions = 5 for _ in range(num_test_actions): action = self.agent.act(None) self.assertTrue(self.action_space.contains(action))
def test_run_episode_steady_level_flight_no_render(self): self.setUp(task_type=tasks.HeadingControlTask) agent = RandomAgent(self.env.action_space) report_every = 20 EPISODES = 10 for _ in range(EPISODES): ep_reward = 0 done = False state = self.env.reset() step_number = 0 while not done: action = agent.act(state) state, reward, done, info = self.env.step(action) ep_reward += reward if step_number % report_every == 0: print(f'time:\t{self.env.sim.get_sim_time()} s') print(f'last reward:\t{reward}') print(f'episode reward:\t{ep_reward}') step_number += 1
def take_step_with_random_agent(self, env: JsbSimEnv): agent = RandomAgent(action_space=env.action_space) # we set up for a loop through one episode first_state = env.reset() # we take a single step action = agent.act(first_state) state, reward, done, info = env.step(action) # we see the state has changed self.assertEqual(first_state.shape, state.shape) self.assertTrue(np.any(np.not_equal(first_state, state)), msg='state should have changed after simulation step') expected_time_step_size = env.sim_steps_per_agent_step / env.JSBSIM_DT_HZ self.assertAlmostEqual(expected_time_step_size, env.sim.get_sim_time()) self.assertFalse(done, msg='episode is terminal after only a single step') # the aircraft engines are running, as per initial conditions self.assertNotAlmostEqual(env.sim[prp.engine_thrust_lbs], 0) env.close()
def setUp(self): self.action_space = FlightTaskStub().get_action_space() self.agent = RandomAgent(action_space=self.action_space)