def test_higher_level_step(): """Tests environment for higher level steps correctly""" hiro_agent = HIRO(config) ll_env = hiro_agent.lower_level_agent.environment h_env = hiro_agent.higher_level_agent.environment h_env.reset() # HIRO.goal_transition = lambda x, y, z: y state_before = hiro_agent.higher_level_state assert hiro_agent.higher_level_next_state is None next_state, reward, done, _ = h_env.step(np.array([-1.0, 2.0, 3.0])) assert np.allclose( hiro_agent.goal, HIRO.goal_transition(state_before, np.array([-1.0, 2.0, 3.0]), next_state)) assert all(hiro_agent.higher_level_state == next_state) assert all(hiro_agent.higher_level_next_state == next_state) assert hiro_agent.higher_level_reward == reward assert hiro_agent.higher_level_done == done assert next_state.shape[0] == 3 assert isinstance(reward, float) assert not done for _ in range(200): next_state, reward, done, _ = h_env.step(np.array([-1.0, 2.0, 3.0])) assert all(hiro_agent.higher_level_next_state == next_state) assert all(hiro_agent.higher_level_next_state == next_state) assert hiro_agent.higher_level_reward == reward assert hiro_agent.higher_level_done == done
def test_changing_max_lower_timesteps(): """Tests that changing the max lower level timesteps works""" config2 = copy.deepcopy(config) config2.hyperparameters["LOWER_LEVEL"]["max_lower_level_timesteps"] = 1 hiro_agent2 = HIRO(config2) h_env2 = hiro_agent2.higher_level_agent.environment h_env2.reset() next_state, reward, done, _ = h_env2.step(np.array([-1.0, 2.0, 3.0])) assert not done assert hiro_agent2.lower_level_done assert reward == hiro_agent2.higher_level_reward
def test_goal_transition(): """Tests environment does goal transitions properly""" hiro_agent.higher_level_state = 2 hiro_agent.goal = 9 next_state = 3 assert HIRO.goal_transition(hiro_agent.higher_level_state, hiro_agent.goal, next_state) == 8 hiro_agent.higher_level_state = 2 hiro_agent.goal = 9 next_state = 3 ll_env.update_goal(next_state) assert hiro_agent.goal == 8 h_env.reset() hiro_agent.goal = np.array([2.0, 4.0, -3.0]) hiro_agent.higher_level_reward = 0 ll_env.reset() state = hiro_agent.higher_level_state next_state, reward, done, _ = ll_env.step(np.array([random.random()])) assert all(hiro_agent.goal == state + np.array([2.0, 4.0, -3.0]) - next_state[0:3])
}, "batch_size": 256, "discount_rate": 0.9, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 20, "learning_updates_per_learning_session": 10, "number_goal_candidates": 8, "clip_rewards": False }, } hiro_agent = HIRO(config) ll_env = hiro_agent.lower_level_agent.environment h_env = hiro_agent.higher_level_agent.environment def test_environment_resets(): """Tests created environments reset properly""" lower_level_state = ll_env.reset() assert lower_level_state.shape[0] == 6 assert ll_env.max_sub_policy_timesteps == 3 assert ll_env.lower_level_timesteps == 0 hiro_agent.higher_level_state = np.array([0., 1.0, 2.0]) hiro_agent.goal = np.array([1.0, 4.0, -22.]) assert all(ll_env.reset() == np.array([0.0, 1.0, 2.0, 1.0, 4.0, -22.0]))