Example #1
0
def test_higher_level_step():
    """Tests environment for higher level steps correctly"""
    hiro_agent = HIRO(config)
    ll_env = hiro_agent.lower_level_agent.environment
    h_env = hiro_agent.higher_level_agent.environment
    h_env.reset()
    # HIRO.goal_transition = lambda x, y, z: y
    state_before = hiro_agent.higher_level_state
    assert hiro_agent.higher_level_next_state is None
    next_state, reward, done, _ = h_env.step(np.array([-1.0, 2.0, 3.0]))

    assert np.allclose(
        hiro_agent.goal,
        HIRO.goal_transition(state_before, np.array([-1.0, 2.0, 3.0]),
                             next_state))

    assert all(hiro_agent.higher_level_state == next_state)
    assert all(hiro_agent.higher_level_next_state == next_state)
    assert hiro_agent.higher_level_reward == reward
    assert hiro_agent.higher_level_done == done

    assert next_state.shape[0] == 3
    assert isinstance(reward, float)
    assert not done

    for _ in range(200):
        next_state, reward, done, _ = h_env.step(np.array([-1.0, 2.0, 3.0]))
        assert all(hiro_agent.higher_level_next_state == next_state)
        assert all(hiro_agent.higher_level_next_state == next_state)
        assert hiro_agent.higher_level_reward == reward
        assert hiro_agent.higher_level_done == done
Example #2
0
def test_changing_max_lower_timesteps():
    """Tests that changing the max lower level timesteps works"""
    config2 = copy.deepcopy(config)
    config2.hyperparameters["LOWER_LEVEL"]["max_lower_level_timesteps"] = 1
    hiro_agent2 = HIRO(config2)
    h_env2 = hiro_agent2.higher_level_agent.environment
    h_env2.reset()
    next_state, reward, done, _ = h_env2.step(np.array([-1.0, 2.0, 3.0]))

    assert not done
    assert hiro_agent2.lower_level_done
    assert reward == hiro_agent2.higher_level_reward
Example #3
0
def test_goal_transition():
    """Tests environment does goal transitions properly"""
    hiro_agent.higher_level_state = 2
    hiro_agent.goal = 9
    next_state = 3
    assert HIRO.goal_transition(hiro_agent.higher_level_state, hiro_agent.goal,
                                next_state) == 8

    hiro_agent.higher_level_state = 2
    hiro_agent.goal = 9
    next_state = 3
    ll_env.update_goal(next_state)
    assert hiro_agent.goal == 8

    h_env.reset()
    hiro_agent.goal = np.array([2.0, 4.0, -3.0])
    hiro_agent.higher_level_reward = 0
    ll_env.reset()
    state = hiro_agent.higher_level_state
    next_state, reward, done, _ = ll_env.step(np.array([random.random()]))
    assert all(hiro_agent.goal == state + np.array([2.0, 4.0, -3.0]) -
               next_state[0:3])
Example #4
0
        },
        "batch_size": 256,
        "discount_rate": 0.9,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.25,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 20,
        "learning_updates_per_learning_session": 10,
        "number_goal_candidates": 8,
        "clip_rewards": False
    },
}

hiro_agent = HIRO(config)
ll_env = hiro_agent.lower_level_agent.environment
h_env = hiro_agent.higher_level_agent.environment


def test_environment_resets():
    """Tests created environments reset properly"""
    lower_level_state = ll_env.reset()
    assert lower_level_state.shape[0] == 6
    assert ll_env.max_sub_policy_timesteps == 3
    assert ll_env.lower_level_timesteps == 0

    hiro_agent.higher_level_state = np.array([0., 1.0, 2.0])
    hiro_agent.goal = np.array([1.0, 4.0, -22.])
    assert all(ll_env.reset() == np.array([0.0, 1.0, 2.0, 1.0, 4.0, -22.0]))