Esempio n. 1
0
    def sample_average_reward_func(env, n):
        total = 0
        for _ in range(0, n):
            env.reset()
            total += sum(
                r for _, r, _, _ in until_done(env, env.action_space.sample))

        return total / n
Esempio n. 2
0
def test_raise_an_error_when_stepping_past_done(make_ready_env, datums):
    datums.add().rows([1], [2])
    env = make_ready_env()
    list(until_done(env, [1, 0]))
    with pytest.raises(PortfolioResetNeeded):
        idle_step(env)
def test_reset_environment_is_not_done(env, direction):
    all(until_done(env, direction))
    env.reset()
    assert not unpack_done(env.step(direction))
    all(until_done(env, direction))
def test_environment_has_a_max_episode_len(env):
    assert sum(1 for _ in until_done(env, 0)) == env.max_len
def test_render_agent_pos_in_green_when_reaching_goal(env, make_walk_string,
                                                      walk_len, capstdout):
    all(_ for _ in until_done(env, 1))
    env.render()
    assert capstdout.read() == "(Right)\n" + make_walk_string(
        agent_pos=walk_len - 1, color='green') + "\n"
def test_walking_left_until_max_length_is_reached_achieves_minimum_reward(env):
    assert sum(r for _, r, _, _ in until_done(env, 0)) == env.reward_range[0]
def test_walking_right_achieves_maximum_reward(env):
    assert sum(r for _, r, _, _ in until_done(env, 1)) == env.reward_range[1]
def test_reaching_goal_on_the_right_returns_reward(env):
    assert last(r for _, r, _, _ in until_done(env, 1)) == env.reward
Esempio n. 9
0
def test_has_a_max_episode_length(env, idle):
    env.reset()
    assert sum(1 for _ in until_done(env, lambda: idle)) == env.max_len