def sample_average_reward_func(env, n): total = 0 for _ in range(0, n): env.reset() total += sum( r for _, r, _, _ in until_done(env, env.action_space.sample)) return total / n
def test_raise_an_error_when_stepping_past_done(make_ready_env, datums): datums.add().rows([1], [2]) env = make_ready_env() list(until_done(env, [1, 0])) with pytest.raises(PortfolioResetNeeded): idle_step(env)
def test_reset_environment_is_not_done(env, direction): all(until_done(env, direction)) env.reset() assert not unpack_done(env.step(direction)) all(until_done(env, direction))
def test_environment_has_a_max_episode_len(env): assert sum(1 for _ in until_done(env, 0)) == env.max_len
def test_render_agent_pos_in_green_when_reaching_goal(env, make_walk_string, walk_len, capstdout): all(_ for _ in until_done(env, 1)) env.render() assert capstdout.read() == "(Right)\n" + make_walk_string( agent_pos=walk_len - 1, color='green') + "\n"
def test_walking_left_until_max_length_is_reached_achieves_minimum_reward(env): assert sum(r for _, r, _, _ in until_done(env, 0)) == env.reward_range[0]
def test_walking_right_achieves_maximum_reward(env): assert sum(r for _, r, _, _ in until_done(env, 1)) == env.reward_range[1]
def test_reaching_goal_on_the_right_returns_reward(env): assert last(r for _, r, _, _ in until_done(env, 1)) == env.reward
def test_has_a_max_episode_length(env, idle): env.reset() assert sum(1 for _ in until_done(env, lambda: idle)) == env.max_len