Esempio n. 1
0
def test_bounds_velocity():
    env = MountainCar()
    env.v = env.BOUND_MIN_V
    env.step(0)

    assert env.v == env.BOUND_MIN_V

    env = MountainCar()
    env.v = env.BOUND_MAX_V
    env.step(2)

    assert env.v == env.BOUND_MAX_V
Esempio n. 2
0
def test_prepares_observation_space():
    env = MountainCar()
    assert env.obs_spec[0].shape == (1, )
    assert env.obs_spec[0].lo == -1.2
    assert env.obs_spec[0].hi == 0.5
    assert env.obs_spec[1].shape == (1, )
    assert env.obs_spec[1].lo == -0.07
    assert env.obs_spec[1].hi == 0.07
    assert env.obs_spec[0].is_continuous()
    assert env.obs_spec[1].is_continuous()
Esempio n. 3
0
def test_runs_in_mountain_car_environment():
    environment = MountainCar()
    agent_program = SemiGradientSarsa(
        environment.act_spec,
        environment.obs_spec,
        alpha=0.5,
        epsilon=0.0,
        gamma=1.0)
    agent = core.Agent(
        agent_program,
        lambda env: (env.obs, env.reward, env.done()),
        lambda action, env: env.step(action))
    core.Run(agent, environment).start()
Esempio n. 4
0
def test_true_sarsa_lambda_runs_in_mountain_car_environment():
    environment = MountainCar()
    agent_program = TrueOnlineSarsaLambda(
        environment.act_spec,
        environment.obs_spec,
        alpha=0.2,
        epsilon=0.0,
        gamma=1.0,
        lmbda=0.9)
    agent = core.Agent(
        agent_program,
        lambda env: (env.obs, env.reward, env.done()),
        lambda action, env: env.step(action))
    core.Run(agent, environment).start()
Esempio n. 5
0
def test_bounds_position():
    env = MountainCar()
    env.p = -1.19
    env.v = env.BOUND_MIN_V
    env.step(0)

    assert env.p == env.BOUND_MIN_P
    assert env.v == 0.0
    assert env.reward == -1
Esempio n. 6
0
def test_actor_critic_runs_in_mountain_car_environment():
    environment = MountainCar()
    agent_program = ActorCriticLambda(
        environment.act_spec,
        environment.obs_spec,
        alpha_w=0.1,
        alpha_theta=0.01,
        gamma=1.0,
        lambda_w=0.9,
        lambda_theta=0.9)
    agent = core.Agent(
        agent_program,
        lambda env: (env.obs, env.reward, env.done()),
        lambda action, env: env.step(action))
    core.Run(agent, environment).start()
Esempio n. 7
0
def test_prepares_action_space():
    env = MountainCar()
    assert env.act_spec[0].shape == (1, )
    assert env.act_spec[0].lo == -1
    assert env.act_spec[0].hi == 1
Esempio n. 8
0
def test_is_done_when_top_of_the_mountain_is_reached():
    env = MountainCar()
    env.p = 0.5

    assert env.done()