def test_bounds_velocity(): env = MountainCar() env.v = env.BOUND_MIN_V env.step(0) assert env.v == env.BOUND_MIN_V env = MountainCar() env.v = env.BOUND_MAX_V env.step(2) assert env.v == env.BOUND_MAX_V
def test_prepares_observation_space(): env = MountainCar() assert env.obs_spec[0].shape == (1, ) assert env.obs_spec[0].lo == -1.2 assert env.obs_spec[0].hi == 0.5 assert env.obs_spec[1].shape == (1, ) assert env.obs_spec[1].lo == -0.07 assert env.obs_spec[1].hi == 0.07 assert env.obs_spec[0].is_continuous() assert env.obs_spec[1].is_continuous()
def test_runs_in_mountain_car_environment(): environment = MountainCar() agent_program = SemiGradientSarsa( environment.act_spec, environment.obs_spec, alpha=0.5, epsilon=0.0, gamma=1.0) agent = core.Agent( agent_program, lambda env: (env.obs, env.reward, env.done()), lambda action, env: env.step(action)) core.Run(agent, environment).start()
def test_true_sarsa_lambda_runs_in_mountain_car_environment(): environment = MountainCar() agent_program = TrueOnlineSarsaLambda( environment.act_spec, environment.obs_spec, alpha=0.2, epsilon=0.0, gamma=1.0, lmbda=0.9) agent = core.Agent( agent_program, lambda env: (env.obs, env.reward, env.done()), lambda action, env: env.step(action)) core.Run(agent, environment).start()
def test_bounds_position(): env = MountainCar() env.p = -1.19 env.v = env.BOUND_MIN_V env.step(0) assert env.p == env.BOUND_MIN_P assert env.v == 0.0 assert env.reward == -1
def test_actor_critic_runs_in_mountain_car_environment(): environment = MountainCar() agent_program = ActorCriticLambda( environment.act_spec, environment.obs_spec, alpha_w=0.1, alpha_theta=0.01, gamma=1.0, lambda_w=0.9, lambda_theta=0.9) agent = core.Agent( agent_program, lambda env: (env.obs, env.reward, env.done()), lambda action, env: env.step(action)) core.Run(agent, environment).start()
def test_prepares_action_space(): env = MountainCar() assert env.act_spec[0].shape == (1, ) assert env.act_spec[0].lo == -1 assert env.act_spec[0].hi == 1
def test_is_done_when_top_of_the_mountain_is_reached(): env = MountainCar() env.p = 0.5 assert env.done()