from rlberry.agents import AVECPPOAgent from rlberry.envs.classic_control import MountainCar render = False for env, n_episodes, horizon in zip([MountainCar()], [40000], [256]): print("Running AVECPPO on %s" % env.name) agent = AVECPPOAgent(env, n_episodes=n_episodes, horizon=horizon, gamma=0.99, learning_rate=0.00025, eps_clip=0.2, k_epochs=4) agent.fit() if render: env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.render()
""" =============================== A demo of MountainCar environment =============================== Illustration of MountainCar environment .. video:: ../../video_plot_montain_car.mp4 :width: 600 """ # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_montain_car.jpg' from rlberry.agents.mbqvi import MBQVIAgent from rlberry.envs.classic_control import MountainCar from rlberry.wrappers import DiscretizeStateWrapper _env = MountainCar() env = DiscretizeStateWrapper(_env, 20) agent = MBQVIAgent(env, n_samples=40, gamma=0.99) agent.fit() env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state video = env.save_video("_video/video_plot_montain_car.mp4")
from rlberry.agents import RSUCBVIAgent from rlberry.envs.classic_control import MountainCar from rlberry.envs.benchmarks.ball_exploration import PBall2D for env, horizon in zip([MountainCar(), PBall2D()], [170, 50]): print("Running RS-UCBVI on %s" % env.name) agent = RSUCBVIAgent(env, n_episodes=1000, gamma=0.99, horizon=horizon, bonus_scale_factor=0.1) agent.fit() env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.render()
""" ================================================== A demo of RSUCBVI algorithm in MountainCar environment ================================================== Illustration of how to set up an RSUCBVI algorithm in rlberry. The environment chosen here is MountainCar environment. .. video:: ../../video_plot_rsucbvi.mp4 :width: 600 """ # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_rsucbvi.jpg' from rlberry.agents import RSUCBVIAgent from rlberry.envs.classic_control import MountainCar env = MountainCar() horizon = 170 print("Running RS-UCBVI on %s" % env.name) agent = RSUCBVIAgent(env, gamma=0.99, horizon=horizon, bonus_scale_factor=0.1) agent.fit(budget=500) env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state video = env.save_video("_video/video_plot_rsucbvi.mp4")
from rlberry.agents import A2CAgent from rlberry.envs.classic_control import MountainCar from rlberry.envs.benchmarks.ball_exploration import PBall2D from rlberry.seeding import seeding from rlberry.utils.logging import configure_logging render = True seeding.set_global_seed(1223) configure_logging("DEBUG") for env, n_episodes, horizon in zip([PBall2D(), MountainCar()], [400, 40000], [256, 512]): print("Running A2C on %s" % env.name) agent = A2CAgent(env, n_episodes=n_episodes, horizon=horizon, gamma=0.99, learning_rate=0.001, k_epochs=4) agent.fit() if render: env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.render()