Esempio n. 1
0
from rlberry.agents import AVECPPOAgent
from rlberry.envs.classic_control import MountainCar
render = False

for env, n_episodes, horizon in zip([MountainCar()], [40000], [256]):
    print("Running AVECPPO on %s" % env.name)
    agent = AVECPPOAgent(env,
                         n_episodes=n_episodes,
                         horizon=horizon,
                         gamma=0.99,
                         learning_rate=0.00025,
                         eps_clip=0.2,
                         k_epochs=4)
    agent.fit()

    if render:
        env.enable_rendering()
        state = env.reset()
        for tt in range(200):
            action = agent.policy(state)
            next_state, reward, done, _ = env.step(action)
            state = next_state

        env.render()
"""
===============================
A demo of MountainCar environment
===============================
 Illustration of MountainCar environment

.. video:: ../../video_plot_montain_car.mp4
   :width: 600

"""
# sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_montain_car.jpg'

from rlberry.agents.mbqvi import MBQVIAgent
from rlberry.envs.classic_control import MountainCar
from rlberry.wrappers import DiscretizeStateWrapper

_env = MountainCar()
env = DiscretizeStateWrapper(_env, 20)
agent = MBQVIAgent(env, n_samples=40, gamma=0.99)
agent.fit()

env.enable_rendering()
state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

video = env.save_video("_video/video_plot_montain_car.mp4")
Esempio n. 3
0
from rlberry.agents import RSUCBVIAgent
from rlberry.envs.classic_control import MountainCar
from rlberry.envs.benchmarks.ball_exploration import PBall2D

for env, horizon in zip([MountainCar(), PBall2D()], [170, 50]):
    print("Running RS-UCBVI on %s" % env.name)
    agent = RSUCBVIAgent(env,
                         n_episodes=1000,
                         gamma=0.99,
                         horizon=horizon,
                         bonus_scale_factor=0.1)
    agent.fit()

    env.enable_rendering()
    state = env.reset()
    for tt in range(200):
        action = agent.policy(state)
        next_state, reward, done, _ = env.step(action)
        state = next_state

    env.render()
Esempio n. 4
0
"""
==================================================
A demo of RSUCBVI algorithm in MountainCar environment
==================================================
 Illustration of how to set up an RSUCBVI algorithm in rlberry.
 The environment chosen here is MountainCar environment.

.. video:: ../../video_plot_rsucbvi.mp4
   :width: 600

"""
# sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_rsucbvi.jpg'

from rlberry.agents import RSUCBVIAgent
from rlberry.envs.classic_control import MountainCar

env = MountainCar()
horizon = 170
print("Running RS-UCBVI on %s" % env.name)
agent = RSUCBVIAgent(env, gamma=0.99, horizon=horizon, bonus_scale_factor=0.1)
agent.fit(budget=500)

env.enable_rendering()
state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

video = env.save_video("_video/video_plot_rsucbvi.mp4")
Esempio n. 5
0
from rlberry.agents import A2CAgent
from rlberry.envs.classic_control import MountainCar
from rlberry.envs.benchmarks.ball_exploration import PBall2D
from rlberry.seeding import seeding
from rlberry.utils.logging import configure_logging

render = True
seeding.set_global_seed(1223)
configure_logging("DEBUG")

for env, n_episodes, horizon in zip([PBall2D(), MountainCar()], [400, 40000],
                                    [256, 512]):
    print("Running A2C on %s" % env.name)
    agent = A2CAgent(env,
                     n_episodes=n_episodes,
                     horizon=horizon,
                     gamma=0.99,
                     learning_rate=0.001,
                     k_epochs=4)
    agent.fit()

    if render:
        env.enable_rendering()
        state = env.reset()
        for tt in range(200):
            action = agent.policy(state)
            next_state, reward, done, _ = env.step(action)
            state = next_state

        env.render()