Python RSUCBVIAgent Examples

Programming Language: Python

Namespace/Package Name: rlberry.agents

Class/Type: RSUCBVIAgent

Examples at hotexamples.com: 7

Python RSUCBVIAgent - 7 examples found. These are the top rated real world Python examples of rlberry.agents.RSUCBVIAgent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RSUCBVIAgent(7)

fit(5)

policy(1)

reseed(1)

Example #1

Show file

def test_vis2dwrapper():
    env = MountainCar()
    env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200)

    agent = RSUCBVIAgent(
        env,
        gamma=0.99,
        horizon=200,
        bonus_scale_factor=0.1,
        copy_env=False,
        min_dist=0.1,
    )

    agent.fit(budget=15)
    env.plot_trajectories(show=False)
    env.plot_trajectory_actions(show=False)

Example #2

Show file

from rlberry.envs import Acrobot
from rlberry.agents import RSUCBVIAgent
from rlberry.utils.logging import configure_logging
from rlberry.wrappers import RescaleRewardWrapper

configure_logging("DEBUG")

env = Acrobot()
# rescale rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSUCBVIAgent(env,
                     n_episodes=500,
                     gamma=0.99,
                     horizon=300,
                     bonus_scale_factor=0.01,
                     min_dist=0.25)
agent.fit()

env.enable_rendering()
state = env.reset()
for tt in range(4 * agent.horizon):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

env.save_video("acrobot.mp4")

Example #3

Show file

from rlberry.agents import RSUCBVIAgent
from rlberry.envs.classic_control import MountainCar
from rlberry.envs.benchmarks.ball_exploration import PBall2D
from rlberry.utils.logging import configure_logging

configure_logging("DEBUG")

for env, horizon in zip([MountainCar(), PBall2D()], [170, 50]):
    print("Running RS-UCBVI on %s" % env.name)
    agent = RSUCBVIAgent(env,
                         n_episodes=1000,
                         gamma=0.99,
                         horizon=horizon,
                         bonus_scale_factor=0.1)
    agent.fit()

    env.enable_rendering()
    state = env.reset()
    for tt in range(200):
        action = agent.policy(state)
        next_state, reward, done, _ = env.step(action)
        state = next_state

    env.render()

Example #4

Show file

""" 
 ===================== 
 Demo: demo_gym_wrapper 
 =====================
"""
from rlberry.envs import gym_make
from rlberry.agents import RSUCBVIAgent
from rlberry.wrappers import RescaleRewardWrapper

env = gym_make("Acrobot-v1")
env.reward_range = (-1.0, 0.0)  # missing in gym implementation

# rescake rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSUCBVIAgent(env,
                     gamma=0.99,
                     horizon=200,
                     bonus_scale_factor=0.1,
                     min_dist=0.2)
agent.fit(budget=10)

state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state
    env.render()
env.close()

Example #5

Show file

                       n_bins_obs=20,
                       memory_size=100,
                       state_preprocess_fn=get_nroom_state_coord)
    agent = ValueIterationAgent(env.unwrapped,
                                gamma=0.99,
                                horizon=200,
                                copy_env=False)

else:
    env = MountainCar()
    env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200)

    agent = RSUCBVIAgent(
        env,
        gamma=0.99,
        horizon=200,
        bonus_scale_factor=0.1,
        copy_env=False,
        min_dist=0.1,
    )

agent.fit(budget=100)

env.enable_rendering()
for ep in range(3):
    state = env.reset()
    for tt in range(agent.horizon):
        action = agent.policy(state)
        next_s, _, _, _ = env.step(action)
        state = next_s

try:

Example #6

Show file

"""
==================================================
A demo of RSUCBVI algorithm in MountainCar environment
==================================================
 Illustration of how to set up an RSUCBVI algorithm in rlberry.
 The environment chosen here is MountainCar environment.

.. video:: ../../video_plot_rsucbvi.mp4
   :width: 600

"""
# sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_rsucbvi.jpg'

from rlberry.agents import RSUCBVIAgent
from rlberry.envs.classic_control import MountainCar

env = MountainCar()
horizon = 170
print("Running RS-UCBVI on %s" % env.name)
agent = RSUCBVIAgent(env, gamma=0.99, horizon=horizon, bonus_scale_factor=0.1)
agent.fit(budget=500)

env.enable_rendering()
state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

video = env.save_video("_video/video_plot_rsucbvi.mp4")

Example #7

Show file

File: demo_seeding.py Project: omardrwch/rlberry

# See https://numpy.org/doc/stable/reference/random/generator.html to check the
# methods available in rng.
seeder.rng.integers(5)
seeder.rng.normal()
print(type(seeder.rng))
# etc

# Environments and agents should be seeded using a single seeder,
# to ensure that their random number generators are independent.
from rlberry.envs import gym_make
from rlberry.agents import RSUCBVIAgent

env = gym_make("MountainCar-v0")
env.reseed(seeder)

agent = RSUCBVIAgent(env)
agent.reseed(seeder)

# Environments and Agents have their own seeder and rng.
# When writing your own agents and inheriring from the Agent class,
# you should use agent.rng whenever you need to generate random numbers;
# the same applies to your environments.
# This is necessary to ensure reproducibility.
print("env seeder: ", env.seeder)
print("random sample from env rng: ", env.rng.normal())
print("agent seeder: ", agent.seeder)
print("random sample from agent rng: ", agent.rng.normal())

# A seeder can spawn other seeders that are independent from it.
# This is useful to seed two different threads, using seeder1
# in the first thread, and seeder2 in the second thread.