def test_vis2dwrapper(): env = MountainCar() env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200) agent = RSUCBVIAgent( env, gamma=0.99, horizon=200, bonus_scale_factor=0.1, copy_env=False, min_dist=0.1, ) agent.fit(budget=15) env.plot_trajectories(show=False) env.plot_trajectory_actions(show=False)
from rlberry.envs import Acrobot from rlberry.agents import RSUCBVIAgent from rlberry.utils.logging import configure_logging from rlberry.wrappers import RescaleRewardWrapper configure_logging("DEBUG") env = Acrobot() # rescale rewards to [0, 1] env = RescaleRewardWrapper(env, (0.0, 1.0)) agent = RSUCBVIAgent(env, n_episodes=500, gamma=0.99, horizon=300, bonus_scale_factor=0.01, min_dist=0.25) agent.fit() env.enable_rendering() state = env.reset() for tt in range(4 * agent.horizon): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.save_video("acrobot.mp4")
from rlberry.agents import RSUCBVIAgent from rlberry.envs.classic_control import MountainCar from rlberry.envs.benchmarks.ball_exploration import PBall2D from rlberry.utils.logging import configure_logging configure_logging("DEBUG") for env, horizon in zip([MountainCar(), PBall2D()], [170, 50]): print("Running RS-UCBVI on %s" % env.name) agent = RSUCBVIAgent(env, n_episodes=1000, gamma=0.99, horizon=horizon, bonus_scale_factor=0.1) agent.fit() env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.render()
""" ===================== Demo: demo_gym_wrapper ===================== """ from rlberry.envs import gym_make from rlberry.agents import RSUCBVIAgent from rlberry.wrappers import RescaleRewardWrapper env = gym_make("Acrobot-v1") env.reward_range = (-1.0, 0.0) # missing in gym implementation # rescake rewards to [0, 1] env = RescaleRewardWrapper(env, (0.0, 1.0)) agent = RSUCBVIAgent(env, gamma=0.99, horizon=200, bonus_scale_factor=0.1, min_dist=0.2) agent.fit(budget=10) state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.render() env.close()
n_bins_obs=20, memory_size=100, state_preprocess_fn=get_nroom_state_coord) agent = ValueIterationAgent(env.unwrapped, gamma=0.99, horizon=200, copy_env=False) else: env = MountainCar() env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200) agent = RSUCBVIAgent( env, gamma=0.99, horizon=200, bonus_scale_factor=0.1, copy_env=False, min_dist=0.1, ) agent.fit(budget=100) env.enable_rendering() for ep in range(3): state = env.reset() for tt in range(agent.horizon): action = agent.policy(state) next_s, _, _, _ = env.step(action) state = next_s try:
""" ================================================== A demo of RSUCBVI algorithm in MountainCar environment ================================================== Illustration of how to set up an RSUCBVI algorithm in rlberry. The environment chosen here is MountainCar environment. .. video:: ../../video_plot_rsucbvi.mp4 :width: 600 """ # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_rsucbvi.jpg' from rlberry.agents import RSUCBVIAgent from rlberry.envs.classic_control import MountainCar env = MountainCar() horizon = 170 print("Running RS-UCBVI on %s" % env.name) agent = RSUCBVIAgent(env, gamma=0.99, horizon=horizon, bonus_scale_factor=0.1) agent.fit(budget=500) env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state video = env.save_video("_video/video_plot_rsucbvi.mp4")
# See https://numpy.org/doc/stable/reference/random/generator.html to check the # methods available in rng. seeder.rng.integers(5) seeder.rng.normal() print(type(seeder.rng)) # etc # Environments and agents should be seeded using a single seeder, # to ensure that their random number generators are independent. from rlberry.envs import gym_make from rlberry.agents import RSUCBVIAgent env = gym_make("MountainCar-v0") env.reseed(seeder) agent = RSUCBVIAgent(env) agent.reseed(seeder) # Environments and Agents have their own seeder and rng. # When writing your own agents and inheriring from the Agent class, # you should use agent.rng whenever you need to generate random numbers; # the same applies to your environments. # This is necessary to ensure reproducibility. print("env seeder: ", env.seeder) print("random sample from env rng: ", env.rng.normal()) print("agent seeder: ", agent.seeder) print("random sample from agent rng: ", agent.rng.normal()) # A seeder can spawn other seeders that are independent from it. # This is useful to seed two different threads, using seeder1 # in the first thread, and seeder2 in the second thread.