def test_continuous_state_env_3(): env = NRoom(nrooms=3, array_observation=True) counter = OnlineDiscretizationCounter(env.observation_space, env.action_space, rate_power=0.5, min_dist=0.0) for N in range(10, 20): assert counter.get_n_visited_states() == 0 assert counter.get_entropy() == 0.0 for ss in range(env.discrete_observation_space.n): for aa in range(env.action_space.n): for _ in range(N): ns, rr, _, _ = env.sample(ss, aa) continuous_ss = env._convert_index_to_float_coord(ss) counter.update(continuous_ss, aa, None, rr) assert counter.N_sa[ss, aa] == N assert counter.count(continuous_ss, aa) == N assert np.allclose(counter.measure(continuous_ss, aa), np.sqrt(1.0 / N)) assert counter.get_n_visited_states( ) == env.discrete_observation_space.n assert np.allclose(counter.get_entropy(), np.log2(env.discrete_observation_space.n)) counter.reset()
def test_n_room(reward_free, array_observation, initial_state_distribution): env = NRoom(reward_free=reward_free, array_observation=array_observation, initial_state_distribution=initial_state_distribution) initial_state = env.reset() next_state, reward, _, _ = env.step(1) if initial_state_distribution == 'uniform': assert env.initial_state_distribution[ 0] == 1.0 / env.observation_space.n assert env.observation_space.contains(initial_state) assert env.observation_space.contains(next_state) if reward_free: assert env.reward_at == {} if array_observation: assert isinstance(initial_state, np.ndarray) assert isinstance(next_state, np.ndarray)
""" ===================== Demo: demo_vis2d ===================== """ from rlberry.envs.benchmarks.grid_exploration.nroom import NRoom, get_nroom_state_coord from rlberry.envs.classic_control import MountainCar from rlberry.wrappers.vis2d import Vis2dWrapper from rlberry.agents import RSUCBVIAgent from rlberry.agents.dynprog import ValueIterationAgent CHOICE = 1 if CHOICE == 0: env = NRoom(nrooms=5, array_observation=False, reward_free=True) env = Vis2dWrapper(env, n_bins_obs=20, memory_size=100, state_preprocess_fn=get_nroom_state_coord) agent = ValueIterationAgent(env.unwrapped, gamma=0.99, horizon=200, copy_env=False) else: env = MountainCar() env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200) agent = RSUCBVIAgent( env, gamma=0.99,
=============================== Illustration of NRooms environment .. video:: ../../video_plot_rooms.mp4 :width: 600 """ # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_rooms.jpg' from rlberry.envs.benchmarks.grid_exploration.nroom import NRoom from rlberry.agents.dynprog import ValueIterationAgent env = NRoom( nrooms=9, remove_walls=False, room_size=9, initial_state_distribution="center", include_traps=True, ) horizon = env.observation_space.n agent = ValueIterationAgent(env, gamma=0.999, horizon=horizon) print("fitting...") info = agent.fit() print(info) env.enable_rendering() for _ in range(10): state = env.reset() for tt in range(horizon):
from rlberry.envs.benchmarks.grid_exploration.four_room import FourRoom from rlberry.envs.benchmarks.grid_exploration.six_room import SixRoom from rlberry.envs.benchmarks.grid_exploration.nroom import NRoom from rlberry.agents.dynprog import ValueIterationAgent # env = FourRoom(reward_free=False, difficulty=0, array_observation=False) # env = SixRoom(reward_free=False, array_observation=False) env = NRoom(nrooms=9, remove_walls=False, room_size=9, initial_state_distribution='center', include_traps=True) horizon = env.observation_space.n agent = ValueIterationAgent(env, gamma=0.999, horizon=horizon) print("fitting...") info = agent.fit() print(info) env.enable_rendering() for _ in range(10): state = env.reset() for tt in range(horizon): # action = agent.policy(state) action = env.action_space.sample() next_s, _, done, _ = env.step(action) if done: break state = next_s