def test_continuous_state_env_3():
    env = NRoom(nrooms=3, array_observation=True)
    counter = OnlineDiscretizationCounter(env.observation_space,
                                          env.action_space,
                                          rate_power=0.5,
                                          min_dist=0.0)

    for N in range(10, 20):
        assert counter.get_n_visited_states() == 0
        assert counter.get_entropy() == 0.0

        for ss in range(env.discrete_observation_space.n):
            for aa in range(env.action_space.n):
                for _ in range(N):
                    ns, rr, _, _ = env.sample(ss, aa)
                    continuous_ss = env._convert_index_to_float_coord(ss)
                    counter.update(continuous_ss, aa, None, rr)
                assert counter.N_sa[ss, aa] == N
                assert counter.count(continuous_ss, aa) == N
                assert np.allclose(counter.measure(continuous_ss, aa),
                                   np.sqrt(1.0 / N))

        assert counter.get_n_visited_states(
        ) == env.discrete_observation_space.n
        assert np.allclose(counter.get_entropy(),
                           np.log2(env.discrete_observation_space.n))

        counter.reset()
Esempio n. 2
0
def test_n_room(reward_free, array_observation, initial_state_distribution):
    env = NRoom(reward_free=reward_free,
                array_observation=array_observation,
                initial_state_distribution=initial_state_distribution)

    initial_state = env.reset()
    next_state, reward, _, _ = env.step(1)

    if initial_state_distribution == 'uniform':
        assert env.initial_state_distribution[
            0] == 1.0 / env.observation_space.n

    assert env.observation_space.contains(initial_state)
    assert env.observation_space.contains(next_state)

    if reward_free:
        assert env.reward_at == {}

    if array_observation:
        assert isinstance(initial_state, np.ndarray)
        assert isinstance(next_state, np.ndarray)
Esempio n. 3
0
""" 
 ===================== 
 Demo: demo_vis2d 
 =====================
"""
from rlberry.envs.benchmarks.grid_exploration.nroom import NRoom, get_nroom_state_coord
from rlberry.envs.classic_control import MountainCar
from rlberry.wrappers.vis2d import Vis2dWrapper
from rlberry.agents import RSUCBVIAgent
from rlberry.agents.dynprog import ValueIterationAgent

CHOICE = 1

if CHOICE == 0:
    env = NRoom(nrooms=5, array_observation=False, reward_free=True)
    env = Vis2dWrapper(env,
                       n_bins_obs=20,
                       memory_size=100,
                       state_preprocess_fn=get_nroom_state_coord)
    agent = ValueIterationAgent(env.unwrapped,
                                gamma=0.99,
                                horizon=200,
                                copy_env=False)

else:
    env = MountainCar()
    env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200)

    agent = RSUCBVIAgent(
        env,
        gamma=0.99,
Esempio n. 4
0
===============================
 Illustration of NRooms environment

.. video:: ../../video_plot_rooms.mp4
   :width: 600

"""
# sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_rooms.jpg'

from rlberry.envs.benchmarks.grid_exploration.nroom import NRoom
from rlberry.agents.dynprog import ValueIterationAgent

env = NRoom(
    nrooms=9,
    remove_walls=False,
    room_size=9,
    initial_state_distribution="center",
    include_traps=True,
)
horizon = env.observation_space.n

agent = ValueIterationAgent(env, gamma=0.999, horizon=horizon)
print("fitting...")
info = agent.fit()
print(info)

env.enable_rendering()

for _ in range(10):
    state = env.reset()
    for tt in range(horizon):
Esempio n. 5
0
from rlberry.envs.benchmarks.grid_exploration.four_room import FourRoom
from rlberry.envs.benchmarks.grid_exploration.six_room import SixRoom
from rlberry.envs.benchmarks.grid_exploration.nroom import NRoom
from rlberry.agents.dynprog import ValueIterationAgent


# env = FourRoom(reward_free=False, difficulty=0, array_observation=False)
# env = SixRoom(reward_free=False, array_observation=False)
env = NRoom(nrooms=9,
            remove_walls=False,
            room_size=9,
            initial_state_distribution='center',
            include_traps=True)
horizon = env.observation_space.n

agent = ValueIterationAgent(env, gamma=0.999, horizon=horizon)
print("fitting...")
info = agent.fit()
print(info)

env.enable_rendering()

for _ in range(10):
    state = env.reset()
    for tt in range(horizon):
        # action = agent.policy(state)
        action = env.action_space.sample()
        next_s, _, done, _ = env.step(action)
        if done:
            break
        state = next_s