Esempio n. 1
0
def test_continuous_state_env_2():
    env = MountainCar()
    counter = OnlineDiscretizationCounter(env.observation_space,
                                          env.action_space)

    for N in [10, 20, 30]:
        for _ in range(100):
            ss = env.observation_space.sample()
            aa = env.action_space.sample()
            for nn in range(N):
                ns, rr, _, _ = env.sample(ss, aa)
                counter.update(ss, aa, ns, rr)
            assert counter.count(ss, aa) == N
            counter.reset()
Esempio n. 2
0
def test_continuous_state_env():
    env = MountainCar()
    counter = DiscreteCounter(env.observation_space, env.action_space)

    for N in [10, 20, 30]:
        for _ in range(100):
            ss = env.observation_space.sample()
            aa = env.action_space.sample()
            for _ in range(N):
                ns, rr, _, _ = env.sample(ss, aa)
                counter.update(ss, aa, ns, rr)

            dss = counter.state_discretizer.discretize(ss)
            assert counter.N_sa[dss, aa] == N
            assert counter.count(ss, aa) == N
            counter.reset()
def test_continuous_state_env_2(rate_power):
    env = MountainCar()
    counter = OnlineDiscretizationCounter(env.observation_space,
                                          env.action_space,
                                          rate_power=rate_power)

    for N in [10, 20]:
        for _ in range(50):
            ss = env.observation_space.sample()
            aa = env.action_space.sample()
            for nn in range(N):
                ns, rr, _, _ = env.sample(ss, aa)
                counter.update(ss, aa, ns, rr)
            assert counter.count(ss, aa) == N
            if rate_power == pytest.approx(1):
                assert np.allclose(counter.measure(ss, aa), 1.0 / N)
            elif rate_power == pytest.approx(0.5):
                assert np.allclose(counter.measure(ss, aa), np.sqrt(1.0 / N))
            counter.reset()
Esempio n. 4
0
def test_dqn_agent_rnd():
    env = MountainCar()

    def uncertainty_estimator_fn(observation_space, action_space):
        counter = RandomNetworkDistillation(observation_space, action_space)
        return counter

    agent = DQNAgent(env,
                     n_episodes=2,
                     use_bonus=True,
                     uncertainty_estimator_kwargs=dict(
                         uncertainty_estimator_fn=uncertainty_estimator_fn,
                         bonus_scale_factor=1.0))
    agent.fit()
    agent.policy(env.observation_space.sample())
Esempio n. 5
0
def test_dqn_agent():
    env = MountainCar()

    def uncertainty_estimator_fn(observation_space, action_space):
        counter = OnlineDiscretizationCounter(observation_space,
                                              action_space,
                                              min_dist=0.25)
        return counter

    agent = DQNAgent(env,
                     n_episodes=2,
                     use_bonus=True,
                     uncertainty_estimator_kwargs=dict(
                         uncertainty_estimator_fn=uncertainty_estimator_fn,
                         bonus_scale_factor=1.0))
    agent.fit()
    agent.policy(env.observation_space.sample())