def test_continuous_state_env_2(): env = MountainCar() counter = OnlineDiscretizationCounter(env.observation_space, env.action_space) for N in [10, 20, 30]: for _ in range(100): ss = env.observation_space.sample() aa = env.action_space.sample() for nn in range(N): ns, rr, _, _ = env.sample(ss, aa) counter.update(ss, aa, ns, rr) assert counter.count(ss, aa) == N counter.reset()
def test_continuous_state_env(): env = MountainCar() counter = DiscreteCounter(env.observation_space, env.action_space) for N in [10, 20, 30]: for _ in range(100): ss = env.observation_space.sample() aa = env.action_space.sample() for _ in range(N): ns, rr, _, _ = env.sample(ss, aa) counter.update(ss, aa, ns, rr) dss = counter.state_discretizer.discretize(ss) assert counter.N_sa[dss, aa] == N assert counter.count(ss, aa) == N counter.reset()
def test_continuous_state_env_2(rate_power): env = MountainCar() counter = OnlineDiscretizationCounter(env.observation_space, env.action_space, rate_power=rate_power) for N in [10, 20]: for _ in range(50): ss = env.observation_space.sample() aa = env.action_space.sample() for nn in range(N): ns, rr, _, _ = env.sample(ss, aa) counter.update(ss, aa, ns, rr) assert counter.count(ss, aa) == N if rate_power == pytest.approx(1): assert np.allclose(counter.measure(ss, aa), 1.0 / N) elif rate_power == pytest.approx(0.5): assert np.allclose(counter.measure(ss, aa), np.sqrt(1.0 / N)) counter.reset()
def test_dqn_agent_rnd(): env = MountainCar() def uncertainty_estimator_fn(observation_space, action_space): counter = RandomNetworkDistillation(observation_space, action_space) return counter agent = DQNAgent(env, n_episodes=2, use_bonus=True, uncertainty_estimator_kwargs=dict( uncertainty_estimator_fn=uncertainty_estimator_fn, bonus_scale_factor=1.0)) agent.fit() agent.policy(env.observation_space.sample())
def test_dqn_agent(): env = MountainCar() def uncertainty_estimator_fn(observation_space, action_space): counter = OnlineDiscretizationCounter(observation_space, action_space, min_dist=0.25) return counter agent = DQNAgent(env, n_episodes=2, use_bonus=True, uncertainty_estimator_kwargs=dict( uncertainty_estimator_fn=uncertainty_estimator_fn, bonus_scale_factor=1.0)) agent.fit() agent.policy(env.observation_space.sample())