def test_vec_env_monitor_kwargs(): env = make_vec_env("MountainCarContinuous-v0", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": False}) assert env.get_attr("allow_early_resets")[0] is False env = make_atari_env("BreakoutNoFrameskip-v4", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": False}) assert env.get_attr("allow_early_resets")[0] is False env = make_vec_env("MountainCarContinuous-v0", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": True}) assert env.get_attr("allow_early_resets")[0] is True env = make_atari_env( "BreakoutNoFrameskip-v4", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": True}, ) assert env.get_attr("allow_early_resets")[0] is True
def test_custom_vec_env(tmp_path): """ Stand alone test for a special case (passing a custom VecEnv class) to avoid doubling the number of tests. """ monitor_dir = tmp_path / "test_make_vec_env/" env = make_vec_env( "CartPole-v1", n_envs=1, monitor_dir=monitor_dir, seed=0, vec_env_cls=SubprocVecEnv, vec_env_kwargs={"start_method": None}, ) assert env.num_envs == 1 assert isinstance(env, SubprocVecEnv) assert os.path.isdir(monitor_dir) # Kill subprocess env.close() # Cleanup folder shutil.rmtree(monitor_dir) # This should fail because DummyVecEnv does not have any keyword argument with pytest.raises(TypeError): make_vec_env("CartPole-v1", n_envs=1, vec_env_kwargs={"dummy": False})
def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class): env = make_vec_env(env_id, n_envs, vec_env_cls=vec_env_cls, wrapper_class=wrapper_class, monitor_dir=None, seed=0) assert env.num_envs == n_envs if vec_env_cls is None: assert isinstance(env, DummyVecEnv) if wrapper_class is not None: assert isinstance(env.envs[0], wrapper_class) else: assert isinstance(env.envs[0], Monitor) else: assert isinstance(env, SubprocVecEnv) # Kill subprocesses env.close()
def test_callbacks(tmp_path, model_class, policy_class): log_folder = tmp_path / "logs/callbacks/" # DQN only support discrete actions env_name = select_env(model_class) # Create RL model # Small network for fast test model = model_class(policy_class, env_name, policy_kwargs=dict(net_arch=[32])) checkpoint_callback = CheckpointCallback(save_freq=100, save_path=log_folder) eval_env = gym.make(env_name) # Stop training if the performance is good enough callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-1200, verbose=1) eval_callback = EvalCallback( eval_env, callback_on_new_best=callback_on_best, best_model_save_path=log_folder, log_path=log_folder, eval_freq=int(N_STEPS_SMALL / 5), warn=False, ) # Equivalent to the `checkpoint_callback` # but here in an event-driven manner checkpoint_on_event = CheckpointCallback(save_freq=1, save_path=log_folder, name_prefix="event") event_callback = EveryNTimesteps(n_steps=500, callback=checkpoint_on_event) # Stop training if max number of episodes is reached callback_max_episodes = StopTrainingOnMaxEpisodes(max_episodes=100, verbose=1) callback = CallbackList([ checkpoint_callback, eval_callback, event_callback, callback_max_episodes ]) model.learn(N_STEPS_SMALL, callback=callback) # Check access to local variables assert model.env.observation_space.contains(callback.locals["new_obs"][0]) # Check that the child callback was called assert checkpoint_callback.locals["new_obs"] is callback.locals["new_obs"] assert event_callback.locals["new_obs"] is callback.locals["new_obs"] assert checkpoint_on_event.locals["new_obs"] is callback.locals["new_obs"] # Check that internal callback counters match models' counters assert event_callback.num_timesteps == model.num_timesteps assert event_callback.n_calls == model.num_timesteps model.learn(N_STEPS_SMALL, callback=None) # Transform callback into a callback list automatically model.learn(N_STEPS_SMALL, callback=[checkpoint_callback, eval_callback]) # Automatic wrapping, old way of doing callbacks model.learn(N_STEPS_SMALL, callback=lambda _locals, _globals: True) # Testing models that support multiple envs if model_class in [A2C, PPO]: max_episodes = 1 n_envs = 2 # Pendulum-v0 has a timelimit of 200 timesteps max_episode_length = 200 envs = make_vec_env(env_name, n_envs=n_envs, seed=0) model = model_class(policy_class, envs, policy_kwargs=dict(net_arch=[32])) callback_max_episodes = StopTrainingOnMaxEpisodes( max_episodes=max_episodes, verbose=1) callback = CallbackList([callback_max_episodes]) model.learn(max_episode_length * 2, callback=callback) # Check that the actual number of episodes and timesteps per env matches the expected one episodes_per_env = callback_max_episodes.n_episodes // n_envs assert episodes_per_env == max_episodes timesteps_per_env = model.num_timesteps // n_envs assert timesteps_per_env == max_episode_length if os.path.exists(log_folder): shutil.rmtree(log_folder)
def test_vec_env_kwargs(): env = make_vec_env("MountainCarContinuous-v0", n_envs=1, seed=0, env_kwargs={"goal_velocity": 0.11}) assert env.get_attr("goal_velocity")[0] == 0.11
def test_discrete_obs_space(model_class, policy_class, env): env = make_vec_env(env, n_envs=2, seed=0) model_class(policy_class, env, n_steps=256).learn(100)