def test_mutliproc_single_proc_equivalency(): env = gym.make("CartPole-v1") num_envs = 3 # uses single threaded vector environment venv1 = concat_vec_envs_v1(env, num_envs, num_cpus=0) # uses multiprocessing vector environment venv2 = concat_vec_envs_v1(env, num_envs, num_cpus=4) check_vec_env_equivalency(venv1, venv2)
def test_multiagent_mutliproc_single_proc_equivalency(): env = simple_spread_v2.parallel_env(max_cycles=10) env = pettingzoo_env_to_vec_env_v1(env) num_envs = 3 # uses single threaded vector environment venv1 = concat_vec_envs_v1(env, num_envs, num_cpus=0) # uses multiprocessing vector environment venv2 = concat_vec_envs_v1(env, num_envs, num_cpus=4) check_vec_env_equivalency(venv1, venv2)
def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False) -> VecEnv: env = pistonball_v5.parallel_env() env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v0(env, x_size=84, y_size=84, linear_interp=True) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v1(env) print(n_envs) env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=4, base_class="stable_baselines3") env = VecMonitor(env) env = self._maybe_normalize(env, eval_env) if is_image_space( env.observation_space) and not is_image_space_channels_first( env.observation_space): if self.verbose > 0: print("Wrapping into a VecTransposeImage") env = VecTransposeImage(env) return env
def test_multiproc_buffer(): num_envs = 2 env = gym.make("CartPole-v1") env = concat_vec_envs_v1(env, num_envs, num_cpus=2) obss = env.reset() for i in range(55): actions = [env.action_space.sample() for i in range(env.num_envs)] # Check we're not passing a thing that gets mutated keep_obs = copy.deepcopy(obss) new_obss, rews, dones, infos = env.step(actions) assert hash(str(keep_obs)) == hash(str(obss)) obss = new_obss
def test_good_vecenv(): num_envs = 2 env = simple_spread_v2.parallel_env() max_num_agents = len(env.possible_agents) * num_envs env = pettingzoo_env_to_vec_env_v1(env) env = concat_vec_envs_v1(env, num_envs) obss = env.reset() for i in range(55): actions = [env.action_space.sample() for i in range(env.num_envs)] # Check we're not passing a thing that gets mutated keep_obs = copy.deepcopy(obss) new_obss, rews, dones, infos = env.step(actions) assert hash(str(keep_obs)) == hash(str(obss)) assert len(new_obss) == max_num_agents assert len(rews) == max_num_agents assert len(dones) == max_num_agents assert len(infos) == max_num_agents # no agent death, only env death if any(dones): assert all(dones) obss = new_obss
def image_transpose(env): if is_image_space(env.observation_space) and not is_image_space_channels_first( env.observation_space ): env = VecTransposeImage(env) return env env = pistonball_v5.parallel_env() env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v1(env) env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=1, base_class="stable_baselines3") env = VecMonitor(env) env = image_transpose(env) eval_env = pistonball_v5.parallel_env() eval_env = ss.color_reduction_v0(eval_env, mode="B") eval_env = ss.resize_v0(eval_env, x_size=84, y_size=84) eval_env = ss.frame_stack_v1(eval_env, 3) eval_env = ss.pettingzoo_env_to_vec_env_v1(eval_env) eval_env = ss.concat_vec_envs_v1( eval_env, 1, num_cpus=1, base_class="stable_baselines3" ) eval_env = VecMonitor(eval_env) eval_env = image_transpose(eval_env) eval_freq = int(n_timesteps / n_evaluations)
import shutil import traci if __name__ == '__main__': RESOLUTION = (3200, 1800) env = sumo_rl.grid4x4(use_gui=True, out_csv_name='outputs/grid4x4/ppo_test', virtual_display=RESOLUTION) max_time = env.unwrapped.env.sim_max_time delta_time = env.unwrapped.env.delta_time print("Environment created") env = ss.pettingzoo_env_to_vec_env_v1(env) env = ss.concat_vec_envs_v1(env, 2, num_cpus=1, base_class='stable_baselines3') env = VecMonitor(env) model = PPO("MlpPolicy", env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256,
def test_inital_state_dissimilarity(): env = gym.make("CartPole-v1") venv = concat_vec_envs_v1(env, 2) observations = venv.reset() assert not np.equal(observations[0], observations[1]).all()
def test_gym_supersuit_equivalency(): env = gym.make("MountainCarContinuous-v0") num_envs = 3 venv1 = concat_vec_envs_v1(env, num_envs) venv2 = gym_vec_env_v0(env, num_envs) check_vec_env_equivalency(venv1, venv2)
env = pistonball_v6.parallel_env( n_pistons=20, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125, ) env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v1(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v1(env) env = ss.concat_vec_envs_v1(env, 8, num_cpus=4, base_class="stable_baselines3") model = PPO( CnnPolicy, env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256, )
def test_multi_threaded_concatenate(): env = make_env() env = pettingzoo_env_to_vec_env_v1(env) env = concat_vec_envs_v1(env, 2, num_cpus=2) dict_vec_env_test(env)
def test_env_is_wrapped_true(): env = gym.make("MountainCarContinuous-v0") env = frame_skip_gym(env, 4) num_envs = 3 venv1 = concat_vec_envs_v1(env, num_envs) assert venv1.env_is_wrapped(frame_skip_gym) == [True] * 3
def test_env_is_wrapped_pettingzoo(): env = simple_spread_v2.parallel_env() venv1 = pettingzoo_env_to_vec_env_v1(env) num_envs = 3 venv1 = concat_vec_envs_v1(venv1, num_envs) assert venv1.env_is_wrapped(frame_skip_gym) == [False] * 9