def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False) -> VecEnv: env = pistonball_v5.parallel_env() env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v0(env, x_size=84, y_size=84, linear_interp=True) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v1(env) print(n_envs) env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=4, base_class="stable_baselines3") env = VecMonitor(env) env = self._maybe_normalize(env, eval_env) if is_image_space( env.observation_space) and not is_image_space_channels_first( env.observation_space): if self.verbose > 0: print("Wrapping into a VecTransposeImage") env = VecTransposeImage(env) return env
def env_creator(): env = pistonball_v4.env(n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125) env = ss.color_reduction_v0(env, mode='B') env = ss.dtype_v0(env, 'float32') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.normalize_obs_v0(env, env_min=0, env_max=1) env = ss.frame_stack_v1(env, 3) return env
def env_creator(args): env = env_constr.env( ) #killable_knights=False, killable_archers=False) resize_size = 84 if model == None else 32 env = supersuit.resize_v0(env, resize_size, resize_size, linear_interp=True) env = supersuit.color_reduction_v0(env) env = supersuit.pad_action_space_v0(env) env = supersuit.pad_observations_v0(env) # env = supersuit.frame_stack_v0(env,2) env = supersuit.dtype_v0(env, np.float32) env = supersuit.normalize_obs_v0(env) if model == "MLPModelV2": env = supersuit.flatten_v0(env) env = PettingZooEnv(env) return env
def unwrapped_check(env): # image observations if isinstance(env.observation_space, spaces.Box): if ((env.observation_space.low.shape == 3) and (env.observation_space.low == 0).all() and (len(env.observation_space.shape[2]) == 3) and (env.observation_space.high == 255).all()): env = max_observation_v0(env, 2) env = color_reduction_v0(env, mode="full") env = normalize_obs_v0(env) # box action spaces if isinstance(env.action_space, spaces.Box): env = clip_actions_v0(env) env = scale_actions_v0(env, 0.5) # stackable observations if isinstance(env.observation_space, spaces.Box) or isinstance( env.observation_space, spaces.Discrete): env = frame_stack_v1(env, 2) # not discrete and not multibinary observations if not isinstance(env.observation_space, spaces.Discrete) and not isinstance( env.observation_space, spaces.MultiBinary): env = dtype_v0(env, np.float16) env = flatten_v0(env) env = frame_skip_v0(env, 2) # everything else env = clip_reward_v0(env, lower_bound=-1, upper_bound=1) env = delay_observations_v0(env, 2) env = sticky_actions_v0(env, 0.5) env = nan_random_v0(env) env = nan_zeros_v0(env) assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
def unwrapped_check(env): env.reset() agents = env.agents if image_observation(env, agents): env = max_observation_v0(env, 2) env = color_reduction_v0(env, mode="full") env = normalize_obs_v0(env) if box_action(env, agents): env = clip_actions_v0(env) env = scale_actions_v0(env, 0.5) if observation_homogenizable(env, agents): env = pad_observations_v0(env) env = frame_stack_v1(env, 2) env = agent_indicator_v0(env) env = black_death_v3(env) if (not_dict_observation(env, agents) and not_discrete_observation(env, agents) and not_multibinary_observation(env, agents)): env = dtype_v0(env, np.float16) env = flatten_v0(env) env = frame_skip_v0(env, 2) if action_homogenizable(env, agents): env = pad_action_space_v0(env) env = clip_reward_v0(env, lower_bound=-1, upper_bound=1) env = delay_observations_v0(env, 2) env = sticky_actions_v0(env, 0.5) env = nan_random_v0(env) env = nan_zeros_v0(env) assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
name=experiment_name, monitor_gym=True, save_code=True) writer = SummaryWriter(f"/tmp/{experiment_name}") # TRY NOT TO MODIFY: seeding device = torch.device( 'cuda' if torch.cuda.is_available() and args.cuda else 'cpu') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic # petting zoo env = pistonball_v4.parallel_env() env = ss.color_reduction_v0(env, mode='B') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v0(env) envs = ss.concat_vec_envs_v0(env, args.num_envs, num_cpus=0, base_class='stable_baselines3') envs = VecMonitor(envs) if args.capture_video: envs = VecVideoRecorder(envs, f'videos/{experiment_name}', record_video_trigger=lambda x: x % 150000 == 0, video_length=400) envs = VecPyTorch(envs, device) args.num_envs = envs.num_envs
assert obs.shape == (64, 3) first_obs, _, _, _ = env.step(5) assert np.all(np.equal(first_obs, base_obs.reshape([64, 3]))) def new_continuous_dummy(): base_act_spaces = Box(low=np.float32(0.0), high=np.float32(10.0), shape=[3]) return DummyEnv(base_obs, base_obs_space, base_act_spaces) def new_dummy(): return DummyEnv(base_obs, base_obs_space, base_act_spaces) wrappers = [ supersuit.color_reduction_v0(new_dummy(), "R"), supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10), supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10, linear_interp=True), supersuit.dtype_v0(new_dummy(), np.int32), supersuit.flatten_v0(new_dummy()), supersuit.reshape_v0(new_dummy(), (64, 3)), supersuit.normalize_obs_v0(new_dummy(), env_min=-1, env_max=5.0), supersuit.frame_stack_v1(new_dummy(), 8), supersuit.reward_lambda_v0(new_dummy(), lambda x: x / 10), supersuit.clip_reward_v0(new_dummy()), supersuit.clip_actions_v0(new_continuous_dummy()), supersuit.frame_skip_v0(new_dummy(), 4), supersuit.frame_skip_v0(new_dummy(), (4, 6)), supersuit.sticky_actions_v0(new_dummy(), 0.75), supersuit.delay_observations_v0(new_dummy(), 1), ]
def env_creator(config): env = pistonball_v6.env() env = dtype_v0(env, dtype=np.float32) env = color_reduction_v0(env, mode="R") env = normalize_obs_v0(env) return env
def env_creator(config): env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2)) env = dtype_v0(env, dtype=float32) env = color_reduction_v0(env, mode="R") env = normalize_obs_v0(env) return env
def test_pettinzoo_pad_action_space(): _env = simple_world_comm_v2.env() wrapped_env = pad_action_space_v0(_env) api_test.api_test(wrapped_env) seed_test.seed_test( lambda: sticky_actions_v0(simple_world_comm_v2.env(), 0.5), 100) def test_pettingzoo_parallel_env(): _env = simple_world_comm_v2.parallel_env() wrapped_env = pad_action_space_v0(_env) parallel_test.parallel_play_test(wrapped_env) wrappers = [ supersuit.color_reduction_v0(knights_archers_zombies_v4.env(), "R"), supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8), x_size=5, y_size=10), supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8), x_size=5, y_size=10, linear_interp=True), supersuit.dtype_v0(knights_archers_zombies_v4.env(), np.int32), supersuit.flatten_v0(knights_archers_zombies_v4.env()), supersuit.reshape_v0(knights_archers_zombies_v4.env(), (512 * 512, 3)), supersuit.normalize_obs_v0(dtype_v0(knights_archers_zombies_v4.env(), np.float32), env_min=-1, env_max=5.0), supersuit.frame_stack_v1(knights_archers_zombies_v4.env(), 8),
eval_freq = int(n_timesteps / n_evaluations) eval_freq = max(eval_freq // (n_envs*n_agents), 1) model = PPO("MlpPolicy", env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256) eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=eval_freq, deterministic=True, render=False) model.learn(total_timesteps=n_timesteps, callback=eval_callback) model = PPO.load("./logs/best_model") mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10) print(mean_reward) print(std_reward) render_env = base_env.copy().parallel_env() render_env = ss.color_reduction_v0(render_env, mode='B') render_env = ss.resize_v0(render_env, x_size=84, y_size=84) render_env = ss.frame_stack_v1(render_env, 3) obs_list = [] i = 0 render_env.reset() while True: for agent in render_env.agent_iter(): observation, _, done, _ = render_env.last() action = model.predict(observation, deterministic=True)[0] if not done else None render_env.step(action) i += 1
with open("./hyperparameter_jsons/" + "hyperparameters_" + num + ".json") as f: params = json.load(f) print(params) def image_transpose(env): if is_image_space(env.observation_space) and not is_image_space_channels_first( env.observation_space ): env = VecTransposeImage(env) return env env = pistonball_v5.parallel_env() env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v1(env) env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=1, base_class="stable_baselines3") env = VecMonitor(env) env = image_transpose(env) eval_env = pistonball_v5.parallel_env() eval_env = ss.color_reduction_v0(eval_env, mode="B") eval_env = ss.resize_v0(eval_env, x_size=84, y_size=84) eval_env = ss.frame_stack_v1(eval_env, 3) eval_env = ss.pettingzoo_env_to_vec_env_v1(eval_env) eval_env = ss.concat_vec_envs_v1( eval_env, 1, num_cpus=1, base_class="stable_baselines3" )
def env_creator(config): env = zoo_yaniv.env(config=config) env = dtype_v0(env, dtype=float32) env = color_reduction_v0(env, mode="R") env = normalize_obs_v0(env) return env
def test_pettingzoo_pad_action_space(): _env = simple_world_comm_v2.env() wrapped_env = pad_action_space_v0(_env) api_test(wrapped_env) seed_test(lambda: sticky_actions_v0(simple_world_comm_v2.env(), 0.5), 100) def test_pettingzoo_parallel_env(): _env = simple_world_comm_v2.parallel_env() wrapped_env = pad_action_space_v0(_env) parallel_test.parallel_api_test(wrapped_env) wrappers = [ supersuit.color_reduction_v0( knights_archers_zombies_v10.env(vector_state=False), "R"), supersuit.resize_v1( dtype_v0(knights_archers_zombies_v10.env(vector_state=False), np.uint8), x_size=5, y_size=10, ), supersuit.resize_v1( dtype_v0(knights_archers_zombies_v10.env(vector_state=False), np.uint8), x_size=5, y_size=10, linear_interp=True, ), supersuit.dtype_v0(knights_archers_zombies_v10.env(), np.int32), supersuit.flatten_v0(knights_archers_zombies_v10.env()),