Ejemplo n.º 1
0
def test_frame_stack():
    base_obs_space = {"a{}".format(idx): Box(low=np.float32(0.0), high=np.float32(10.0), shape=[2, 3]) for idx in range(2)}
    base_obs = {"a{}".format(idx): np.zeros([2, 3]) + np.arange(3) + idx for idx in range(2)}
    base_env = DummyEnv(base_obs, base_obs_space, base_act_spaces)
    env = frame_stack_v1(base_env, 4)
    obs = env.reset()
    obs, _, _, _ = env.last()
    assert obs.shape == (2, 3, 4)
    env.step(2)
    first_obs, _, _, _ = env.last()
    print(first_obs[:, :, -1])
    assert np.all(np.equal(first_obs[:, :, -1], base_obs["a1"]))
    assert np.all(np.equal(first_obs[:, :, :-1], 0))

    base_obs = {"a{}".format(idx): idx + 3 for idx in range(2)}
    base_env = DummyEnv(base_obs, base_act_spaces, base_act_spaces)
    env = frame_stack_v1(base_env, 4)
    obs = env.reset()
    obs, _, _, _ = env.last()
    assert env.observation_spaces[env.agent_selection].n == 5 ** 4
    env.step(2)
    first_obs, _, _, _ = env.last()
    assert first_obs == 4
    env.step(2)
    second_obs, _, _, _ = env.last()
    assert second_obs == 3 + 3 * 5
    for x in range(8):
        nth_obs = env.step(2)
        nth_obs, _, _, _ = env.last()
    assert nth_obs == ((3 * 5 + 3) * 5 + 3) * 5 + 3
Ejemplo n.º 2
0
    def env_creator():
        if args.game.__package__.endswith('atari'):
            if (args.game_name.startswith('foozpong') or
                args.game_name.startswith('basketball_pong') or
                args.game_name.startswith('volleyball_pong')
                ):
                env = args.game.env(obs_type=args.atari_obs_type,
                                    max_cycles=args.max_steps['atari'],
                                    full_action_space=False,
                                    num_players=2)
            else:
                env = args.game.env(obs_type=args.atari_obs_type,
                                    full_action_space=False,
                                    max_cycles=args.max_steps['atari'])
            env = frame_skip_v0(env, args.atari_frame_skip_num)
            env = frame_stack_v1(env, args.atari_frame_stack_num)

        else:
            env = args.game.env()
        if args.game_name.startswith('rps'):
            env = one_hot_obs_wrapper(env)
        env = dtype_v0(env, dtype=float32)
        env = pad_observations_v0(env)
        env = pad_action_space_v0(env)
        if args.game_name.startswith('connect_four') or args.game_name.startswith('tictactoe'):
            env = FlattenEnvWrapper(env)
        GAUSSIAN_STD = 1.0
        assert abs(GAUSSIAN_STD - 1.0) < 1e-5, "must be 1.0, otherwise simple ensemble implementation is wrong"
        env = LatentGaussianAugmentedEnvWrapper(env,
                                                latent_parameter_dim=args.latent_para_dim,
                                                gaussian_std=1.0,
                                                use_dict_obs_space=args.use_dict_obs_space)
        return env
Ejemplo n.º 3
0
def atari_preprocessing(
    env: Union[ParallelEnvWrapper, SequentialEnvWrapper]
) -> Union[ParallelEnvWrapper, SequentialEnvWrapper]:

    # Preprocessing
    env = supersuit.max_observation_v0(env, 2)

    # repeat_action_probability is set to 0.25
    # to introduce non-determinism to the system
    env = supersuit.sticky_actions_v0(env, repeat_action_probability=0.25)

    # skip frames for faster processing and less control
    # to be compatable with gym, use frame_skip(env, (2,5))
    env = supersuit.frame_skip_v0(env, 4)

    # downscale observation for faster processing
    env = supersuit.resize_v0(env, 84, 84)

    # allow agent to see everything on the screen
    # despite Atari's flickering screen problem
    env = supersuit.frame_stack_v1(env, 4)

    # set dtype to float32
    env = supersuit.dtype_v0(env, np.float32)

    return env
Ejemplo n.º 4
0
def run_parallel2(args):
    """
    Test parallel mode with supersuit env wrappers. 
    """
    parallel_env = eval(args.env).parallel_env()
    # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames
    # to deal with frame flickering
    env = supersuit.max_observation_v0(parallel_env, 2)

    # repeat_action_probability is set to 0.25 to introduce non-determinism to the system
    env = supersuit.sticky_actions_v0(env, repeat_action_probability=0.25)

    # skip frames for faster processing and less control
    # to be compatable with gym, use frame_skip(env, (2,5))
    env = supersuit.frame_skip_v0(env, 4)

    # downscale observation for faster processing
    env = supersuit.resize_v0(env, 84, 84)

    # allow agent to see everything on the screen despite Atari's flickering screen problem
    parallel_env = supersuit.frame_stack_v1(env, 4)
    parallel_env.seed(1)

    observations = parallel_env.reset()
    print(parallel_env.agents)
    max_cycles = 500
    for step in range(max_cycles):
        actions = {agent: 1 for agent in parallel_env.agents}
        observations, rewards, dones, infos = parallel_env.step(actions)
        parallel_env.render()
Ejemplo n.º 5
0
    def create_envs(self,
                    n_envs: int,
                    eval_env: bool = False,
                    no_log: bool = False) -> VecEnv:

        env = pistonball_v5.parallel_env()
        env = ss.color_reduction_v0(env, mode="B")
        env = ss.resize_v0(env, x_size=84, y_size=84, linear_interp=True)
        env = ss.frame_stack_v1(env, 3)
        env = ss.pettingzoo_env_to_vec_env_v1(env)
        print(n_envs)
        env = ss.concat_vec_envs_v1(env,
                                    n_envs,
                                    num_cpus=4,
                                    base_class="stable_baselines3")
        env = VecMonitor(env)

        env = self._maybe_normalize(env, eval_env)

        if is_image_space(
                env.observation_space) and not is_image_space_channels_first(
                    env.observation_space):
            if self.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)

        return env
Ejemplo n.º 6
0
def env_creator():
    env = pistonball_v4.env(n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125)
    env = ss.color_reduction_v0(env, mode='B')
    env = ss.dtype_v0(env, 'float32')
    env = ss.resize_v0(env, x_size=84, y_size=84)
    env = ss.normalize_obs_v0(env, env_min=0, env_max=1)
    env = ss.frame_stack_v1(env, 3)
    return env
Ejemplo n.º 7
0
def env_fn():
    env = AtariWrapper(gym.make("SpaceInvadersNoFrameskip-v4"),
                       clip_reward=False)
    env = supersuit.frame_stack_v1(env, 4)
    env = supersuit.observation_lambda_v0(
        env, lambda obs: np.transpose(obs, axes=(2, 0, 1)))
    # env = supersuit.dtype_v0(env,np.float32)
    # env = supersuit.normalize_obs_v0(env)
    return env
Ejemplo n.º 8
0
 def env_creator(args):
     env = game_env.env(obs_type='grayscale_image')
     #env = clip_reward_v0(env, lower_bound=-1, upper_bound=1)
     env = sticky_actions_v0(env, repeat_action_probability=0.25)
     env = resize_v0(env, 84, 84)
     #env = color_reduction_v0(env, mode='full')
     env = frame_skip_v0(env, 4)
     env = frame_stack_v1(env, 4)
     env = agent_indicator_v0(env, type_only=False)
     return env
Ejemplo n.º 9
0
 def get_env(config):
     name = env_name.replace('-', '_')
     env = __import__(f'pettingzoo.atari.{name}', fromlist=[None])
     env = env.parallel_env(obs_type='grayscale_image')
     env = frame_skip_v0(env, 4)
     env = resize_v0(env, 84, 84)
     env = frame_stack_v1(env, 4)
     env = agent_indicator_v0(env)
     return ParallelPettingZooEnv(
         env,
         random_action=config['random_action'],
         random_proba=config['random_action_probability'])
Ejemplo n.º 10
0
def make_env(env_name='boxing_v1', seed=1, obs_type='rgb_image'):
    '''https://www.pettingzoo.ml/atari'''
    if env_name == 'slimevolley_v0':
        env = SlimeVolleyWrapper(gym.make("SlimeVolley-v0"))

    else:  # PettingZoo envs
        env = eval(env_name).parallel_env(obs_type=obs_type)

        if obs_type == 'rgb_image':
            # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames
            # to deal with frame flickering
            env = supersuit.max_observation_v0(env, 2)

            # repeat_action_probability is set to 0.25 to introduce non-determinism to the system
            env = supersuit.sticky_actions_v0(env,
                                              repeat_action_probability=0.25)

            # skip frames for faster processing and less control
            # to be compatable with gym, use frame_skip(env, (2,5))
            env = supersuit.frame_skip_v0(env, 4)

            # downscale observation for faster processing
            env = supersuit.resize_v0(env, 84, 84)

            # allow agent to see everything on the screen despite Atari's flickering screen problem
            env = supersuit.frame_stack_v1(env, 4)

        else:
            env = supersuit.frame_skip_v0(env, 4)

        #   env = PettingZooWrapper(env)  # need to be put at the end
        if env_name in AtariEnvs:  # normalize the observation of Atari for both image or RAM
            env = supersuit.dtype_v0(
                env, 'float32'
            )  # need to transform uint8 to float first for normalizing observation: https://github.com/PettingZoo-Team/SuperSuit
            env = supersuit.normalize_obs_v0(
                env, env_min=0,
                env_max=1)  # normalize the observation to (0,1)

        # assign observation and action spaces
        env.observation_space = list(env.observation_spaces.values())[0]
        env.action_space = list(env.action_spaces.values())[0]

    env.seed(seed)
    return env
Ejemplo n.º 11
0
def unwrapped_check(env):
    # image observations
    if isinstance(env.observation_space, spaces.Box):
        if ((env.observation_space.low.shape == 3)
                and (env.observation_space.low == 0).all()
                and (len(env.observation_space.shape[2]) == 3)
                and (env.observation_space.high == 255).all()):
            env = max_observation_v0(env, 2)
            env = color_reduction_v0(env, mode="full")
            env = normalize_obs_v0(env)

    # box action spaces
    if isinstance(env.action_space, spaces.Box):
        env = clip_actions_v0(env)
        env = scale_actions_v0(env, 0.5)

    # stackable observations
    if isinstance(env.observation_space, spaces.Box) or isinstance(
            env.observation_space, spaces.Discrete):
        env = frame_stack_v1(env, 2)

    # not discrete and not multibinary observations
    if not isinstance(env.observation_space,
                      spaces.Discrete) and not isinstance(
                          env.observation_space, spaces.MultiBinary):
        env = dtype_v0(env, np.float16)
        env = flatten_v0(env)
        env = frame_skip_v0(env, 2)

    # everything else
    env = clip_reward_v0(env, lower_bound=-1, upper_bound=1)
    env = delay_observations_v0(env, 2)
    env = sticky_actions_v0(env, 0.5)
    env = nan_random_v0(env)
    env = nan_zeros_v0(env)

    assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
Ejemplo n.º 12
0
def wrap_env(env, obs_type='ram'):
    env = env.parallel_env(obs_type=obs_type)
    env_agents = env.unwrapped.agents
    if obs_type == 'rgb_image':
        env = supersuit.max_observation_v0(
            env, 2
        )  # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames to deal with frame flickering
        env = supersuit.sticky_actions_v0(
            env, repeat_action_probability=0.25
        )  # repeat_action_probability is set to 0.25 to introduce non-determinism to the system
        env = supersuit.frame_skip_v0(
            env, 4
        )  # skip frames for faster processing and less control to be compatable with gym, use frame_skip(env, (2,5))
        env = supersuit.resize_v0(
            env, 84, 84)  # downscale observation for faster processing
        env = supersuit.frame_stack_v1(
            env, 4
        )  # allow agent to see everything on the screen despite Atari's flickering screen problem
    else:
        env = supersuit.frame_skip_v0(
            env, 4
        )  # RAM version also need frame skip, essential for boxing-v1, etc

    # normalize the observation of Atari for both image or RAM
    env = supersuit.dtype_v0(
        env, 'float32'
    )  # need to transform uint8 to float first for normalizing observation: https://github.com/PettingZoo-Team/SuperSuit
    env = supersuit.normalize_obs_v0(
        env, env_min=0, env_max=1)  # normalize the observation to (0,1)

    env.observation_space = list(env.observation_spaces.values())[0]
    env.action_space = list(env.action_spaces.values())[0]
    env.agents = env_agents
    env = Dict2TupleWrapper(env)

    return env
Ejemplo n.º 13
0
def unwrapped_check(env):
    env.reset()
    agents = env.agents

    if image_observation(env, agents):
        env = max_observation_v0(env, 2)
        env = color_reduction_v0(env, mode="full")
        env = normalize_obs_v0(env)

    if box_action(env, agents):
        env = clip_actions_v0(env)
        env = scale_actions_v0(env, 0.5)

    if observation_homogenizable(env, agents):
        env = pad_observations_v0(env)
        env = frame_stack_v1(env, 2)
        env = agent_indicator_v0(env)
        env = black_death_v3(env)

    if (not_dict_observation(env, agents)
            and not_discrete_observation(env, agents)
            and not_multibinary_observation(env, agents)):
        env = dtype_v0(env, np.float16)
        env = flatten_v0(env)
        env = frame_skip_v0(env, 2)

    if action_homogenizable(env, agents):
        env = pad_action_space_v0(env)

    env = clip_reward_v0(env, lower_bound=-1, upper_bound=1)
    env = delay_observations_v0(env, 2)
    env = sticky_actions_v0(env, 0.5)
    env = nan_random_v0(env)
    env = nan_zeros_v0(env)

    assert env.unwrapped.__class__ == DummyEnv, f"Failed to unwrap {env}"
Ejemplo n.º 14
0
    return DummyEnv(base_obs, base_obs_space, base_act_spaces)


def new_dummy():
    return DummyEnv(base_obs, base_obs_space, base_act_spaces)


wrappers = [
    supersuit.color_reduction_v0(new_dummy(), "R"),
    supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10),
    supersuit.resize_v0(dtype_v0(new_dummy(), np.uint8), x_size=5, y_size=10, linear_interp=True),
    supersuit.dtype_v0(new_dummy(), np.int32),
    supersuit.flatten_v0(new_dummy()),
    supersuit.reshape_v0(new_dummy(), (64, 3)),
    supersuit.normalize_obs_v0(new_dummy(), env_min=-1, env_max=5.0),
    supersuit.frame_stack_v1(new_dummy(), 8),
    supersuit.reward_lambda_v0(new_dummy(), lambda x: x / 10),
    supersuit.clip_reward_v0(new_dummy()),
    supersuit.clip_actions_v0(new_continuous_dummy()),
    supersuit.frame_skip_v0(new_dummy(), 4),
    supersuit.frame_skip_v0(new_dummy(), (4, 6)),
    supersuit.sticky_actions_v0(new_dummy(), 0.75),
    supersuit.delay_observations_v0(new_dummy(), 1),
]


@pytest.mark.parametrize("env", wrappers)
def test_basic_wrappers(env):
    env.seed(5)
    obs = env.reset()
    act_space = env.action_space
Ejemplo n.º 15
0
               save_code=True)
    writer = SummaryWriter(f"/tmp/{experiment_name}")

# TRY NOT TO MODIFY: seeding
device = torch.device(
    'cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = args.torch_deterministic

# petting zoo
env = pistonball_v4.parallel_env()
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
envs = ss.concat_vec_envs_v0(env,
                             args.num_envs,
                             num_cpus=0,
                             base_class='stable_baselines3')
envs = VecMonitor(envs)
if args.capture_video:
    envs = VecVideoRecorder(envs,
                            f'videos/{experiment_name}',
                            record_video_trigger=lambda x: x % 150000 == 0,
                            video_length=400)
envs = VecPyTorch(envs, device)
args.num_envs = envs.num_envs
args.batch_size = int(args.num_envs * args.num_steps)
args.minibatch_size = int(args.batch_size // args.n_minibatch)
    generated_agents_env_v0,
)

import supersuit
from supersuit import dtype_v0
import pytest

wrappers = [
    supersuit.dtype_v0(generated_agents_parallel_v0.env(), np.int32),
    supersuit.flatten_v0(generated_agents_parallel_v0.env()),
    supersuit.normalize_obs_v0(
        dtype_v0(generated_agents_parallel_v0.env(), np.float32),
        env_min=-1,
        env_max=5.0,
    ),
    supersuit.frame_stack_v1(generated_agents_parallel_v0.env(), 8),
    supersuit.reward_lambda_v0(generated_agents_parallel_v0.env(),
                               lambda x: x / 10),
    supersuit.clip_reward_v0(generated_agents_parallel_v0.env()),
    supersuit.nan_noop_v0(generated_agents_parallel_v0.env(), 0),
    supersuit.nan_zeros_v0(generated_agents_parallel_v0.env()),
    supersuit.nan_random_v0(generated_agents_parallel_v0.env()),
    supersuit.frame_skip_v0(generated_agents_parallel_v0.env(), 4),
    supersuit.sticky_actions_v0(generated_agents_parallel_v0.env(), 0.75),
    supersuit.delay_observations_v0(generated_agents_parallel_v0.env(), 3),
    supersuit.max_observation_v0(generated_agents_parallel_v0.env(), 3),
]


@pytest.mark.parametrize("env", wrappers)
def test_pettingzoo_aec_api_par_gen(env):
Ejemplo n.º 17
0
def test_pettinzoo_frame_stack():
    _env = simple_push_v2.env()
    wrapped_env = frame_stack_v1(_env)
    api_test.api_test(wrapped_env)
print(params)


def image_transpose(env):
    if is_image_space(env.observation_space) and not is_image_space_channels_first(
        env.observation_space
    ):
        env = VecTransposeImage(env)
    return env


env = pistonball_v5.parallel_env()
env = ss.color_reduction_v0(env, mode="B")
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v1(env)
env = ss.concat_vec_envs_v1(env, n_envs, num_cpus=1, base_class="stable_baselines3")
env = VecMonitor(env)
env = image_transpose(env)

eval_env = pistonball_v5.parallel_env()
eval_env = ss.color_reduction_v0(eval_env, mode="B")
eval_env = ss.resize_v0(eval_env, x_size=84, y_size=84)
eval_env = ss.frame_stack_v1(eval_env, 3)
eval_env = ss.pettingzoo_env_to_vec_env_v1(eval_env)
eval_env = ss.concat_vec_envs_v1(
    eval_env, 1, num_cpus=1, base_class="stable_baselines3"
)
eval_env = VecMonitor(eval_env)
eval_env = image_transpose(eval_env)
Ejemplo n.º 19
0
n_evaluations = 20
n_agents = 2
n_envs = 4
n_timesteps = 8000000

# n agents, n timesteps, docs, make, PZ import in test file
# The main class SumoEnvironment inherits MultiAgentEnv from RLlib.

base_env = make_env(net_file='nets/4x4-Lucas/4x4.net.xml',
                    route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                    out_csv_name='outputs/4x4grid/test',
                    use_gui=False,
                    num_seconds=80000)

env = base_env.copy().parallel_env()
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
env = ss.concat_vec_envs_v0(env, n_envs, num_cpus=1, base_class='stable_baselines3')
env = VecMonitor(env)

eval_env = base_env.copy().parallel_env()
eval_env = ss.frame_stack_v1(eval_env, 3)
eval_env = ss.pettingzoo_env_to_vec_env_v0(eval_env)
eval_env = ss.concat_vec_envs_v0(eval_env, 1, num_cpus=1, base_class='stable_baselines3')
eval_env = VecMonitor(eval_env)

eval_freq = int(n_timesteps / n_evaluations)
eval_freq = max(eval_freq // (n_envs*n_agents), 1)

model = PPO("MlpPolicy", env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256)
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=eval_freq, deterministic=True, render=False)
 supersuit.resize_v1(
     dtype_v0(knights_archers_zombies_v10.env(vector_state=False),
              np.uint8),
     x_size=5,
     y_size=10,
     linear_interp=True,
 ),
 supersuit.dtype_v0(knights_archers_zombies_v10.env(), np.int32),
 supersuit.flatten_v0(knights_archers_zombies_v10.env()),
 supersuit.reshape_v0(knights_archers_zombies_v10.env(vector_state=False),
                      (512 * 512, 3)),
 supersuit.normalize_obs_v0(dtype_v0(knights_archers_zombies_v10.env(),
                                     np.float32),
                            env_min=-1,
                            env_max=5.0),
 supersuit.frame_stack_v1(combined_arms_v6.env(), 8),
 supersuit.pad_observations_v0(simple_world_comm_v2.env()),
 supersuit.pad_action_space_v0(simple_world_comm_v2.env()),
 supersuit.black_death_v3(combined_arms_v6.env()),
 supersuit.agent_indicator_v0(knights_archers_zombies_v10.env(), True),
 supersuit.agent_indicator_v0(knights_archers_zombies_v10.env(), False),
 supersuit.reward_lambda_v0(knights_archers_zombies_v10.env(),
                            lambda x: x / 10),
 supersuit.clip_reward_v0(combined_arms_v6.env()),
 supersuit.nan_noop_v0(knights_archers_zombies_v10.env(), 0),
 supersuit.nan_zeros_v0(knights_archers_zombies_v10.env()),
 supersuit.nan_random_v0(chess_v5.env()),
 supersuit.nan_random_v0(knights_archers_zombies_v10.env()),
 supersuit.frame_skip_v0(combined_arms_v6.env(), 4),
 supersuit.sticky_actions_v0(combined_arms_v6.env(), 0.75),
 supersuit.delay_observations_v0(combined_arms_v6.env(), 3),
Ejemplo n.º 21
0
    supersuit.color_reduction_v0(knights_archers_zombies_v4.env(), "R"),
    supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8),
                        x_size=5,
                        y_size=10),
    supersuit.resize_v0(dtype_v0(knights_archers_zombies_v4.env(), np.uint8),
                        x_size=5,
                        y_size=10,
                        linear_interp=True),
    supersuit.dtype_v0(knights_archers_zombies_v4.env(), np.int32),
    supersuit.flatten_v0(knights_archers_zombies_v4.env()),
    supersuit.reshape_v0(knights_archers_zombies_v4.env(), (512 * 512, 3)),
    supersuit.normalize_obs_v0(dtype_v0(knights_archers_zombies_v4.env(),
                                        np.float32),
                               env_min=-1,
                               env_max=5.0),
    supersuit.frame_stack_v1(knights_archers_zombies_v4.env(), 8),
    supersuit.pad_observations_v0(knights_archers_zombies_v4.env()),
    supersuit.pad_action_space_v0(knights_archers_zombies_v4.env()),
    supersuit.black_death_v0(knights_archers_zombies_v4.env()),
    supersuit.agent_indicator_v0(knights_archers_zombies_v4.env(), True),
    supersuit.agent_indicator_v0(knights_archers_zombies_v4.env(), False),
    supersuit.reward_lambda_v0(knights_archers_zombies_v4.env(),
                               lambda x: x / 10),
    supersuit.clip_reward_v0(knights_archers_zombies_v4.env()),
    supersuit.clip_actions_v0(prison_v2.env(continuous=True)),
    supersuit.frame_skip_v0(knights_archers_zombies_v4.env(), 4),
    supersuit.sticky_actions_v0(knights_archers_zombies_v4.env(), 0.75),
    supersuit.delay_observations_v0(knights_archers_zombies_v4.env(), 3),
]

Ejemplo n.º 22
0
def create_single_env(args):
    env_name = args.env
    if args.num_envs > 1:
        keep_info = True  # keep_info True to maintain dict type for parallel envs (otherwise cannot pass VectorEnv wrapper)
    else:
        keep_info = False
    '''https://www.pettingzoo.ml/atari'''
    if "slimevolley" in env_name or "SlimeVolley" in env_name:
        print(f'Load SlimeVolley env: {env_name}')
        env = gym.make(env_name)
        if env_name in [
                'SlimeVolleySurvivalNoFrameskip-v0',
                'SlimeVolleyNoFrameskip-v0', 'SlimeVolleyPixel-v0'
        ]:
            # For image-based envs, apply following wrappers (from gym atari) to achieve pettingzoo style env,
            # or use supersuit (requires input env to be either pettingzoo or gym env).
            # same as: https://github.com/hardmaru/slimevolleygym/blob/master/training_scripts/train_ppo_pixel.py
            # TODO Note: this cannot handle the two obervations in above SlimeVolley envs,
            # since the wrappers are for single agent.
            if env_name != 'SlimeVolleyPixel-v0':
                env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)
            env = WarpFrame(env)
            # #env = ClipRewardEnv(env)
            env = FrameStack(env, 4)

        env = SlimeVolleyWrapper(
            env, args.against_baseline)  # slimevolley to pettingzoo style
        env = NFSPPettingZooWrapper(
            env, keep_info=keep_info
        )  # pettingzoo to nfsp style, keep_info True to maintain dict type for parallel envs

    elif env_name in AtariEnvs:  # PettingZoo Atari envs
        print(f'Load PettingZoo Atari env: {env_name}')
        if args.ram:
            obs_type = 'ram'
        else:
            obs_type = 'rgb_image'

        env = eval(env_name).parallel_env(obs_type=obs_type)
        env_agents = env.unwrapped.agents  # this cannot go through supersuit wrapper, so get it first and reassign it

        if obs_type == 'rgb_image':
            # as per openai baseline's MaxAndSKip wrapper, maxes over the last 2 frames
            # to deal with frame flickering
            env = supersuit.max_observation_v0(env, 2)

            # repeat_action_probability is set to 0.25 to introduce non-determinism to the system
            env = supersuit.sticky_actions_v0(env,
                                              repeat_action_probability=0.25)

            # skip frames for faster processing and less control
            # to be compatable with gym, use frame_skip(env, (2,5))
            env = supersuit.frame_skip_v0(env, 4)

            # downscale observation for faster processing
            env = supersuit.resize_v0(env, 84, 84)

            # allow agent to see everything on the screen despite Atari's flickering screen problem
            env = supersuit.frame_stack_v1(env, 4)

        else:
            env = supersuit.frame_skip_v0(
                env, 4
            )  # RAM version also need frame skip, essential for boxing-v1, etc

        #   env = PettingZooWrapper(env)  # need to be put at the end

        # normalize the observation of Atari for both image or RAM
        env = supersuit.dtype_v0(
            env, 'float32'
        )  # need to transform uint8 to float first for normalizing observation: https://github.com/PettingZoo-Team/SuperSuit
        env = supersuit.normalize_obs_v0(
            env, env_min=0, env_max=1)  # normalize the observation to (0,1)

        # assign observation and action spaces
        env.observation_space = list(env.observation_spaces.values())[0]
        env.action_space = list(env.action_spaces.values())[0]
        env.agents = env_agents
        env = NFSPPettingZooWrapper(
            env, keep_info=keep_info
        )  # pettingzoo to nfsp style, keep_info True to maintain dict type for parallel envs)

    elif env_name in ClassicEnvs:  # PettingZoo Classic envs
        print(f'Load PettingZoo Classic env: {env_name}')
        if env_name in ['rps_v1', 'rpsls_v1']:
            env = eval(env_name).parallel_env()
            env = PettingzooClassicWrapper(env, observation_mask=1.)
        else:  # only rps_v1 can use parallel_env at present
            env = eval(env_name).env()
            env = PettingzooClassic_Iterate2Parallel(
                env, observation_mask=None
            )  # since Classic games do not support Parallel API yet

        env = NFSPPettingZooWrapper(env, keep_info=keep_info)

    elif "LaserTag" in env_name:  # LaserTag: https://github.com/younggyoseo/pytorch-nfsp
        print(f'Load LaserTag env: {env_name}')
        env = gym.make(env_name)
        env = wrap_pytorch(env)

    else:  # gym env
        print(f'Load Gym env: {env_name}')
        try:
            env = gym.make(env_name)
        except:
            print(f"Error: No such env: {env_name}!")
        # may need more wrappers here, e.g. Pong-ram-v0 need scaled observation!
        # Ref: https://towardsdatascience.com/deep-q-network-dqn-i-bce08bdf2af
        env = NFSPAtariWrapper(env, keep_info=keep_info)

    env.seed(args.seed)
    return env