예제 #1
0
def test_identical():
    def env_fn():
        return knights_archers_zombies_v7.env()  # ,20)

    n_envs = 2
    # single threaded
    env1 = vectorize_aec_env_v0(knights_archers_zombies_v7.env(), n_envs)
    env2 = vectorize_aec_env_v0(knights_archers_zombies_v7.env(),
                                n_envs,
                                num_cpus=1)
    env1.seed(42)
    env2.seed(42)
    env1.reset()
    env2.reset()

    def policy(obs, agent):
        return [
            env1.action_spaces[agent].sample() for i in range(env1.num_envs)
        ]

    envs_done = 0
    for agent in env1.agent_iter(200000):
        assert env1.agent_selection == env2.agent_selection
        agent = env1.agent_selection
        obs1, rew1, agent_done1, env_done1, agent_passes1, infos1 = env1.last()
        obs2, rew2, agent_done2, env_done2, agent_passes2, infos2 = env2.last()
        assert np.all(np.equal(obs1, obs2))
        assert np.all(np.equal(agent_done1, agent_done2))
        assert np.all(np.equal(agent_passes1, agent_passes2))
        assert np.all(np.equal(env_done1, env_done2))
        assert np.all(np.equal(obs1, obs2))
        assert all(
            np.all(np.equal(r1, r2))
            for r1, r2 in zip(env1.rewards.values(), env2.rewards.values()))
        assert infos1 == infos2
        actions = policy(obs1, agent)
        env1.step(actions)
        env2.step(actions)
        # env.envs[0].render()
        for j in range(2):
            # if agent_passes[j]:
            #     print("pass")
            if rew1[j] != 0:
                print(j, agent, rew1, agent_done1[j])
            if env_done1[j]:
                print(j, "done")
                envs_done += 1
                if envs_done == n_envs + 1:
                    print("test passed")
                    return
예제 #2
0
def test_all():
    NUM_ENVS = 5
    NUM_CPUS = 2

    def test_vec_env(vec_env):
        vec_env.reset()
        obs, rew, agent_done, env_done, agent_passes, infos = vec_env.last()
        print(np.asarray(obs).shape)
        assert len(obs) == NUM_ENVS
        act_space = vec_env.action_spaces[vec_env.agent_selection]
        assert np.all(np.equal(obs, vec_env.observe(vec_env.agent_selection)))
        assert len(vec_env.observe(vec_env.agent_selection)) == NUM_ENVS
        vec_env.step([act_space.sample() for _ in range(NUM_ENVS)])
        obs, rew, agent_done, env_done, agent_passes, infos = vec_env.last(observe=False)
        assert obs is None

    def test_infos(vec_env):
        vec_env.reset()
        infos = vec_env.infos[vec_env.agent_selection]
        assert infos[1]["legal_moves"]

    def test_seed(vec_env):
        vec_env.seed(4)

    def test_some_done(vec_env):
        vec_env.reset()
        act_space = vec_env.action_spaces[vec_env.agent_selection]
        assert not any(done for dones in vec_env.dones.values() for done in dones)
        vec_env.step([act_space.sample() for _ in range(NUM_ENVS)])
        assert any(done for dones in vec_env.dones.values() for done in dones)
        assert any(rew != 0 for rews in vec_env.rewards.values() for rew in rews)

    def select_action(vec_env, passes, i):
        my_info = vec_env.infos[vec_env.agent_selection][i]
        if False and not passes[i] and "legal_moves" in my_info:
            return random.choice(my_info["legal_moves"])
        else:
            act_space = vec_env.action_spaces[vec_env.agent_selection]
            return act_space.sample()

    for num_cpus in [0, 1]:
        test_vec_env(vectorize_aec_env_v0(rps_v1.env(), NUM_ENVS, num_cpus=num_cpus))
        test_vec_env(vectorize_aec_env_v0(mahjong_maker(), NUM_ENVS, num_cpus=num_cpus))
        test_infos(vectorize_aec_env_v0(hanabi_maker(), NUM_ENVS, num_cpus=num_cpus))
        test_some_done(vectorize_aec_env_v0(mahjong_maker(), NUM_ENVS, num_cpus=num_cpus))
        test_vec_env(vectorize_aec_env_v0(knights_archers_zombies_v7.env(), NUM_ENVS, num_cpus=num_cpus))
        test_vec_env(vectorize_aec_env_v0(simple_world_comm_v2.env(), NUM_ENVS, num_cpus=num_cpus))
        generated_agents_parallel_v0.parallel_env(), 3),
    supersuit.max_observation_v0(generated_agents_parallel_v0.parallel_env(),
                                 3),
]


@pytest.mark.parametrize("env", parallel_wrappers)
def test_pettingzoo_parallel_api_gen(env):
    parallel_test.parallel_api_test(env, num_cycles=50)


wrapper_fns = [
    lambda: supersuit.pad_action_space_v0(generated_agents_parallel_v0.env()),
    lambda: supersuit.pad_observations_v0(generated_agents_parallel_v0.env()),
    lambda: supersuit.agent_indicator_v0(generated_agents_parallel_v0.env()),
    lambda: supersuit.vectorize_aec_env_v0(generated_agents_parallel_v0.env(),
                                           2),
    lambda: supersuit.pad_action_space_v0(generated_agents_parallel_v0.
                                          parallel_env()),
    lambda: supersuit.pad_observations_v0(generated_agents_parallel_v0.
                                          parallel_env()),
    lambda: supersuit.agent_indicator_v0(generated_agents_parallel_v0.
                                         parallel_env()),
    lambda: supersuit.pettingzoo_env_to_vec_env_v1(generated_agents_parallel_v0
                                                   .parallel_env()),
]


@pytest.mark.parametrize("wrapper_fn", wrapper_fns)
def test_pettingzoo_missing_optional_error_message(wrapper_fn):
    with pytest.raises(AssertionError, match=" must have "):
        wrapper_fn()