def test_sanitize_action_one_agent_done(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_step = create_mock_vector_step_result(num_agents=5) mock_step.agent_id = np.array(range(5)) setup_mock_unityenvironment(mock_env, mock_spec, mock_step) env = UnityEnv(" ", use_visual=False, multiagent=True) received_step_result = create_mock_vector_step_result(num_agents=6) received_step_result.agent_id = np.array(range(6)) # agent #3 (id = 2) is Done received_step_result.done = np.array([False] * 2 + [True] + [False] * 3) sanitized_result = env._sanitize_info(received_step_result) for expected_agent_id, agent_id in zip([0, 1, 5, 3, 4], sanitized_result.agent_id): assert expected_agent_id == agent_id
def test_sanitize_action_single_agent_multiple_done(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_step = create_mock_vector_step_result(num_agents=1) mock_step.agent_id = np.array(range(1)) setup_mock_unityenvironment(mock_env, mock_spec, mock_step) env = UnityEnv(" ", use_visual=False, multiagent=False) received_step_result = create_mock_vector_step_result(num_agents=3) received_step_result.agent_id = np.array(range(3)) # original agent (id = 0) is Done # so is the "new" agent (id = 1) done = [True, True, False] received_step_result.done = np.array(done) sanitized_result = env._sanitize_info(received_step_result) for expected_agent_id, agent_id in zip([2], sanitized_result.agent_id): assert expected_agent_id == agent_id
def test_sanitize_action_shuffled_id(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_step = create_mock_vector_step_result(num_agents=5) mock_step.agent_id = np.array(range(5)) setup_mock_unityenvironment(mock_env, mock_spec, mock_step) env = UnityEnv(" ", use_visual=False, multiagent=True) shuffled_step_result = create_mock_vector_step_result(num_agents=5) shuffled_order = [4, 2, 3, 1, 0] shuffled_step_result.reward = np.array(shuffled_order) shuffled_step_result.agent_id = np.array(shuffled_order) sanitized_result = env._sanitize_info(shuffled_step_result) for expected_reward, reward in zip(range(5), sanitized_result.reward): assert expected_reward == reward for expected_agent_id, agent_id in zip(range(5), sanitized_result.agent_id): assert expected_agent_id == agent_id
def test_sanitize_action_new_agent_done(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_step = create_mock_vector_step_result(num_agents=3) mock_step.agent_id = np.array(range(5)) setup_mock_unityenvironment(mock_env, mock_spec, mock_step) env = UnityEnv(" ", use_visual=False, multiagent=True) received_step_result = create_mock_vector_step_result(num_agents=7) received_step_result.agent_id = np.array(range(7)) # agent #3 (id = 2) is Done # so is the "new" agent (id = 5) done = [False] * 7 done[2] = True done[5] = True received_step_result.done = np.array(done) sanitized_result = env._sanitize_info(received_step_result) for expected_agent_id, agent_id in zip([0, 1, 6, 3, 4], sanitized_result.agent_id): assert expected_agent_id == agent_id