예제 #1
0
def test_process_visual_observation():
    in_array_1 = np.random.rand(128, 64, 3)
    proto_obs_1 = generate_compressed_proto_obs(in_array_1)
    in_array_2 = np.random.rand(128, 64, 3)
    in_array_2_mapping = [0, 1, 2]
    proto_obs_2 = generate_compressed_proto_obs_with_mapping(
        in_array_2, in_array_2_mapping)

    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
    arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
    assert list(arr.shape) == [2, 128, 64, 3]
    assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
예제 #2
0
def test_process_visual_observation_bad_shape():
    in_array_1 = np.random.rand(128, 64, 3)
    proto_obs_1 = generate_compressed_proto_obs(in_array_1)
    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
    with pytest.raises(UnityObservationException):
        _process_visual_observation(0, (128, 42, 3), ap_list)
def test_process_visual_observation_grayscale():
    in_array_1 = np.random.rand(128, 64, 3)
    proto_obs_1 = generate_compressed_proto_obs(in_array_1, grayscale=True)
    expected_out_array_1 = np.mean(in_array_1, axis=2, keepdims=True)
    in_array_2 = np.random.rand(128, 64, 3)
    in_array_2_mapping = [0, 0, 0]
    proto_obs_2 = generate_compressed_proto_obs_with_mapping(
        in_array_2, in_array_2_mapping)
    expected_out_array_2 = np.mean(in_array_2, axis=2, keepdims=True)

    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
    arr = _process_visual_observation(0, (128, 64, 1), ap_list)
    assert list(arr.shape) == [2, 128, 64, 1]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
예제 #4
0
def test_process_visual_observation_bad_shape():
    in_array_1 = np.random.rand(128, 64, 3)
    proto_obs_1 = generate_compressed_proto_obs(in_array_1)
    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]

    shape = (128, 42, 3)
    obs_spec = create_observation_specs_with_shapes([shape])[0]

    with pytest.raises(UnityObservationException):
        _process_maybe_compressed_observation(0, obs_spec, ap_list)
def generate_list_agent_proto(
    n_agent: int,
    shape: List[Tuple[int]],
    infinite_rewards: bool = False,
    nan_observations: bool = False,
) -> List[AgentInfoProto]:
    result = []
    for agent_index in range(n_agent):
        ap = AgentInfoProto()
        ap.reward = float("inf") if infinite_rewards else agent_index
        ap.done = agent_index % 2 == 0
        ap.max_step_reached = agent_index % 4 == 0
        ap.id = agent_index
        ap.action_mask.extend([True, False] * 5)
        obs_proto_list = []
        for obs_index in range(len(shape)):
            obs_proto = ObservationProto()
            obs_proto.shape.extend(list(shape[obs_index]))
            obs_proto.compression_type = NONE
            obs_proto.float_data.data.extend(
                ([float("nan")] if nan_observations else [0.1]) *
                np.prod(shape[obs_index]))
            obs_proto_list.append(obs_proto)
        ap.observations.extend(obs_proto_list)
        result.append(ap)
    return result
def test_process_visual_observation_padded_channels():
    in_array_1 = np.random.rand(128, 64, 12)
    in_array_1_mapping = [0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1]
    proto_obs_1 = generate_compressed_proto_obs_with_mapping(
        in_array_1, in_array_1_mapping)
    expected_out_array_1 = np.take(in_array_1, [0, 1, 2, 3, 6, 7, 8, 9],
                                   axis=2)

    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
    arr = _process_visual_observation(0, (128, 64, 8), ap_list)
    assert list(arr.shape) == [1, 128, 64, 8]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
예제 #7
0
def proto_from_batched_step_result(
    batched_step_result: BatchedStepResult
) -> List[AgentInfoProto]:
    agent_info_protos: List[AgentInfoProto] = []
    for agent_id in batched_step_result.agent_id:
        agent_id_index = batched_step_result.agent_id_to_index[agent_id]
        reward = batched_step_result.reward[agent_id_index]
        done = batched_step_result.done[agent_id_index]
        max_step_reached = batched_step_result.max_step[agent_id_index]
        agent_mask = None
        if batched_step_result.action_mask is not None:
            agent_mask = []  # type: ignore
            for _branch in batched_step_result.action_mask:
                agent_mask = np.concatenate(
                    (agent_mask, _branch[agent_id_index, :]), axis=0
                )
        observations: List[ObservationProto] = []
        for all_observations_of_type in batched_step_result.obs:
            observation = all_observations_of_type[agent_id_index]
            if len(observation.shape) == 3:
                observations.append(generate_uncompressed_proto_obs(observation))
            else:
                observations.append(
                    ObservationProto(
                        float_data=ObservationProto.FloatData(data=observation),
                        shape=[len(observation)],
                        compression_type=NONE,
                    )
                )

        agent_info_proto = AgentInfoProto(
            reward=reward,
            done=done,
            id=agent_id,
            max_step_reached=max_step_reached,
            action_mask=agent_mask,
            observations=observations,
        )
        agent_info_protos.append(agent_info_proto)
    return agent_info_protos
def proto_from_steps(decision_steps: DecisionSteps,
                     terminal_steps: TerminalSteps) -> List[AgentInfoProto]:
    agent_info_protos: List[AgentInfoProto] = []
    # Take care of the DecisionSteps first
    for agent_id in decision_steps.agent_id:
        agent_id_index = decision_steps.agent_id_to_index[agent_id]
        reward = decision_steps.reward[agent_id_index]
        done = False
        max_step_reached = False
        agent_mask: Any = None
        if decision_steps.action_mask is not None:
            agent_mask = []
            for _branch in decision_steps.action_mask:
                agent_mask = np.concatenate(
                    (agent_mask, _branch[agent_id_index, :]), axis=0)
            agent_mask = agent_mask.astype(np.bool).tolist()
        observations: List[ObservationProto] = []
        for all_observations_of_type in decision_steps.obs:
            observation = all_observations_of_type[agent_id_index]
            if len(observation.shape) == 3:
                observations.append(
                    generate_uncompressed_proto_obs(observation))
            else:
                observations.append(
                    ObservationProto(
                        float_data=ObservationProto.FloatData(
                            data=observation),
                        shape=[len(observation)],
                        compression_type=NONE,
                    ))
        agent_info_proto = AgentInfoProto(
            reward=reward,
            done=done,
            id=agent_id,
            max_step_reached=bool(max_step_reached),
            action_mask=agent_mask,
            observations=observations,
        )
        agent_info_protos.append(agent_info_proto)
    # Take care of the TerminalSteps second
    for agent_id in terminal_steps.agent_id:
        agent_id_index = terminal_steps.agent_id_to_index[agent_id]
        reward = terminal_steps.reward[agent_id_index]
        done = True
        max_step_reached = terminal_steps.interrupted[agent_id_index]

        final_observations: List[ObservationProto] = []
        for all_observations_of_type in terminal_steps.obs:
            observation = all_observations_of_type[agent_id_index]
            if len(observation.shape) == 3:
                final_observations.append(
                    generate_uncompressed_proto_obs(observation))
            else:
                final_observations.append(
                    ObservationProto(
                        float_data=ObservationProto.FloatData(
                            data=observation),
                        shape=[len(observation)],
                        compression_type=NONE,
                    ))
        agent_info_proto = AgentInfoProto(
            reward=reward,
            done=done,
            id=agent_id,
            max_step_reached=bool(max_step_reached),
            action_mask=None,
            observations=final_observations,
        )
        agent_info_protos.append(agent_info_proto)

    return agent_info_protos