Beispiel #1
0
def test_process_visual_observation():
    in_array_1 = np.random.rand(128, 128, 3)
    proto_obs_1 = generate_compressed_proto_obs(in_array_1)
    in_array_2 = np.random.rand(128, 128, 3)
    proto_obs_2 = generate_compressed_proto_obs(in_array_2)
    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
    arr = _process_visual_observation(0, (128, 128, 3), ap_list)
    assert list(arr.shape) == [2, 128, 128, 3]
    assert (arr[0, :, :, :] - in_array_1 < 0.01).all()
    assert (arr[1, :, :, :] - in_array_2 < 0.01).all()
    def _get_agent_infos(self):
        dict_agent_info = {}
        list_agent_info = []
        vector_obs = [1, 2, 3]

        observations = [
            ObservationProto(
                compressed_data=None,
                shape=[30, 40, 3],
                compression_type=COMPRESSION_TYPE_PNG,
            )
            for _ in range(self.visual_inputs)
        ]
        vector_obs_proto = ObservationProto(
            float_data=ObservationProto.FloatData(data=vector_obs),
            shape=[len(vector_obs)],
            compression_type=COMPRESSION_TYPE_NONE,
        )
        observations.append(vector_obs_proto)

        for i in range(self.num_agents):
            list_agent_info.append(
                AgentInfoProto(
                    reward=1,
                    done=(i == 2),
                    max_step_reached=False,
                    id=i,
                    observations=observations,
                )
            )
        dict_agent_info["RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto(
            value=list_agent_info
        )
        return dict_agent_info
Beispiel #3
0
def _make_agent_info_proto(vector_obs: List[float]) -> AgentInfoProto:
    obs = ObservationProto(
        float_data=ObservationProto.FloatData(data=vector_obs),
        shape=[len(vector_obs)],
        compression_type=COMPRESSION_TYPE_NONE,
    )
    agent_info_proto = AgentInfoProto(observations=[obs])
    return agent_info_proto
def test_from_agent_proto_nan(mock_warning, mock_nan_to_num):
    agent_info_proto = AgentInfoProto()
    agent_info_proto.stacked_vector_observation.extend(
        [1.0, 2.0, float("nan")])

    brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain)
    # nan gets set to 0.0
    expected = [1.0, 2.0, 0.0]
    assert (brain_info.vector_observations == expected).all()
    mock_nan_to_num.assert_called()
    mock_warning.assert_called()
def test_from_agent_proto_fast_path(mock_warning, mock_nan_to_num):
    """
    Check that all finite values skips the nan_to_num call
    """
    agent_info_proto = AgentInfoProto()
    agent_info_proto.stacked_vector_observation.extend([1.0, 2.0, 3.0])

    brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain)
    expected = [1.0, 2.0, 3.0]
    assert (brain_info.vector_observations == expected).all()
    mock_nan_to_num.assert_not_called()
    mock_warning.assert_not_called()
def test_from_agent_proto_inf(mock_warning, mock_nan_to_num):
    agent_info_proto = AgentInfoProto()
    agent_info_proto.stacked_vector_observation.extend(
        [1.0, float("inf"), 0.0])

    brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain)
    # inf should get set to float_max
    expected = [1.0, sys.float_info.max, 0.0]
    assert (brain_info.vector_observations == expected).all()
    mock_nan_to_num.assert_called()
    # We don't warn on inf, just NaN
    mock_warning.assert_not_called()
Beispiel #7
0
    def reset(
        self,
        config: Dict[str, float] = None,
        train_mode: bool = True,
        custom_reset_parameters: Any = None,
    ) -> AllBrainInfo:  # type: ignore
        self._reset_agent()

        agent_info = AgentInfoProto(
            stacked_vector_observation=[self.goal] * OBS_SIZE,
            done=False,
            max_step_reached=False,
        )
        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }
Beispiel #8
0
    def step(
        self,
        vector_action: Dict[str, Any] = None,
        memory: Dict[str, Any] = None,
        value: Dict[str, Any] = None,
    ) -> AllBrainInfo:
        assert vector_action is not None

        if self.discrete:
            act = vector_action[BRAIN_NAME][0][0]
            delta = 1 if act else -1
        else:
            delta = vector_action[BRAIN_NAME][0][0]
        delta = clamp(delta, -STEP_SIZE, STEP_SIZE)
        self.position += delta
        self.position = clamp(self.position, -1, 1)
        self.step_count += 1
        done = self.position >= 1.0 or self.position <= -1.0
        if done:
            reward = SUCCESS_REWARD * self.position * self.goal
        else:
            reward = -TIME_PENALTY

        vector_obs = [self.goal] * OBS_SIZE
        vector_obs_proto = ObservationProto(
            float_data=ObservationProto.FloatData(data=vector_obs),
            shape=[len(vector_obs)],
            compression_type=COMPRESSION_TYPE_NONE,
        )
        agent_info = AgentInfoProto(reward=reward,
                                    done=bool(done),
                                    observations=[vector_obs_proto])

        if done:
            self._reset_agent()

        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }
Beispiel #9
0
    def _get_agent_infos(self):
        dict_agent_info = {}
        if self.is_discrete:
            vector_action = [1]
        else:
            vector_action = [1, 2]
        list_agent_info = []
        if self.num_stacks == 1:
            observation = [1, 2, 3]
        else:
            observation = [1, 2, 3, 1, 2, 3]

        compressed_obs = [
            CompressedObservationProto(
                data=None,
                shape=[30, 40, 3],
                compression_type=CompressionTypeProto.PNG)
            for _ in range(self.visual_inputs)
        ]

        for i in range(self.num_agents):
            list_agent_info.append(
                AgentInfoProto(
                    stacked_vector_observation=observation,
                    reward=1,
                    stored_vector_actions=vector_action,
                    stored_text_actions="",
                    text_observation="",
                    memories=[],
                    done=(i == 2),
                    max_step_reached=False,
                    id=i,
                    compressed_observations=compressed_obs,
                ))
        dict_agent_info[
            "RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto(
                value=list_agent_info)
        return dict_agent_info
Beispiel #10
0
    def reset(
        self,
        config: Dict[str, float] = None,
        train_mode: bool = True,
        custom_reset_parameters: Any = None,
    ) -> AllBrainInfo:  # type: ignore
        self._reset_agent()

        vector_obs = [self.goal] * OBS_SIZE
        vector_obs_proto = ObservationProto(
            float_data=ObservationProto.FloatData(data=vector_obs),
            shape=[len(vector_obs)],
            compression_type=COMPRESSION_TYPE_NONE,
        )
        agent_info = AgentInfoProto(done=False,
                                    max_step_reached=False,
                                    observations=[vector_obs_proto])

        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }
Beispiel #11
0
    def step(
        self,
        vector_action: Dict[str, Any] = None,
        memory: Dict[str, Any] = None,
        text_action: Dict[str, Any] = None,
        value: Dict[str, Any] = None,
    ) -> AllBrainInfo:
        assert vector_action is not None

        if self.discrete:
            act = vector_action[BRAIN_NAME][0][0]
            delta = 1 if act else -1
        else:
            delta = vector_action[BRAIN_NAME][0][0]
        delta = clamp(delta, -STEP_SIZE, STEP_SIZE)
        self.position += delta
        self.position = clamp(self.position, -1, 1)
        self.step_count += 1
        done = self.position >= 1.0 or self.position <= -1.0
        if done:
            reward = SUCCESS_REWARD * self.position * self.goal
        else:
            reward = -TIME_PENALTY

        agent_info = AgentInfoProto(stacked_vector_observation=[self.goal] *
                                    OBS_SIZE,
                                    reward=reward,
                                    done=done)

        if done:
            self._reset_agent()

        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }
Beispiel #12
0
def generate_list_agent_proto(n_agent: int,
                              shape: List[Tuple[int]]) -> List[AgentInfoProto]:
    result = []
    for agent_index in range(n_agent):
        ap = AgentInfoProto()
        ap.reward = agent_index
        ap.done = agent_index % 2 == 0
        ap.max_step_reached = agent_index % 2 == 1
        ap.id = agent_index
        ap.action_mask.extend([True, False] * 5)
        obs_proto_list = []
        for obs_index in range(len(shape)):
            obs_proto = ObservationProto()
            obs_proto.shape.extend(list(shape[obs_index]))
            obs_proto.compression_type = NONE
            obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index]))
            obs_proto_list.append(obs_proto)
        ap.observations.extend(obs_proto_list)
        result.append(ap)
    return result
def load_demonstration(file_path: str) -> Tuple[BrainParameters, List[BrainInfo], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of BrainInfos containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33
    file_paths = []
    if os.path.isdir(file_path):
        all_files = os.listdir(file_path)
        for _file in all_files:
            if _file.endswith(".demo"):
                file_paths.append(os.path.join(file_path, _file))
        if not all_files:
            raise ValueError("There are no '.demo' files in the provided directory.")
    elif os.path.isfile(file_path):
        file_paths.append(file_path)
        file_extension = pathlib.Path(file_path).suffix
        if file_extension != ".demo":
            raise ValueError(
                "The file is not a '.demo' file. Please provide a file with the "
                "correct extension."
            )
    else:
        raise FileNotFoundError(
            "The demonstration file or directory {} does not exist.".format(file_path)
        )

    brain_params = None
    brain_param_proto = None
    brain_infos = []
    total_expected = 0
    for _file_path in file_paths:
        data = open(_file_path, "rb").read()
        next_pos, pos, obs_decoded = 0, 0, 0
        while pos < len(data):
            next_pos, pos = _DecodeVarint32(data, pos)
            if obs_decoded == 0:
                meta_data_proto = DemonstrationMetaProto()
                meta_data_proto.ParseFromString(data[pos : pos + next_pos])
                total_expected += meta_data_proto.number_steps
                pos = INITIAL_POS
            if obs_decoded == 1:
                brain_param_proto = BrainParametersProto()
                brain_param_proto.ParseFromString(data[pos : pos + next_pos])

                pos += next_pos
            if obs_decoded > 1:
                agent_info = AgentInfoProto()
                agent_info.ParseFromString(data[pos : pos + next_pos])
                if brain_params is None:
                    brain_params = BrainParameters.from_proto(
                        brain_param_proto, agent_info
                    )
                brain_info = BrainInfo.from_agent_proto(0, [agent_info], brain_params)
                brain_infos.append(brain_info)
                if len(brain_infos) == total_expected:
                    break
                pos += next_pos
            obs_decoded += 1
    return brain_params, brain_infos, total_expected