def step( self, vector_action: Dict[str, Any] = None, memory: Dict[str, Any] = None, text_action: Dict[str, Any] = None, value: Dict[str, Any] = None, ) -> AllBrainInfo: assert vector_action is not None delta = vector_action[BRAIN_NAME][0][0] delta = clamp(delta, -STEP_SIZE, STEP_SIZE) self.position += delta self.position = clamp(self.position, -1, 1) self.step_count += 1 done = self.position >= 1.0 or self.position <= -1.0 if done: reward = SUCCESS_REWARD * self.position else: reward = -TIME_PENALTY agent_info = AgentInfoProto( stacked_vector_observation=[self.position] * OBS_SIZE, reward=reward, done=done, ) if done: self._reset_agent() return { BRAIN_NAME: BrainInfo.from_agent_proto(0, [agent_info], self._brains[BRAIN_NAME]) }
def reset( self, config: Dict[str, float] = None, train_mode: bool = True, custom_reset_parameters: Any = None, ) -> AllBrainInfo: # type: ignore self._reset_agent() agent_info = AgentInfoProto( stacked_vector_observation=[self.position] * OBS_SIZE, done=False, max_step_reached=False, ) return { BRAIN_NAME: BrainInfo.from_agent_proto(0, [agent_info], self._brains[BRAIN_NAME]) }