def step(
        self,
        vector_action: Dict[str, Any] = None,
        memory: Dict[str, Any] = None,
        text_action: Dict[str, Any] = None,
        value: Dict[str, Any] = None,
    ) -> AllBrainInfo:
        assert vector_action is not None

        delta = vector_action[BRAIN_NAME][0][0]
        delta = clamp(delta, -STEP_SIZE, STEP_SIZE)
        self.position += delta
        self.position = clamp(self.position, -1, 1)
        self.step_count += 1
        done = self.position >= 1.0 or self.position <= -1.0
        if done:
            reward = SUCCESS_REWARD * self.position
        else:
            reward = -TIME_PENALTY

        agent_info = AgentInfoProto(
            stacked_vector_observation=[self.position] * OBS_SIZE,
            reward=reward,
            done=done,
        )

        if done:
            self._reset_agent()

        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }
    def reset(
        self,
        config: Dict[str, float] = None,
        train_mode: bool = True,
        custom_reset_parameters: Any = None,
    ) -> AllBrainInfo:  # type: ignore
        self._reset_agent()

        agent_info = AgentInfoProto(
            stacked_vector_observation=[self.position] * OBS_SIZE,
            done=False,
            max_step_reached=False,
        )
        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }