Example #1
0
    def step(self):
        if self._prevact is None:
            self.start()

        rot = self._env.step(self._prevact)

        roat = Reward_observation_action_terminal()
        roat.o = rot.o
        roat.r = rot.r
        roat.terminal = rot.terminal

        self._total_reward += rot.r

        if rot.terminal == 1:
            self._episode_count += 1
            converged = self._agent.end(rot.r)
            roat.converged = converged if converged is not None else 0
            self._prevact = None
        else:
            self._step_count += 1
            self._prevact = self._agent.step(rot.r, rot.o)
            roat.a = self._prevact

        return roat