Example #1
0
 def step(self):
     self.do_call_with_no_params(network.kRLStep)
     self.do_standard_recv(network.kRLStep)
     roat = Reward_observation_action_terminal()
     roat.terminal = self._network.get_int()
     roat.r = self._network.get_double()
     roat.o = self._network.get_Observation()
     roat.a = self._network.get_Action()
     return roat
Example #2
0
    def step(self):
        if self._prevact is None:
            self.start()

        rot = self._env.step(self._prevact)

        roat = Reward_observation_action_terminal()
        roat.o = rot.o
        roat.r = rot.r
        roat.terminal = rot.terminal

        self._total_reward += rot.r

        if rot.terminal == 1:
            self._episode_count += 1
            converged = self._agent.end(rot.r)
            roat.converged = converged if converged is not None else 0
            self._prevact = None
        else:
            self._step_count += 1
            self._prevact = self._agent.step(rot.r, rot.o)
            roat.a = self._prevact

        return roat