def step(self): self.do_call_with_no_params(network.kRLStep) self.do_standard_recv(network.kRLStep) roat = Reward_observation_action_terminal() roat.terminal = self._network.get_int() roat.r = self._network.get_double() roat.o = self._network.get_Observation() roat.a = self._network.get_Action() return roat
def step(self): if self._prevact is None: self.start() rot = self._env.step(self._prevact) roat = Reward_observation_action_terminal() roat.o = rot.o roat.r = rot.r roat.terminal = rot.terminal self._total_reward += rot.r if rot.terminal == 1: self._episode_count += 1 converged = self._agent.end(rot.r) roat.converged = converged if converged is not None else 0 self._prevact = None else: self._step_count += 1 self._prevact = self._agent.step(rot.r, rot.o) roat.a = self._prevact return roat