def RL_step(): roat = None doCallWithNoParams(Network.kRLStep) doStandardRecv(Network.kRLStep) roat = Reward_observation_action_terminal() roat.terminal = network.getInt() roat.r = network.getDouble() roat.o = network.getObservation() roat.a = network.getAction() return roat
def RL_step(self): if self.prevact is None: self.RL_start() self.step_count += 1 rot = self.env.env_step(self.prevact) roat = Reward_observation_action_terminal() roat.terminal = rot.terminal self.exitStatus = rot.terminal if rot.terminal == 1: self.agent.agent_end(rot.r) roat.a = self.prevact self.prevact = None else: self.prevact = self.agent.agent_step(rot.r, rot.o) roat.a = self.prevact self.reward_return += rot.r roat.r = rot.r roat.o = rot.o return roat