Ejemplo n.º 1
0
def RL_step():
	roat = None
	doCallWithNoParams(Network.kRLStep)
	doStandardRecv(Network.kRLStep)
	roat = Reward_observation_action_terminal()
	roat.terminal = network.getInt()
	roat.r = network.getDouble()
	roat.o = network.getObservation()
	roat.a = network.getAction()
	return roat
Ejemplo n.º 2
0
def RL_step():
	roat = None
	doCallWithNoParams(Network.kRLStep)
	doStandardRecv(Network.kRLStep)
	roat = Reward_observation_action_terminal()
	roat.terminal = network.getInt()
	roat.r = network.getDouble()
	roat.o = network.getObservation()
	roat.a = network.getAction()
	return roat
Ejemplo n.º 3
0
    def RL_step(self):
        if self.prevact is None:
            self.RL_start()
        self.step_count += 1
        rot = self.env.env_step(self.prevact)
        roat = Reward_observation_action_terminal()
        roat.terminal = rot.terminal
        self.exitStatus = rot.terminal

        if rot.terminal == 1:
            self.agent.agent_end(rot.r)
            roat.a = self.prevact
            self.prevact = None
        else:
            self.prevact = self.agent.agent_step(rot.r, rot.o)
            roat.a = self.prevact

        self.reward_return += rot.r
        roat.r = rot.r
        roat.o = rot.o
        return roat
Ejemplo n.º 4
0
	def RL_step(self):
		if self.prevact is None:
			self.RL_start()
		self.step_count += 1
		rot = self.env.env_step(self.prevact)
		roat = Reward_observation_action_terminal()
		roat.terminal = rot.terminal
		self.exitStatus = rot.terminal

		if rot.terminal == 1:
			self.agent.agent_end(rot.r)
			roat.a = self.prevact
			self.prevact = None
		else:
			self.prevact = self.agent.agent_step(rot.r, rot.o)
			roat.a = self.prevact

		self.reward_return += rot.r
		roat.r = rot.r
		roat.o = rot.o
		return roat