obs[-2 + self.extraRandoms] = 0.1 * sin(angle1) + cartpos if self.numPoles == 2: if self.markov: angle2 = obs[3] else: angle2 = obs[1] obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos if self.extraRandoms > 0: obs[-self.extraRandoms:] = randn(self.extraRandoms) if self.verbose: print('obs', obs) return obs def performAction(self, action): if self.verbose: print('act', action) impl.performAction(action[0]) self.addReward() if __name__ == '__main__': from pybrain.rl import EpisodicExperiment from pybrain.rl.agents import FlatNetworkAgent x = FastCartPoleTask() a = FlatNetworkAgent(x.outdim, x.indim) e = EpisodicExperiment(x, a) e.doEpisodes(2)
# queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop() # # rewards.append(mean(agent.history.getSumOverSequences('reward'))) # print agent.module.getParameters(), # print mean(agent.history.getSumOverSequences('reward')) # clf() # plot(rewards) # episodic version x = 0 batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting) while x < 5000: #while True: experiment.doEpisodes(batch) x += batch reward = mean( agent.history.getSumOverSequences('reward')) * task.rewardscale if useGraphics: pl.addData(0, x, reward) print agent.module.params print reward #if reward > 3: # pass agent.learn() agent.reset() if useGraphics: pl.update() if len(sys.argv) > 2:
if self.numPoles == 2: if self.markov: angle2 = obs[3] else: angle2 = obs[1] obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos if self.extraRandoms > 0: obs[-self.extraRandoms :] = randn(self.extraRandoms) if self.verbose: print "obs", obs return obs def performAction(self, action): if self.verbose: print "act", action impl.performAction(action[0]) self.addReward() if __name__ == "__main__": from pybrain.rl import EpisodicExperiment from pybrain.rl.agents import FlatNetworkAgent x = FastCartPoleTask() a = FlatNetworkAgent(x.outdim, x.indim) e = EpisodicExperiment(x, a) e.doEpisodes(2)
# queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop() # # rewards.append(mean(agent.history.getSumOverSequences('reward'))) # print agent.module.getParameters(), # print mean(agent.history.getSumOverSequences('reward')) # clf() # plot(rewards) # episodic version x = 0 batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting) while x<5000: #while True: experiment.doEpisodes(batch) x += batch reward = mean(agent.history.getSumOverSequences('reward'))*task.rewardscale if useGraphics: pl.addData(0,x,reward) print agent.module.params print reward #if reward > 3: # pass agent.learn() agent.reset() if useGraphics: pl.update() if len(sys.argv) > 2: