Beispiel #1
0
            obs[-2 + self.extraRandoms] = 0.1 * sin(angle1) + cartpos    
            if self.numPoles == 2:
                if self.markov:
                    angle2 = obs[3]
                else:
                    angle2 = obs[1]
                obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos
                obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos
        
        if self.extraRandoms > 0:
            obs[-self.extraRandoms:] = randn(self.extraRandoms)
            
        if self.verbose:
            print('obs', obs)
        return obs
        
    def performAction(self, action):
        if self.verbose:
            print('act', action)
        impl.performAction(action[0])
        self.addReward()
        
if __name__ == '__main__':
    from pybrain.rl import EpisodicExperiment
    from pybrain.rl.agents import FlatNetworkAgent
    x = FastCartPoleTask()
    a = FlatNetworkAgent(x.outdim, x.indim)
    e = EpisodicExperiment(x, a)
    e.doEpisodes(2)
    
Beispiel #2
0
# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30  #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x < 5000:
    #while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(
        agent.history.getSumOverSequences('reward')) * task.rewardscale
    if useGraphics:
        pl.addData(0, x, reward)
    print agent.module.params
    print reward
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()

if len(sys.argv) > 2:
Beispiel #3
0
            if self.numPoles == 2:
                if self.markov:
                    angle2 = obs[3]
                else:
                    angle2 = obs[1]
                obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos
                obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos

        if self.extraRandoms > 0:
            obs[-self.extraRandoms :] = randn(self.extraRandoms)

        if self.verbose:
            print "obs", obs
        return obs

    def performAction(self, action):
        if self.verbose:
            print "act", action
        impl.performAction(action[0])
        self.addReward()


if __name__ == "__main__":
    from pybrain.rl import EpisodicExperiment
    from pybrain.rl.agents import FlatNetworkAgent

    x = FastCartPoleTask()
    a = FlatNetworkAgent(x.outdim, x.indim)
    e = EpisodicExperiment(x, a)
    e.doEpisodes(2)
Beispiel #4
0
# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(), 
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x<5000:
#while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(agent.history.getSumOverSequences('reward'))*task.rewardscale
    if useGraphics:
        pl.addData(0,x,reward)
    print agent.module.params
    print reward
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()
    

if len(sys.argv) > 2: