Python EpisodicExperiment.doEpisodes Examples

Programming Language: Python

Namespace/Package Name: pybrain.rl

Method/Function: doEpisodes

Examples at hotexamples.com: 4

Python EpisodicExperiment.doEpisodes - 4 examples found. These are the top rated real world Python examples of pybrain.rl.EpisodicExperiment.doEpisodes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

EpisodicExperiment(2)

doEpisodes(2)

Example #1

Show file

File: cartpoleenv.py Project: nnarziev/MyWeek_Server

            obs[-2 + self.extraRandoms] = 0.1 * sin(angle1) + cartpos    
            if self.numPoles == 2:
                if self.markov:
                    angle2 = obs[3]
                else:
                    angle2 = obs[1]
                obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos
                obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos
        
        if self.extraRandoms > 0:
            obs[-self.extraRandoms:] = randn(self.extraRandoms)
            
        if self.verbose:
            print('obs', obs)
        return obs
        
    def performAction(self, action):
        if self.verbose:
            print('act', action)
        impl.performAction(action[0])
        self.addReward()
        
if __name__ == '__main__':
    from pybrain.rl import EpisodicExperiment
    from pybrain.rl.agents import FlatNetworkAgent
    x = FastCartPoleTask()
    a = FlatNetworkAgent(x.outdim, x.indim)
    e = EpisodicExperiment(x, a)
    e.doEpisodes(2)

Example #2

Show file

File: shipbenchSDE.py Project: saadmahboob/pybrain

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30  #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x < 5000:
    #while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(
        agent.history.getSumOverSequences('reward')) * task.rewardscale
    if useGraphics:
        pl.addData(0, x, reward)
    print agent.module.params
    print reward
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()

if len(sys.argv) > 2:

Example #3

Show file

File: cartpoleenv.py Project: kortschak/pybrain

            if self.numPoles == 2:
                if self.markov:
                    angle2 = obs[3]
                else:
                    angle2 = obs[1]
                obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos
                obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos

        if self.extraRandoms > 0:
            obs[-self.extraRandoms :] = randn(self.extraRandoms)

        if self.verbose:
            print "obs", obs
        return obs

    def performAction(self, action):
        if self.verbose:
            print "act", action
        impl.performAction(action[0])
        self.addReward()


if __name__ == "__main__":
    from pybrain.rl import EpisodicExperiment
    from pybrain.rl.agents import FlatNetworkAgent

    x = FastCartPoleTask()
    a = FlatNetworkAgent(x.outdim, x.indim)
    e = EpisodicExperiment(x, a)
    e.doEpisodes(2)

Example #4

Show file

File: shipbenchSDE.py Project: HKou/pybrain

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(), 
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x<5000:
#while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(agent.history.getSumOverSequences('reward'))*task.rewardscale
    if useGraphics:
        pl.addData(0,x,reward)
    print agent.module.params
    print reward
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()
    

if len(sys.argv) > 2: