Esempio n. 1
0
            obs[-2 + self.extraRandoms] = 0.1 * sin(angle1) + cartpos    
            if self.numPoles == 2:
                if self.markov:
                    angle2 = obs[3]
                else:
                    angle2 = obs[1]
                obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos
                obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos
        
        if self.extraRandoms > 0:
            obs[-self.extraRandoms:] = randn(self.extraRandoms)
            
        if self.verbose:
            print('obs', obs)
        return obs
        
    def performAction(self, action):
        if self.verbose:
            print('act', action)
        impl.performAction(action[0])
        self.addReward()
        
if __name__ == '__main__':
    from pybrain.rl import EpisodicExperiment
    from pybrain.rl.agents import FlatNetworkAgent
    x = FastCartPoleTask()
    a = FlatNetworkAgent(x.outdim, x.indim)
    e = EpisodicExperiment(x, a)
    e.doEpisodes(2)
    
Esempio n. 2
0
#net.initParams(0.0)

# create agent
agent = StateDependentAgent(net, ENAC())
agent.learner.gd.rprop = True
# only relevant for RP
agent.learner.gd.deltamin = 0.0001
#agent.learner.gd.deltanull = 0.05
# only relevant for BP
agent.learner.gd.alpha = 0.01
agent.learner.gd.momentum = 0.9

agent.actaspg = False

# create experiment
experiment = EpisodicExperiment(task, agent)

# print weights at beginning
print agent.module.params

rewards = []
if useGraphics:
    figure()
    ion()
    pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1])
    pl.setLineStyle(linewidth=2)

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
Esempio n. 3
0
            if self.numPoles == 2:
                if self.markov:
                    angle2 = obs[3]
                else:
                    angle2 = obs[1]
                obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos
                obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos

        if self.extraRandoms > 0:
            obs[-self.extraRandoms :] = randn(self.extraRandoms)

        if self.verbose:
            print "obs", obs
        return obs

    def performAction(self, action):
        if self.verbose:
            print "act", action
        impl.performAction(action[0])
        self.addReward()


if __name__ == "__main__":
    from pybrain.rl import EpisodicExperiment
    from pybrain.rl.agents import FlatNetworkAgent

    x = FastCartPoleTask()
    a = FlatNetworkAgent(x.outdim, x.indim)
    e = EpisodicExperiment(x, a)
    e.doEpisodes(2)
Esempio n. 4
0

# create agent
agent = StateDependentAgent(net, ENAC())
agent.learner.gd.rprop = True
# only relevant for RP
agent.learner.gd.deltamin = 0.0001
#agent.learner.gd.deltanull = 0.05
# only relevant for BP
agent.learner.gd.alpha = 0.01
agent.learner.gd.momentum = 0.9

agent.actaspg = False

# create experiment
experiment = EpisodicExperiment(task, agent)

# print weights at beginning
print agent.module.params

rewards = []
if useGraphics:
    figure()
    ion()
    pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1])
    pl.setLineStyle(linewidth=2)

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()