예제 #1
0
파일: xor.py 프로젝트: zyx061212/Kaggle
def runExp(gamma=0, epsilon=0.1, xor=False, lr = 0.02):    
    if xor: 
        print "Attempting the XOR task"
    else:
        print "Attempting the AND task"
        
    task = XORTask()
    task.and_task = not xor
    
    l = Q_LinFA(task.nactions, task.nsenses)
    l.rewardDiscount = gamma
    l.learningRate = lr

    agent = LinFA_QAgent(l)
    agent.epsilon = epsilon
    exp = Experiment(task, agent)    
            
    sofar = 0
    for i in range(30):
        exp.doInteractions(100)
        print exp.task.cumreward - sofar,
        if i%10 == 9: 
            print                
        sofar = exp.task.cumreward          
        l._decayLearningRate()
예제 #2
0
"""Using the agent found in the xor example, rather than in linearfa.py.

"""
from pybrain.rl.learners.valuebased.linearfa import Q_LinFA
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import LinearFATileCoding3456BalanceTask
from training import LinearFATraining
from agents import LinFA_QAgent

task = LinearFATileCoding3456BalanceTask()
learner = Q_LinFA(task.nactions, task.outdim)
task.discount = learner.rewardDiscount
agent = LinFA_QAgent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
agent.learning = True
performance_agent = LinFA_QAgent(learner)
performance_agent.logging = False
performance_agent.greedy = True
performance_agent.epsilon = 0.0
performance_agent.learning = False
experiment = EpisodicExperiment(task, agent)

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 100000
# NOTE increasing this number above from the default of 100 is what got the