from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks_old import LinearFATileCoding3456BalanceTaskRewardBipolar from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTaskRewardBipolar() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving.
from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import LinearFATileCoding3456BalanceTaskRewardPower8 from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTaskRewardPower8() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving.
from numpy import loadtxt from pybrain.rl.agents.linearfa import LinearFA_Agent from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces from game import Game task = LinearFATileCoding3456BalanceTask() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) #theta = loadtxt('/home/fitze/Documents/agent-bicycle/data/balance_sarsalambda_linfa_replacetrace_anneal_112217H56M04S/theta_3800.dat') theta = loadtxt('theta_linearfatilecoding3456balancetask_balance_works.dat') learner._theta = theta performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False Game(performance_agent, task, noise_mag=0.2).run()
from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTask(only_steer=True, max_time=8.0) learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim, learningRateDecay=1000, rewardDiscount=0.99, randomInit=False) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False agent.epsilonGreedy = True #agent.init_exploration = 0.5 performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower.
from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from pybrain.utilities import one_to_n from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTask() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. for i in np.arange(2000, 3800, 50): theta = np.loadtxt( '/home/fitze/Documents/agent-bicycle/data/balance_sarsalambda_linfa_replacetrace_anneal_112217H56M04S/theta_%i.dat' % i) learner._theta = theta
from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces from renderer import Renderer task = LinearFATileCoding3456BalanceTask() renderer = Renderer() task.env.setRenderer(renderer) renderer.start() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim, learningRateDecay=1500, randomInit=False) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that
from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from pybrain.utilities import one_to_n from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTask() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. for i in np.arange(2000, 3800, 50): theta = np.loadtxt('/home/fitze/Documents/agent-bicycle/data/balance_sarsalambda_linfa_replacetrace_anneal_112217H56M04S/theta_%i.dat' % i) learner._theta = theta Q = learner._qValues(one_to_n(task.getBin(0, 0, 0, 0, 0), task.outdim)) pl.plot(Q, label='%s' % i)