task = LinearFATileCoding3456BalanceTaskRewardPower8() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_sarsalambda_linfa_replacetrace_anneal_RewardPower8_take1', experiment, performance_agent, verbose=True) tr.train(7000, performance_interval=1, n_performance_episodes=1, serialization_interval=50)
task = LSPIBalanceTask(only_steer=True) learner = LSPI(task.nactions, task.outdim) theta = np.loadtxt('/home/fitze/Dropbox/stanford/21quarter/229cs/proj/data/balance_lspi_experimental_112011H17M18S/theta_800.dat') learner._theta = theta # TODO this LSPI does not have eligibility traces. #learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False #learner.exploring = True performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_lspi_experimental', experiment, performance_agent, verbose=True) tr.train(55000, performance_interval=10, n_performance_episodes=1)
agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining( 'balance_sarsalambda_linfa_replacetrace_anneal_RewardBipolar_take2', experiment, performance_agent, verbose=True) tr.train(7000, performance_interval=1, n_performance_episodes=1, serialization_interval=50)
task = LinearFATileCoding3456GoToTask(only_steer=True, max_time=50.0) learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False agent.epsilonGreedy = True performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('goto_sarsalambda_linfa_replacetrace_onlysteer', experiment, performance_agent, verbose=True) tr.train(200000, performance_interval=10, n_performance_episodes=5)
agent = LinearFA_Agent(learner) #agent.epsilonGreedy = True #agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_recover3800', experiment, performance_agent, verbose=True) tr.train(7000, n_episodes_per_rehearsal=100, performance_interval=1, n_performance_episodes=1, serialization_interval=50, plot_action_history=True)
from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456GoToTask(only_steer=True, max_time=50.0) learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False agent.epsilonGreedy = True performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('goto_sarsalambda_linfa_replacetrace_onlysteer', experiment, performance_agent, verbose=True) tr.train(200000, performance_interval=10, n_performance_episodes=5)
task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False agent.epsilonGreedy = True #agent.init_exploration = 0.5 performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. #learner.learningRateDecay = 1000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_sarsalambda_linfa_replacetrace_onlysteer', experiment, performance_agent, verbose=True) tr.train(200000, performance_interval=10, n_performance_episodes=5, plot_action_history=True)
from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTask() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_sarsalambda_linfa_replacetrace_noperform', experiment, agent, verbose=False) tr.train(2000000, performance_interval=50, n_performance_episodes=5, serialization_interval=200)
learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim, learningRateDecay=1500, randomInit=False) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_vis', experiment, performance_agent, verbose=True) tr.train(7000, n_episodes_per_rehearsal=10, performance_interval=1, n_performance_episodes=1, serialization_interval=50, plot_action_history=True)
from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTask(only_steer=True, max_time=8.0) learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim, learningRateDecay=1000, rewardDiscount=0.99, randomInit=False) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False agent.epsilonGreedy = True #agent.init_exploration = 0.5 performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. #learner.learningRateDecay = 1000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. tr = LinearFATraining('balance_sarsalambda_linfa_replacetrace_onlysteer', experiment, performance_agent, verbose=True) tr.train(200000, performance_interval=10, n_performance_episodes=5, plot_action_history=True)