Python LinearFA_Agent Exemples

Langage de programmation: Python

Espace de nommage/Pack: pybrain.rl.agents.linearfa

Class/Type: LinearFA_Agent

Exemples au hotexamples.com: 6

Python LinearFA_Agent - 6 exemples trouvés. Ce sont les exemples réels les mieux notés de pybrain.rl.agents.linearfa.LinearFA_Agent extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

LinearFA_Agent(2)

greedy(2)

logging(2)

reset(2)

epsilonGreedy(1)

init_exploration(1)

learnerning(1)

learning(1)

Méthodes fréquemment utilisées

LinearFA_Agent (2)

greedy (2)

logging (2)

reset (2)

epsilonGreedy (1)

init_exploration (1)

learnerning (1)

learning (1)

Associées

crc_str

build_template_from_workspace

delay

PostingForm

Task

Toolbar

CollectPlugins

PCA

XmlPatcher

inv_transform

Related in langs

gsite_cfg (PHP)

DATABOX_getadditiondatas (PHP)

LocationEntryIdStrategy.GetLocationPropertyBagDelegate (C#)

GetTextDetectionRequest (C#)

pitchQuat (C++)

pmNewContextZone (C++)

DisplaySwatches (Go)

NodeLister (Go)

EmployeeNew (Java)

Hashtable (Java)

Exemple #1

0

Afficher le fichier

Fichier : balance_sarsalambda_linfa_replacetrace_anneal_RewardPower8.py Projet : chrisdembia/agent-bicycle

from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import LinearFATileCoding3456BalanceTaskRewardPower8 from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTaskRewardPower8() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving.

Exemple #2

0

Afficher le fichier

from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks_old import LinearFATileCoding3456BalanceTaskRewardBipolar from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTaskRewardBipolar() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) agent.epsilonGreedy = True agent.init_exploration = 0.5 # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 2000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving.

Exemple #3

0

Afficher le fichier

from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.learners.valuebased.linearfa import LSPI from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import LSPIBalanceTask from training import LinearFATraining task = LSPIBalanceTask(only_steer=True) learner = LSPI(task.nactions, task.outdim) theta = np.loadtxt('/home/fitze/Dropbox/stanford/21quarter/229cs/proj/data/balance_lspi_experimental_112011H17M18S/theta_800.dat') learner._theta = theta # TODO this LSPI does not have eligibility traces. #learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False #learner.exploring = True performance_agent = LinearFA_Agent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the

Exemple #4

0

Afficher le fichier

from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from pybrain.utilities import one_to_n from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTask() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. for i in np.arange(2000, 3800, 50): theta = np.loadtxt( '/home/fitze/Documents/agent-bicycle/data/balance_sarsalambda_linfa_replacetrace_anneal_112217H56M04S/theta_%i.dat' % i) learner._theta = theta

Exemple #5

0

Afficher le fichier

Fichier : analyze_linearfa.py Projet : chrisdembia/agent-bicycle

from pybrain.rl.agents.linearfa import LinearFA_Agent from pybrain.rl.experiments import EpisodicExperiment from pybrain.utilities import one_to_n from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from learners import SARSALambda_LinFA_ReplacingTraces task = LinearFATileCoding3456BalanceTask() learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim) learner._lambda = 0.95 task.discount = learner.rewardDiscount agent = LinearFA_Agent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the # learning to actually happen, and fixed the bug/issue where the performance # agent's performance stopped improving. for i in np.arange(2000, 3800, 50): theta = np.loadtxt('/home/fitze/Documents/agent-bicycle/data/balance_sarsalambda_linfa_replacetrace_anneal_112217H56M04S/theta_%i.dat' % i) learner._theta = theta Q = learner._qValues(one_to_n(task.getBin(0, 0, 0, 0, 0), task.outdim)) pl.plot(Q, label='%s' % i)

Exemple #6

0

Afficher le fichier

Fichier : lspi_test.py Projet : Nicolas99-9/Rl_projet

dist_to_goal = np.linalg.norm(target - np.array([xf, yf])) delta_tilt = self.env.getTilt()**2 - self.env.last_omega**2 last_xf = self.env.last_xf last_yf = self.env.last_yf dist_to_goal_last = np.linalg.norm(target - np.array([last_xf, last_yf])) delta_dist = dist_to_goal - dist_to_goal_last return -delta_tilt - delta_dist * 0.01 task = LSPI_task() learner = LSPI(9, 20) task.rewardDiscount = 0.8 learner.rewardDiscount = 0.8 agent = LinearFA_Agent(learner) agent.epsilonGreedy = True exp = EpisodicExperiment(task, agent) learner.learningRateDecay = 3000 max_agent = LinearFA_Agent(learner) max_agent.learnerning = False max_agent.greedy = True task.env.saveWheelContactTrajectories(True) plt.ion() plt.figure(figsize=(8, 4)) ax1 = plt.subplot(1, 2, 1) ax2 = plt.subplot(1, 2, 2)