Esempi in Python per LinearFA_Agent

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: pybrain.rl.agents.linearfa

Classe/tipologia: LinearFA_Agent

Esempi su hotexamples.com: 6

LinearFA_Agent in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per pybrain.rl.agents.linearfa.LinearFA_Agent, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

LinearFA_Agent(2)

greedy(2)

logging(2)

reset(2)

epsilonGreedy(1)

init_exploration(1)

learnerning(1)

learning(1)

Esempio n. 1

Mostra file

File: balance_sarsalambda_linfa_replacetrace_anneal_RewardPower8.py Progetto: chrisdembia/agent-bicycle

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import LinearFATileCoding3456BalanceTaskRewardPower8
from training import LinearFATraining
from learners import SARSALambda_LinFA_ReplacingTraces

task = LinearFATileCoding3456BalanceTaskRewardPower8()
learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim)
learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
agent.epsilonGreedy = True
agent.init_exploration = 0.5
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
performance_agent = LinearFA_Agent(learner)
performance_agent.logging = False
performance_agent.greedy = True
performance_agent.learning = False
experiment = EpisodicExperiment(task, agent)

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 2000
# NOTE increasing this number above from the default of 100 is what got the
# learning to actually happen, and fixed the bug/issue where the performance
# agent's performance stopped improving.

Esempio n. 2

Mostra file

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks_old import LinearFATileCoding3456BalanceTaskRewardBipolar
from training import LinearFATraining
from learners import SARSALambda_LinFA_ReplacingTraces

task = LinearFATileCoding3456BalanceTaskRewardBipolar()
learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim)
learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
agent.epsilonGreedy = True
agent.init_exploration = 0.5
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
performance_agent = LinearFA_Agent(learner)
performance_agent.logging = False
performance_agent.greedy = True
performance_agent.learning = False
experiment = EpisodicExperiment(task, agent)

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 2000
# NOTE increasing this number above from the default of 100 is what got the
# learning to actually happen, and fixed the bug/issue where the performance
# agent's performance stopped improving.

Esempio n. 3

Mostra file

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.learners.valuebased.linearfa import LSPI
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import LSPIBalanceTask
from training import LinearFATraining

task = LSPIBalanceTask(only_steer=True)
learner = LSPI(task.nactions, task.outdim)
theta = np.loadtxt('/home/fitze/Dropbox/stanford/21quarter/229cs/proj/data/balance_lspi_experimental_112011H17M18S/theta_800.dat')
learner._theta = theta
# TODO this LSPI does not have eligibility traces.
#learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
#learner.exploring = True
performance_agent = LinearFA_Agent(learner)
performance_agent.logging = False
performance_agent.greedy = True
performance_agent.learning = False
experiment = EpisodicExperiment(task, agent)

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 100000
# NOTE increasing this number above from the default of 100 is what got the

Esempio n. 4

Mostra file

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.utilities import one_to_n

from environment import Environment
from tasks import LinearFATileCoding3456BalanceTask
from training import LinearFATraining
from learners import SARSALambda_LinFA_ReplacingTraces

task = LinearFATileCoding3456BalanceTask()
learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim)
learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 100000
# NOTE increasing this number above from the default of 100 is what got the
# learning to actually happen, and fixed the bug/issue where the performance
# agent's performance stopped improving.

for i in np.arange(2000, 3800, 50):
    theta = np.loadtxt(
        '/home/fitze/Documents/agent-bicycle/data/balance_sarsalambda_linfa_replacetrace_anneal_112217H56M04S/theta_%i.dat'
        % i)
    learner._theta = theta

Esempio n. 5

Mostra file

File: analyze_linearfa.py Progetto: chrisdembia/agent-bicycle

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.utilities import one_to_n

from environment import Environment
from tasks import LinearFATileCoding3456BalanceTask
from training import LinearFATraining
from learners import SARSALambda_LinFA_ReplacingTraces

task = LinearFATileCoding3456BalanceTask()
learner = SARSALambda_LinFA_ReplacingTraces(task.nactions, task.outdim)
learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 100000
# NOTE increasing this number above from the default of 100 is what got the
# learning to actually happen, and fixed the bug/issue where the performance
# agent's performance stopped improving.

for i in np.arange(2000, 3800, 50):
    theta = np.loadtxt('/home/fitze/Documents/agent-bicycle/data/balance_sarsalambda_linfa_replacetrace_anneal_112217H56M04S/theta_%i.dat' % i)
    learner._theta = theta
    Q = learner._qValues(one_to_n(task.getBin(0, 0, 0, 0, 0), task.outdim))
    pl.plot(Q, label='%s' % i)

Esempio n. 6

Mostra file

File: lspi_test.py Progetto: Nicolas99-9/Rl_projet

        dist_to_goal = np.linalg.norm(target - np.array([xf, yf]))
        delta_tilt = self.env.getTilt()**2 - self.env.last_omega**2
        last_xf = self.env.last_xf
        last_yf = self.env.last_yf
        dist_to_goal_last = np.linalg.norm(target -
                                           np.array([last_xf, last_yf]))
        delta_dist = dist_to_goal - dist_to_goal_last
        return -delta_tilt - delta_dist * 0.01


task = LSPI_task()
learner = LSPI(9, 20)
task.rewardDiscount = 0.8
learner.rewardDiscount = 0.8

agent = LinearFA_Agent(learner)
agent.epsilonGreedy = True
exp = EpisodicExperiment(task, agent)
learner.learningRateDecay = 3000
max_agent = LinearFA_Agent(learner)
max_agent.learnerning = False
max_agent.greedy = True

task.env.saveWheelContactTrajectories(True)
plt.ion()
plt.figure(figsize=(8, 4))

ax1 = plt.subplot(1, 2, 1)
ax2 = plt.subplot(1, 2, 2)