Python LSPI Examples

Programming Language: Python

Namespace/Package Name: pybrain.rl.learners.valuebased.linearfa

Class/Type: LSPI

Examples at hotexamples.com: 6

Python LSPI - 6 examples found. These are the top rated real world Python examples of pybrain.rl.learners.valuebased.linearfa.LSPI extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LSPI(3)

learningRateDecay(2)

_greedyAction(1)

_qValues(1)

_theta(1)

rewardDiscount(1)

Example #1

Show file

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.learners.valuebased.linearfa import LSPI
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import LSPIBalanceTask
from training import LinearFATraining

task = LSPIBalanceTask(only_steer=True)
learner = LSPI(task.nactions, task.outdim)
theta = np.loadtxt('/home/fitze/Dropbox/stanford/21quarter/229cs/proj/data/balance_lspi_experimental_112011H17M18S/theta_800.dat')
learner._theta = theta
# TODO this LSPI does not have eligibility traces.
#learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
#learner.exploring = True
performance_agent = LinearFA_Agent(learner)
performance_agent.logging = False
performance_agent.greedy = True
performance_agent.learning = False
experiment = EpisodicExperiment(task, agent)

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 100000
# NOTE increasing this number above from the default of 100 is what got the

Example #2

Show file

from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import LSPIGotoTask
from training import LinearFATraining

x_g = 10
y_g = 30

task = LSPIGotoTask(butt_disturbance_amplitude=0.0000,
                    randomInitState=False,
                    five_actions=True,
                    rewardType=1,
                    x_goal=x_g,
                    y_goal=y_g)
learner = LSPI(task.nactions, task.outdim, randomInit=False)

# TODO this LSPI does not have eligibility traces.
#learner._lambda = 0.95

# lagoudakis uses 0.8 discount factor
learner.rewardDiscount = 0.8
task.discount = learner.rewardDiscount

agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
agent.epsilonGreedy = True
#learner.exploring = True
performance_agent = LinearFA_Agent(learner)

Example #3

Show file

File: balance_lspi.py Project: chrisdembia/agent-bicycle

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.learners.valuebased.linearfa import LSPI
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import LSPIBalanceTask
from training import LinearFATraining

task = LSPIBalanceTask()
learner = LSPI(task.nactions, task.outdim)
# TODO this LSPI does not have eligibility traces.
#learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
performance_agent = LinearFA_Agent(learner)
performance_agent.logging = False
performance_agent.greedy = True
performance_agent.learning = False
experiment = EpisodicExperiment(task, agent)

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 100000
# NOTE increasing this number above from the default of 100 is what got the
# learning to actually happen, and fixed the bug/issue where the performance
# agent's performance stopped improving.

Example #4

Show file

File: goto_lspi.py Project: chrisdembia/agent-bicycle

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.learners.valuebased.linearfa import LSPI
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import LSPIGotoTask
from training import LinearFATraining

x_g = 10
y_g = 30

task = LSPIGotoTask(butt_disturbance_amplitude = 0.0000, randomInitState = False, five_actions = True,  rewardType = 1, x_goal = x_g, y_goal = y_g)
learner = LSPI(task.nactions, task.outdim, randomInit = False)

# TODO this LSPI does not have eligibility traces.
#learner._lambda = 0.95

# lagoudakis uses 0.8 discount factor
learner.rewardDiscount = 0.8
task.discount = learner.rewardDiscount

agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False
agent.epsilonGreedy = True
#learner.exploring = True
performance_agent = LinearFA_Agent(learner)
performance_agent.logging = False
performance_agent.greedy = True
performance_agent.learning = False

Example #5

Show file

from pybrain.rl.agents.linearfa import LinearFA_Agent
from pybrain.rl.learners.valuebased.linearfa import LSPI
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.utilities import one_to_n

from environment import Environment
from tasks import LSPIBalanceTask
from training import LinearFATraining

task = LSPIBalanceTask()
learner = LSPI(task.nactions, task.outdim)
# TODO this LSPI does not have eligibility traces.
#learner._lambda = 0.95
task.discount = learner.rewardDiscount
agent = LinearFA_Agent(learner)
# The state has a huge number of dimensions, and the logging causes me to run
# out of memory. We needn't log, since learning is done online.
agent.logging = False

# TODO PyBrain says that the learning rate needs to decay, but I don't see that
# described in Randlov's paper.
# A higher number here means the learning rate decays slower.
learner.learningRateDecay = 100000
# NOTE increasing this number above from the default of 100 is what got the
# learning to actually happen, and fixed the bug/issue where the performance
# agent's performance stopped improving.

for idx in np.arange(0, 2500, 100):
theta = np.loadtxt('/home/fitze/Dropbox/stanford/21quarter/229cs/proj/data/balance_lspi_experimental_112011H17M18S/theta_%i.dat' % idx)
learner._theta = theta

Example #6

Show file

File: lspi_test.py Project: Nicolas99-9/Rl_projet

    def getReward(self):
        target = np.array([30, 50])
        (_, _, _, _, _, xf, yf, _, _, _) = self.env.sensors
        dist_to_goal = np.linalg.norm(target - np.array([xf, yf]))
        delta_tilt = self.env.getTilt()**2 - self.env.last_omega**2
        last_xf = self.env.last_xf
        last_yf = self.env.last_yf
        dist_to_goal_last = np.linalg.norm(target -
                                           np.array([last_xf, last_yf]))
        delta_dist = dist_to_goal - dist_to_goal_last
        return -delta_tilt - delta_dist * 0.01


task = LSPI_task()
learner = LSPI(9, 20)
task.rewardDiscount = 0.8
learner.rewardDiscount = 0.8

agent = LinearFA_Agent(learner)
agent.epsilonGreedy = True
exp = EpisodicExperiment(task, agent)
learner.learningRateDecay = 3000
max_agent = LinearFA_Agent(learner)
max_agent.learnerning = False
max_agent.greedy = True

task.env.saveWheelContactTrajectories(True)
plt.ion()
plt.figure(figsize=(8, 4))