Beispiel #1
0
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import BalanceTask
from training import NFQTraining

task = BalanceTask()
action_value_function = ActionValueNetwork(task.outdim, task.nactions,
        name='BalanceNFQActionValueNetwork')
learner = NFQ()
learner.gamma = 0.9999
learner.explorer.epsilon = 0.9
task.discount = learner.gamma
agent = LearningAgent(action_value_function, learner)
performance_agent = LearningAgent(action_value_function, None)
experiment = EpisodicExperiment(task, agent)

tr = NFQTraining('balance_nfq', experiment, performance_agent)

tr.train(7000, performance_interval=1, n_performance_episodes=1, plotsave_interval=10, plot_action_history=True)

from pybrain.rl.environments.cartpole import CartPoleEnvironment, DiscreteBalanceTask
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.experiments import EpisodicExperiment

from training import NFQTraining

task = DiscreteBalanceTask(CartPoleEnvironment(), 100)
action_value_function = ActionValueNetwork(4, 3,
        name='CartPoleNFQActionValueNetwork')
learner = NFQ()
#learner.gamma = 0.99
learner.explorer.epsilon = 0.4
task.discount = learner.gamma
agent = LearningAgent(action_value_function, learner)
performance_agent = LearningAgent(action_value_function, None)
experiment = EpisodicExperiment(task, agent)

tr = NFQTraining('cartpole_nfq', experiment, performance_agent)

tr.train(7000, performance_interval=1, n_performance_episodes=5)

Beispiel #3
0
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.experiments import EpisodicExperiment

from environment import Environment
from tasks import BalanceTask
from training import NFQTraining

task = BalanceTask()
action_value_function = ActionValueNetwork(task.outdim, task.nactions,
        name='BalanceNFQActionValueNetwork')
learner = NFQ()
learner.gamma = 0.99
learner.explorer.epsilon = 0.5
task.discount = learner.gamma
agent = LearningAgent(action_value_function, learner)
performance_agent = LearningAgent(action_value_function, None)
experiment = EpisodicExperiment(task, agent)

tr = NFQTraining('balance_nfq_many_episodes_per_learn', experiment, performance_agent)

tr.train(7000, performance_interval=1, n_performance_episodes=1, n_episodes_per_rehearsal=5)