from pybrain.rl.agents import LearningAgent from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import BalanceTask from training import NFQTraining task = BalanceTask() action_value_function = ActionValueNetwork(task.outdim, task.nactions, name='BalanceNFQActionValueNetwork') learner = NFQ() learner.gamma = 0.9999 learner.explorer.epsilon = 0.9 task.discount = learner.gamma agent = LearningAgent(action_value_function, learner) performance_agent = LearningAgent(action_value_function, None) experiment = EpisodicExperiment(task, agent) tr = NFQTraining('balance_nfq', experiment, performance_agent) tr.train(7000, performance_interval=1, n_performance_episodes=1, plotsave_interval=10, plot_action_history=True)
from pybrain.rl.environments.cartpole import CartPoleEnvironment, DiscreteBalanceTask from pybrain.rl.agents import LearningAgent from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.experiments import EpisodicExperiment from training import NFQTraining task = DiscreteBalanceTask(CartPoleEnvironment(), 100) action_value_function = ActionValueNetwork(4, 3, name='CartPoleNFQActionValueNetwork') learner = NFQ() #learner.gamma = 0.99 learner.explorer.epsilon = 0.4 task.discount = learner.gamma agent = LearningAgent(action_value_function, learner) performance_agent = LearningAgent(action_value_function, None) experiment = EpisodicExperiment(task, agent) tr = NFQTraining('cartpole_nfq', experiment, performance_agent) tr.train(7000, performance_interval=1, n_performance_episodes=5)
from pybrain.rl.agents import LearningAgent from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import BalanceTask from training import NFQTraining task = BalanceTask() action_value_function = ActionValueNetwork(task.outdim, task.nactions, name='BalanceNFQActionValueNetwork') learner = NFQ() learner.gamma = 0.99 learner.explorer.epsilon = 0.5 task.discount = learner.gamma agent = LearningAgent(action_value_function, learner) performance_agent = LearningAgent(action_value_function, None) experiment = EpisodicExperiment(task, agent) tr = NFQTraining('balance_nfq_many_episodes_per_learn', experiment, performance_agent) tr.train(7000, performance_interval=1, n_performance_episodes=1, n_episodes_per_rehearsal=5)