def testAugmented(): from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.mdpmap import MDPconverter from vgdl.agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def testRecordingToGif(human=False): from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from agents import PolicyDrivenAgent, InteractiveAgent from tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200) task = GameTask(env) if human: agent = InteractiveAgent() else: agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(1) print res actions = [a for _, a, _ in env._allEvents] print actions makeGifVideo(env, actions, initstate=env._initstate)
def testAugmented(): from core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from mdpmap import MDPconverter from agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def __init__(self, task, agent): ''' Constructor ''' EpisodicExperiment.__init__(self, task, agent)
class QAlgorithm: def Pause(self):#if menu says pause pause exicution while self.state == 1: time.sleep(.05) return True def Quit(self):#if menu says quit stop running self.process.terminate() return False def Start(self):#starts the Bot if self.process == None: self.runBot() #self.process = multiprocessing.Process(target=self.runBot, args= []) #self.process.start() return True def CheckState(self):#checks to see what state the menu says to be in if self.state == 0 : self.Start() elif self.state == 1: self.Pause() elif self.state == 2: self.Quit() def GameOver(self):#checks to see if state requires bot pause, quit or if the game is over return self.CheckState() or self.sr.checkEndGame(self.endBox,self.gameOver) def __init__(self,rewardBox,box,gameOver,endGame,scoreArea): self.reward = rewardBox self.bbox = box self.environment = TEnviroment(box)#Custom environment class if os.path.isfile("bot.txt"): self.controller = pickle.load(open("bot.txt","rb")) else: self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons) self.learner = Q() gf = {0:self.GameOver} self.agent = LearningAgent(self.controller, self.learner) self.task = TTask(self.environment,scoreArea,gf)#needs custom task self.experiment = EpisodicExperiment(self.task, self.agent) self.process = None self.endBox = endGame def runBot(self):#runes the bot for a single Episode self.experiment.doEpisodes() self.agent.learn() self.agent.reset() file = open("bot.txt","wb+") pickle.dump(self.controller,file)
def test4(): """ Same thing, but animated. """ from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameEnvironment, GameTask from vgdl.agents import PolicyDrivenAgent g = VGDLParser().parseGame(windy_stoch_game) g.buildLevel(windy_level) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(5) print res
def test4(): """ Same thing, but animated. """ from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameEnvironment, GameTask from vgdl.agents import PolicyDrivenAgent g = VGDLParser().parseGame(windy_stoch_game) g.buildLevel(windy_level) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(5) print res
def playSubjectiveGame(game_str, map_str): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameTask from vgdl.subjective import SubjectiveGame from vgdl.agents import InteractiveAgent, UserTiredException g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass
def playSubjectiveGame(game_str, map_str): from pybrain.rl.experiments.episodic import EpisodicExperiment from interfaces import GameTask from subjective import SubjectiveGame from agents import InteractiveAgent, UserTiredException g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass
def combinedScore(agent, task = None): """ Let the agent act on a number of levels of increasing difficulty. Return the combined score.""" if task == None: task = MarioTask(agentName = agent.name) exp = EpisodicExperiment(task, agent) res = 0 for difficulty in range(1): for seed in range(1): task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward) res += task.reward return res
def combinedScore(agent, task=None): """ Let the agent act on a number of levels of increasing difficulty. Return the combined score.""" if task == None: task = MarioTask(agentName=agent.name) exp = EpisodicExperiment(task, agent) res = 0 for difficulty in range(1): for seed in range(1): task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % ( difficulty, seed, task.reward) res += task.reward return res
def testPolicyAgent(): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=False, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def testPolicyAgent(): from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from agents import PolicyDrivenAgent game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=False, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def mlDriver(cv, stateTransfer, actionTransfer): #parameter setup #dimensionality of state argument (could be less than stateTransfer) stateDim = 352 #Number of moves possible numMoves = 361 env = SettleEnv(cv, stateTransfer, actionTransfer) task = SettleTask(env) controller = RestrictedActionValueNetwork(stateDim, numMoves, env) learner = NFQ() learner.explorer = EpsilonHackedExplorer(env) agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(10) print "Done with experiments" agent.learn() print "Learned" agent.reset() print "Cycled"
def mlDriver(cv, stateTransfer, actionTransfer): #parameter setup #dimensionality of state argument (could be less than stateTransfer) stateDim = 352 #Number of moves possible numMoves = 361 env = SettleEnv(cv, stateTransfer, actionTransfer) task = SettleTask(env) controller = RestrictedActionValueNetwork(stateDim, numMoves, env) learner = NFQ() learner.explorer = EpsilonHackedExplorer(env) agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(10) print "Done with experiments" agent.learn() print "Learned" agent.reset() print "Cycled"
def test3(): from examples.gridphysics.mazes import polarmaze_game from examples.gridphysics.mazes.simple import maze_level_1b from core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from interfaces import GameTask from agents import InteractiveAgent, UserTiredException game_str, map_str = polarmaze_game, maze_level_1b g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass print senv._allEvents
def test3(): from examples.gridphysics.mazes import polarmaze_game from examples.gridphysics.mazes.simple import maze_level_1b from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameTask from vgdl.agents import InteractiveAgent, UserTiredException game_str, map_str = polarmaze_game, maze_level_1b g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass print senv._allEvents
def testRecordingToGif(human=False): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent, InteractiveAgent from vgdl.tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200) task = GameTask(env) if human: agent = InteractiveAgent() else: agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(1) print res actions = [a for _, a, _ in env._allEvents] print actions makeGifVideo(env, actions, initstate=env._initstate)
def testInteractions(): from random import randint from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): res = randint(0, self.total - 1) return res game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = DummyAgent() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def main(): agent = SimpleMLPMarioAgent(10, inGridSize = 3, ) print agent.name NetworkWriter.writeToFile(agent.module, "../temp/MarioNetwork-"+agent.name+".xml") task = MarioTask(agent.name, timeLimit = 200) exp = EpisodicExperiment(task, agent) res = 0 cumul = 0 for seed in [0]: for difficulty in [0,3,5,10]: task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward) cumul += task.reward if task.reward < 4000: break res += 1 print res print agent.module.inputbuffer*1.
def main(): agent = ForwardAgent() task = MarioTask(agent.name, initMarioMode=1) exp = EpisodicExperiment(task, agent) print 'Task Ready' exp.doEpisodes(3) print 'mm 1:', task.reward task.env.initMarioMode = 2 exp.doEpisodes(1) print 'mm 2:', task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print 'mm 0:', task.reward print "finished"
def __init__(self,rewardBox,box,gameOver,endGame,scoreArea): self.reward = rewardBox self.bbox = box self.environment = TEnviroment(box)#Custom environment class if os.path.isfile("bot.txt"): self.controller = pickle.load(open("bot.txt","rb")) else: self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons) self.learner = Q() gf = {0:self.GameOver} self.agent = LearningAgent(self.controller, self.learner) self.task = TTask(self.environment,scoreArea,gf)#needs custom task self.experiment = EpisodicExperiment(self.task, self.agent) self.process = None self.endBox = endGame
def testInteractions(): from random import randint from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): res = randint(0, self.total - 1) return res game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = DummyAgent() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def f(self, x): """ An episodic task can be used as an evaluation function of a module that produces actions from observations, or as an evaluator of an agent. """ r = 0. for _ in range(self.batchSize): if isinstance(x, Module): x.reset() self.reset() while not self.isFinished(): self.performAction(x.activate(self.getObservation())) elif isinstance(x, Agent): EpisodicExperiment(self, x).doEpisodes() else: raise ValueError(self.__class__.__name__+' cannot evaluate the fitness of '+str(type(x))) r += self.getTotalReward() return r / float(self.batchSize)
def __call__(self, module): """ An episodic task can be used as an evaluation function of a module that produces actions from observations, or as an evaluator of an agent. """ if isinstance(module, Module): module.reset() self.reset() while not self.isFinished(): self.performAction(module.activate(self.getObservation())) return self.getTotalReward() elif isinstance(module, Agent): EpisodicExperiment(self, module).doEpisodes(self.batchSize) return self.getTotalReward() / float(self.batchSize) else: raise NotImplementedError('Missing implementation for ' + module.__class__.__name__ + ' evaluation')
def main(): agent = ForwardAgent() task = MarioTask(agent.name, initMarioMode=1) exp = EpisodicExperiment(task, agent) print "Task Ready" exp.doEpisodes(3) print "mm 1:", task.reward task.env.initMarioMode = 2 exp.doEpisodes(1) print "mm 2:", task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print "mm 0:", task.reward print "finished"
# create agent learner = ENAC() learner.gd.rprop = True # only relevant for RP learner.gd.deltamin = 0.0001 #agent.learner.gd.deltanull = 0.05 # only relevant for BP learner.gd.alpha = 0.01 learner.gd.momentum = 0.9 agent = LearningAgent(net, learner) agent.actaspg = False # create experiment experiment = EpisodicExperiment(task, agent) # print weights at beginning print(agent.module.params) rewards = [] if useGraphics: figure() ion() pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1]) pl.setLineStyle(linewidth=2) # queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop()
def main(): """ Main program for automatic asset allocation problem. """ # Directories input_data_dir = '../../Data/Input/' output_data_dir = '../../Data/Output/' # Experiment parameters batch = 1 # Number of samples per learning step prnts = 100 # Learning steps before printing results nEpisodes = 100/batch/prnts # Number of rollouts nExperiments = 1 # Number of experiments et = ExTools(batch, prnts) # Tool for printing and plotting # Paramenters X = 0.0 / 252 # Daily risk-free rate deltaP = 0.00 # Proportional transaction costs deltaF = 0.0 # Fixed transaction costs deltaS = 0.00 # Short-selling borrowing costs P = 5 # Number of past days the agent considers discount = 0.95 # Discount factor # Evaluation interval sizes start = P + 1 trainingIntervalLength = 70 testIntervalLength = 30 # Initialize the market environment market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P) nSamples = len(market.data) nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength) # Initialize the asset allocation tasks task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount) # Initialize controller module module = buildNetwork(market.outdim, # Input layer market.indim, # Output layer outclass=SoftmaxLayer) # Output activation function # Initialize learner module learner = PGPE(storeAllEvaluations=True, learningRate=0.01, sigmaLearningRate=0.01, batchSize=batch, # momentum=0.05, # epsilon=6.0, rprop=False) # Initialize learning agent agent = OptimizationAgent(module, learner) et.agent = agent for period in xrange(5): # nPeriods): # Set initial and final time steps for training initialTimeStep = start finalTimeStep = start + trainingIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.trainingMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Train the agent for episode in xrange(nEpisodes): for i in xrange(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], 1, episode) # Set initial and final time steps for training initialTimeStep = start + trainingIntervalLength finalTimeStep = initialTimeStep + testIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.backtestMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Test the agent experiment.doEpisodes(batch) # Slide evaluation window start += testIntervalLength # Print allocations task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE') plt.ylim(0.0, 1.0) plt.xlabel('Date') plt.ylabel('Portfolio Allocation') plt.show() # Print cumulative log-returns buyHold = market.data.ix[task.report.index, 'SPY'] buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0) ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0) cumLogReturns = pd.DataFrame(index=task.report.index) cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns cumLogReturns['PGPE'] = ptfCumLogReturns cumLogReturns.plot(title='Cumulative Log-Returns - PGPE', lw=2, grid=True) plt.xlabel('Date') plt.ylabel('Cumulative Log-Returns') plt.show()
def __init__(self, task, agent): ''' Constructor ''' EpisodicExperiment.__init__(self, task, agent)
# create agent learner = ENAC() learner.gd.rprop = True # only relevant for RP learner.gd.deltamin = 0.0001 #agent.learner.gd.deltanull = 0.05 # only relevant for BP learner.gd.alpha = 0.01 learner.gd.momentum = 0.9 agent = LearningAgent(net, learner) agent.actaspg = False # create experiment experiment = EpisodicExperiment(task, agent) # print weights at beginning print(agent.module.params) rewards = [] if useGraphics: figure() ion() pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1]) pl.setLineStyle(linewidth=2) # queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop()
from scipy import * import sys, time from pybrain.rl.learners.valuebased import ActionValueNetwork from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import Q, SARSA, NFQ from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.rl.environments import Task from tasktest import TestTask from envtest import TestEnv env = TestEnv() task = TestTask(env) controller = ActionValueNetwork(200, 3) learner = NFQ() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) i = 0 while True: experiment.doEpisodes(10) print "Learning" agent.learn() agent.reset() i += 1 print "Cycle: %d" % i if i > 60: agent.learning = False
import sys, time from pybrain.rl.learners.valuebased import ActionValueNetwork from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import Q, SARSA, NFQ from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.rl.environments import Task from tasktest import TestTask from envtest import TestEnv env = TestEnv() task = TestTask(env) controller = ActionValueNetwork(200, 3) learner = NFQ() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) i = 0 while True: experiment.doEpisodes(10) print "Learning" agent.learn() agent.reset() i += 1 print "Cycle: %d" %i if i > 60: agent.learning = False
import timeit environment = EuphoriaGame() controller = euActionValueNetwork(582,113) learner = NFQ() agent = EuphoriaLearningAgent(controller,learner) agentOp = EuphoriaRandomPlayer(environment) task = EuphoriaTask(agentOp) experiment = EpisodicExperiment(task, agent) i = 0 reward = [] while i<1: tic=timeit.default_timer() r = experiment.doEpisodes(3) for ri in r: reward.append(ri[-1]) with open('rewardList_'+str(i)+'.csv', 'wb') as f: writer = csv.writer(f) writer.writerows([reward]) # print reward
# coding=utf-8 from pybrain.optimization.hillclimber import HillClimber from pybrain.rl.agents.optimization import OptimizationAgent from pybrain.rl.environments.cartpole.balancetask import BalanceTask from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.tools.shortcuts import buildNetwork task = BalanceTask() net = buildNetwork(task.outdim, 3, task.indim) agent = OptimizationAgent(net, HillClimber()) exp = EpisodicExperiment(task, agent) exp.doEpisodes(100) print(exp)