def testAugmented(): from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.mdpmap import MDPconverter from vgdl.agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def testAugmented(): from core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from mdpmap import MDPconverter from agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def main(): agent = SimpleMLPMarioAgent( 10, inGridSize=3, ) print agent.name NetworkWriter.writeToFile(agent.module, "../temp/MarioNetwork-" + agent.name + ".xml") task = MarioTask(agent.name, timeLimit=200) exp = EpisodicExperiment(task, agent) res = 0 cumul = 0 for seed in [0]: for difficulty in [0, 3, 5, 10]: task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % ( difficulty, seed, task.reward) cumul += task.reward if task.reward < 4000: break res += 1 print res print agent.module.inputbuffer * 1.
class QAlgorithm: def Pause(self):#if menu says pause pause exicution while self.state == 1: time.sleep(.05) return True def Quit(self):#if menu says quit stop running self.process.terminate() return False def Start(self):#starts the Bot if self.process == None: self.runBot() #self.process = multiprocessing.Process(target=self.runBot, args= []) #self.process.start() return True def CheckState(self):#checks to see what state the menu says to be in if self.state == 0 : self.Start() elif self.state == 1: self.Pause() elif self.state == 2: self.Quit() def GameOver(self):#checks to see if state requires bot pause, quit or if the game is over return self.CheckState() or self.sr.checkEndGame(self.endBox,self.gameOver) def __init__(self,rewardBox,box,gameOver,endGame,scoreArea): self.reward = rewardBox self.bbox = box self.environment = TEnviroment(box)#Custom environment class if os.path.isfile("bot.txt"): self.controller = pickle.load(open("bot.txt","rb")) else: self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons) self.learner = Q() gf = {0:self.GameOver} self.agent = LearningAgent(self.controller, self.learner) self.task = TTask(self.environment,scoreArea,gf)#needs custom task self.experiment = EpisodicExperiment(self.task, self.agent) self.process = None self.endBox = endGame def runBot(self):#runes the bot for a single Episode self.experiment.doEpisodes() self.agent.learn() self.agent.reset() file = open("bot.txt","wb+") pickle.dump(self.controller,file)
def playSubjectiveGame(game_str, map_str): from pybrain.rl.experiments.episodic import EpisodicExperiment from interfaces import GameTask from subjective import SubjectiveGame from agents import InteractiveAgent, UserTiredException g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass
def playSubjectiveGame(game_str, map_str): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameTask from vgdl.subjective import SubjectiveGame from vgdl.agents import InteractiveAgent, UserTiredException g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass
def combinedScore(agent, task = None): """ Let the agent act on a number of levels of increasing difficulty. Return the combined score.""" if task == None: task = MarioTask(agentName = agent.name) exp = EpisodicExperiment(task, agent) res = 0 for difficulty in range(1): for seed in range(1): task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward) res += task.reward return res
def testRecordingToGif(human=False): from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from agents import PolicyDrivenAgent, InteractiveAgent from tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200) task = GameTask(env) if human: agent = InteractiveAgent() else: agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(1) print res actions = [a for _, a, _ in env._allEvents] print actions makeGifVideo(env, actions, initstate=env._initstate)
def combinedScore(agent, task=None): """ Let the agent act on a number of levels of increasing difficulty. Return the combined score.""" if task == None: task = MarioTask(agentName=agent.name) exp = EpisodicExperiment(task, agent) res = 0 for difficulty in range(1): for seed in range(1): task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % ( difficulty, seed, task.reward) res += task.reward return res
def mlDriver(cv, stateTransfer, actionTransfer): #parameter setup #dimensionality of state argument (could be less than stateTransfer) stateDim = 352 #Number of moves possible numMoves = 361 env = SettleEnv(cv, stateTransfer, actionTransfer) task = SettleTask(env) controller = RestrictedActionValueNetwork(stateDim, numMoves, env) learner = NFQ() learner.explorer = EpsilonHackedExplorer(env) agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(10) print "Done with experiments" agent.learn() print "Learned" agent.reset() print "Cycled"
def test3(): from examples.gridphysics.mazes import polarmaze_game from examples.gridphysics.mazes.simple import maze_level_1b from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameTask from vgdl.agents import InteractiveAgent, UserTiredException game_str, map_str = polarmaze_game, maze_level_1b g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass print senv._allEvents
def mlDriver(cv, stateTransfer, actionTransfer): #parameter setup #dimensionality of state argument (could be less than stateTransfer) stateDim = 352 #Number of moves possible numMoves = 361 env = SettleEnv(cv, stateTransfer, actionTransfer) task = SettleTask(env) controller = RestrictedActionValueNetwork(stateDim, numMoves, env) learner = NFQ() learner.explorer = EpsilonHackedExplorer(env) agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(10) print "Done with experiments" agent.learn() print "Learned" agent.reset() print "Cycled"
def test3(): from examples.gridphysics.mazes import polarmaze_game from examples.gridphysics.mazes.simple import maze_level_1b from core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from interfaces import GameTask from agents import InteractiveAgent, UserTiredException game_str, map_str = polarmaze_game, maze_level_1b g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass print senv._allEvents
def test4(): """ Same thing, but animated. """ from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameEnvironment, GameTask from vgdl.agents import PolicyDrivenAgent g = VGDLParser().parseGame(windy_stoch_game) g.buildLevel(windy_level) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(5) print res
def main(): agent = SimpleMLPMarioAgent(10, inGridSize = 3, ) print agent.name NetworkWriter.writeToFile(agent.module, "../temp/MarioNetwork-"+agent.name+".xml") task = MarioTask(agent.name, timeLimit = 200) exp = EpisodicExperiment(task, agent) res = 0 cumul = 0 for seed in [0]: for difficulty in [0,3,5,10]: task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward) cumul += task.reward if task.reward < 4000: break res += 1 print res print agent.module.inputbuffer*1.
def test4(): """ Same thing, but animated. """ from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameEnvironment, GameTask from vgdl.agents import PolicyDrivenAgent g = VGDLParser().parseGame(windy_stoch_game) g.buildLevel(windy_level) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(5) print res
def testPolicyAgent(): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=False, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def testPolicyAgent(): from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from agents import PolicyDrivenAgent game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=False, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def main(): agent = ForwardAgent() task = MarioTask(agent.name, initMarioMode=1) exp = EpisodicExperiment(task, agent) print 'Task Ready' exp.doEpisodes(3) print 'mm 1:', task.reward task.env.initMarioMode = 2 exp.doEpisodes(1) print 'mm 2:', task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print 'mm 0:', task.reward print "finished"
def main(): agent = ForwardAgent() task = MarioTask(agent.name, initMarioMode=1) exp = EpisodicExperiment(task, agent) print "Task Ready" exp.doEpisodes(3) print "mm 1:", task.reward task.env.initMarioMode = 2 exp.doEpisodes(1) print "mm 2:", task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print "mm 0:", task.reward print "finished"
def testInteractions(): from random import randint from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): res = randint(0, self.total - 1) return res game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = DummyAgent() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def testRecordingToGif(human=False): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent, InteractiveAgent from vgdl.tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200) task = GameTask(env) if human: agent = InteractiveAgent() else: agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(1) print res actions = [a for _, a, _ in env._allEvents] print actions makeGifVideo(env, actions, initstate=env._initstate)
def testInteractions(): from random import randint from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): res = randint(0, self.total - 1) return res game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = DummyAgent() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
# coding=utf-8 from pybrain.optimization.hillclimber import HillClimber from pybrain.rl.agents.optimization import OptimizationAgent from pybrain.rl.environments.cartpole.balancetask import BalanceTask from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.tools.shortcuts import buildNetwork task = BalanceTask() net = buildNetwork(task.outdim, 3, task.indim) agent = OptimizationAgent(net, HillClimber()) exp = EpisodicExperiment(task, agent) exp.doEpisodes(100) print(exp)
def main(): """ Main program for automatic asset allocation problem. """ # Directories input_data_dir = '../../Data/Input/' output_data_dir = '../../Data/Output/' # Experiment parameters batch = 1 # Number of samples per learning step prnts = 100 # Learning steps before printing results nEpisodes = 100/batch/prnts # Number of rollouts nExperiments = 1 # Number of experiments et = ExTools(batch, prnts) # Tool for printing and plotting # Paramenters X = 0.0 / 252 # Daily risk-free rate deltaP = 0.00 # Proportional transaction costs deltaF = 0.0 # Fixed transaction costs deltaS = 0.00 # Short-selling borrowing costs P = 5 # Number of past days the agent considers discount = 0.95 # Discount factor # Evaluation interval sizes start = P + 1 trainingIntervalLength = 70 testIntervalLength = 30 # Initialize the market environment market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P) nSamples = len(market.data) nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength) # Initialize the asset allocation tasks task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount) # Initialize controller module module = buildNetwork(market.outdim, # Input layer market.indim, # Output layer outclass=SoftmaxLayer) # Output activation function # Initialize learner module learner = PGPE(storeAllEvaluations=True, learningRate=0.01, sigmaLearningRate=0.01, batchSize=batch, # momentum=0.05, # epsilon=6.0, rprop=False) # Initialize learning agent agent = OptimizationAgent(module, learner) et.agent = agent for period in xrange(5): # nPeriods): # Set initial and final time steps for training initialTimeStep = start finalTimeStep = start + trainingIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.trainingMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Train the agent for episode in xrange(nEpisodes): for i in xrange(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], 1, episode) # Set initial and final time steps for training initialTimeStep = start + trainingIntervalLength finalTimeStep = initialTimeStep + testIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.backtestMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Test the agent experiment.doEpisodes(batch) # Slide evaluation window start += testIntervalLength # Print allocations task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE') plt.ylim(0.0, 1.0) plt.xlabel('Date') plt.ylabel('Portfolio Allocation') plt.show() # Print cumulative log-returns buyHold = market.data.ix[task.report.index, 'SPY'] buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0) ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0) cumLogReturns = pd.DataFrame(index=task.report.index) cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns cumLogReturns['PGPE'] = ptfCumLogReturns cumLogReturns.plot(title='Cumulative Log-Returns - PGPE', lw=2, grid=True) plt.xlabel('Date') plt.ylabel('Cumulative Log-Returns') plt.show()
# queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop() # # rewards.append(mean(agent.history.getSumOverSequences('reward'))) # print agent.module.getParameters(), # print mean(agent.history.getSumOverSequences('reward')) # clf() # plot(rewards) # episodic version x = 0 batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting) while x < 5000: #while True: experiment.doEpisodes(batch) x += batch reward = mean( agent.history.getSumOverSequences('reward')) * task.rewardscale if useGraphics: pl.addData(0, x, reward) print(agent.module.params) print(reward) #if reward > 3: # pass agent.learn() agent.reset() if useGraphics: pl.update() if len(sys.argv) > 2:
import sys, time from pybrain.rl.learners.valuebased import ActionValueNetwork from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import Q, SARSA, NFQ from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.rl.environments import Task from tasktest import TestTask from envtest import TestEnv env = TestEnv() task = TestTask(env) controller = ActionValueNetwork(200, 3) learner = NFQ() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) i = 0 while True: experiment.doEpisodes(10) print "Learning" agent.learn() agent.reset() i += 1 print "Cycle: %d" %i if i > 60: agent.learning = False
from scipy import * import sys, time from pybrain.rl.learners.valuebased import ActionValueNetwork from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import Q, SARSA, NFQ from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.rl.environments import Task from tasktest import TestTask from envtest import TestEnv env = TestEnv() task = TestTask(env) controller = ActionValueNetwork(200, 3) learner = NFQ() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) i = 0 while True: experiment.doEpisodes(10) print "Learning" agent.learn() agent.reset() i += 1 print "Cycle: %d" % i if i > 60: agent.learning = False
learner = NFQ() agent = EuphoriaLearningAgent(controller,learner) agentOp = EuphoriaRandomPlayer(environment) task = EuphoriaTask(agentOp) experiment = EpisodicExperiment(task, agent) i = 0 reward = [] while i<1: tic=timeit.default_timer() r = experiment.doEpisodes(3) for ri in r: reward.append(ri[-1]) with open('rewardList_'+str(i)+'.csv', 'wb') as f: writer = csv.writer(f) writer.writerows([reward]) # print reward # agent.learn() # agent.reset() toc=timeit.default_timer() print toc - tic #elapsed time in seconds i+=1
# queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop() # # rewards.append(mean(agent.history.getSumOverSequences('reward'))) # print agent.module.getParameters(), # print mean(agent.history.getSumOverSequences('reward')) # clf() # plot(rewards) # episodic version x = 0 batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting) while x<5000: #while True: experiment.doEpisodes(batch) x += batch reward = mean(agent.history.getSumOverSequences('reward'))*task.rewardscale if useGraphics: pl.addData(0,x,reward) print(agent.module.params) print(reward) #if reward > 3: # pass agent.learn() agent.reset() if useGraphics: pl.update() if len(sys.argv) > 2: