def test6(): """ Now with memory!""" from numpy import ndarray from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(cheese_maze) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 10, 4, temperature=0.1, recurrent=True) algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=6, maxSteps=30, exploretoo=False), net, verbose=True, desiredEvaluation=0.85) print algo.batchSize rows, cols = 2,3 episodesPerStep = 5 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP and the mapping to observations C = MDPconverter(g) Ts, R, fMap = C.convert() # find the the best least-squares approximation to the policy, # given only observations, not the state information if useTD: # state-based _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor) else: # state-action-based _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor) # evaluate the policy Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vlspi) if showValue: # expected discounted reward at initial state Vinit = Vlspi[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)
def test2(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.optimization import SNES game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True) net = buildNet(game_env.outdim, 6, 2) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.43) rows, cols = 3, 3 episodesPerStep = 2 for i in range(rows * cols): pylab.subplot(rows, cols, i + 1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i + 1) * episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def test3(): from examples.gridphysics.mazes.simple import consistent_corridor from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(consistent_corridor) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78) rows, cols = 2, 2 episodesPerStep = 3 for i in range(rows * cols): pylab.subplot(rows, cols, i + 1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i + 1) * episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def testAugmented(): from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.mdpmap import MDPconverter from vgdl.agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP and the mapping to observations C = MDPconverter(g) Ts, R, fMap = C.convert() # find the the best least-squares approximation to the policy, # given only observations, not the state information if useTD: # state-based _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor) else: # state-action-based _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor) # evaluate the policy Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vlspi) if showValue: # expected discounted reward at initial state Vinit = Vlspi[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)
def testAugmented(): from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.mdpmap import MDPconverter from vgdl.agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def testRecordingToGif(human=False): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent, InteractiveAgent from vgdl.tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200) task = GameTask(env) if human: agent = InteractiveAgent() else: agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(1) print res actions = [a for _, a, _ in env._allEvents] print actions makeGifVideo(env, actions, initstate=env._initstate)
def test6(): """ Now with memory!""" from numpy import ndarray from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(cheese_maze) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 10, 4, temperature=0.1, recurrent=True) algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=6, maxSteps=30, exploretoo=False), net, verbose=True, desiredEvaluation=0.85) print algo.batchSize rows, cols = 2,3 episodesPerStep = 5 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def test4(): from numpy import ndarray from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES, WeightGuessing g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(labyrinth2) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 5, 4, temperature=0.1, recurrent=False) algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=3), net, verbose=True, desiredEvaluation=0.75) #algo = WeightGuessing(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78) rows, cols = 2,2 episodesPerStep = 4 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def test4(): from numpy import ndarray from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES, WeightGuessing g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(labyrinth2) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 5, 4, temperature=0.1, recurrent=False) algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=3), net, verbose=True, desiredEvaluation=0.75) #algo = WeightGuessing(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78) rows, cols = 2,2 episodesPerStep = 4 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def testRolloutVideo(actions=[0, 0, 2, 2, 0, 3] * 2): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from vgdl.core import VGDLParser from vgdl.tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) makeGifVideo(GameEnvironment(g, visualize=True), actions)
def testRollout(actions=[0, 0, 2, 2, 0, 3] * 20): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from vgdl.core import VGDLParser game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) env.rollOut(actions)
def testRolloutVideo(actions=[0, 0, 2, 2, 0, 3] * 2): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from vgdl.core import VGDLParser from vgdl.tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) makeGifVideo(GameEnvironment(g, visualize=True), actions)
def _createVGDLGame( gameSpec, levelSpec ): import uuid from vgdl.core import VGDLParser # parse, run and play. game = VGDLParser().parseGame(gameSpec) game.buildLevel(levelSpec) game.uiud = uuid.uuid4() return game
def testRollout(actions=[0, 0, 2, 2, 0, 3] * 20): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from vgdl.core import VGDLParser game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) env.rollOut(actions)
def testLoadSave(): from vgdl.core import VGDLParser from examples.gridphysics.aliens import aliens_level, aliens_game map_str, game_str = aliens_level, aliens_game g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) for _ in range(1000): s = g.getFullState() g.setFullState(s)
def test2(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from vgdl.core import VGDLParser game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) actions = [1, 0, 0, 3, 0, 2, 0, 2, 0, 0, 0] env = GameEnvironment(g, visualize=True, actionDelay=100) env.rollOut(actions) env.reset() senv = SubjectiveGame(g, actionDelay=1500) senv.rollOut(actions)
def test2(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from vgdl.core import VGDLParser game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) actions = [1, 0, 0, 3, 0, 2, 0, 2, 0, 0, 0] env = GameEnvironment(g, visualize=True, actionDelay=100) env.rollOut(actions) env.reset() senv = SubjectiveGame(g, actionDelay=1500) senv.rollOut(actions)
def testStochMaze(): from vgdl.core import VGDLParser from examples.gridphysics.mazes.stochastic import stoch_game, stoch_level g = VGDLParser().parseGame(stoch_game) g.buildLevel(stoch_level) C = MDPconverter(g, verbose=True) Ts, R, fMap = C.convert() print C.states print R for T in Ts: print T print fMap
def testStochMaze(): from vgdl.core import VGDLParser from examples.gridphysics.mazes.stochastic import stoch_game, stoch_level g = VGDLParser().parseGame(stoch_game) g.buildLevel(stoch_level) C = MDPconverter(g, verbose=True) Ts, R, fMap = C.convert() print C.states print R for T in Ts: print T print fMap
def testMaze(): from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) C = MDPconverter(g, verbose=True) Ts, R, fMap = C.convert() print C.states print R for T in Ts: print T print fMap
def testMaze(): from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) C = MDPconverter(g, verbose=True) Ts, R, fMap = C.convert() print C.states print R for T in Ts: print T print fMap
def test4(): """ Same thing, but animated. """ from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameEnvironment, GameTask from vgdl.agents import PolicyDrivenAgent g = VGDLParser().parseGame(windy_stoch_game) g.buildLevel(windy_level) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(5) print res
def test4(): """ Same thing, but animated. """ from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameEnvironment, GameTask from vgdl.agents import PolicyDrivenAgent g = VGDLParser().parseGame(windy_stoch_game) g.buildLevel(windy_level) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(5) print res
def testPolicyAgent(): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=False, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def test1(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) game_env = GameEnvironment(g) print 'number of observations:', game_env.outdim net = buildNet(game_env.outdim, 2, 2) for i in range(200): net.randomize() net.reset() print someEpisodes(game_env, net), if i% 20 == 19: print
def test1(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) game_env = GameEnvironment(g) print 'number of observations:', game_env.outdim net = buildNet(game_env.outdim, 2, 2) for i in range(200): net.randomize() net.reset() print someEpisodes(game_env, net), if i% 20 == 19: print
def testPolicyAgent(): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=False, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def test3(): from examples.gridphysics.mazes import polarmaze_game from examples.gridphysics.mazes.simple import maze_level_1b from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameTask from vgdl.agents import InteractiveAgent, UserTiredException game_str, map_str = polarmaze_game, maze_level_1b g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass print senv._allEvents
def test3(): from examples.gridphysics.mazes import polarmaze_game from examples.gridphysics.mazes.simple import maze_level_1b from vgdl.core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameTask from vgdl.agents import InteractiveAgent, UserTiredException game_str, map_str = polarmaze_game, maze_level_1b g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass print senv._allEvents
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)
def testInteractions(): from vgdl.core import VGDLParser from examples.gridphysics.aliens import aliens_level, aliens_game from pygame.locals import K_SPACE # from examples.gridphysics.sokoban import so from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): # res = randint(0, self.total - 1) return 1 map_str, game_str = aliens_level, aliens_game g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) g._initScreen(g.screensize,headless=True) for _ in range(300): win, _ = g.tick(K_SPACE) if win is not None: break
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f" % Vinit)
def testInteractions(): from random import randint from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): res = randint(0, self.total - 1) return res game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = DummyAgent() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def testRecordingToGif(human=False): from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from vgdl.agents import PolicyDrivenAgent, InteractiveAgent from vgdl.tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200) task = GameTask(env) if human: agent = InteractiveAgent() else: agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(1) print res actions = [a for _, a, _ in env._allEvents] print actions makeGifVideo(env, actions, initstate=env._initstate)
def test3(): from examples.gridphysics.mazes.simple import office_layout_2, consistent_corridor from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(consistent_corridor) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78) rows, cols = 2,2 episodesPerStep = 3 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def testInteractions(): from random import randint from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): res = randint(0, self.total - 1) return res game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = DummyAgent() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def test2(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.optimization import SNES game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True) net = buildNet(game_env.outdim, 6, 2) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.43) rows, cols = 3,3 episodesPerStep = 2 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
s += "w w\n" s += "wGw\n" for _ in range(length/2-2): s += "w w\n" s += "wAw\n" s += "w4w\n" s += "w1w\n" s += "www\n" return s def ringworld(width): assert width > 1 level = ["w"]*(width+2)+["\n"] level += ["w"]+[" "]*width+["w\n"] level += ["w"]*(width+2)+["\n"] level[int(width*1.5+3.5)] = 'G' #level[-(width+5)] = 'A' level_str = ''.join(level) return level_str if __name__ == "__main__": print ringworld(9) from vgdl.core import VGDLParser g = VGDLParser().parseGame(wrapmaze_game) g.buildLevel(ringworld(19)) g.randomizeAvatar() g.startGame() VGDLParser.playGame(portalmaze_game, portalringworld(19))
def runLunarLander(): # import lunar lander from vgdl.examples.continuousphysics.lander import lander_game, lander_level # build the game g = VGDLParser().parseGame(lander_game) g.buildLevel(lander_level) # TODO: Determine how to not need to bring up the pygame display in order to run the game. g._initScreen([1, 1]) ship = g.getAvatars()[0] # store initial ship state initState = [ship.rect.x, ship.rect.y, ship.speed, ship.orientation] print "starting position: " + str(ship) print "starting state: " + str(initState) # get random actions actions = generateInput(ACTIONS) states = [initState] # move ship based on random actions print actions for a in actions: for i in range(REPEATS): ship.action = a updateGame(g, a) if ended: print a, i break states.append(makeState(ship)) endState = states[len(states)-1] # confirm final position print "first final position after actions: " + str(ship) print "final state: " + str(endState) # reroll ship back to initial state setState(ship, initState) # vary action sequence # first pick a point to vary random.seed(10466) varyIndex = random.randint(0, len(actions) - 1) # then change that action oldAction = actions[varyIndex] actions[varyIndex] = BASEDIRS[random.randint(0, len(BASEDIRS) - 1)] # print out the change and the full list of actions print "changed action " + str(varyIndex) + " to " + str(actions[varyIndex]) print "new actions: " + str(actions) # predict through simple calculation how the final position should be predictState = predictOutcome(states, actions, oldAction, varyIndex) print "predicted state " + str(predictState) # find out where the actual final position is for a in actions: for i in range(REPEATS): updateGame(g, a) if ended: print a, i break endState = makeState(ship) print "actual ending position: " + str(ship) print "ending state: " + str(endState) # get error error = [endState[0] - predictState[0], endState[1] - predictState[1]] print "prediction error: " + str(error)