def testAugmented(): from core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from mdpmap import MDPconverter from agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def buildOptimal(game_env, discountFactor=0.99): """ Given a game, find the optimal (state-based) policy and return an agent that is playing accordingly. """ from mdpmap import MDPconverter C = MDPconverter(env=game_env) Ts, R, _ = C.convert() policy, _ = policyIteration(Ts, R, discountFactor=discountFactor) game_env.reset() def x(*_): s = game_env.getState() #print s i = C.states.index(s) return i #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState())) return PolicyDrivenAgent(policy, x)