Example #1
0
def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)
Example #2
0
def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent
    
    
    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    
    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g, visualize=False,
                          recordingEnabled=True, actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)    
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
Example #3
0
def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)
Example #4
0
def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent

    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g,
                          visualize=False,
                          recordingEnabled=True,
                          actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
Example #5
0
def plotBackground(env, known=[[]]):
    if len(known[0]) == 0:
        from vgdl.mdpmap import MDPconverter
        g = env._game
        C = MDPconverter(g, env=env, verbose=False)
        _, R, _ = C.convert()
        size = (g.width, g.height)
        known[0].append((size, C.states, R))
    featurePlot(*known[0][0])
Example #6
0
def plotBackground(env, known=[[]]):
    if len(known[0]) == 0:
        from vgdl.mdpmap import MDPconverter
        g = env._game
        C = MDPconverter(g, env=env, verbose=False)
        _, R, _ = C.convert()
        size = (g.width, g.height)
        known[0].append((size, C.states, R))
    featurePlot(*known[0][0])
Example #7
0
    def buildOptimal(game_env, discountFactor=0.99):
        """ Given a game, find the optimal (state-based) policy and 
        return an agent that is playing accordingly. """
        from vgdl.mdpmap import MDPconverter
        C = MDPconverter(env=game_env)
        Ts, R, _ = C.convert()
        policy, _ = policyIteration(Ts, R, discountFactor=discountFactor)
        game_env.reset()

        def x(*_):
            s = game_env.getState()
            #print s
            i = C.states.index(s)
            return i
        #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState()))
        return PolicyDrivenAgent(policy, x)
Example #8
0
    def buildOptimal(game_env, discountFactor=0.99):
        """ Given a game, find the optimal (state-based) policy and 
        return an agent that is playing accordingly. """
        from vgdl.mdpmap import MDPconverter
        C = MDPconverter(env=game_env)
        Ts, R, _ = C.convert()
        policy, _ = policyIteration(Ts, R, discountFactor=discountFactor)
        game_env.reset()

        def x(*_):
            s = game_env.getState()
            #print s
            i = C.states.index(s)
            return i

        #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState()))
        return PolicyDrivenAgent(policy, x)
Example #9
0
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)

    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()

    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)

    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)

    # plot those values
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)

    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f" % Vinit)
Example #10
0
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()
    
    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)