Python policyIteration Examples

Programming Language: Python

Namespace/Package Name: pybrain.rl.learners.modelbased

Method/Function: policyIteration

Examples at hotexamples.com: 5

Python policyIteration - 5 examples found. These are the top rated real world Python examples of pybrain.rl.learners.modelbased.policyIteration extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: gapworld.py Project: sahil02235/symbolic-rl

def test_gapworld():
    # Register the avatar first
    vgdl.registry.register_class(RightMovingJumpingAvatar)
    game = load_gapworld_game_and_level()
    env = VGDLPybrainEnvironment(game, GapworldObserver(game))
    task = VGDLPybrainTask(env)
    mapper = vgdl.mdp.MDPConverter(task)
    T, R = mapper.convert_task_to_mdp()
    print('Known states:')
    print(mapper.get_observations())
    for action_i in range(T.shape[0]):
        print('Action {}:'.format(env.action_set[action_i]))
        print(T[action_i])
    print('Rewards:')
    print(R)

    from pybrain.rl.learners.modelbased import policyIteration, trueValues
    # policy is S x A
    policy, optimal_T = policyIteration(T, R, discountFactor=.9)
    # So this seems wrong whether we allow transitions from absorbing states
    # or not, but it's a good indication
    V = trueValues(optimal_T, R, discountFactor=.9)

    print('Optimal policy:')
    print(policy)
    import ipdb
    ipdb.set_trace()

Example #2

Show file

File: agents.py Project: sarobe/VGDLEntityCreator

    def buildOptimal(game_env, discountFactor=0.99):
        """ Given a game, find the optimal (state-based) policy and 
        return an agent that is playing accordingly. """
        from vgdl.mdpmap import MDPconverter
        C = MDPconverter(env=game_env)
        Ts, R, _ = C.convert()
        policy, _ = policyIteration(Ts, R, discountFactor=discountFactor)
        game_env.reset()

        def x(*_):
            s = game_env.getState()
            #print s
            i = C.states.index(s)
            return i
        #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState()))
        return PolicyDrivenAgent(policy, x)

Example #3

Show file

File: agents.py Project: xjzpguob/py-vgdl

    def buildOptimal(game_env, discountFactor=0.99):
        """ Given a game, find the optimal (state-based) policy and 
        return an agent that is playing accordingly. """
        from mdpmap import MDPconverter
        C = MDPconverter(env=game_env)
        Ts, R, _ = C.convert()
        policy, _ = policyIteration(Ts, R, discountFactor=discountFactor)
        game_env.reset()

        def x(*_):
            s = game_env.getState()
            #print s
            i = C.states.index(s)
            return i
        #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState()))
        return PolicyDrivenAgent(policy, x)

Example #4

Show file

File: model_mdp.py Project: lrhammond/sdsrl

def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)

    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()

    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)

    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)

    # plot those values
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)

    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f" % Vinit)

Example #5

Show file

File: model_mdp.py Project: chongdashu/py-vgdl

def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()
    
    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)