Python trueValues Examples

Programming Language: Python

Namespace/Package Name: pybrain.rl.learners.modelbased

Method/Function: trueValues

Examples at hotexamples.com: 5

Python trueValues - 5 examples found. These are the top rated real world Python examples of pybrain.rl.learners.modelbased.trueValues extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: gapworld.py Project: sahil02235/symbolic-rl

def test_gapworld():
    # Register the avatar first
    vgdl.registry.register_class(RightMovingJumpingAvatar)
    game = load_gapworld_game_and_level()
    env = VGDLPybrainEnvironment(game, GapworldObserver(game))
    task = VGDLPybrainTask(env)
    mapper = vgdl.mdp.MDPConverter(task)
    T, R = mapper.convert_task_to_mdp()
    print('Known states:')
    print(mapper.get_observations())
    for action_i in range(T.shape[0]):
        print('Action {}:'.format(env.action_set[action_i]))
        print(T[action_i])
    print('Rewards:')
    print(R)

    from pybrain.rl.learners.modelbased import policyIteration, trueValues
    # policy is S x A
    policy, optimal_T = policyIteration(T, R, discountFactor=.9)
    # So this seems wrong whether we allow transitions from absorbing states
    # or not, but it's a good indication
    V = trueValues(optimal_T, R, discountFactor=.9)

    print('Optimal policy:')
    print(policy)
    import ipdb
    ipdb.set_trace()

Example #2

Show file

File: model_pomdp.py Project: chongdashu/py-vgdl

def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)

Example #3

Show file

def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)

Example #4

Show file

File: model_mdp.py Project: lrhammond/sdsrl

def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)

    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()

    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)

    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)

    # plot those values
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)

    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f" % Vinit)

Example #5

Show file

File: model_mdp.py Project: chongdashu/py-vgdl

def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()
    
    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)