def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP and the mapping to observations C = MDPconverter(g) Ts, R, fMap = C.convert() # find the the best least-squares approximation to the policy, # given only observations, not the state information if useTD: # state-based _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor) else: # state-action-based _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor) # evaluate the policy Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vlspi) if showValue: # expected discounted reward at initial state Vinit = Vlspi[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)
def plotBackground(env, known=[[]]): if len(known[0]) == 0: from vgdl.mdpmap import MDPconverter g = env._game C = MDPconverter(g, env=env, verbose=False) _, R, _ = C.convert() size = (g.width, g.height) known[0].append((size, C.states, R)) featurePlot(*known[0][0])
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f" % Vinit)
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False): # build the game g = VGDLParser().parseGame(gametype) g.buildLevel(layout) # transform into an MDP C = MDPconverter(g) Ts, R, _ = C.convert() # find the optimal policy _, Topt = policyIteration(Ts, R, discountFactor=discountFactor) # evaluate the policy Vopt = trueValues(Topt, R, discountFactor=discountFactor) # plot those values featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True) if showValue: # expected discounted reward at initial state Vinit = Vopt[C.initIndex()] pylab.xlabel("V0=%.4f"%Vinit)