def test(): try: from gridworld.chainwalk import Chainwalk except: print("Unable to import Chainwalk for test!") return cw = Chainwalk() trace = cw.trace() zeros = np.zeros(cw.nfeatures()) w = LSPI(trace,0.0001,cw,zeros,show=True) print(w)
def test(): try: from gridworld.chainwalk import Chainwalk except: print "Unable to import Chainwalk for test!" return cw = Chainwalk() trace = cw.trace() zeros = np.zeros(cw.nfeatures()) w = LSPI(trace,0.0001,cw,zeros,show=True) print w
gw.set_arrows(pi) gw.background() gw.mainloop() if test_sarsa: gw = GridworldGui(nrows=9, ncols=9, endstates=[0], walls=[]) learner = Sarsa(8, 81, 0.5, 0.9, 0.9, 0.1) learner.learn(10000, gw, verbose=True) pi = [learner.best(s) for s in range(gw.nstates)] gw.set_arrows(pi) gw.background() gw.mainloop() if test_chainwalk: cw = Chainwalk() t = cw.trace(1000) policy0 = np.zeros(cw.nfeatures()) print(LSTDQ(t, cw, policy0)) if test_scale: gw = GridworldGui(nrows=64, ncols=64, size=8, endstates=[0], walls=[]) try: t = pickle.load(open("scale_trace.pck")) except: t = gw.trace(100000) #, show = False) pickle.dump(t, open("scale_trace.pck", "w"), pickle.HIGHEST_PROTOCOL) policy0 = np.zeros(gw.nfeatures()) #w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=1, method="alt", debug = False, timer = True, show=False, format="csr") w0, weights0 = LSPI(t, 0.005,
gw.set_arrows(pi) gw.background() gw.mainloop() if test_sarsa: gw = GridworldGui(nrows = 9, ncols = 9, endstates = [0], walls = []) learner = Sarsa(8, 81, 0.5, 0.9, 0.9, 0.1) learner.learn(10000, gw, verbose=True) pi = [learner.best(s) for s in range(gw.nstates)] gw.set_arrows(pi) gw.background() gw.mainloop() if test_chainwalk: cw = Chainwalk() t = cw.trace(1000) policy0 = np.zeros(cw.nfeatures()) print LSTDQ(t, cw, policy0) if test_scale: gw = GridworldGui(nrows=64,ncols=64, size=8, endstates = [0], walls=[]) try: t = pickle.load(open("scale_trace.pck")) except: t = gw.trace(100000)#, show = False) pickle.dump(t,open("scale_trace.pck","w"),pickle.HIGHEST_PROTOCOL) policy0 = np.zeros(gw.nfeatures()) #w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=1, method="alt", debug = False, timer = True, show=False, format="csr") w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=10, method="parallel", debug = False, timer = True, show=True,ncpus=6) #w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=10, method="sparse", debug = False, timer = True, show=True, format="csr")