コード例 #1
0
ファイル: lspi.py プロジェクト: stober/lspi
def test():
    try:
        from gridworld.chainwalk import Chainwalk
    except:
        print "Unable to import Chainwalk for test!"
        return

    cw = Chainwalk()
    trace = cw.trace()
    zeros = np.zeros(cw.nfeatures())
    w = LSPI(trace,0.0001,cw,zeros,show=True)
    print w
コード例 #2
0
ファイル: lspi.py プロジェクト: QueensGambit/lspi
def test():
    try:
        from gridworld.chainwalk import Chainwalk
    except:
        print("Unable to import Chainwalk for test!")
        return

    cw = Chainwalk()
    trace = cw.trace()
    zeros = np.zeros(cw.nfeatures())
    w = LSPI(trace,0.0001,cw,zeros,show=True)
    print(w)
コード例 #3
0
ファイル: test_lspi.py プロジェクト: QueensGambit/lspi
    pi = [gw.linear_policy(w0, s) for s in range(gw.nstates)]
    gw.set_arrows(pi)
    gw.background()
    gw.mainloop()

if test_sarsa:
    gw = GridworldGui(nrows=9, ncols=9, endstates=[0], walls=[])
    learner = Sarsa(8, 81, 0.5, 0.9, 0.9, 0.1)
    learner.learn(10000, gw, verbose=True)
    pi = [learner.best(s) for s in range(gw.nstates)]
    gw.set_arrows(pi)
    gw.background()
    gw.mainloop()

if test_chainwalk:
    cw = Chainwalk()
    t = cw.trace(1000)
    policy0 = np.zeros(cw.nfeatures())
    print(LSTDQ(t, cw, policy0))

if test_scale:
    gw = GridworldGui(nrows=64, ncols=64, size=8, endstates=[0], walls=[])
    try:
        t = pickle.load(open("scale_trace.pck"))
    except:
        t = gw.trace(100000)  #, show = False)
        pickle.dump(t, open("scale_trace.pck", "w"), pickle.HIGHEST_PROTOCOL)

    policy0 = np.zeros(gw.nfeatures())
    #w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=1, method="alt", debug = False, timer = True, show=False, format="csr")
    w0, weights0 = LSPI(t,
コード例 #4
0
ファイル: test_lspi.py プロジェクト: stober/lspi
    pi = [gw.linear_policy(w0,s) for s in range(gw.nstates)]
    gw.set_arrows(pi)    
    gw.background()
    gw.mainloop()

if test_sarsa:
    gw = GridworldGui(nrows = 9, ncols = 9, endstates = [0], walls = [])
    learner = Sarsa(8, 81, 0.5, 0.9, 0.9, 0.1)
    learner.learn(10000, gw, verbose=True)
    pi = [learner.best(s) for s in range(gw.nstates)]
    gw.set_arrows(pi)
    gw.background()
    gw.mainloop()

if test_chainwalk:
    cw = Chainwalk()
    t = cw.trace(1000)
    policy0 = np.zeros(cw.nfeatures())
    print LSTDQ(t, cw, policy0)

if test_scale:
    gw = GridworldGui(nrows=64,ncols=64, size=8, endstates = [0], walls=[])
    try:
        t = pickle.load(open("scale_trace.pck"))
    except:
        t = gw.trace(100000)#, show = False)
        pickle.dump(t,open("scale_trace.pck","w"),pickle.HIGHEST_PROTOCOL)

    policy0 = np.zeros(gw.nfeatures())
    #w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=1, method="alt", debug = False, timer = True, show=False, format="csr")
    w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=10, method="parallel", debug = False, timer = True, show=True,ncpus=6)
コード例 #5
0
ファイル: td_example.py プロジェクト: tanduong/td
Author: Jeremy M. Stober
Program: TD_EXAMPLE.PY
Date: Friday, February 24 2012
Description: Examples using TD algorithms to learn value functions.
"""

from gridworld.boyan import Boyan
from gridworld.chainwalk import Chainwalk
from cartpole import CartPole
from td import TD, TDQ, TDQCmac, SarsaCmac, Sarsa, ActorCritic, ActorCriticCmac

# a simple environment
env = Boyan()
learner = TD(13, 0.1, 1.0, 0.8)
learner.learn(1000, env, env.random_policy)
print learner.V

env = Chainwalk()
learnerq = TDQ(2, 4, 0.1, 0.9, 0.8)

import pdb

env = CartPole()
#learnerq = SarsaCmac(2,0.01,0.95,0.9,0.01)
#learnerq = Sarsa(2,170,0.001,0.95,0.5,0.01)
#learnerq = ActorCritic(2, 162, 0.5, 0.5, 0.95, 0.8, 0.9) # From an old Sutton paper -- seems to work quite well.
learnerq = ActorCriticCmac(
    2, 0.5, 1.0, 0.95, 0.8, 0.9
)  # Clearly does some learning, but not nearly as well. Policy not as stable.
learnerq.learn(1000, env)