Esempi in Python per Chainwalk

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: gridworld.chainwalk

Classe/tipologia: Chainwalk

Esempi su hotexamples.com: 5

Chainwalk in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per gridworld.chainwalk.Chainwalk, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Chainwalk(3)

nfeatures(2)

trace(2)

Esempio n. 1

Mostra file

File: lspi.py Progetto: stober/lspi

def test():
    try:
        from gridworld.chainwalk import Chainwalk
    except:
        print "Unable to import Chainwalk for test!"
        return

    cw = Chainwalk()
    trace = cw.trace()
    zeros = np.zeros(cw.nfeatures())
    w = LSPI(trace,0.0001,cw,zeros,show=True)
    print w

Esempio n. 2

Mostra file

File: lspi.py Progetto: QueensGambit/lspi

def test():
    try:
        from gridworld.chainwalk import Chainwalk
    except:
        print("Unable to import Chainwalk for test!")
        return

    cw = Chainwalk()
    trace = cw.trace()
    zeros = np.zeros(cw.nfeatures())
    w = LSPI(trace,0.0001,cw,zeros,show=True)
    print(w)

Esempio n. 3

Mostra file

File: test_lspi.py Progetto: QueensGambit/lspi

    pi = [gw.linear_policy(w0, s) for s in range(gw.nstates)]
    gw.set_arrows(pi)
    gw.background()
    gw.mainloop()

if test_sarsa:
    gw = GridworldGui(nrows=9, ncols=9, endstates=[0], walls=[])
    learner = Sarsa(8, 81, 0.5, 0.9, 0.9, 0.1)
    learner.learn(10000, gw, verbose=True)
    pi = [learner.best(s) for s in range(gw.nstates)]
    gw.set_arrows(pi)
    gw.background()
    gw.mainloop()

if test_chainwalk:
    cw = Chainwalk()
    t = cw.trace(1000)
    policy0 = np.zeros(cw.nfeatures())
    print(LSTDQ(t, cw, policy0))

if test_scale:
    gw = GridworldGui(nrows=64, ncols=64, size=8, endstates=[0], walls=[])
    try:
        t = pickle.load(open("scale_trace.pck"))
    except:
        t = gw.trace(100000)  #, show = False)
        pickle.dump(t, open("scale_trace.pck", "w"), pickle.HIGHEST_PROTOCOL)

    policy0 = np.zeros(gw.nfeatures())
    #w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=1, method="alt", debug = False, timer = True, show=False, format="csr")
    w0, weights0 = LSPI(t,

Esempio n. 4

Mostra file

File: test_lspi.py Progetto: stober/lspi

    pi = [gw.linear_policy(w0,s) for s in range(gw.nstates)]
    gw.set_arrows(pi)    
    gw.background()
    gw.mainloop()

if test_sarsa:
    gw = GridworldGui(nrows = 9, ncols = 9, endstates = [0], walls = [])
    learner = Sarsa(8, 81, 0.5, 0.9, 0.9, 0.1)
    learner.learn(10000, gw, verbose=True)
    pi = [learner.best(s) for s in range(gw.nstates)]
    gw.set_arrows(pi)
    gw.background()
    gw.mainloop()

if test_chainwalk:
    cw = Chainwalk()
    t = cw.trace(1000)
    policy0 = np.zeros(cw.nfeatures())
    print LSTDQ(t, cw, policy0)

if test_scale:
    gw = GridworldGui(nrows=64,ncols=64, size=8, endstates = [0], walls=[])
    try:
        t = pickle.load(open("scale_trace.pck"))
    except:
        t = gw.trace(100000)#, show = False)
        pickle.dump(t,open("scale_trace.pck","w"),pickle.HIGHEST_PROTOCOL)

    policy0 = np.zeros(gw.nfeatures())
    #w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=1, method="alt", debug = False, timer = True, show=False, format="csr")
    w0, weights0 = LSPI(t, 0.005, gw, policy0, maxiter=10, method="parallel", debug = False, timer = True, show=True,ncpus=6)

Esempio n. 5

Mostra file

File: td_example.py Progetto: tanduong/td

Author: Jeremy M. Stober
Program: TD_EXAMPLE.PY
Date: Friday, February 24 2012
Description: Examples using TD algorithms to learn value functions.
"""

from gridworld.boyan import Boyan
from gridworld.chainwalk import Chainwalk
from cartpole import CartPole
from td import TD, TDQ, TDQCmac, SarsaCmac, Sarsa, ActorCritic, ActorCriticCmac

# a simple environment
env = Boyan()
learner = TD(13, 0.1, 1.0, 0.8)
learner.learn(1000, env, env.random_policy)
print learner.V

env = Chainwalk()
learnerq = TDQ(2, 4, 0.1, 0.9, 0.8)

import pdb

env = CartPole()
#learnerq = SarsaCmac(2,0.01,0.95,0.9,0.01)
#learnerq = Sarsa(2,170,0.001,0.95,0.5,0.01)
#learnerq = ActorCritic(2, 162, 0.5, 0.5, 0.95, 0.8, 0.9) # From an old Sutton paper -- seems to work quite well.
learnerq = ActorCriticCmac(
    2, 0.5, 1.0, 0.95, 0.8, 0.9
)  # Clearly does some learning, but not nearly as well. Policy not as stable.
learnerq.learn(1000, env)