Python qfunc Beispiele

Programmiersprache: Python

Namespace / Paketname: policy

Methode / Funktion: qfunc

Beispiele auf hotexamples.com: 3

Python qfunc - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die policy.qfunc, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: model_free.py Projekt: yuhsh24/RLlearning

def qlearning(grid, policy, evaler, num_iter1, alpha):
    actions = grid.actions
    gamma = grid.gamma
    y = []
    for i in xrange(len(policy.theta)):
        policy.theta[i] = 0.1

    for iter1 in xrange(num_iter1):
        y.append(evaler.eval(policy))
        f = grid.start()
        a = actions[int(random.random() * len(actions))]
        t = False
        count = 0
        while False == t and count < 100:
            t, f1, r = grid.receive(a)
            #find max Q
            qmax = policy.qfunc(f1, actions[0])
            for a1 in actions:
                pvalue = policy.qfunc(f1, a1)
                if qmax < pvalue:
                    qmax = pvalue
            update(policy, f, a, r + gamma * qmax, alpha)
            f = f1
            a = policy.epsilon_greedy(f1)
            count += 1

    return policy,y

Beispiel #2

Datei anzeigen

Datei: model_free.py Projekt: yuhsh24/RLlearning

def sarsa(grid, policy, evaler, num_iter1, alpha):
    actions = grid.actions
    gamma = grid.gamma
    y = []
    for i in xrange(len(policy.theta)):
        policy.theta[i] = 0.1

    for iter1 in xrange(num_iter1):
        y.append(evaler.eval(policy))
        f = grid.start()
        a = actions[int(random.random() * len(actions))]
        t = False
        count = 0
        while False == t and count < 100:
            t, f1, r = grid.receive(a)
            a1 = policy.epsilon_greedy(f1)
            update(policy, f, a, r + gamma * policy.qfunc(f1, a1), alpha)
            f = f1
            a = a1
            count += 1

    return policy,y

Beispiel #3

Datei anzeigen

Datei: model_free.py Projekt: yuhsh24/RLlearning

def update(policy, f, a, tvalue, alpha):
    pvalue = policy.qfunc(f, a)
    error = pvalue - tvalue
    fea = policy.get_fea_vec(f, a)
    policy.theta -= alpha * error * fea