Beispiel #1
0
def inc_Q(s, a, alpha, inc):
    Q[s][a] += alpha * inc * E[s][a]
    World.set_cell_score(s, a, Q[s][a])
Beispiel #2
0
actions = World.actions
states = []
Q = {}
E = {}
for i in range(World.x):
    for j in range(World.y):
        states.append((i, j))

for state in states:
    temp = {}
    temp_e = {}
    for action in actions:
        temp[action] = 0.0  # Set to 0.1 if following greedy policy
        temp_e[action] = 0.0
        World.set_cell_score(state, action, temp[action])
    Q[state] = temp
    E[state] = temp_e

for (i, j, c, w) in World.specials:
    for action in actions:
        Q[(i, j)][action] = w
        World.set_cell_score((i, j), action, w)


def do_action(action):
    s = World.player
    r = -World.score
    if action == actions[0]:
        World.try_move(0, -1)
    elif action == actions[1]: