if action==1: hiteligible = [sum(x) for x in zip(hiteligible, features)] else: stickeligible = [sum(x) for x in zip(stickeligible, features)] state, reward = step(state, action) features = linear(state) hitdelta = reward - sum([x[0]*x[1] for x in zip(features, hitparam)]) stickdelta = reward - sum([x[0]*x[1] for x in zip(features, stickparam)]) if action==1: actionvalue = update(actionvalue, features, action, hitparam) hitdelta += actionvalue[(tuple(features), 1)] else: actionvalue = update(actionvalue, features, action, stickparam) stickdelta += actionvalue[(tuple(features), 0)] hitparam = [sum(x) for x in zip(hitparam, [a * hitdelta * h for h in hiteligible])] stickparam = [sum(x) for x in zip(stickparam, [a * stickdelta * s for s in stickeligible])] hiteligible = [lamBda * h for h in hiteligible] stickeligible = [lamBda * s for s in stickeligible] action = greedy(features, actionvalue, e) if lamBda in (0.0, 1.0): mses += [(game, mse(MCactionvalue, actionvalue))] plot(mses, 'Game', 'Mean square error', 'Lambda ' + str(lamBda)) meansquarerror.append((lamBda, mse(MCactionvalue, actionvalue))) plot(meansquarerror, 'Lambda', 'Mean square error', 'MSE: Lambda 0.0-1.0')
mses += [(game, mse(MCactionvalue, actionvalue))] Z = [0.0] * (3 * 6 * 2) state = State() action = greedysoft(state, actionvalue, w, e, 1) features = linear(state, action) while state.gameover == 0: # Z = features; traces = 'Replaced traces' Z = [sum(x) for x in zip([lamBda * z for z in Z], features)] traces = "Accumulated traces" state, reward = step(state, action) d = reward - sum([x[0] * x[1] for x in zip(features, w)]) if state.gameover == 1: w = [sum(x) for x in zip(w, [a * d * z for z in Z])] break action, actionvalue, features = greedysoft(state, actionvalue, w, e, 0) d += actionvalue[tuple(features)] w = [sum(x) for x in zip(w, [a * d * z for z in Z])] if lamBda in (0.0, 1.0): mses += [(game, mse(MCactionvalue, actionvalue))] plot(mses, "Game", "Mean square error", "Lambda = " + str(lamBda) + " . " + traces) meansquarerror.append((lamBda, mse(MCactionvalue, actionvalue))) plot(meansquarerror, "Lambda", "Mean square error", "MSE: Lambda 0.0-1.0")