if action==1: hiteligible = [sum(x) for x in zip(hiteligible, features)]
                else: stickeligible = [sum(x) for x in zip(stickeligible, features)]

                state, reward = step(state, action)
                features = linear(state)

                hitdelta = reward - sum([x[0]*x[1] for x in zip(features, hitparam)])
                stickdelta = reward - sum([x[0]*x[1] for x in zip(features, stickparam)])

                if action==1:
                    actionvalue = update(actionvalue, features, action, hitparam)
                    hitdelta += actionvalue[(tuple(features), 1)]
                else:
                    actionvalue = update(actionvalue, features, action, stickparam)
                    stickdelta += actionvalue[(tuple(features), 0)]

                hitparam = [sum(x) for x in zip(hitparam, [a * hitdelta * h for h in hiteligible])]
                stickparam = [sum(x) for x in zip(stickparam, [a * stickdelta * s for s in stickeligible])]

                hiteligible = [lamBda * h for h in hiteligible]
                stickeligible = [lamBda * s for s in stickeligible]

                action = greedy(features, actionvalue, e)

        if lamBda in (0.0, 1.0):
            mses += [(game, mse(MCactionvalue, actionvalue))]
            plot(mses, 'Game', 'Mean square error', 'Lambda ' + str(lamBda))

        meansquarerror.append((lamBda, mse(MCactionvalue, actionvalue)))
    plot(meansquarerror, 'Lambda', 'Mean square error', 'MSE: Lambda 0.0-1.0')
                mses += [(game, mse(MCactionvalue, actionvalue))]

            Z = [0.0] * (3 * 6 * 2)
            state = State()
            action = greedysoft(state, actionvalue, w, e, 1)
            features = linear(state, action)

            while state.gameover == 0:

                # Z = features; traces = 'Replaced traces'
                Z = [sum(x) for x in zip([lamBda * z for z in Z], features)]
                traces = "Accumulated traces"

                state, reward = step(state, action)
                d = reward - sum([x[0] * x[1] for x in zip(features, w)])

                if state.gameover == 1:
                    w = [sum(x) for x in zip(w, [a * d * z for z in Z])]
                    break

                action, actionvalue, features = greedysoft(state, actionvalue, w, e, 0)
                d += actionvalue[tuple(features)]
                w = [sum(x) for x in zip(w, [a * d * z for z in Z])]

        if lamBda in (0.0, 1.0):
            mses += [(game, mse(MCactionvalue, actionvalue))]
            plot(mses, "Game", "Mean square error", "Lambda = " + str(lamBda) + " . " + traces)

        meansquarerror.append((lamBda, mse(MCactionvalue, actionvalue)))
    plot(meansquarerror, "Lambda", "Mean square error", "MSE: Lambda 0.0-1.0")