Beispiel #1
0
Datei: game.py Projekt: kdub0/ice
def to_instance(game):
    ((index,U),demonstrated) = game
    
    (M,actions,_) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0]*N
    offset           = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset              = offset + actions[i]*(actions[i]-1)
    deviations = offset

    A   = []
    row = []
    col = []
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x      = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i]-1)*x + y
                    if x < y:
                        deviation = deviation - 1
                    for k in range(K):
                        A.append(U[i][outcomep,k] - U[i][outcome,k])
                        row.append(outcome)
                        col.append(K*deviation+k)                   
        
    return (demonstrated, K, scipy.sparse.csr_matrix((A,(row,col)), shape=(M, K*deviations)))
Beispiel #2
0
Datei: ce.py Projekt: sallen7/ice
def solve(game, c, w, opt):
    (index, U) = game
    (M, actions, _) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0] * N
    offset = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset = offset + actions[i] * (actions[i] - 1)
    deviations = offset

    Uw = zeros((M, N))
    for outcome in range(M):
        for i in range(N):
            Uw[outcome, i] = dot(U[i][outcome], w)

    cp = zeros(M)
    for outcome in range(M):
        for i in range(N):
            cp[outcome] += c[i] * Uw[outcome, i]

    R = zeros((M, deviations))
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i] - 1) * x + y
                    if x < y:
                        deviation = deviation - 1
                    R[outcome, deviation] = Uw[outcomep, i] - Uw[outcome, i]

    def predict(x):
        regrets = cp - dot(R, x)
        offset = regrets.max()
        sigma = exp(regrets - offset)
        Z = sum(sigma)
        return sigma / Z

    def gradient(x):
        sigma = predict(x)
        g = dot(sigma, R)
        return g

    return predict(opt(deviations, gradient))
Beispiel #3
0
Datei: ce.py Projekt: kdub0/ice
def solve(game, c, w, opt):
    (index,U) = game
    (M,actions,_) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0]*N
    offset           = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset              = offset + actions[i]*(actions[i]-1)
    deviations = offset

    Uw = zeros((M, N))
    for outcome in range(M):
        for i in range(N):
            Uw[outcome,i] = dot(U[i][outcome], w)

    cp = zeros(M)
    for outcome in range(M):
        for i in range(N):
            cp[outcome] += c[i]*Uw[outcome,i]

    R = zeros((M, deviations))
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x      = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i]-1)*x + y
                    if x < y:
                        deviation = deviation - 1
                    R[outcome,deviation] = Uw[outcomep,i] - Uw[outcome,i]

    def predict(x):
        regrets = cp - dot(R,x)
        offset  = regrets.max()
        sigma   = exp(regrets-offset)
        Z       = sum(sigma)
        return sigma/Z

    def gradient(x):
        sigma = predict(x)
        g     = dot(sigma, R)
        return g

    return predict(opt(deviations, gradient))
Beispiel #4
0
Datei: game.py Projekt: kdub0/ice
def to_instance(game):
    ((index, U), demonstrated) = game

    (M, actions, _) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0] * N
    offset = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset = offset + actions[i] * (actions[i] - 1)
    deviations = offset

    A = []
    row = []
    col = []
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i] - 1) * x + y
                    if x < y:
                        deviation = deviation - 1
                    for k in range(K):
                        A.append(U[i][outcomep, k] - U[i][outcome, k])
                        row.append(outcome)
                        col.append(K * deviation + k)

    return (demonstrated, K,
            scipy.sparse.csr_matrix((A, (row, col)),
                                    shape=(M, K * deviations)))