Ejemplo n.º 1
0
Archivo: game.py Proyecto: kdub0/ice
def to_instance(game):
    ((index,U),demonstrated) = game
    
    (M,actions,_) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0]*N
    offset           = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset              = offset + actions[i]*(actions[i]-1)
    deviations = offset

    A   = []
    row = []
    col = []
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x      = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i]-1)*x + y
                    if x < y:
                        deviation = deviation - 1
                    for k in range(K):
                        A.append(U[i][outcomep,k] - U[i][outcome,k])
                        row.append(outcome)
                        col.append(K*deviation+k)                   
        
    return (demonstrated, K, scipy.sparse.csr_matrix((A,(row,col)), shape=(M, K*deviations)))
Ejemplo n.º 2
0
Archivo: ce.py Proyecto: sallen7/ice
def solve(game, c, w, opt):
    (index, U) = game
    (M, actions, _) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0] * N
    offset = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset = offset + actions[i] * (actions[i] - 1)
    deviations = offset

    Uw = zeros((M, N))
    for outcome in range(M):
        for i in range(N):
            Uw[outcome, i] = dot(U[i][outcome], w)

    cp = zeros(M)
    for outcome in range(M):
        for i in range(N):
            cp[outcome] += c[i] * Uw[outcome, i]

    R = zeros((M, deviations))
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i] - 1) * x + y
                    if x < y:
                        deviation = deviation - 1
                    R[outcome, deviation] = Uw[outcomep, i] - Uw[outcome, i]

    def predict(x):
        regrets = cp - dot(R, x)
        offset = regrets.max()
        sigma = exp(regrets - offset)
        Z = sum(sigma)
        return sigma / Z

    def gradient(x):
        sigma = predict(x)
        g = dot(sigma, R)
        return g

    return predict(opt(deviations, gradient))
Ejemplo n.º 3
0
Archivo: ce.py Proyecto: kdub0/ice
def solve(game, c, w, opt):
    (index,U) = game
    (M,actions,_) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0]*N
    offset           = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset              = offset + actions[i]*(actions[i]-1)
    deviations = offset

    Uw = zeros((M, N))
    for outcome in range(M):
        for i in range(N):
            Uw[outcome,i] = dot(U[i][outcome], w)

    cp = zeros(M)
    for outcome in range(M):
        for i in range(N):
            cp[outcome] += c[i]*Uw[outcome,i]

    R = zeros((M, deviations))
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x      = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i]-1)*x + y
                    if x < y:
                        deviation = deviation - 1
                    R[outcome,deviation] = Uw[outcomep,i] - Uw[outcome,i]

    def predict(x):
        regrets = cp - dot(R,x)
        offset  = regrets.max()
        sigma   = exp(regrets-offset)
        Z       = sum(sigma)
        return sigma/Z

    def gradient(x):
        sigma = predict(x)
        g     = dot(sigma, R)
        return g

    return predict(opt(deviations, gradient))
Ejemplo n.º 4
0
def create(N):
    assert N > 1
    actions = [4] * N
    idx = indexing.create(actions)
    (M, _, _) = idx
    U = [zeros((M, 4)) for i in range(N)]
    for outcome in range(M):
        a = indexing.unindex(idx, outcome)
        left = 0
        back = 0
        for x in a:
            if x % 2 == 0:
                left += 1
            if x // 2 == 0:
                back += 1

        v = array([1.5 + .2 * N, 9.0, 1.0 / 8.0 + 0.04 * N, 7 + 0.4 * N])
        for i in range(N):
            if a[i] % 2 == 0:
                u = v + array([1.0, 1.5, 1.0 / 20.0, 2.0])
            else:
                u = v + array([
                    1.0 + 2.0 * left, 1.0, 1.0 / 20.0 + 0.04 * left,
                    1.5 + .4 * left
                ])

            if a[i] // 2 == 0:
                u = v + array([
                    6.0 + 0.5 * back, 12.0, 1.0 / 7.0 + 0.01 * back,
                    10.0 + 3.0 * back
                ])
            else:
                u = v + array([2.0, 20.0, 1.0 / 8.0, 15.0])

            U[i][outcome, :] = -u

    return (idx, U)
Ejemplo n.º 5
0
Archivo: game.py Proyecto: kdub0/ice
def to_instance(game):
    ((index, U), demonstrated) = game

    (M, actions, _) = index
    N = len(actions)
    K = U[0].shape[1]

    deviation_offset = [0] * N
    offset = 0
    for i in range(N):
        deviation_offset[i] = offset
        offset = offset + actions[i] * (actions[i] - 1)
    deviations = offset

    A = []
    row = []
    col = []
    for outcome in range(M):
        for i in range(N):
            action = indexing.unindex(index, outcome)
            x = action[i]
            for y in range(actions[i]):
                if x != y:
                    outcomep = indexing.reindex(index, outcome, i, y)

                    deviation = deviation_offset[i] + (actions[i] - 1) * x + y
                    if x < y:
                        deviation = deviation - 1
                    for k in range(K):
                        A.append(U[i][outcomep, k] - U[i][outcome, k])
                        row.append(outcome)
                        col.append(K * deviation + k)

    return (demonstrated, K,
            scipy.sparse.csr_matrix((A, (row, col)),
                                    shape=(M, K * deviations)))