예제 #1
0
def lenient_boltzmannq(state, fitness):
    temperature = 50
    kappa = 1

    #PD
    #A = np.array([[ 5, 0],[10, 1]])
    #MP
    A = np.array([[5, 0], [10, 1]])
    #RPS
    #A = np.array([[0,-1,1],[1,0,-1],[-1,1,0]])
    x = list()
    y = list()
    x.append(fitness[0])
    x.append(1 - fitness[0])
    y.append(fitness[1])
    y.append(1 - fitness[1])

    fitness_exploitation = list()
    for i in range(len(fitness)):
        term = 0
        for j in range(len(fitness)):
            term += A[i][j] * y[j] * (
                math.pow(sum([y[k]
                              for k in range(2) if A[i][k] <= A[i][j]]), kappa)
                - math.pow(sum([y[k] for k in range(2) if A[i][k] < A[i][j]]),
                           kappa)) / sum(
                               [y[k] for k in range(2) if A[i][k] == A[i][j]])
        #j=1
        #term1 = A[i][1] * y[1] * ( math.pow(sum([y[k] for k in range(2) if A[i][k] <= A[i][1]]),kappa) - math.pow(sum([y[k] for k in range(2) if A[i][k] < A[i][1]]),kappa)) / sum([y[k] for k in range(2) if A[i][k] == A[i][1]])
        fitness_exploitation.append(term)

    exploitation = (1. / temperature) * dynamics.replicator(
        state, fitness_exploitation)
    exploration = (np.log(state) - state.dot(np.log(state).transpose()))
    return exploitation - state * exploration
예제 #2
0
def _q_learning_dynamics(composition, payoff, temperature):
    r"""An equivalent implementation of `dynamics.boltzmannq`."""
    return 1 / temperature * dynamics.replicator(composition, payoff) + \
      composition * _sum_j_x_j_ln_x_j_over_x_i(composition)
예제 #3
0
def replicator(state, fitness):
    return egt_dyn.replicator(state, fitness)