def lenient_boltzmannq(state, fitness): temperature = 50 kappa = 1 #PD #A = np.array([[ 5, 0],[10, 1]]) #MP A = np.array([[5, 0], [10, 1]]) #RPS #A = np.array([[0,-1,1],[1,0,-1],[-1,1,0]]) x = list() y = list() x.append(fitness[0]) x.append(1 - fitness[0]) y.append(fitness[1]) y.append(1 - fitness[1]) fitness_exploitation = list() for i in range(len(fitness)): term = 0 for j in range(len(fitness)): term += A[i][j] * y[j] * ( math.pow(sum([y[k] for k in range(2) if A[i][k] <= A[i][j]]), kappa) - math.pow(sum([y[k] for k in range(2) if A[i][k] < A[i][j]]), kappa)) / sum( [y[k] for k in range(2) if A[i][k] == A[i][j]]) #j=1 #term1 = A[i][1] * y[1] * ( math.pow(sum([y[k] for k in range(2) if A[i][k] <= A[i][1]]),kappa) - math.pow(sum([y[k] for k in range(2) if A[i][k] < A[i][1]]),kappa)) / sum([y[k] for k in range(2) if A[i][k] == A[i][1]]) fitness_exploitation.append(term) exploitation = (1. / temperature) * dynamics.replicator( state, fitness_exploitation) exploration = (np.log(state) - state.dot(np.log(state).transpose())) return exploitation - state * exploration
def _q_learning_dynamics(composition, payoff, temperature): r"""An equivalent implementation of `dynamics.boltzmannq`.""" return 1 / temperature * dynamics.replicator(composition, payoff) + \ composition * _sum_j_x_j_ln_x_j_over_x_i(composition)
def replicator(state, fitness): return egt_dyn.replicator(state, fitness)