Beispiel #1
0
def FPI_transformation_once(strategies_profile, payoff_matrices, rate):

    str_row, str_col = strategies_profile
    rows = str_row.shape[0]
    cols = str_col.shape[0]
    payoff_matrix_row, payoff_matrix_col = payoff_matrices

    payoff_row = str_row.dot(payoff_matrix_row).dot(str_col.T)
    vertex_payoff_row = np.identity(rows).dot(payoff_matrix_row).dot(str_col.T)
    payoff_gain_row = vertex_payoff_row - payoff_row
    lambda_row = np.where(payoff_gain_row > 0, payoff_gain_row, 0)

    payoff_col = str_col.dot(payoff_matrix_col.T).dot(str_row.T)
    vertex_payoff_col = np.identity(cols).dot(payoff_matrix_col.T).dot(
        str_row.T)
    payoff_gain_col = vertex_payoff_col - payoff_col
    lambda_col = np.where(payoff_gain_col > 0, payoff_gain_col, 0)

    if False:
        rate_in_use = rate * np.random.rand() * 100
    else:
        rate_in_use = rate

    # alpha function on VGV entrywisely
    # lambda_row_in_use = np.apply_along_axis(alpha, 0, lambda_row)
    # lambda_col_in_use = np.apply_along_axis(alpha, 0, lambda_col)

    # update mixed strategies
    str_row = utils.vector_update(str_row, lambda_row, rate_in_use)
    str_col = utils.vector_update(str_col, lambda_col, rate_in_use)

    return (str_row, str_col)
Beispiel #2
0
    def converge(self, rate, iterations):
        vgss_old = 10**15
        start_time = datetime.datetime.now()
        self.init_stats()
        for i in range(iterations):
            payoff_row = self.str_row.dot(self.payoff_matrix_row).dot(
                self.str_col.T)
            vertex_payoff_row = np.identity(self.rows).dot(
                self.payoff_matrix_row).dot(self.str_col.T)
            payoff_gain_row = vertex_payoff_row - payoff_row
            self.lambda_row = np.where(payoff_gain_row > 0, payoff_gain_row, 0)

            payoff_col = self.str_col.dot(self.payoff_matrix_col.T).dot(
                self.str_row.T)
            vertex_payoff_col = np.identity(self.cols).dot(
                self.payoff_matrix_col.T).dot(self.str_row.T)
            payoff_gain_col = vertex_payoff_col - payoff_col
            self.lambda_col = np.where(payoff_gain_col > 0, payoff_gain_col, 0)

            # update the sucklest strategy that has the smallest VGS
            vgss = self.lambda_row.sum() + self.lambda_col.sum()
            if vgss < vgss_old:
                self.eqpt_row = self.str_row
                self.eqpt_col = self.str_col
                self.eqpt_lambda_row = self.lambda_row
                self.eqpt_lambda_col = self.lambda_col
                vgss_old = vgss

            # collect stats to return
            self.collect_stats()
            '''
            # if random rate is used, it is not FPI anymore since there is no fixed function to be considered.
            if self.use_random_rate:
                rate_in_use = rate * np.random.rand() * 100
            else:
                rate_in_use = rate
            '''

            # alpha function on VGV entrywisely
            lambda_row_in_use = np.apply_along_axis(self.alpha, 0,
                                                    self.lambda_row)
            lambda_col_in_use = np.apply_along_axis(self.alpha, 0,
                                                    self.lambda_col)

            # update mixed strategies
            if type(rate) is list or type(rate) is tuple:
                self.str_row = utils.vector_update(self.str_row,
                                                   lambda_row_in_use, rate[0])
                self.str_col = utils.vector_update(self.str_col,
                                                   lambda_col_in_use, rate[1])
            else:
                self.str_row = utils.vector_update(self.str_row,
                                                   lambda_row_in_use, rate)
                self.str_col = utils.vector_update(self.str_col,
                                                   lambda_col_in_use, rate)

        stop_time = datetime.datetime.now()
        print('*** rate: %s iters: %s time: %s' %
              (rate, iterations, stop_time - start_time))
Beispiel #3
0
    def run_one_iteration(self, game, rate):
        ''' the core of everything'''
        self.payoff = self.payoff_vector.dot(game.compute_prob_dist())

        prob_dist = np.apply_along_axis(game.compute_prob_dist, 0, np.identity(self.pure_strategies_num), self)
        vertex_payoff = self.payoff_vector.dot(prob_dist)
        temp = vertex_payoff - self.payoff
        self.target = np.where(temp > 0, temp, 0)

        self.mixed_strategy = utils.vector_update(self.mixed_strategy, self.target, rate)

        self.vgs_l.append(self.target.sum())
Beispiel #4
0
 def FPI(self):
     s = self.init_strategy
     for i in range(self.iters):
         payoff = s.dot(self.vertex_payoff)
         vertex_gain = np.where((self.vertex_payoff - payoff) > 0, self.vertex_payoff - payoff, 0)
         # vertex_gain = np.apply_along_axis(lambda x: 10 ** 4 * x * x, 0, vertex_gain)
         vertex_gain = np.apply_along_axis(lambda x: x, 0, vertex_gain)
         s = utils.vector_update(s, vertex_gain, self.rate)
     self.vertex_gain = vertex_gain
     self.strategy = s
     self.angle = utils.vector_angle(self.strategy, self.vertex_gain)
     self.payoff = payoff
     self.gamma = vertex_gain.dot(vertex_gain) / (1 / self.rate + vertex_gain.sum())
Beispiel #5
0
    def run_one_iteration(self, game, rate, player_index):
        ''' the core of everything
        time complexity: (n-1)g^n multiplications,
        where g is the average of players' pure strategies number
        '''

        # step 1: evalute vertex payoff vector: \vec{v}
        v = []
        for j in np.arange(self.pure_strategies_num):
            vertex_prob_dist = game.compute_joint_dist_on_vertex(
                player_index, j)
            a_vertex_payoff = vertex_prob_dist.dot(self.payoff_vector)
            v.append(a_vertex_payoff)
        # step 2: compute payoff
        payoff = self.mixed_strategy.dot(v)
        # step 3: compute VGV
        temp = v - payoff
        self.VGV = np.where(temp > 0, temp, 0)
        # step 4: collect stats: VGS and path
        self.vgs_l.append(self.VGV.sum())
        self.path_l.append(self.mixed_strategy)
        # step 5: update strategies
        self.mixed_strategy = utils.vector_update(self.mixed_strategy,
                                                  self.VGV, rate)
Beispiel #6
0
size = 2
iterations = 1 * 10**4
lambda_function = strengthen_functions.PF12
s1 = utils.randomize_mixed_strategy(size)
s2 = utils.randomize_mixed_strategy(size)
# s1 = np.array([0.5, 0.5, 0, 0, 0, 0])
# s2 = np.array([0, 0, 0, 0, 0.5, 0.5])
print('initial strategy:', s1)
print('initial strategy:', s2)

distance_l = []
distance_old = 10**8
for i in range(iterations):
    target_1 = np.array([lambda_function(s1[0]), 1 - lambda_function(s1[0])])
    target_2 = np.array([lambda_function(s2[0]), 1 - lambda_function(s2[0])])
    s1 = utils.vector_update(s1, target_1, r)
    s2 = utils.vector_update(s2, target_2, r)
    distance = np.linalg.norm(s1 - s2)
    distance_l.append(distance / distance_old)
    if distance > distance_old:
        print(i, '+', distance_old, distance)
    else:
        print(i, '-', distance_old, distance)
    distance_old = distance
    # print(distance)

print(distance_l[-100:])
print('final strategy:', s1.round(2), s2.round(2))
# print('final VG:', vertex_gain_1, vertex_gain_2)
# import matplotlib.pyplot as plt
# plt.plot(distance_l)
distance_ratio_l = []
distance_old = 10**8
distance_ratio_old = 0
s1_l = []
s2_l = []
for i in range(iterations):
    s1_l.append(s1)
    s2_l.append(s2)
    distance = np.linalg.norm(s1 - s2)
    payoff_1 = s1.dot(vertex_payoff)
    payoff_2 = s2.dot(vertex_payoff)
    vertex_gain_1 = np.where((vertex_payoff - payoff_1) > 0,
                             vertex_payoff - payoff_1, 0)
    vertex_gain_2 = np.where((vertex_payoff - payoff_2) > 0,
                             vertex_payoff - payoff_2, 0)
    s1 = utils.vector_update(s1, vertex_gain_1, r)
    s2 = utils.vector_update(s2, vertex_gain_2, r)
    # print(distance)
    distance_ratio = distance / distance_old
    distance_ratio_l.append(distance_ratio)
    # print(distance_ratio)
    if distance >= distance_old:
        print(i, 'Lipschitz K>=1, wrong: %s >= %s' % (distance, distance_old))
        sys.exit(1)
    if distance_ratio <= distance_ratio_old:
        print(i, 'Lipschitz K decreasing, unexpected!')
        # sys.exit(1)
    distance_old = distance
    distance_ratio_old = distance_ratio

print('final strategy:', s1, s2)
Beispiel #8
0
import numpy as np
import utils

r = 10**-4
size = 6
iterations = 1 * 10**4
s = utils.randomize_mixed_strategy(size)
vertex_payoff = utils.randomize_payoff_vector(size)
# vertex_payoff = np.array([0.1, 0.7, 0.7, 0.7, -2, -1])
vertex_payoff *= 1000
print('initial strategy:', s)
print('payoff:', vertex_payoff)

payoff_old = -10**8
for i in range(1000):
    payoff = s.dot(vertex_payoff)
    vertex_gain_origin = np.where((vertex_payoff - payoff) > 0,
                                  vertex_payoff - payoff, 0)
    vertex_gain = np.apply_along_axis(lambda x: x**2, 0, vertex_gain_origin)
    # vertex_gain = np.apply_along_axis(lambda x: x, 0, vertex_gain_origin) #  the vanila
    s = utils.vector_update(s, vertex_gain, r)
    payoff_new = s.dot(vertex_payoff)
    gamma = vertex_gain.dot(vertex_gain_origin) / (1.0 / r + vertex_gain.sum())
    print(payoff_new - payoff - gamma)

print('final strategy:', s)
print('final VG:', vertex_gain)
Beispiel #9
0
    # t = s * p
    # t = strengthen_functions.PF12(s)
    t = strengthen_functions.PF30(s)
    # t = s * s * s + s * s + s
    # t = t / t.sum()
    return t


angle_old = -1
angle_l = []
target_l = []
for i in range(iters):
    target = target_function(s)
    # print(s, target)
    angle = utils.vector_angle(s, target)
    s = utils.vector_update(s, target, r)
    angle_l.append(angle)
    target_l.append((target**2).sum())
    if angle >= angle_old:
        print('+', angle_old, angle)
        # print('s', s)
        # print('p', p)
        # print('target', target)
    else:
        print('-', angle_old, angle)
        # print('s', s)
        # print('p', p)
        # print('target', target)
        # break
    angle_old = angle
Beispiel #10
0
import numpy as np
import strengthen_functions
import utils

# s = utils.randomize_mixed_strategy(2)
s = np.random.rand()
s = np.array([s, 1 - s])
print(s)
s_l = []
for i in range(10 ** 5):
    target = strengthen_functions.PF80(s[0])
    target = np.array([target, 1 - target])
    s = utils.vector_update(s, target, 10 ** -4)
    s_l.append(s)

import matplotlib.pyplot as plt

print(s_l[-1])
plt.scatter([i[0] for i in s_l], [i[1] for i in s_l])
plt.show()