def test_lemke_howson_enumeration(self): """Test for the enumeration of equilibrium using Lemke Howson""" A = np.array([[3, 1], [0, 2]]) B = np.array([[2, 1], [0, 3]]) g = nash.Game(A, B) expected_equilibria = [(np.array([1, 0]), np.array([1, 0])), (np.array([0, 1]), np.array([0, 1]))] * 2 equilibria = g.lemke_howson_enumeration() for equilibrium, expected_equilibrium in zip(equilibria, expected_equilibria): for strategy, expected_strategy in zip(equilibrium, expected_equilibrium): self.assertTrue(all(np.isclose(strategy, expected_strategy))) A = np.array([[3, 1], [1, 3]]) B = np.array([[1, 3], [3, 1]]) g = nash.Game(A, B) expected_equilibria = [(np.array([1 / 2, 1 / 2 ]), np.array([1 / 2, 1 / 2]))] * 4 equilibria = g.lemke_howson_enumeration() for equilibrium, expected_equilibrium in zip(equilibria, expected_equilibria): for strategy, expected_strategy in zip(equilibrium, expected_equilibrium): self.assertTrue(all(np.isclose(strategy, expected_strategy)))
def NashEquilibriumSolver(A, B=None): """ Quickly solve *one* Nash equilibrium with Lemke Howson algorithm, given *degenerate* (det not equal to 0) payoff matrix. Ref: https://nashpy.readthedocs.io/en/stable/reference/lemke-howson.html#lemke-howson TODO: sometimes give nan or wrong dimensions, check here: https://github.com/drvinceknight/Nashpy/issues/35 """ # print('Determinant of matrix: ', np.linalg.det(A)) if B is not None: rps = nash.Game(A, B) else: rps = nash.Game(A) # zero-sum game: unimatrix dim = A.shape[0] final_eq = None # To handle the problem that sometimes Lemke-Howson implementation will give # wrong returned NE shapes or NAN in value, use different initial_dropped_label value # to find a valid one. for l in range(0, sum(A.shape) - 1): # Lemke Howson can not solve degenerate matrix. # eq = rps.lemke_howson(initial_dropped_label=l) # The initial_dropped_label is an integer between 0 and sum(A.shape) - 1 # Lexicographic Lemke Howson can solve degenerate matrix: https://github.com/newaijj/Nashpy/blob/ffea3522706ad51f712d42023d41683c8fa740e6/tests/unit/test_lemke_howson_lex.py#L9 eq = lemke_howson_lex(A, -A, initial_dropped_label=l) if eq[0].shape[0] == dim and eq[1].shape[0] == dim and not np.isnan( eq[0]).any() and not np.isnan(eq[1]).any(): # valid shape and valid value (not nan) final_eq = eq break if final_eq is None: raise ValueError('No valid Nash equilibrium is found!') return final_eq
def adaptive_strategy(p_list_1, p_list_2, n_split=40, n_rep=40): mean_diff_1 = split_and_compare_perturbed_games(p_list_1, n_split=n_split, n_rep=n_rep) mean_diff_2 = split_and_compare_perturbed_games(p_list_2, n_split=n_split, n_rep=n_rep, player_ix=1) averaging_is_better_1 = mean_diff_1 > 0 averaging_is_better_2 = mean_diff_2 > 0 _, _, p1_1, p2_1 = get_payoffs_from_list(p_list_1) _, _, p1_2, p2_2 = get_payoffs_from_list(p_list_2) p1_avg = (p1_1 + p2_1) / 2 p2_avg = (p2_1 + p2_2) / 2 # if averaging_is_better_1: # a1 = get_welfare_optimal_eq(nash.Game(p1_avg, p2_avg))[0] # else: # a1 = get_welfare_optimal_eq(nash.Game(p1_1, p2_1))[0] # if averaging_is_better_2: # a2 = get_welfare_optimal_eq(nash.Game(p1_avg, p2_avg))[1] # else: # a2 = get_welfare_optimal_eq(nash.Game(p1_2, p2_2))[1] if averaging_is_better_1 and averaging_is_better_2: a1, a2, _ = get_welfare_optimal_eq(nash.Game(p1_avg, p2_avg)) else: a1 = get_welfare_optimal_eq(nash.Game(p1_1, p2_1))[0] a2 = get_welfare_optimal_eq(nash.Game(p1_2, p2_2))[1] return a1, a2, mean_diff_1, mean_diff_2
def NashEqu(output, Game, state, j): M = Game.createGameMatrix(state) flag , V = saddlePoint(M) if flag: output.put((j, V)) else: # if np.linalg.det(M) != 0: # iden = np.array([1, 1, 1, 1]) # V = 1 / (iden.dot(np.linalg.inv(M)).dot(iden.T)) # output.put((j, V)) # print("Not singular Matrix, use Game theory method, V is:", V) # if V>100 or V<-100: # print("M is:", M) # print("wdnmd") # else: rps = nash.Game(M) # print("state is: ", state, "M is: ", M) eqs = rps.support_enumeration() flag_su = 0 for eq in eqs: # policy_ct = np.round(eq[0], 6) # policy_ad = np.round(eq[1], 6) policy_ct = eq[0] policy_ad = eq[1] reward = rps[policy_ct, policy_ad] reward_ct = reward[0] if math.isnan(reward_ct) == False and math.isinf(reward_ct) == False and abs(reward_ct) <=100 and sum(abs(policy_ct))<1.1 and sum(abs(policy_ad))<1.1: flag_su = 1 break if flag_su == 1: # print("Use support_enumeration, V is:", reward_ct) output.put((j, reward_ct)) else: # except UnboundLocalError: ##Here, we can not use support_enumeration, try vertex enumeration rps = nash.Game(M) eqs = rps.vertex_enumeration() flag_ver = 0 for eq in eqs: # policy_ct = np.round(eq[0], 6) # policy_ad = np.round(eq[1], 6) policy_ct = eq[0] policy_ad = eq[1] reward = rps[policy_ct, policy_ad] reward_ct = reward[0] if math.isnan(reward_ct) == False and math.isinf(reward_ct) == False and abs(reward_ct) <=100 and sum(abs(policy_ct))<1.1 and sum(abs(policy_ad))<1.1: flag_ver = 1 break if flag_ver == 1: # print("Use vertex_enumeration, V is:", reward_ct) output.put((j, reward_ct)) else: print ("WDNMD") print("M is:", M) print("state is:", state)
def NashEquilibriaSolver(A, B=None): """ Given payoff matrix/matrices, return a list of existing Nash equilibria: [(nash1_p1, nash1_p2), (nash2_p1, nash2_p2), ...] """ if B is not None: rps = nash.Game(A, B) else: rps = nash.Game(A) # zero-sum game: unimatrix # eqs = rps.support_enumeration() eqs = rps.vertex_enumeration() return list(eqs)
def test_vertex_enumeration_for_bi_matrix(self): """Test for the equilibria calculation using vertex enumeration""" A = np.array([[160, 205, 44], [175, 180, 45], [201, 204, 50], [120, 207, 49]]) B = np.array([[2, 2, 2], [1, 0, 0], [3, 4, 1], [4, 1, 2]]) g = nash.Game(A, B) expected_equilibria = [(np.array([0, 0, 3 / 4, 1 / 4]), np.array([1 / 28, 27 / 28, 0]))] for obtained, expected in zip(g.vertex_enumeration(), expected_equilibria): for s1, s2 in zip(obtained, expected): self.assertTrue( all(np.isclose(s1, s2)), msg="obtained: {} !=expected: {}".format( obtained, expected), ) A = np.array([[1, 0], [-2, 3]]) B = np.array([[3, 2], [-1, 0]]) g = nash.Game(A, B) expected_equilibria = [ (np.array([1, 0]), np.array([1, 0])), (np.array([0, 1]), np.array([0, 1])), (np.array([1 / 2, 1 / 2]), np.array([1 / 2, 1 / 2])), ] for obtained, expected in zip(g.vertex_enumeration(), expected_equilibria): for s1, s2 in zip(obtained, expected): self.assertTrue( all(np.isclose(s1, s2)), msg="obtained: {} !=expected: {}".format( obtained, expected), ) A = np.array([[2, 1], [0, 2]]) B = np.array([[2, 0], [1, 2]]) g = nash.Game(A, B) expected_equilibria = [ (np.array([1, 0]), np.array([1, 0])), (np.array([0, 1]), np.array([0, 1])), (np.array([1 / 3, 2 / 3]), np.array([1 / 3, 2 / 3])), ] for obtained, expected in zip(g.vertex_enumeration(), expected_equilibria): for s1, s2 in zip(obtained, expected): self.assertTrue( all(np.isclose(s1, s2)), msg="obtained: {} !=expected: {}".format( obtained, expected), )
def test_zero_sum_game_init(self, A): """Test that can create a zero sum game""" g = nash.Game(A) self.assertTrue(np.array_equal(g.payoff_matrices[0], A)) self.assertTrue( np.array_equal(g.payoff_matrices[0], -g.payoff_matrices[1])) self.assertTrue(g.zero_sum) # Can also init with lists A = A.tolist() g = nash.Game(A) self.assertTrue(np.array_equal(g.payoff_matrices[0], np.asarray(A))) self.assertTrue( np.array_equal(g.payoff_matrices[0], -g.payoff_matrices[1])) self.assertTrue(g.zero_sum)
def test_bi_matrix_init(self, A, B): """Test that can create a bi matrix game""" g = nash.Game(A, B) self.assertEqual(g.payoff_matrices, (A, B)) if np.array_equal(A, -B): # Check if A or B are non zero self.assertTrue(g.zero_sum) else: self.assertFalse(g.zero_sum) # Can also init with lists A = A.tolist() B = B.tolist() g = nash.Game(A, B) self.assertTrue(np.array_equal(g.payoff_matrices[0], np.asarray(A))) self.assertTrue(np.array_equal(g.payoff_matrices[1], np.asarray(B)))
def test_stochastic_fictitious_play(self, A, B, seed): """Test for the stochastic fictitious play algorithm""" np.random.seed(seed) iterations = 10 g = nash.Game(A, B) expected_outcome = tuple( nashpy.learning.stochastic_fictitious_play. stochastic_fictitious_play(*g.payoff_matrices, iterations=iterations)) np.random.seed(seed) outcome = tuple(g.stochastic_fictitious_play(iterations=iterations)) assert len(outcome) == iterations + 1 assert len(expected_outcome) == iterations + 1 for (plays, distributions), ( expected_plays, expected_distributions, ) in zip(outcome, expected_outcome): row_play, column_play = plays expected_row_play, expected_column_play = expected_plays row_dist, column_dist = distributions expected_row_dist, expected_column_dist = expected_distributions assert np.allclose(column_dist, expected_column_dist) assert np.allclose(row_dist, expected_row_dist) assert np.allclose(column_play, expected_column_play) assert np.allclose(row_play, expected_row_play)
def Nash(state, Agent1, Agent2): '''Calculate nash equilibrium on current state and Q(s) as rewards.''' Agent1.check_state_exist(state) Agent2.check_state_exist(state) q_1, q_2 = [], [] for action1 in Agent1.actions: row_q_1, row_q_2 = [], [] for action2 in Agent2.actions: joint_action = (action1, action2) row_q_1.append(Agent1.q_table.loc[state][joint_action]) row_q_2.append(Agent2.q_table.loc[state][joint_action]) q_1.append(row_q_1) q_2.append(row_q_2) game = nashpy.Game(np.array(q_1), np.array(q_2)) equilibria = game.lemke_howson_enumeration() eq_list = list(equilibria) pi = None for eq in eq_list: if eq[0].shape == (len(Agent1.actions), ) and eq[1].shape == (len( Agent2.actions), ): if any(np.isnan(eq[0])) is False and any(np.isnan(eq[1])) is False: pi = eq break if pi is None: pi1 = np.repeat(1.0 / len(Agent1.actions), len(Agent1.actions)) pi2 = np.repeat(1.0 / len(Agent2.actions), len(Agent2.actions)) pi = (pi1, pi2) return pi[0], pi[1]
def random_nash(sigma_x=1, n=10): u1_mean = np.array([[-10, 0], [-3, -1]]) u2_mean = np.array([[-10, -3], [0, -1]]) sigma_x_mat = np.array([[sigma_x, sigma_x], [0., 0.]]) u1_1 = np.random.normal(loc=u1_mean, scale=sigma_x_mat, size=(n, 2, 2)) u1_2 = np.random.normal(loc=u2_mean, scale=sigma_x_mat, size=(n, 2, 2)) u2_1 = np.random.normal(loc=u1_mean, scale=sigma_x_mat, size=(n, 2, 2)) u2_2 = np.random.normal(loc=u2_mean, scale=sigma_x_mat, size=(n, 2, 2)) crash_lst = [] for i in range(n): u1_1_i, u1_2_i, u2_1_i, u2_2_i = u1_1[i], u1_2[i], u2_1[i], u2_2[i] a1_1, _, _ = get_welfare_optimal_eq(nash.Game(u1_1_i, u1_2_i)) _, a2_2, _ = get_welfare_optimal_eq(nash.Game(u2_1_i, u2_2_i)) crash = (a1_1[0] == a2_2[0] == 1) crash_lst.append(crash) print(np.mean(crash_lst))
def nash_strategy(solver, return_all=False): """Returns nash distribution on meta game matrix. This method only works for two player general-sum games. Args: solver: GenPSROSolver instance. return_all: if return all NE or random one. Returns: Nash distribution on strategies. """ meta_games = solver.get_meta_game if not isinstance(meta_games, list): meta_games = [meta_games, -meta_games] if len(meta_games) > 2: raise ValueError( "Nash solver only works for two player general-sum games. Number of players > 2." ) p1_payoff = meta_games[0] p2_payoff = meta_games[1] game = nash.Game(p1_payoff, p2_payoff) NE_list = [] for eq in game.support_enumeration(): NE_list.append(eq) if return_all: return NE_list else: return list(np.random.choice(NE_list))
def updateNashEquilibrium(self, number_of_games): reward_matrix = np.zeros( (self.number_of_agents, self.number_of_agents)) for i in range(self.number_of_agents): for j in range(i + 1, self.number_of_agents): average_reward, _ = self.game_tester.getAverageReward( self.agents[i], self.agents[j], number_of_games) reward_matrix[i][j] = average_reward reward_matrix[j][i] = -average_reward if True: for i in range(self.number_of_agents): print(reward_matrix[i]) x = nash.Game(reward_matrix).support_enumeration() self.nash_distribution = torch.FloatTensor(list(x)[0][0]) # Don't ask # Add epsilon to the nash equilibrium and normalise eps = 0.2 for i in range(self.number_of_agents): self.nash_distribution[ i] = eps / self.number_of_agents + self.nash_distribution[ i] / (1 + eps) print(self.nash_distribution) self.nash_categorical = Categorical(self.nash_distribution)
def test_incorrect_dimensions_init(self): """Tests that ValueError is raised for unequal dimensions""" A = np.array([[1, 2, 3], [4, 5, 6]]) B = np.array([[1, 2], [3, 4]]) with pytest.raises(ValueError): nash.Game(A, B)
def nash_mixture_from_payoff(self, payoffa, payoffb, sgs, sds): config = self.config def _update_g(p): p = np.reshape(p, [-1]) result = self.destructive_mixture_g(p) return p, result def _update_d(p): p = np.reshape(p, [-1]) result = self.destructive_mixture_d(p) return p, result if self.config.nash_method == 'support': try: u = next(nash.Game(payoffa, payoffb).support_enumeration()) except (StopIteration): print("Nashpy 'support' iteration failed. Using 1,0,0...") u = [ list(np.zeros(len(self.sds))), list(np.zeros(len(self.sgs))) ] u[0][0] = 1. u[1][0] = 1. elif self.config.nash_method == 'lemke': u = next(nash.Game(payoffa, payoffb).lemke_howson_enumeration()) else: try: u = next(nash.Game(payoffa, payoffb).vertex_enumeration()) except (StopIteration, scipy.spatial.qhull.QhullError): print("Nashpy 'vertex' iteration failed. Using 1,0,0...") u = [ list(np.zeros(len(self.sds))), list(np.zeros(len(self.sgs))) ] u[0][0] = 1. u[1][0] = 1. if len(u[0]) != len(self.sgs): return [None, None, None, None] p1, p1result = _update_g(u[0]) p2, p2result = _update_d(u[1]) return p1, p1result, p2, p2result
def test_support_enumeration_for_deg_bi_matrix_game_with_low_tol(self): A = np.array([[0, 0], [0, 0]]) g = nash.Game(A) with warnings.catch_warnings(record=True) as w: obtained_equilibria = list(g.support_enumeration(tol=0)) self.assertEqual(len(obtained_equilibria), 4) self.assertGreater(len(w), 0) self.assertEqual(w[-1].category, RuntimeWarning)
def __init__(self, A, B): Game.__init__(self) """ 5,1 3,5 2,0 2,0 A = [[5, 3], [2, 2]] B = [[1, 5], [0, 0]] """ self.game = nash.Game(A, B)
def test_replicator_dynamics(self): """Test for the replicator dynamics algorithm""" A = np.array([[3, 2], [4, 1]]) game = nash.Game(A) y0 = np.array([0.9, 0.1]) timepoints = np.linspace(0, 10, 100) xs = game.replicator_dynamics(y0, timepoints) expected_xs = np.array([[0.50449178, 0.49550822]]) assert np.allclose(xs[-1], expected_xs)
def get_equilibria(p, c, low_cost, high_cost, commit_prior_if_committed, commit_prior_if_not_committed, cost_prior): payoffs_threatener, payoffs_target = create_payoff_matrix( p, c, low_cost, high_cost, commit_prior_if_committed, commit_prior_if_not_committed, cost_prior) game = nashpy.Game(payoffs_threatener, payoffs_target) eqs = list(game.support_enumeration()) # ToDo: check for multiple equilibria? return game, eqs[0]
def solve_nashpy(game): import nashpy as nash rps = nash.Game(game) eqs = list(rps.support_enumeration()) y = eqs[0] z = eqs[1] return y, z
def calculate_equilibria_vertex_enum(self, A=None, B=None): """ :return: List of equilibria for the game found via support enumeration """ if A is None and B is None: A, B = self.dask_A, self.dask_B game = nash.Game(A, B) equilibria = game.vertex_enumeration() return list(equilibria)
def calc_NE(pa_payoff, po_payoff): game = nash.Game(pa_payoff, po_payoff) for i in range(pa_payoff.shape[0]): try: eq = game.lemke_howson(i) except RuntimeWarning: pass else: break return list(eq)[0], list(eq)[1]
def test_lemke_howson_for_bi_matrix(self): """Test for the equilibria calculation using lemke howson""" A = np.array([[160, 205, 44], [175, 180, 45], [201, 204, 50], [120, 207, 49]]) B = np.array([[2, 2, 2], [1, 0, 0], [3, 4, 1], [4, 1, 2]]) g = nash.Game(A, B) expected_equilibria = (np.array([0, 0, 3 / 4, 1 / 4]), np.array([1 / 28, 27 / 28, 0])) equilibria = g.lemke_howson(initial_dropped_label=4) for eq, expected in zip(equilibria, expected_equilibria): self.assertTrue(all(np.isclose(eq, expected)))
def value_of_matrix1(M): 'using Nashpy' zsgame = nash.Game(M) label = 0 eq = zsgame.lemke_howson(initial_dropped_label=label) #f = np.ones(np.shape(M)[0])/ np.shape(M)[0] #g = np.empty(np.shape(M)[1])/ np.shape(M)[1] #v = 0 f, g = eq[0], eq[1] v = zsgame[f, g][0] return v
def game_theory(): A = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]]) B = -A rps = nash.Game(A, B) print(rps) #utility of a pair strategies sigma_c = [1 / 2, 1 / 2, 0] sigma_r = [0, 1 / 2, 1 / 2] print(rps[sigma_r, sigma_c]) #Nash equilibria eqs = rps.support_enumeration() print(list(eqs)) #prisoners dillema P1 = np.array([[3, 0], [4, 1]]) P2 = np.array([[3, 4], [0, 1]]) prisoner_dilemma = nash.Game(P1, P2) print(prisoner_dilemma) eqs = prisoner_dilemma.support_enumeration() print(eqs) return
def nash_equilibrium(self): p_r = [] p_c = [] game = nash.Game(self.Rowena, self.Collin) for eq in game.support_enumeration(): p_r.append(eq[0]) p_c.append(eq[1]) print("The nash equilibrium probabilities are:") for index, _ in enumerate(p_r): print("(p={}, q={})".format(p_r[index][0], p_c[index][0]))
def find_nash(full_matrix, disp_strategy_idx): matrix = full_matrix[disp_strategy_idx, :, :, 1:] A = extract_player_utility(matrix, 0) B = extract_player_utility(matrix, 1) game = nash.Game(A, B) equs = skip_mixed_strategy(game.support_enumeration()) if equs and is_system_consistent(equs, A, B): sol = extract_solution(equs) return sol else: return 'Inconsistent'
def get_prob_of_defection(payoff_matrix, support_enumeration=True): """ A function which computes the Nash Equilibria of the game using one of three algorithms and returns a dictionary of the Nash Equilibria, the maximum and minimum probabilities of defection within the equilibria and whether the game could be degenerate. The input variables are: 'payoff_matrix', a numpy array containing the payoffs obtained for each action of the game; and 'support_enumeration', a boolean variable stating whether the support enumeration algorithm (if evaluated true) or the vertex enumeration algorithm (if evaluated False) is used to calculate the Nash equilibria. """ game = nash.Game(payoff_matrix, payoff_matrix.transpose()) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") if support_enumeration == True: nash_equilibria = list(game.support_enumeration()) else: highlight_numpy_warning = np.seterr(all="warn") nash_equilibria = list(game.vertex_enumeration()) if len(w) == 0: warning_message = None else: warning_message = str([w[i].message for i in range(len(w))]) if (len(nash_equilibria) == 0) or ("-Inf" in nash_equilibria): nash_equilibria = None prob_of_defection_in_equilibria = None least_prob_of_defection_in_equilibria = None greatest_prob_of_defection_in_equilibria = None else: prob_of_defection_in_equilibria = [ sigma_1[-1] for sigma_1, _ in nash_equilibria ] least_prob_of_defection_in_equilibria = min( prob_of_defection_in_equilibria) greatest_prob_of_defection_in_equilibria = max( prob_of_defection_in_equilibria) get_prob_of_defect_output_dict = { "nash equilibria": np.array(nash_equilibria), "least prob of defect": least_prob_of_defection_in_equilibria, "greatest prob of defect": greatest_prob_of_defection_in_equilibria, "warning message": str(warning_message), } return get_prob_of_defect_output_dict
def test_property_support_enumeration(self, A, B): """Property based test for the equilibria calculation""" g = nash.Game(A, B) for equilibrium in g.support_enumeration(): for i, s in enumerate(equilibrium): # Test that have a probability vector (subject to numerical # error) self.assertAlmostEqual(s.sum(), 1) # Test that it is of the correct size self.assertEqual(s.size, [3, 4][i]) # Test that it is non negative self.assertTrue(all(s >= 0))
def test_zero_sum_repr(self): """Test that can create a bi matrix game""" A = np.array([[1, -1], [-1, 1]]) g = nash.Game(A) string_repr = """Zero sum game with payoff matrices: Row player: [[ 1 -1] [-1 1]] Column player: [[-1 1] [ 1 -1]]""" self.assertEqual(g.__repr__(), string_repr)