def test_constant_sum_transition_matrix(self): """Tests closed-form transition matrix computation for constant-sum case.""" game = pyspiel.load_matrix_game("matrix_rps") payoff_tables = utils.game_payoffs_array(game) # Checks if the game is symmetric and runs single-population analysis if so _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) m = 20 alpha = 0.1 # Case 1) General-sum game computation (slower) game_is_constant_sum = False use_local_selection_model = False payoff_sum = None c1, rhos1 = alpharank._get_singlepop_transition_matrix( payoff_tables[0], payoffs_are_hpt_format, m, alpha, game_is_constant_sum, use_local_selection_model, payoff_sum) # Case 2) Constant-sum closed-form computation (faster) game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( payoff_tables[0], payoffs_are_hpt_format) c2, rhos2 = alpharank._get_singlepop_transition_matrix( payoff_tables[0], payoffs_are_hpt_format, m, alpha, game_is_constant_sum, use_local_selection_model, payoff_sum) # Ensure both cases match np.testing.assert_array_almost_equal(c1, c2) np.testing.assert_array_almost_equal(rhos1, rhos2)
def get_replicator_dynamics(game_name, iterations, learning_rate=0.01, verbose=False): game = pyspiel.load_game(game_name) if not isinstance(game, pyspiel.MatrixGame): print( "Game " + game_name + "is not a matrix game, construction of payoff matrix will take a long time..." ) payoff_matrix = game_payoffs_array(game) dyn = dynamics.SinglePopulationDynamics(payoff_matrix, dynamics.replicator) legal_actions = game.num_distinct_actions() x = [np.random.rand() for _ in range(legal_actions)] s = sum(x) x = [v / s for v in x] actions = np.zeros((iterations, legal_actions)) for i in range(0, iterations): x += learning_rate * dyn(x) actions[i] = x if verbose: util.pretty_print_strategies(game, actions) return actions
def _phaseplot(games, bstreamplot=False): plt.figure(figsize=(32, len(games))) for g, game in enumerate(games): # SETUP VALUES is_2x2 = game.num_cols() == 2 payoff_tensor = game_payoffs_array(game) ## dynamics: choose between replicator, boltzmann_qlearning, boltzmann_faqlearning ## for lfaq: LenientMultiPopulationDynamics(payoff_tensor, boltzmann_faqlearning, k=...) (only valid for 2x2 games) dyn = MultiPopulationDynamics( payoff_tensor, replicator) if is_2x2 else SinglePopulationDynamics( payoff_tensor, replicator) ## PLOTTING ax = plt.subplot2grid( (1, len(games)), (0, g), projection="2x2") if is_2x2 else plt.subplot2grid( (1, len(games)), (0, g), projection="3x3") ax.streamplot(dyn, density=0.75, color='black', linewidth=1) if bstreamplot else ax.quiver(dyn) if is_2x2: ax.set_xlabel(plot_labels[game.get_type().short_name][0]) ax.set_ylabel(plot_labels[game.get_type().short_name][1]) else: ax.set_labels(plot_labels[game.get_type().short_name]) plt.title(game.get_type().long_name.upper()) plt.show()
def test_plot_pi_vs_alpha(self, mock_plt): # Construct game game = pyspiel.load_matrix_game("matrix_rps") payoff_tables = utils.game_payoffs_array(game) _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) # Compute alpharank alpha = 1e2 _, _, pi, num_profiles, num_strats_per_population = ( alpharank.compute(payoff_tables, alpha=alpha)) strat_labels = utils.get_strat_profile_labels(payoff_tables, payoffs_are_hpt_format) num_populations = len(payoff_tables) # Construct synthetic pi-vs-alpha history pi_list = np.empty((num_profiles, 0)) alpha_list = [] for _ in range(2): pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1) alpha_list.append(alpha) # Test plotting code (via pyplot mocking to prevent plot pop-up) alpharank_visualizer.plot_pi_vs_alpha( pi_list.T, alpha_list, num_populations, num_strats_per_population, strat_labels, num_strats_to_label=0) self.assertTrue(mock_plt.show.called)
def test_expected_payoff(self, strategy): logging.info("Testing expected payoff for matrix game.") game = pyspiel.load_matrix_game("matrix_rps") payoff_tables = utils.game_payoffs_array(game) table = heuristic_payoff_table.from_matrix_game(payoff_tables[0]) expected_payoff = table.expected_payoff(strategy) print(expected_payoff) assert len(expected_payoff) == table._num_strategies
def test_multi_population_rps(self): game = pyspiel.load_matrix_game('matrix_rps') payoff_matrix = game_payoffs_array(game) rd = dynamics.replicator dyn = dynamics.MultiPopulationDynamics(payoff_matrix, [rd] * 2) x = np.concatenate( [np.ones(k) / float(k) for k in payoff_matrix.shape[1:]]) np.testing.assert_allclose(dyn(x), np.zeros((6, )), atol=1e-15)
def payoff_tensor(self): if self.pt is None: if not self.tensor_game: logging.info('reloading pyspiel game as tensor_game') self.game = pyspiel.load_tensor_game(self.string_specifier) self.tensor_game = True pt = np.asarray(game_payoffs_array(self.game)) self.pt = pt - self.game.min_utility() return self.pt
def test_rd_rps_pure_fixed_points(self): game = pyspiel.load_matrix_game('matrix_rps') payoff_matrix = game_payoffs_array(game) rd = dynamics.replicator dyn = dynamics.SinglePopulationDynamics(payoff_matrix, rd) x = np.eye(3) np.testing.assert_allclose(dyn(x[0]), np.zeros((3, ))) np.testing.assert_allclose(dyn(x[1]), np.zeros((3, ))) np.testing.assert_allclose(dyn(x[2]), np.zeros((3, )))
def test_game_payoffs_array_pd(self): """Test `game_payoffs_array` for prisoners' dilemma.""" game = pyspiel.load_matrix_game("matrix_pd") payoff_matrix = np.empty(shape=(2, 2, 2)) payoff_row = np.array([[5., 0.], [10., 1.]]) payoff_matrix[0] = payoff_row payoff_matrix[1] = payoff_row.T np.testing.assert_allclose(utils.game_payoffs_array(game), payoff_matrix)
def test_game_payoffs_array_rps(self): """Test `game_payoffs_array` for rock-paper-scissors.""" game = pyspiel.load_matrix_game("matrix_rps") payoff_matrix = np.empty(shape=(2, 3, 3)) payoff_row = np.array([[0., -1., 1.], [1., 0., -1.], [-1., 1., 0.]]) payoff_matrix[0] = payoff_row payoff_matrix[1] = -1. * payoff_row np.testing.assert_allclose(utils.game_payoffs_array(game), payoff_matrix)
def test_constant_sum_checker(self): """Tests if verification of constant-sum game is correct.""" game = pyspiel.load_matrix_game("matrix_rps") payoff_tables = utils.game_payoffs_array(game) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( payoff_tables[0], payoffs_are_hpt_format) self.assertTrue(game_is_constant_sum) self.assertEqual(payoff_sum, 0.)
def __init__(self, game): """Initializes the Double Oracle solver. Args: game: pyspiel.MatrixGame (zero-sum). """ assert isinstance(game, pyspiel.MatrixGame) assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM # convert matrix game to numpy.ndarray of shape [2,rows,columns] self.payoffs = utils.game_payoffs_array(game) self.subgame_strategies = [[], []]
def rps_phaseplot_boltzmann(fig): size = 111 game = pyspiel.load_game("matrix_rps") payoff_tensor = game_payoffs_array(game) print(payoff_tensor[0]) dyn = dynamics.SinglePopulationDynamics(payoff_tensor, lenient_boltzmannq) sub = fig.add_subplot(size, projection="3x3") sub.quiver(dyn) sub.set_title("Phaseplot Rock Paper Scissors") return sub
def matrix_rps_phaseplot(size=None, fig=None): fig = plt.figure(figsize=(10, 10)) if fig is None else fig size = 111 if size is None else size assert isinstance(fig, plt.Figure) game = pyspiel.load_game("matrix_rps") payoff_tensor = game_payoffs_array(game) dyn = dynamics.SinglePopulationDynamics(payoff_tensor, dynamics.replicator) sub = fig.add_subplot(size, projection="3x3") sub.quiver(dyn) sub.set_title("Phaseplot Rock Paper Scissors") return sub
def matrix_mp_phaseplot(size=None, fig=None): fig = plt.figure(figsize=(10, 10)) if fig is None else fig size = 111 if size is None else size assert isinstance(fig, plt.Figure) game = pyspiel.load_game("matrix_mp") payoff_tensor = game_payoffs_array(game) dyn = dynamics.MultiPopulationDynamics(payoff_tensor, dynamics.replicator) sub = fig.add_subplot(size, projection="2x2") sub.quiver(dyn) sub.set_title("Phaseplot Matching pennies") sub.set_xlabel("Player 1") sub.set_ylabel("Player 2") return sub
def __init__(self, game, enforce_symmetry=False): """Initializes the Double Oracle solver. Args: game: pyspiel.MatrixGame (zero-sum). enforce_symmetry: If True, enforces symmetry in the strategies appended by each player, by using the first player's best response for the second player as well; also asserts the game is symmetric and that players are seeded with identical initial_strategies, default: False. """ assert isinstance(game, pyspiel.MatrixGame) assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM # convert matrix game to numpy.ndarray of shape [2,rows,columns] self.payoffs = utils.game_payoffs_array(game) self.subgame_strategies = [[], []] self.enforce_symmetry = enforce_symmetry if self.enforce_symmetry: assert utils.is_symmetric_matrix_game(self.payoffs), ( "enforce_symmetry is True, but payoffs are asymmetric!")
def _dynamics_kplot(k_values, games): """ :param k_values: array of k-values to plot for :param games: array of (2x2) matrix games to plot for :return: a clean plot of the lenient faq dynamics for each game for all k-values """ games = [game for game in games if game.num_cols() == 2] n = len(games) + 1 # +1 to add k values ks = len(k_values) + 1 # +1 to add game name plot plt.figure(figsize=(n * 8, ks * 4)) for g_, game in enumerate(games): payoff_tensor = game_payoffs_array(game) ## GAME TITLE ax = plt.subplot2grid((n, ks), (g_, 0)) plt.text(1, 0.5, game.get_type().long_name.upper(), fontsize=24, horizontalalignment='right', fontweight='bold') plt.axis('off') for k_, k in enumerate(k_values): dyn = MultiPopulationDynamics(payoff_tensor, boltzmann_faqlearning) ax = plt.subplot2grid((n, ks), (g_, k_ + 1), projection="2x2") ax.quiver(dyn) # plt.title(game.get_type().long_name.upper()) plt.xlabel("x") plt.ylabel("y") ## K-LABELS for k_, k in enumerate(k_values): ax = plt.subplot2grid((n, ks), (n - 1, k_ + 1)) plt.text(0.5, 1, "k = " + str(k), fontsize=24, horizontalalignment='center', verticalalignment='top', fontweight='bold') plt.axis('off') plt.show()
def _trajectoryplot(game, population_histories, k=1): is_2x2 = game.num_cols() == 2 if is_2x2: payoff_tensor = game_payoffs_array(game) dyn = MultiPopulationDynamics( payoff_tensor, replicator ) # eps = replicator / boltz = boltzmann_qlearning / faq = boltzmann_faqlearning # dyn = LenientMultiPopulationDynamics(payoff_tensor, boltzmann_faqlearning, k=k) # for de lfaq plots fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111, projection="2x2") ax.quiver(dyn) for pop_hist in population_histories: x = [hist[0][0] for hist in pop_hist ] # take the prob of choosing the first action for player 1 y = [hist[1][0] for hist in pop_hist ] # take the prob of choosing the first action for player 2 plt.plot(x, y) # plot each population plt.title(game.get_type().long_name.upper()) plt.xlabel(plot_labels[game.get_type().short_name][0]) plt.ylabel(plot_labels[game.get_type().short_name][1]) plt.xlim(-0.01, 1.01) plt.ylim(-0.01, 1.01) plt.show() return
def main(_): game = pyspiel.load_game(FLAGS.game) print("loaded game") # convert game to matrix form if it isn't already a matrix game if not isinstance(game, pyspiel.MatrixGame): game = pyspiel.extensive_to_matrix_game(game) num_rows, num_cols = game.num_rows(), game.num_cols() print("converted to matrix form with shape (%d, %d)" % (num_rows, num_cols)) # use iterated dominance to reduce the space unless the solver is LP (fast) if FLAGS.solver != "linear": if FLAGS.mode == "all": game, _ = lp_solver.iterated_dominance( game, tol=FLAGS.tol, mode=lp_solver.DOMINANCE_STRICT) num_rows, num_cols = game.num_rows(), game.num_cols() print( "discarded strictly dominated actions yielding shape (%d, %d)" % (num_rows, num_cols)) if FLAGS.mode == "one": game, _ = lp_solver.iterated_dominance( game, tol=FLAGS.tol, mode=lp_solver.DOMINANCE_VERY_WEAK) num_rows, num_cols = game.num_rows(), game.num_cols() print( "discarded very weakly dominated actions yielding shape (%d, %d)" % (num_rows, num_cols)) # game is now finalized num_rows, num_cols = game.num_rows(), game.num_cols() row_actions = [game.row_action_name(row) for row in range(num_rows)] col_actions = [game.col_action_name(col) for col in range(num_cols)] row_payoffs, col_payoffs = utils.game_payoffs_array(game) pure_nash = list( zip(*((row_payoffs >= row_payoffs.max(0, keepdims=True) - FLAGS.tol) & (col_payoffs >= col_payoffs.max(1, keepdims=True) - FLAGS.tol) ).nonzero())) if pure_nash: print("found %d pure equilibria" % len(pure_nash)) if FLAGS.mode == "pure": if not pure_nash: print("found no pure equilibria") return print("pure equilibria:") for row, col in pure_nash: print("payoffs %f, %f:" % (row_payoffs[row, col], col_payoffs[row, col])) print("row action:") print(row_actions[row]) print("col action:") print(col_actions[col]) print("") return if FLAGS.mode == "one" and pure_nash: print("pure equilibrium:") row, col = pure_nash[0] print("payoffs %f, %f:" % (row_payoffs[row, col], col_payoffs[row, col])) print("row action:") print(row_actions[row]) print("col action:") print(col_actions[col]) print("") return for row, action in enumerate(row_actions): print("row action %s:" % row) print(action) print("--") for col, action in enumerate(col_actions): print("col action %s:" % col) print(action) print("--") if num_rows == 1 or num_cols == 1: equilibria = itertools.product(np.eye(num_rows), np.eye(num_cols)) elif FLAGS.solver == "linear": if FLAGS.mode != "one" or (row_payoffs + col_payoffs).max() > ( row_payoffs + col_payoffs).min() + FLAGS.tol: raise ValueError( "can't use linear solver for non-constant-sum game or " "for finding all optima!") print("using linear solver") def gen(): p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(row_payoffs - col_payoffs, col_payoffs - row_payoffs)) yield (np.squeeze(p0_sol, 1), np.squeeze(p1_sol, 1)) equilibria = gen() elif FLAGS.solver == "lrsnash": print("using lrsnash solver") equilibria = lrs_solve(row_payoffs, col_payoffs) elif FLAGS.solver == "nashpy": if FLAGS.mode == "all": print("using nashpy vertex enumeration") equilibria = nashpy.Game(row_payoffs, col_payoffs).vertex_enumeration() else: print("using nashpy Lemke-Howson solver") equilibria = lemke_howson_solve(row_payoffs, col_payoffs) print("equilibria:" if FLAGS.mode == "all" else "an equilibrium:") equilibria = iter(equilibria) # check that there's at least one equilibrium try: equilibria = itertools.chain([next(equilibria)], equilibria) except StopIteration: print("not found!") for row_mixture, col_mixture in equilibria: print("payoffs %f, %f for %s, %s" % (row_mixture.dot(row_payoffs.dot(col_mixture)), row_mixture.dot( col_payoffs.dot(col_mixture)), row_mixture, col_mixture)) if FLAGS.mode == "one": return
def test_dynamics_rps_mixed_fixed_point(self, func): game = pyspiel.load_matrix_game('matrix_rps') payoff_matrix = game_payoffs_array(game) dyn = dynamics.SinglePopulationDynamics(payoff_matrix, func) x = np.ones(shape=(3, )) / 3. np.testing.assert_allclose(dyn(x), np.zeros((3, )), atol=1e-15)
def iterated_dominance(game_or_payoffs, mode, tol=1e-7): """Reduces a strategy space using iterated dominance. See: http://www.smallparty.com/yoram/classes/principles/nash.pdf Args: game_or_payoffs: either a pyspiel matrix- or normal-form game, or a payoff tensor of dimension `num_players` + 1. First dimension is the player, followed by the actions of all players, e.g. a 3x3 game (2 players) has dimension [2,3,3]. mode: DOMINANCE_STRICT, DOMINANCE_WEAK, or DOMINANCE_VERY_WEAK tol: tolerance Returns: A tuple (`reduced_game`, `live_actions`). * if `game_or_payoffs` is an instance of `pyspiel.MatrixGame`, so is `reduced_game`; otherwise `reduced_game` is a payoff tensor. * `live_actions` is a tuple of length `num_players`, where `live_actions[player]` is a boolean vector of shape `num_actions`; `live_actions[player][action]` is `True` if `action` wasn't dominated for `player`. """ payoffs = utils.game_payoffs_array(game_or_payoffs) if isinstance( game_or_payoffs, pyspiel.NormalFormGame) else np.asfarray(game_or_payoffs) live_actions = [ np.ones(num_actions, np.bool) for num_actions in payoffs.shape[1:] ] progress = True while progress: progress = False # trying faster method first for method in ("pure", "mixed"): if progress: continue for player, live in enumerate(live_actions): if live.sum() == 1: # one action is dominant continue # discarding all dominated opponent actions payoffs_live = payoffs[player] for opponent in range(payoffs.shape[0]): if opponent != player: payoffs_live = payoffs_live.compress( live_actions[opponent], opponent) # reshaping to (player_actions, joint_opponent_actions) payoffs_live = np.moveaxis(payoffs_live, player, 0) payoffs_live = payoffs_live.reshape( (payoffs_live.shape[0], -1)) for action in range(live.size): if not live[action]: continue if method == "pure": # mark all actions that `action` dominates advantage = payoffs_live[action] - payoffs_live dominated = _pure_dominated_from_advantages( advantage, mode, tol) dominated[action] = False dominated &= live if dominated.any(): progress = True live &= ~dominated if live.sum() == 1: break if method == "mixed": # test if `action` is dominated by a mixed policy mixture = is_dominated(live[:action].sum(), payoffs_live[live], 0, mode, tol, return_mixture=True) if mixture is None: continue # if it is, mark any other actions dominated by that policy progress = True advantage = mixture.dot( payoffs_live[live]) - payoffs_live[live] dominated = _pure_dominated_from_advantages( advantage, mode, tol) dominated[mixture > tol] = False assert dominated[live[:action].sum()] live.put(live.nonzero()[0], ~dominated) if live.sum() == 1: break for player, live in enumerate(live_actions): payoffs = payoffs.compress(live, player + 1) if isinstance(game_or_payoffs, pyspiel.MatrixGame): return pyspiel.MatrixGame( game_or_payoffs.get_type(), game_or_payoffs.get_parameters(), [ game_or_payoffs.row_action_name(action) for action in live_actions[0].nonzero()[0] ], [ game_or_payoffs.col_action_name(action) for action in live_actions[1].nonzero()[0] ], *payoffs), live_actions else: return payoffs, live_actions
def _build_dynamics2x2(): """Build multi-population dynamics.""" game = pyspiel.load_game("matrix_pd") payoff_tensor = utils.game_payoffs_array(game) return dynamics.MultiPopulationDynamics(payoff_tensor, dynamics.replicator)
def test_from_matrix_game(self, game): game = pyspiel.load_matrix_game(game) payoff_tables = utils.game_payoffs_array(game) logging.info("Testing payoff table construction for matrix game.") table = heuristic_payoff_table.from_matrix_game(payoff_tables[0]) print(table())
def payoff_tensor(self): if self.pt is None: pt = np.asarray(game_payoffs_array(self.game)) self.pt = pt - self.game.min_utility() return self.pt
def _build_dynamics3x3(): """Build single-population dynamics.""" game = pyspiel.load_game("matrix_rps") payoff_tensor = utils.game_payoffs_array(game) return dynamics.SinglePopulationDynamics(payoff_tensor, dynamics.replicator)
def is_dominated(action, game_or_payoffs, player, mode=DOMINANCE_STRICT, tol=1e-7, return_mixture=False): """Determines whether a pure strategy is dominated by any mixture strategies. Args: action: index of an action for `player` game_or_payoffs: either a pyspiel matrix- or normal-form game, or a payoff tensor for `player` with ndim == number of players player: index of the player (an integer) mode: dominance criterion: strict, weak, or very weak tol: tolerance return_mixture: whether to return the dominating strategy if one exists Returns: If `return_mixture`: a dominating mixture strategy if one exists, or `None`. the strategy is provided as a 1D numpy array of mixture weights. Otherwise: True if a dominating strategy exists, False otherwise. """ # For more detail, please refer to Sec 4.5.2 of Shoham & Leyton-Brown, 2009: # Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations # http://www.masfoundations.org/mas.pdf assert mode in (DOMINANCE_STRICT, DOMINANCE_VERY_WEAK, DOMINANCE_WEAK) payoffs = utils.game_payoffs_array(game_or_payoffs)[player] if isinstance( game_or_payoffs, pyspiel.NormalFormGame) else np.asfarray(game_or_payoffs) # Reshape payoffs so rows correspond to `player` and cols to the joint action # of all other players payoffs = np.moveaxis(payoffs, player, 0) payoffs = payoffs.reshape((payoffs.shape[0], -1)) num_rows, num_cols = payoffs.shape cvxopt.solvers.options["show_progress"] = False cvxopt.solvers.options["maxtol"] = tol cvxopt.solvers.options["feastol"] = tol lp = LinearProgram(OBJ_MAX) # One var for every row probability, fixed to 0 if inactive for r in range(num_rows): if r == action: lp.add_or_reuse_variable(r, lb=0, ub=0) else: lp.add_or_reuse_variable(r, lb=0) # For the strict LP we normalize the payoffs to be strictly positive if mode == DOMINANCE_STRICT: to_subtract = payoffs.min() - 1 else: to_subtract = 0 # For non-strict LPs the probabilities must sum to 1 lp.add_or_reuse_constraint(num_cols, CONS_TYPE_EQ) lp.set_cons_rhs(num_cols, 1) for r in range(num_rows): if r != action: lp.set_cons_coeff(num_cols, r, 1) # The main dominance constraint for c in range(num_cols): lp.add_or_reuse_constraint(c, CONS_TYPE_GEQ) lp.set_cons_rhs(c, payoffs[action, c] - to_subtract) for r in range(num_rows): if r != action: lp.set_cons_coeff(c, r, payoffs[r, c] - to_subtract) if mode == DOMINANCE_STRICT: # Minimize sum of probabilities for r in range(num_rows): if r != action: lp.set_obj_coeff(r, -1) mixture = lp.solve() if mixture is not None and np.sum(mixture) < 1 - tol: mixture = np.squeeze(mixture, 1) / np.sum(mixture) else: mixture = None if mode == DOMINANCE_VERY_WEAK: # Check feasibility mixture = lp.solve() if mixture is not None: mixture = np.squeeze(mixture, 1) if mode == DOMINANCE_WEAK: # Check feasibility and whether there's any advantage for r in range(num_rows): lp.set_obj_coeff(r, payoffs[r].sum()) mixture = lp.solve() if mixture is not None: mixture = np.squeeze(mixture, 1) if (np.dot(mixture, payoffs) - payoffs[action]).sum() <= tol: mixture = None return mixture if return_mixture else (mixture is not None)