def _matrix_game(self, state): # This function sets up a matrix game, solves it and returns the policies p0_utils = [] # row player p1_utils = [] # col player row = 0 key = str(state) states = {key: state} transitions = {} value_iteration._initialize_maps(states, self._values, transitions) for p0action in state.legal_actions(0): # new row p0_utils.append([]) p1_utils.append([]) for p1action in state.legal_actions(1): # loop from left-to-right of columns next_states = transitions[(key, p0action, p1action)] joint_q_value = sum(p * self._values[next_state] for next_state, p in next_states) p0_utils[row].append(joint_q_value) p1_utils[row].append(-joint_q_value) row += 1 stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils) solution = lp_solver.solve_zero_sum_matrix_game(stage_game) probs = solution[0] actions = state.legal_actions( 0) # double check that order is consistent with probs return actions, probs
def nash_strategy(solver, return_joint=False): """Returns nash distribution on meta game matrix. This method only works for two player zero-sum games. Args: solver: GenPSROSolver instance. return_joint: If true, only returns marginals. Otherwise marginals as well as joint probabilities. Returns: Nash distribution on strategies. """ meta_games = solver.get_meta_game() if not isinstance(meta_games, list): meta_games = [meta_games, -meta_games] meta_games = [x.tolist() for x in meta_games] if len(meta_games) != 2: raise NotImplementedError( "nash_strategy solver works only for 2p zero-sum" "games, but was invoked for a {} player game".format( len(meta_games))) nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*meta_games))) result = [ renormalize(np.array(nash_prob_1).reshape(-1)), renormalize(np.array(nash_prob_2).reshape(-1)) ] if not return_joint: return result else: joint_strategies = get_joint_strategy_from_marginals(result) return result, joint_strategies
def _battle_of_the_sexes_easy(): # COORDINATION return pyspiel.create_matrix_game( "battle_of_the_sexes", "Battle of the Sexes", # Ballet Movies ["Ballet", "Movies"], ["Ballet", "Movies"], # Ballet 2,1 0,0 [[2, 0], [0, 1]], # Movies 0,0 1,2 [[1, 0], [0, 2]])
def _staghunt_easy(): # COORDINATION return pyspiel.create_matrix_game( "staghunt", "StagHunt", # Stag Hare ["Stag", "Hare"], ["Stag", "Hare"], # Stag 1,1 0,2/3 [[1, 0], [2 / 3, 2 / 3]], # Hare 2/3,0 2/3,2/3 [[1, 2 / 3], [0, 2 / 3]])
def _prisonners_dilemma_easy(): # NON ZERO-SUM return pyspiel.create_matrix_game( "prisonners_dilemma", "Prisoners Dilemma", # Talk Silent ["Talk", "Silent"], ["Talk", "Silent"], # Talk -6,-6 0,-12 [[3, 3], [0, 5]], # Silent -12,0 -3,-3 [[5, 0], [1, 1]])
def _matching_pennies_easy(): # ZERO-SUM return pyspiel.create_matrix_game( "matching_pennies", "Matching Pennies", # Heads Tails ["Heads", "Tails"], ["Heads", "Tails"], # Heads -1,1 1,-1 [[-1, 1], [1, -1]], # Tails 1,-1 -1,1 [[1, -1], [-1, 1]])
def _biased_rock_paper_scissors_easy(): # ZERO-SUM return pyspiel.create_matrix_game( "biased_rock_paper_scissors", "Biased Rock Paper Scissors", # Rock Paper Scissors ["Rock", "Paper", "Scissors"], ["Rock", "Paper", "Scissors"], # Rock 0,0 -3,3 1,-1 [[0, -3, 1], [3, 0, -2], [-1, 2, 0] ], # Paper 3,-3 0,0 -2,2 [[0, 3, -1], [-3, 0, 2], [1, -2, 0] ]) # Scissor -1,1 2,-2 0,0
def matrix_rps_biased_phaseplot(size=None, fig=None): fig = plt.figure(figsize=(10, 10)) if fig is None else fig size = 111 if size is None else size assert isinstance(fig, plt.Figure) payoff_tensor = np.array([[[0, -1, 2], [1, 0, -1], [-2, 1, 0]], [[0, 1, -2], [-1, 0, 1], [2, -1, 0]]]) dyn = dynamics.SinglePopulationDynamics(payoff_tensor, dynamics.replicator) sub = fig.add_subplot(size, projection="3x3") sub.quiver(dyn) sub.set_title("Phaseplot Rock Paper Scissors") return sub, pyspiel.create_matrix_game(payoff_tensor[0], payoff_tensor[1])
def test_rock_paper_scissors(self): p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game( [[0.0, -1.0, 1.0], [1.0, 0.0, -1.0], [-1.0, 1.0, 0.0]], [[0.0, 1.0, -1.0], [-1.0, 0.0, 1.0], [1.0, -1.0, 0.0]]))) self.assertEqual(len(p0_sol), 3) self.assertEqual(len(p1_sol), 3) for i in range(3): self.assertAlmostEqual(p0_sol[i], 1.0 / 3.0) self.assertAlmostEqual(p1_sol[i], 1.0 / 3.0) self.assertAlmostEqual(p0_sol_val, 0.0) self.assertAlmostEqual(p1_sol_val, 0.0)
def lp_solve(meta_games, checkpoint_dir=None): meta_games = [x.tolist() for x in meta_games] if len(meta_games) != 2: raise NotImplementedError( "nash_strategy solver works only for 2p zero-sum" "games, but was invoked for a {} player game".format( len(meta_games))) nash_prob_1, nash_prob_2, _, _ = (solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*meta_games))) result = [ renormalize(np.array(nash_prob_1).reshape(-1)), renormalize(np.array(nash_prob_2).reshape(-1)) ] return result
def main(_): # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_mp")) # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_rps")) p0_sol, p1_sol, p0_sol_val, p1_sol_val = lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game( [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]], [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]])) print("p0 val = {}, policy = {}".format(p0_sol_val, p0_sol)) print("p1 val = {}, policy = {}".format(p1_sol_val, p1_sol)) print(p0_sol[1]) mixture = lp_solver.is_dominated( 0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]], 0, lp_solver.DOMINANCE_WEAK, return_mixture=True) print(mixture)
def test_asymmetric_pure_nonzero_val(self): # c0 c1 c2 # r0 | 2, -2 | 1, -1 | 5, -5 # r1 |-3, 3 | -4, 4 | -2, 2 # # Pure eq (r0,c1) for a value of (1, -1) # 2nd row is dominated, and then second player chooses 2nd col. p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game([[2.0, 1.0, 5.0], [-3.0, -4.0, -2.0]], [[-2.0, -1.0, -5.0], [3.0, 4.0, 2.0]]))) self.assertLen(p0_sol, 2) self.assertLen(p1_sol, 3) self.assertAlmostEqual(p0_sol[0], 1.0) self.assertAlmostEqual(p0_sol[1], 0.0) self.assertAlmostEqual(p1_sol[0], 0.0) self.assertAlmostEqual(p1_sol[1], 1.0) self.assertAlmostEqual(p0_sol_val, 1.0) self.assertAlmostEqual(p1_sol_val, -1.0)
def test_biased_rock_paper_scissors(self): # See sec 6.2 of Bosansky et al. 2016. Algorithms for Computing Strategies # in Two-Player Simultaneous Move Games # http://mlanctot.info/files/papers/aij-2psimmove.pdf p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game( [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]], [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]]))) self.assertLen(p0_sol, 3) self.assertLen(p1_sol, 3) self.assertAlmostEqual(p0_sol[0], 1.0 / 16.0, places=4) self.assertAlmostEqual(p1_sol[0], 1.0 / 16.0, places=4) self.assertAlmostEqual(p0_sol[1], 10.0 / 16.0, places=4) self.assertAlmostEqual(p1_sol[1], 10.0 / 16.0, places=4) self.assertAlmostEqual(p0_sol[2], 5.0 / 16.0, places=4) self.assertAlmostEqual(p1_sol[2], 5.0 / 16.0, places=4) self.assertAlmostEqual(p0_sol_val, 0.0) self.assertAlmostEqual(p1_sol_val, 0.0)
def nash_strategy(solver): """Returns nash distribution on meta game matrix. This method only works for two player zero-sum games. Args: solver: GenPSROSolver instance. Returns: Nash distribution on strategies. """ meta_games = solver.get_meta_game if not isinstance(meta_games, list): meta_games = [meta_games, -meta_games] meta_games = [x.tolist() for x in meta_games] nash_prob_1, nash_prob_2, _, _ = (lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*meta_games))) return [ renormalize(np.array(nash_prob_1).reshape(-1)), renormalize(np.array(nash_prob_2).reshape(-1)) ]
def nash_solver(meta_games, solver="gambit", mode="one", gambit_path=None, lrsnash_path=None): """ Solver for NE. :param meta_games: meta-games in PSRO. :param solver: options "gambit", "nashpy", "linear", "lrsnash", "replicator". :param mode: options "all", "one", "pure" :param lrsnash_path: path to lrsnash solver. :return: a list of NE. WARNING: opening up a subprocess in every iteration eventually leads the os to block the subprocess. Not usable. """ num_players = len(meta_games) if solver == "gambit": return gambit_solve(meta_games, mode, gambit_path=gambit_path) elif solver == "replicator": return [replicator_dynamics(meta_games)] else: assert num_players == 2 num_rows, num_cols = np.shape(meta_games[0]) row_payoffs, col_payoffs = meta_games[0], meta_games[1] if num_rows == 1 or num_cols == 1: equilibria = itertools.product(np.eye(num_rows), np.eye(num_cols)) elif mode == 'pure': return pure_ne_solve(meta_games) elif solver == "linear": meta_games = [x.tolist() for x in meta_games] nash_prob_1, nash_prob_2, _, _ = ( lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*meta_games))) return [ renormalize(np.array(nash_prob_1).reshape(-1)), renormalize(np.array(nash_prob_2).reshape(-1)) ] elif solver == "lrsnash": logging.info("Using lrsnash solver.") equilibria = lrs_solve(row_payoffs, col_payoffs, lrsnash_path) elif solver == "nashpy": if mode == "all": logging.info("Using nashpy vertex enumeration.") equilibria = nashpy.Game(row_payoffs, col_payoffs).vertex_enumeration() else: logging.info("Using nashpy Lemke-Howson solver.") equilibria = lemke_howson_solve(row_payoffs, col_payoffs) else: raise ValueError("Please choose a valid NE solver.") equilibria = iter(equilibria) # check that there's at least one equilibrium try: equilibria = itertools.chain([next(equilibria)], equilibria) except StopIteration: logging.warning("degenerate game!") # pklfile = open('/home/qmaai/degenerate_game.pkl','wb') # pickle.dump([row_payoffs,col_payoffs],pklfile) # pklfile.close() # degenerate game apply support enumeration equilibria = nashpy.Game(row_payoffs, col_payoffs).support_enumeration() try: equilibria = itertools.chain([next(equilibria)], equilibria) except StopIteration: logging.warning("no equilibrium!") equilibria = list(equilibria) if mode == 'all': return equilibria elif mode == 'one': return equilibria[0] else: raise ValueError("Please choose a valid mode.")
def gen(): p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(row_payoffs - col_payoffs, col_payoffs - row_payoffs)) yield (np.squeeze(p0_sol, 1), np.squeeze(p1_sol, 1))
def solve_subgame(subgame_payoffs): """Solves the subgame using OpenSpiel's LP solver.""" p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game( pyspiel.create_matrix_game(*subgame_payoffs)) p0_sol, p1_sol = np.asarray(p0_sol), np.asarray(p1_sol) return [p0_sol / p0_sol.sum(), p1_sol / p1_sol.sum()]
def value_iteration(game, depth_limit, threshold, cyclic_game=False): """Solves for the optimal value function of a game. For small games only! Solves the game using value iteration, with the maximum error for the value function less than threshold. This algorithm works for sequential 1-player games or 2-player zero-sum games, with or without chance nodes. Arguments: game: The game to analyze, as returned by `load_game`. depth_limit: How deeply to analyze the game tree. Negative means no limit, 0 means root-only, etc. threshold: Maximum error for state values.. cyclic_game: set to True if the game has cycles (from state A we can get to state B, and from state B we can get back to state A). Returns: A `dict` with string keys and float values, mapping string encoding of states to the values of those states. """ assert game.num_players() in (1, 2), ( "Game must be a 1-player or 2-player game") if game.num_players() == 2: assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM, ( "2-player games must be zero sum games") # Must be perfect information or one-shot (not imperfect information). assert (game.get_type().information == pyspiel.GameType.Information.ONE_SHOT or game.get_type().information == pyspiel.GameType.Information.PERFECT_INFORMATION) # We expect Value Iteration to be used with perfect information games, in # which `str` is assumed to display the state of the game. states = get_all_states.get_all_states(game, depth_limit, True, False, to_string=str, stop_if_encountered=cyclic_game) values = {} transitions = {} _initialize_maps(states, values, transitions) error = threshold + 1 # A value larger than threshold min_utility = game.min_utility() while error > threshold: error = 0 for key, state in states.items(): if state.is_terminal(): continue elif state.is_simultaneous_node(): # Simultaneous node. Assemble a matrix game from the child utilities. # and solve it using a matrix game solver. p0_utils = [] # row player p1_utils = [] # col player row = 0 for p0action in state.legal_actions(0): # new row p0_utils.append([]) p1_utils.append([]) for p1action in state.legal_actions(1): # loop from left-to-right of columns next_states = transitions[(key, p0action, p1action)] joint_q_value = sum(p * values[next_state] for next_state, p in next_states) p0_utils[row].append(joint_q_value) p1_utils[row].append(-joint_q_value) row += 1 stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils) solution = lp_solver.solve_zero_sum_matrix_game(stage_game) value = solution[2] else: # Regular decision node player = state.current_player() value = min_utility if player == 0 else -min_utility for action in state.legal_actions(): next_states = transitions[(key, action)] q_value = sum(p * values[next_state] for next_state, p in next_states) if player == 0: value = max(value, q_value) else: value = min(value, q_value) error = max(abs(values[key] - value), error) values[key] = value return values
def _even_easier_create_game(): """Leave out the names too, if you prefer.""" return pyspiel.create_matrix_game([[-1, 1], [1, -1]], [[1, -1], [-1, 1]])
def _easy_create_game(): """Uses the helper function to create the same game as above.""" return pyspiel.create_matrix_game("matching_pennies", "Matching Pennies", ["Heads", "Tails"], ["Heads", "Tails"], [[-1, 1], [1, -1]], [[1, -1], [-1, 1]])
def get_game(game_name): if isinstance(game_name,pyspiel.MatrixGame) or game_name != "matrix_bots": return game_name else: return pyspiel.create_matrix_game([[3,0],[0,2]], [[2,0],[0,3]])
def _import_data_create_game(): """Creates a game via imported payoff data.""" payoff_file = file_utils.find_file( "open_spiel/data/paper_data/response_graph_ucb/soccer.txt", 2) payoffs = np.loadtxt(payoff_file) * 2 - 1 return pyspiel.create_matrix_game(payoffs, payoffs.T)