def main(unused_arg): # Construct meta-game payoff tables # payoff_tables = get_kuhn_poker_data() payoff_tables = [ np.array([[1.1, -10.0], [1.0, -1.0], [-1.0, 1.0]]), np.array([[-1.1, 10.0], [-1.0, 1.0], [1.0, -1.0]]) ] payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) strat_labels = utils.get_strat_profile_labels(payoff_tables, payoffs_are_hpt_format) # Run AlphaRank rhos, rho_m, pi, _, _ = alpharank.compute(payoff_tables, alpha=1e1) # Report & plot results alpharank.print_results(payoff_tables, payoffs_are_hpt_format, rhos=rhos, rho_m=rho_m, pi=pi) utils.print_rankings_table(payoff_tables, pi, strat_labels) m_network_plotter = alpharank_visualizer.NetworkPlot(payoff_tables, rhos, rho_m, pi, strat_labels, num_top_profiles=8) m_network_plotter.compute_and_draw_network()
def test_plot_pi_vs_alpha(self, mock_plt): # Construct game game = pyspiel.load_matrix_game("matrix_rps") payoff_tables = utils.game_payoffs_array(game) _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) # Compute alpharank alpha = 1e2 _, _, pi, num_profiles, num_strats_per_population = ( alpharank.compute(payoff_tables, alpha=alpha)) strat_labels = utils.get_strat_profile_labels(payoff_tables, payoffs_are_hpt_format) num_populations = len(payoff_tables) # Construct synthetic pi-vs-alpha history pi_list = np.empty((num_profiles, 0)) alpha_list = [] for _ in range(2): pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1) alpha_list.append(alpha) # Test plotting code (via pyplot mocking to prevent plot pop-up) alpharank_visualizer.plot_pi_vs_alpha( pi_list.T, alpha_list, num_populations, num_strats_per_population, strat_labels, num_strats_to_label=0) self.assertTrue(mock_plt.show.called)
def test_constant_sum_transition_matrix(self): """Tests closed-form transition matrix computation for constant-sum case.""" game = pyspiel.load_matrix_game("matrix_rps") payoff_tables = utils.nfg_to_ndarray(game) # Checks if the game is symmetric and runs single-population analysis if so _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) m = 20 alpha = 0.1 # Case 1) General-sum game computation (slower) game_is_constant_sum = False use_local_selection_model = False payoff_sum = None c1, rhos1 = alpharank._get_singlepop_transition_matrix( payoff_tables[0], payoffs_are_hpt_format, m, alpha, game_is_constant_sum, use_local_selection_model, payoff_sum) # Case 2) Constant-sum closed-form computation (faster) game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( payoff_tables[0], payoffs_are_hpt_format) c2, rhos2 = alpharank._get_singlepop_transition_matrix( payoff_tables[0], payoffs_are_hpt_format, m, alpha, game_is_constant_sum, use_local_selection_model, payoff_sum) # Ensure both cases match np.testing.assert_array_almost_equal(c1, c2) np.testing.assert_array_almost_equal(rhos1, rhos2)
def test_constant_sum_checker(self): """Tests if verification of constant-sum game is correct.""" game = pyspiel.load_matrix_game("matrix_rps") payoff_tables = utils.nfg_to_ndarray(game) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( payoff_tables[0], payoffs_are_hpt_format) self.assertTrue(game_is_constant_sum) self.assertEqual(payoff_sum, 0.)
def suggest_alpha(payoff_tables, tol=.1): """Suggests an alpha for use in alpha-rank. The suggested alpha is approximately the smallest possible alpha such that the ranking has 'settled out'. It is calculated as -ln(tol)/min_gap_between_payoffs. The logic behind this settling out is that the fixation probabilities can be expanded as a series, and the relative size of each term in this series changes with alpha. As alpha gets larger and larger, one of the terms in this series comes to dominate, and this causes the ranking to settle down. Just how fast this domination happens is easy to calculate, and this function uses it to estimate the alpha by which the ranking has settled. You can find further discussion at the PR: https://github.com/deepmind/open_spiel/pull/403 Args: payoff_tables: List of game payoff tables, one for each agent identity. Each payoff_table may be either a numpy array, or a _PayoffTableInterface object. tol: the desired gap between the first and second terms in the fixation probability expansion. A smaller tolerance leads to a larger alpha, and a 'more settled out' ranking. Returns: A suggested alpha. """ payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) num_strats_per_population = utils.get_num_strats_per_population( payoff_tables, payoffs_are_hpt_format) num_profiles = utils.get_num_profiles(num_strats_per_population) gap = np.inf for id_row_profile in range(num_profiles): row_profile = utils.get_strat_profile_from_id( num_strats_per_population, id_row_profile) next_profile_gen = utils.get_valid_next_profiles( num_strats_per_population, row_profile) for index_population_that_changed, col_profile in next_profile_gen: payoff_table_k = payoff_tables[index_population_that_changed] f_r = _get_payoff(payoff_table_k, payoffs_are_hpt_format, col_profile, index_population_that_changed) f_s = _get_payoff(payoff_table_k, payoffs_are_hpt_format, row_profile, index_population_that_changed) if f_r > f_s: gap = min(gap, f_r - f_s) return -np.log(tol) / gap
def __init__(self, payoff_tables, rhos, rho_m, pi, state_labels, num_top_profiles=None): """Initializes a network plotting object. Args: payoff_tables: List of game payoff tables, one for each agent identity. Each payoff_table may be either a 2D numpy array, or a _PayoffTableInterface object. rhos: Fixation probabilities. rho_m: Neutral fixation probability. pi: Stationary distribution of fixation Markov chain defined by rhos. state_labels: Labels corresponding to Markov states. For the single-population case, state_labels should be a list of pure strategy names. For the multi-population case, it should be a dict with (key,value) pairs: (population index,list of strategy names) num_top_profiles: Set to (int) to show only the graph nodes corresponding to the top k elements of stationary distribution, or None to show all. """ # self.fig = plt.figure(figsize=(10, 10)) self.fig = plt.figure(figsize=(11.5, 11.5)) self.num_populations = len(payoff_tables) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) self.num_strats_per_population =\ utils.get_num_strats_per_population(payoff_tables, payoffs_are_hpt_format) self.rhos = rhos self.rho_m = rho_m self.pi = pi self.num_profiles = len(pi) self.state_labels = state_labels self.first_run = True self.num_top_profiles = num_top_profiles if self.num_top_profiles: # More than total number of strats requested for plotting if self.num_top_profiles > self.num_profiles: self.num_top_profiles = self.num_profiles # Skip the bottom num_profiles-k stationary strategies. self.nodes_to_skip = list(self.pi.argsort()[:self.num_profiles-\ self.num_top_profiles]) else: self.nodes_to_skip = [] self._reset_cycle_counter()
def compute_and_report_alpharank(payoff_tables, m=50, alpha=100, verbose=False, num_top_strats_to_print=8): """Computes and visualizes Alpha-Rank outputs. Args: payoff_tables: List of game payoff tables, one for each agent identity. Each payoff_table may be either a numpy array, or a _PayoffTableInterface object. m: Finite population size. alpha: Fermi distribution temperature parameter. verbose: Set to True to print intermediate results. num_top_strats_to_print: Number of top strategies to print. Returns: pi: AlphaRank stationary distribution/rankings. """ payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) rhos, rho_m, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha) strat_labels = utils.get_strat_profile_labels(payoff_tables, payoffs_are_hpt_format) if verbose: print_results(payoff_tables, payoffs_are_hpt_format, pi=pi) utils.print_rankings_table(payoff_tables, pi, strat_labels, num_top_strats_to_print=num_top_strats_to_print) m_network_plotter = NetworkPlot(payoff_tables, rhos, rho_m, pi, strat_labels, num_top_profiles=8) m_network_plotter.compute_and_draw_network() return pi
def sweep_pi_vs_alpha(payoff_tables, strat_labels=None, warm_start_alpha=None, m=50, rtol=1e-5, atol=1e-8): """Computes stationary distribution, pi, for range of selection intensities. The range of selection intensities is defined in alpha_list and corresponds to the temperature of the Fermi selection function. Args: payoff_tables: List of game payoff tables, one for each agent identity. Each payoff_table may be either a numpy array, or a _PayoffTableInterface object. strat_labels: Human-readable strategy labels. See get_strat_profile_labels() in utils.py for formatting details. warm_start_alpha: Initial value of alpha to use. visualize: Plot the sweep results. return_alpha: Whether to return the final alpha used. m: AlphaRank population size. rtol: The relative tolerance parameter for np.allclose calls. atol: The absolute tolerance parameter for np.allclose calls. Returns: pi: AlphaRank stationary distribution. alpha: The AlphaRank selection-intensity level resulting from sweep. """ payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) num_populations = len(payoff_tables) num_strats_per_population =\ utils.get_num_strats_per_population(payoff_tables, payoffs_are_hpt_format) if num_populations == 1: num_profiles = num_strats_per_population[0] else: num_profiles = utils.get_num_profiles(num_strats_per_population) assert strat_labels is None or isinstance(strat_labels, dict)\ or (len(strat_labels) == num_profiles) pi_list = np.empty((num_profiles, 0)) alpha_list = [] num_iters = 0 alpha_mult_factor = 2. if warm_start_alpha is not None: alpha = warm_start_alpha alpharank_succeeded_once = False else: alpha = 1e-4 # Reasonable default for most games, can be user-overridden while 1: try: _, _, pi, _, _ = compute(payoff_tables, alpha=alpha, m=m) pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1) alpha_list.append(alpha) # Stop when pi converges if num_iters > 0 and np.allclose(pi, pi_list[:, num_iters - 1], rtol, atol): break alpha *= alpha_mult_factor num_iters += 1 alpharank_succeeded_once = True except ValueError as _: if warm_start_alpha is not None and not alpharank_succeeded_once: # When warm_start_alpha is used, there's a chance that # the initial warm_start_alpha is too large and causes exceptions due to # the Markov transition matrix being reducible. So keep decreasing until # a single success occurs. alpha /= 2 elif not np.allclose(pi_list[:, -1], pi_list[:, -2], rtol, atol): # Sweep stopped due to multiple stationary distributions, but pi had # not converged due to the alpha scaling being too large. alpha /= alpha_mult_factor alpha_mult_factor = (alpha_mult_factor + 1.) / 2. alpha *= alpha_mult_factor else: break if strat_labels is None: strat_labels = utils.get_strat_profile_labels(payoff_tables, payoffs_are_hpt_format) fig = plot_pi_vs_alpha(pi_list.T, alpha_list, num_populations, num_strats_per_population, strat_labels, num_strats_to_label=10) return pi, alpha, fig
def compute(payoff_tables, m=50, alpha=100, use_local_selection_model=True, verbose=False, use_inf_alpha=False, inf_alpha_eps=0.01): """Computes the finite population stationary statistics. Args: payoff_tables: List of game payoff tables, one for each agent identity. Each payoff_table may be either a numpy array, or a _PayoffTableInterface object. m: Finite population size. alpha: Fermi distribution temperature parameter. use_local_selection_model: Enable local evolutionary selection model, which considers fitness against the current opponent only, rather than the global population state. verbose: Set to True to print intermediate results. use_inf_alpha: Use infinite-alpha alpharank model. inf_alpha_eps: Noise term to use in infinite-alpha alpharank model. Returns: rhos: Matrix of strategy-to-strategy fixation probabilities. rho_m: Neutral fixation probability. pi: Finite population stationary distribution. num_strats: Number of available strategies. """ payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) num_populations = len(payoff_tables) num_strats_per_population = utils.get_num_strats_per_population( payoff_tables, payoffs_are_hpt_format) # Handles the trivial case of Markov chain with one state if np.array_equal(num_strats_per_population, np.ones(len(num_strats_per_population))): rhos = np.asarray([[1]]) rho_m = 1. / m if not use_inf_alpha else 1 num_profiles = 1 pi = np.asarray([1.]) return rhos, rho_m, pi, num_profiles, num_strats_per_population if verbose: print('Constructing c matrix') print('num_strats_per_population:', num_strats_per_population) if num_populations == 1: # User fast closed-form analysis for constant-sum single-population games game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( payoff_tables[0], payoffs_are_hpt_format) if verbose: print('game_is_constant_sum:', game_is_constant_sum, 'payoff sum: ', payoff_sum) # Single-population/symmetric game just uses the first player's payoffs c, rhos = _get_singlepop_transition_matrix(payoff_tables[0], payoffs_are_hpt_format, m, alpha, game_is_constant_sum, use_local_selection_model, payoff_sum, use_inf_alpha=use_inf_alpha, inf_alpha_eps=inf_alpha_eps) num_profiles = num_strats_per_population[0] else: c, rhos = _get_multipop_transition_matrix(payoff_tables, payoffs_are_hpt_format, m, alpha, use_inf_alpha=use_inf_alpha, inf_alpha_eps=inf_alpha_eps) num_profiles = utils.get_num_profiles(num_strats_per_population) pi = _get_stationary_distr(c) rho_m = 1. / m if not use_inf_alpha else 1 # Neutral fixation probability if verbose: print_results(payoff_tables, payoffs_are_hpt_format, rhos, rho_m, c, pi) return rhos, rho_m, pi, num_profiles, num_strats_per_population
def sweep_pi_vs_epsilon(payoff_tables, strat_labels=None, warm_start_epsilon=None, visualize=False, return_epsilon=False, min_iters=10, max_iters=100, min_epsilon=1e-14, num_strats_to_label=10, legend_sort_clusters=False): """Computes infinite-alpha distribution for a range of perturbations. The range of response graph perturbations is defined in epsilon_list. Note that min_iters and max_iters is necessary as it may sometimes appear the stationary distribution has converged for a game in the first few iterations, where in reality a sufficiently smaller epsilon is needed for the distribution to first diverge, then reconverge. This behavior is dependent on both the payoff structure and bounds, so the parameters min_iters and max_iters can be used to fine-tune this. Args: payoff_tables: List of game payoff tables, one for each agent identity. Each payoff_table may be either a numpy array, or a _PayoffTableInterface object. strat_labels: Human-readable strategy labels. See get_strat_profile_labels() in utils.py for formatting details. warm_start_epsilon: Initial value of epsilon to use. visualize: Plot the sweep results. return_epsilon: Whether to return the final epsilon used. min_iters: the minimum number of sweep iterations. max_iters: the maximum number of sweep iterations. min_epsilon: the minimum value of epsilon to be tested, at which point the sweep terminates (if not converged already). num_strats_to_label: Number of strats to label in legend legend_sort_clusters: If true, strategies in the same cluster are sorted in the legend according to orderings for earlier alpha values. Primarily for visualization purposes! Rankings for lower alpha values should be interpreted carefully. Returns: pi: AlphaRank stationary distribution. epsilon: The AlphaRank transition matrix noise level resulting from sweep. """ payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) num_populations = len(payoff_tables) num_strats_per_population = utils.get_num_strats_per_population( payoff_tables, payoffs_are_hpt_format) if num_populations == 1: num_profiles = num_strats_per_population[0] else: num_profiles = utils.get_num_profiles(num_strats_per_population) assert (strat_labels is None or isinstance(strat_labels, dict) or (len(strat_labels) == num_profiles)) pi_list = np.empty((num_profiles, 0)) pi, alpha, m = None, None, None # Unused in infinite-alpha regime epsilon_list = [] epsilon_pi_hist = {} num_iters = 0 epsilon_mult_factor = 0.5 alpharank_succeeded_once = False if warm_start_epsilon is not None: epsilon = warm_start_epsilon else: epsilon = 0.5 while True: try: pi_prev = pi _, _, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha, use_inf_alpha=True, inf_alpha_eps=epsilon) epsilon_pi_hist[epsilon] = pi # Stop when pi converges if num_iters > min_iters and np.allclose(pi, pi_prev): break epsilon *= epsilon_mult_factor num_iters += 1 alpharank_succeeded_once = True assert num_iters < max_iters, ( 'Alpharank stationary distr. not found' 'after {} iterations of pi_vs_epsilon' 'sweep'.format(num_iters)) except ValueError as _: print('Error: ', _, epsilon, min_epsilon) # Case where epsilon has been decreased beyond desirable limits but no # distribution found. assert epsilon >= min_epsilon, ( 'AlphaRank stationary distr. not found &' 'epsilon < min_epsilon.') # Case where epsilon >= min_epsilon, but still small enough that it causes # causes exceptions due to precision issues. So increase it. epsilon /= epsilon_mult_factor # Case where alpharank_succeeded_once (i.e., epsilon_list and pi_list have # at least one entry), and a) has not converged yet and b) failed on this # instance due to epsilon being too small. I.e., the rate of decreasing # of epsilon is too high. if alpharank_succeeded_once: epsilon_mult_factor = (epsilon_mult_factor + 1.) / 2. epsilon *= epsilon_mult_factor epsilon_list, pi_list = zip( *[(epsilon, epsilon_pi_hist[epsilon]) for epsilon in sorted(epsilon_pi_hist.keys(), reverse=True)]) pi_list = np.asarray(pi_list) if visualize: if strat_labels is None: strat_labels = utils.get_strat_profile_labels( payoff_tables, payoffs_are_hpt_format) alpharank_visualizer.plot_pi_vs_alpha( pi_list.T, epsilon_list, num_populations, num_strats_per_population, strat_labels, num_strats_to_label=num_strats_to_label, legend_sort_clusters=legend_sort_clusters, xlabel=r'Infinite-AlphaRank Noise $\epsilon$') if return_epsilon: return pi_list[-1], epsilon_list[-1] else: return pi_list[-1]
payoff_tables[k][tuple(df_agents.iloc[i])] = row[k] return payoff_tables, strat_labels if __name__ == '__main__': csv_path = "alpha_rank_all_matches_simple.csv" # Import the csv df = pd.read_csv(csv_path, index_col=False) # Convert to payoff_tables payoff_tables, strat_labels = payoff_tables_from_df(df) # payoff_tables = heuristic_payoff_table.from_elo_scores([1286, 1322, 1401, 1440, 1457, 1466, 1470]) payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) # strat_labels = utils.get_strat_profile_labels(payoff_tables(), payoffs_are_hpt_format) # Run AlphaRank rhos, rho_m, pi, _, _ = alpharank.compute(payoff_tables, alpha=1e-1) # Report & plot results alpharank.print_results(payoff_tables, payoffs_are_hpt_format, rhos=rhos, rho_m=rho_m, pi=pi) utils.print_rankings_table(payoff_tables, pi, strat_labels)