Python get_num_strats_per_population 예제들, open_spiel.python.egt.utils.get_num_strats_per_population Python 예제들

예제 #1

0

파일 보기

def get_joint_policies_from_id_list(payoff_tables, policies, profile_id_list):
    """Returns a list of joint policies, given a list of integer IDs.

  Args:
    payoff_tables: List of payoff tables, one per player.
    policies: A list of policies, one per player.
    profile_id_list: list of integer IDs, each corresponding to a joint policy.
      These integers correspond to those in get_strategy_profile_ids().

  Returns:
    selected_joint_policies: A list, with each element being a joint policy
      instance (i.e., a list of policies, one per player).
  """
    num_strats_per_population = (alpharank_utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format=False))
    np.testing.assert_array_equal(num_strats_per_population,
                                  [len(p) for p in policies])
    num_players = len(policies)

    selected_joint_policies = []
    for profile_id in profile_id_list:
        # Compute the profile associated with the integer profile_id
        policy_profile = alpharank_utils.get_strat_profile_from_id(
            num_strats_per_population, profile_id)
        # Append the joint policy corresponding to policy_profile
        selected_joint_policies.append(
            [policies[k][policy_profile[k]] for k in range(num_players)])
    return selected_joint_policies

예제 #2

0

파일 보기

def get_alpharank_marginals(payoff_tables, pi):
    """Returns marginal strategy rankings for each player given joint rankings pi.

  Args:
    payoff_tables: List of meta-game payoff tables for a K-player game, where
      each table has dim [n_strategies_player_1 x ... x n_strategies_player_K].
      These payoff tables may be asymmetric.
    pi: The vector of joint rankings as computed by alpharank. Each element i
      corresponds to a unique integer ID representing a given strategy profile,
      with profile_to_id mappings provided by
      alpharank_utils.get_id_from_strat_profile().

  Returns:
    pi_marginals: List of np.arrays of player-wise marginal strategy masses,
      where the k-th player's np.array has shape [n_strategies_player_k].
  """
    num_populations = len(payoff_tables)

    if num_populations == 1:
        return pi
    else:
        num_strats_per_population = alpharank_utils.get_num_strats_per_population(
            payoff_tables, payoffs_are_hpt_format=False)
        num_profiles = alpharank_utils.get_num_profiles(
            num_strats_per_population)
        pi_marginals = [np.zeros(n) for n in num_strats_per_population]
        for i_strat in range(num_profiles):
            strat_profile = (alpharank_utils.get_strat_profile_from_id(
                num_strats_per_population, i_strat))
            for i_player in range(num_populations):
                pi_marginals[i_player][strat_profile[i_player]] += pi[i_strat]
        return pi_marginals

예제 #3

0

파일 보기

def get_game_for_sampler(game_name):
    """Returns pre-processed game data for ResponseGraphUCB examples."""
    # pylint: disable=invalid-name
    if game_name == 'bernoulli':
        M = get_payoffs_bernoulli_game()
        strategy_spaces = [2, 2]
        G = ZeroSumBernoulliGameSampler(strategy_spaces,
                                        means=M,
                                        payoff_bounds=[-1., 1.])
    elif game_name == 'soccer':
        M = get_soccer_data()
        M = M * 2. - 1  # Convert to zero-sum
        strategy_spaces = np.shape(M)
        M = np.asarray([M, M.T])
        G = ZeroSumBernoulliGameSampler(strategy_spaces,
                                        means=M,
                                        payoff_bounds=[np.min(M),
                                                       np.max(M)])
    elif game_name in ['kuhn_poker_2p', 'kuhn_poker_3p', 'kuhn_poker_4p']:
        if '2p' in game_name:
            num_players = 2
        elif '3p' in game_name:
            num_players = 3
        elif '4p' in game_name:
            num_players = 4
        M = get_kuhn_poker_data(num_players, iterations=2)  # pylint: disable=invalid-name
        strategy_spaces = egt_utils.get_num_strats_per_population(M, False)
        G = BernoulliGameSampler(strategy_spaces,
                                 means=M,
                                 payoff_bounds=[np.min(M),
                                                np.max(M)])
    else:
        raise ValueError('Game', game_name, 'not implemented!')
    # pylint: enable=invalid-name
    return G

예제 #4

0

파일 보기

def suggest_alpha(payoff_tables, tol=.1):
    """Suggests an alpha for use in alpha-rank.

  The suggested alpha is approximately the smallest possible alpha such that
  the ranking has 'settled out'. It is calculated as
  -ln(tol)/min_gap_between_payoffs.

  The logic behind this settling out is that the fixation probabilities can be
  expanded as a series, and the relative size of each term in this series
  changes with alpha. As alpha gets larger and larger, one of the terms in
  this series comes to dominate, and this causes the ranking to settle
  down. Just how fast this domination happens is easy to calculate, and this
  function uses it to estimate the alpha by which the ranking has settled.

  You can find further discussion at the PR:

  https://github.com/deepmind/open_spiel/pull/403

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity. Each
      payoff_table may be either a numpy array, or a _PayoffTableInterface
      object.
    tol: the desired gap between the first and second terms in the fixation
      probability expansion. A smaller tolerance leads to a larger alpha, and
      a 'more settled out' ranking.

  Returns:
    A suggested alpha.
  """
    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)

    num_strats_per_population = utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format)
    num_profiles = utils.get_num_profiles(num_strats_per_population)

    gap = np.inf
    for id_row_profile in range(num_profiles):
        row_profile = utils.get_strat_profile_from_id(
            num_strats_per_population, id_row_profile)

        next_profile_gen = utils.get_valid_next_profiles(
            num_strats_per_population, row_profile)

        for index_population_that_changed, col_profile in next_profile_gen:
            payoff_table_k = payoff_tables[index_population_that_changed]
            f_r = _get_payoff(payoff_table_k, payoffs_are_hpt_format,
                              col_profile, index_population_that_changed)
            f_s = _get_payoff(payoff_table_k, payoffs_are_hpt_format,
                              row_profile, index_population_that_changed)
            if f_r > f_s:
                gap = min(gap, f_r - f_s)

    return -np.log(tol) / gap

예제 #5

0

파일 보기

파일: alpharank_visualizer.py 프로젝트: maxiaoba/rlkit

  def __init__(self,
               payoff_tables,
               rhos,
               rho_m,
               pi,
               state_labels,
               num_top_profiles=None):
    """Initializes a network plotting object.

    Args:
      payoff_tables: List of game payoff tables, one for each agent identity.
        Each payoff_table may be either a 2D numpy array, or a
        _PayoffTableInterface object.
      rhos: Fixation probabilities.
      rho_m: Neutral fixation probability.
      pi: Stationary distribution of fixation Markov chain defined by rhos.
      state_labels: Labels corresponding to Markov states. For the
        single-population case, state_labels should be a list of pure strategy
        names. For the multi-population case, it
                    should be a dict with (key,value) pairs: (population
                      index,list of strategy names)
      num_top_profiles: Set to (int) to show only the graph nodes corresponding
        to the top k elements of stationary distribution, or None to show all.
    """
    # self.fig = plt.figure(figsize=(10, 10))
    self.fig = plt.figure(figsize=(11.5, 11.5))
    self.num_populations = len(payoff_tables)
    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)
    self.num_strats_per_population =\
      utils.get_num_strats_per_population(payoff_tables, payoffs_are_hpt_format)
    self.rhos = rhos
    self.rho_m = rho_m
    self.pi = pi
    self.num_profiles = len(pi)
    self.state_labels = state_labels
    self.first_run = True
    self.num_top_profiles = num_top_profiles

    if self.num_top_profiles:
      # More than total number of strats requested for plotting
      if self.num_top_profiles > self.num_profiles:
        self.num_top_profiles = self.num_profiles
      # Skip the bottom num_profiles-k stationary strategies.
      self.nodes_to_skip = list(self.pi.argsort()[:self.num_profiles-\
                                                  self.num_top_profiles])
    else:
      self.nodes_to_skip = []

    self._reset_cycle_counter()

예제 #6

0

파일 보기

파일: heuristic_payoff_table.py 프로젝트: rahulpandeycs/openspiel-games

def from_matrix_game(matrix_game):
    """Returns a PayOffTable given a symmetric 2-player matrix game.

  Args:
    matrix_game: The payoff matrix corresponding to a 2-player symmetric game.
  """

    if not isinstance(matrix_game, np.ndarray):
        raise ValueError(
            "The matrix game should be a numpy array, not a {}".format(
                type(matrix_game)))
    num_strats_per_population =\
      utils.get_num_strats_per_population(payoff_tables=[matrix_game],
                                          payoffs_are_hpt_format=False)
    assert len(num_strats_per_population) == 2
    assert num_strats_per_population[0] == num_strats_per_population[1]
    num_strategies = num_strats_per_population[0]

    num_profiles = utils.get_num_profiles(num_strats_per_population)
    table = PayoffTable(num_players=2, num_strategies=num_strategies)

    # Construct the HPT by filling in the corresponding payoffs for each profile
    for id_profile in range(num_profiles):
        strat_profile = utils.get_strat_profile_from_id(
            num_strats_per_population, id_profile)
        distribution = table.get_distribution_from_profile(strat_profile)
        # For symmetric matrix games, multiple strategy profiles correspond to the
        # same distribution and payoffs. Thus, ensure the table entry has not
        # already been filled by a previous strategy profile.
        if table.item_is_uninitialized(tuple(distribution)):
            payoffs = np.zeros(num_strategies)
            payoffs[strat_profile[0]] = matrix_game[strat_profile[0],
                                                    strat_profile[1]]
            payoffs[strat_profile[1]] = matrix_game[strat_profile[1],
                                                    strat_profile[0]]
            table[tuple(distribution)] = payoffs

    return table

예제 #7

0

파일 보기

def get_strategy_profile_ids(payoff_tables):
    num_strats_per_population = (alpharank_utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format=False))
    return range(alpharank_utils.get_num_profiles(num_strats_per_population))

예제 #8

0

파일 보기

파일: alpharank.py 프로젝트: maxiaoba/rlkit

def sweep_pi_vs_alpha(payoff_tables,
                      strat_labels=None,
                      warm_start_alpha=None,
                      m=50,
                      rtol=1e-5,
                      atol=1e-8):
    """Computes stationary distribution, pi, for range of selection intensities.

  The range of selection intensities is defined in alpha_list and corresponds
  to the temperature of the Fermi selection function.

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity. Each
      payoff_table may be either a numpy array, or a _PayoffTableInterface
      object.
    strat_labels: Human-readable strategy labels. See get_strat_profile_labels()
      in utils.py for formatting details.
    warm_start_alpha: Initial value of alpha to use.
    visualize: Plot the sweep results.
    return_alpha: Whether to return the final alpha used.
    m: AlphaRank population size.
    rtol: The relative tolerance parameter for np.allclose calls.
    atol: The absolute tolerance parameter for np.allclose calls.

  Returns:
   pi: AlphaRank stationary distribution.
   alpha: The AlphaRank selection-intensity level resulting from sweep.
  """

    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)
    num_populations = len(payoff_tables)
    num_strats_per_population =\
      utils.get_num_strats_per_population(payoff_tables, payoffs_are_hpt_format)

    if num_populations == 1:
        num_profiles = num_strats_per_population[0]
    else:
        num_profiles = utils.get_num_profiles(num_strats_per_population)

    assert strat_labels is None or isinstance(strat_labels, dict)\
        or (len(strat_labels) == num_profiles)

    pi_list = np.empty((num_profiles, 0))
    alpha_list = []
    num_iters = 0
    alpha_mult_factor = 2.

    if warm_start_alpha is not None:
        alpha = warm_start_alpha
        alpharank_succeeded_once = False
    else:
        alpha = 1e-4  # Reasonable default for most games, can be user-overridden

    while 1:
        try:
            _, _, pi, _, _ = compute(payoff_tables, alpha=alpha, m=m)
            pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1)
            alpha_list.append(alpha)
            # Stop when pi converges
            if num_iters > 0 and np.allclose(pi, pi_list[:, num_iters - 1],
                                             rtol, atol):
                break
            alpha *= alpha_mult_factor
            num_iters += 1
            alpharank_succeeded_once = True
        except ValueError as _:
            if warm_start_alpha is not None and not alpharank_succeeded_once:
                # When warm_start_alpha is used, there's a chance that
                # the initial warm_start_alpha is too large and causes exceptions due to
                # the Markov transition matrix being reducible. So keep decreasing until
                # a single success occurs.
                alpha /= 2
            elif not np.allclose(pi_list[:, -1], pi_list[:, -2], rtol, atol):
                # Sweep stopped due to multiple stationary distributions, but pi had
                # not converged due to the alpha scaling being too large.
                alpha /= alpha_mult_factor
                alpha_mult_factor = (alpha_mult_factor + 1.) / 2.
                alpha *= alpha_mult_factor
            else:
                break

    if strat_labels is None:
        strat_labels = utils.get_strat_profile_labels(payoff_tables,
                                                      payoffs_are_hpt_format)
    fig = plot_pi_vs_alpha(pi_list.T,
                           alpha_list,
                           num_populations,
                           num_strats_per_population,
                           strat_labels,
                           num_strats_to_label=10)

    return pi, alpha, fig

예제 #9

0

파일 보기

def compute(payoff_tables,
            m=50,
            alpha=100,
            use_local_selection_model=True,
            verbose=False,
            use_inf_alpha=False,
            inf_alpha_eps=0.01):
    """Computes the finite population stationary statistics.

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity. Each
      payoff_table may be either a numpy array, or a _PayoffTableInterface
      object.
    m: Finite population size.
    alpha: Fermi distribution temperature parameter.
    use_local_selection_model: Enable local evolutionary selection model, which
      considers fitness against the current opponent only, rather than the
      global population state.
    verbose: Set to True to print intermediate results.
    use_inf_alpha: Use infinite-alpha alpharank model.
    inf_alpha_eps: Noise term to use in infinite-alpha alpharank model.

  Returns:
    rhos: Matrix of strategy-to-strategy fixation probabilities.
    rho_m: Neutral fixation probability.
    pi: Finite population stationary distribution.
    num_strats: Number of available strategies.
  """
    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)

    num_populations = len(payoff_tables)

    num_strats_per_population = utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format)

    # Handles the trivial case of Markov chain with one state
    if np.array_equal(num_strats_per_population,
                      np.ones(len(num_strats_per_population))):
        rhos = np.asarray([[1]])
        rho_m = 1. / m if not use_inf_alpha else 1
        num_profiles = 1
        pi = np.asarray([1.])
        return rhos, rho_m, pi, num_profiles, num_strats_per_population

    if verbose:
        print('Constructing c matrix')
        print('num_strats_per_population:', num_strats_per_population)

    if num_populations == 1:
        # User fast closed-form analysis for constant-sum single-population games
        game_is_constant_sum, payoff_sum = utils.check_is_constant_sum(
            payoff_tables[0], payoffs_are_hpt_format)
        if verbose:
            print('game_is_constant_sum:', game_is_constant_sum,
                  'payoff sum: ', payoff_sum)
        # Single-population/symmetric game just uses the first player's payoffs
        c, rhos = _get_singlepop_transition_matrix(payoff_tables[0],
                                                   payoffs_are_hpt_format,
                                                   m,
                                                   alpha,
                                                   game_is_constant_sum,
                                                   use_local_selection_model,
                                                   payoff_sum,
                                                   use_inf_alpha=use_inf_alpha,
                                                   inf_alpha_eps=inf_alpha_eps)
        num_profiles = num_strats_per_population[0]
    else:
        c, rhos = _get_multipop_transition_matrix(payoff_tables,
                                                  payoffs_are_hpt_format,
                                                  m,
                                                  alpha,
                                                  use_inf_alpha=use_inf_alpha,
                                                  inf_alpha_eps=inf_alpha_eps)
        num_profiles = utils.get_num_profiles(num_strats_per_population)

    pi = _get_stationary_distr(c)

    rho_m = 1. / m if not use_inf_alpha else 1  # Neutral fixation probability
    if verbose:
        print_results(payoff_tables, payoffs_are_hpt_format, rhos, rho_m, c,
                      pi)

    return rhos, rho_m, pi, num_profiles, num_strats_per_population

예제 #10

0

파일 보기

def sweep_pi_vs_epsilon(payoff_tables,
                        strat_labels=None,
                        warm_start_epsilon=None,
                        visualize=False,
                        return_epsilon=False,
                        min_iters=10,
                        max_iters=100,
                        min_epsilon=1e-14,
                        num_strats_to_label=10,
                        legend_sort_clusters=False):
    """Computes infinite-alpha distribution for a range of perturbations.

  The range of response graph perturbations is defined in epsilon_list.

  Note that min_iters and max_iters is necessary as it may sometimes appear the
  stationary distribution has converged for a game in the first few iterations,
  where in reality a sufficiently smaller epsilon is needed for the distribution
  to first diverge, then reconverge. This behavior is dependent on both the
  payoff structure and bounds, so the parameters min_iters and max_iters can be
  used to fine-tune this.

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity.
      Each payoff_table may be either a numpy array, or a
      _PayoffTableInterface object.
    strat_labels: Human-readable strategy labels. See get_strat_profile_labels()
      in utils.py for formatting details.
    warm_start_epsilon: Initial value of epsilon to use.
    visualize: Plot the sweep results.
    return_epsilon: Whether to return the final epsilon used.
    min_iters: the minimum number of sweep iterations.
    max_iters: the maximum number of sweep iterations.
    min_epsilon: the minimum value of epsilon to be tested, at which point the
      sweep terminates (if not converged already).
    num_strats_to_label: Number of strats to label in legend
    legend_sort_clusters: If true, strategies in the same cluster are sorted in
      the legend according to orderings for earlier alpha values. Primarily for
      visualization purposes! Rankings for lower alpha values should be
      interpreted carefully.

  Returns:
   pi: AlphaRank stationary distribution.
   epsilon: The AlphaRank transition matrix noise level resulting from sweep.
  """
    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)
    num_populations = len(payoff_tables)
    num_strats_per_population = utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format)

    if num_populations == 1:
        num_profiles = num_strats_per_population[0]
    else:
        num_profiles = utils.get_num_profiles(num_strats_per_population)

    assert (strat_labels is None or isinstance(strat_labels, dict)
            or (len(strat_labels) == num_profiles))

    pi_list = np.empty((num_profiles, 0))
    pi, alpha, m = None, None, None  # Unused in infinite-alpha regime
    epsilon_list = []
    epsilon_pi_hist = {}
    num_iters = 0

    epsilon_mult_factor = 0.5
    alpharank_succeeded_once = False

    if warm_start_epsilon is not None:
        epsilon = warm_start_epsilon
    else:
        epsilon = 0.5

    while True:
        try:
            pi_prev = pi
            _, _, pi, _, _ = compute(payoff_tables,
                                     m=m,
                                     alpha=alpha,
                                     use_inf_alpha=True,
                                     inf_alpha_eps=epsilon)
            epsilon_pi_hist[epsilon] = pi
            # Stop when pi converges
            if num_iters > min_iters and np.allclose(pi, pi_prev):
                break

            epsilon *= epsilon_mult_factor
            num_iters += 1
            alpharank_succeeded_once = True
            assert num_iters < max_iters, (
                'Alpharank stationary distr. not found'
                'after {} iterations of pi_vs_epsilon'
                'sweep'.format(num_iters))

        except ValueError as _:
            print('Error: ', _, epsilon, min_epsilon)
            # Case where epsilon has been decreased beyond desirable limits but no
            # distribution found.
            assert epsilon >= min_epsilon, (
                'AlphaRank stationary distr. not found &'
                'epsilon < min_epsilon.')
            # Case where epsilon >= min_epsilon, but still small enough that it causes
            # causes exceptions due to precision issues. So increase it.
            epsilon /= epsilon_mult_factor

            # Case where alpharank_succeeded_once (i.e., epsilon_list and pi_list have
            # at least one entry), and a) has not converged yet and b) failed on this
            # instance due to epsilon being too small. I.e., the rate of decreasing
            # of epsilon is too high.
            if alpharank_succeeded_once:
                epsilon_mult_factor = (epsilon_mult_factor + 1.) / 2.
                epsilon *= epsilon_mult_factor

    epsilon_list, pi_list = zip(
        *[(epsilon, epsilon_pi_hist[epsilon])
          for epsilon in sorted(epsilon_pi_hist.keys(), reverse=True)])
    pi_list = np.asarray(pi_list)

    if visualize:
        if strat_labels is None:
            strat_labels = utils.get_strat_profile_labels(
                payoff_tables, payoffs_are_hpt_format)
        alpharank_visualizer.plot_pi_vs_alpha(
            pi_list.T,
            epsilon_list,
            num_populations,
            num_strats_per_population,
            strat_labels,
            num_strats_to_label=num_strats_to_label,
            legend_sort_clusters=legend_sort_clusters,
            xlabel=r'Infinite-AlphaRank Noise $\epsilon$')

    if return_epsilon:
        return pi_list[-1], epsilon_list[-1]
    else:
        return pi_list[-1]

예제 #11

0

파일 보기

def _get_multipop_transition_matrix(payoff_tables,
                                    payoffs_are_hpt_format,
                                    m,
                                    alpha,
                                    use_inf_alpha=False,
                                    inf_alpha_eps=0.1):
    """Gets Markov transition matrix for multipopulation games."""

    num_strats_per_population = utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format)
    num_profiles = utils.get_num_profiles(num_strats_per_population)

    eta = 1. / (np.sum(num_strats_per_population - 1))

    c = np.zeros((num_profiles, num_profiles))
    rhos = np.zeros((num_profiles, num_profiles))

    for id_row_profile in range(num_profiles):
        row_profile = utils.get_strat_profile_from_id(
            num_strats_per_population, id_row_profile)

        next_profile_gen = utils.get_valid_next_profiles(
            num_strats_per_population, row_profile)

        for index_population_that_changed, col_profile in next_profile_gen:
            id_col_profile = utils.get_id_from_strat_profile(
                num_strats_per_population, col_profile)
            if use_inf_alpha:
                payoff_col = _get_payoff(
                    payoff_tables[index_population_that_changed],
                    payoffs_are_hpt_format,
                    col_profile,
                    k=index_population_that_changed)
                payoff_row = _get_payoff(
                    payoff_tables[index_population_that_changed],
                    payoffs_are_hpt_format,
                    row_profile,
                    k=index_population_that_changed)
                if np.isclose(payoff_col, payoff_row, atol=1e-14):
                    c[id_row_profile, id_col_profile] = eta * 0.5
                elif payoff_col > payoff_row:
                    # Transition to col strategy since its payoff is higher than row
                    # strategy, but remove some small amount of mass, inf_alpha_eps, to
                    # keep the chain irreducible
                    c[id_row_profile,
                      id_col_profile] = eta * (1 - inf_alpha_eps)
                else:
                    # Transition with very small probability
                    c[id_row_profile, id_col_profile] = eta * inf_alpha_eps
            else:
                rhos[id_row_profile, id_col_profile] = _get_rho_sr_multipop(
                    payoff_table_k=payoff_tables[
                        index_population_that_changed],
                    payoffs_are_hpt_format=payoffs_are_hpt_format,
                    k=index_population_that_changed,
                    m=m,
                    r=col_profile,
                    s=row_profile,
                    alpha=alpha)
                c[id_row_profile,
                  id_col_profile] = eta * rhos[id_row_profile, id_col_profile]
        # Special case of self-transition
        c[id_row_profile, id_row_profile] = 1 - sum(c[id_row_profile, :])

    return c, rhos

예제 #12

0

파일 보기

def _get_singlepop_transition_matrix(payoff_table,
                                     payoffs_are_hpt_format,
                                     m,
                                     alpha,
                                     game_is_constant_sum,
                                     use_local_selection_model,
                                     payoff_sum,
                                     use_inf_alpha=False,
                                     inf_alpha_eps=0.1):
    """Gets the Markov transition matrix for a single-population game.

  Args:
    payoff_table: A payoff table.
    payoffs_are_hpt_format: Boolean indicating whether payoff_table is a
      _PayoffTableInterface object (AKA Heuristic Payoff Table or HPT), or a
      numpy array. True indicates HPT format, False indicates numpy array.
    m: Total number of agents in the k-th population.
    alpha: Fermi distribution temperature parameter.
    game_is_constant_sum: Boolean indicating if the game is constant sum.
    use_local_selection_model: Enable local evolutionary selection model, which
      considers fitness against the current opponent only, rather than the
      global population state.
    payoff_sum: The payoff sum if the game is constant sum, or None otherwise.
    use_inf_alpha: Use infinite-alpha alpharank model.
    inf_alpha_eps: Noise term (epsilon) used in infinite-alpha alpharank model.

  Returns:
    Markov transition matrix.
  """

    num_strats_per_population = utils.get_num_strats_per_population(
        [payoff_table], payoffs_are_hpt_format)
    num_strats = num_strats_per_population[0]

    c = np.zeros((num_strats, num_strats))
    rhos = np.zeros((num_strats, num_strats))

    # r and s are, respectively, the column and row strategy profiles
    for s in range(num_strats):  # Current strategy
        for r in range(num_strats):  # Next strategy
            if s != r:  # Compute off-diagonal fixation probabilities
                if use_inf_alpha:
                    eta = 1. / (num_strats - 1)
                    # Payoff of r when played against s
                    payoff_rs = _get_payoff(payoff_table,
                                            payoffs_are_hpt_format,
                                            strat_profile=[r, s],
                                            k=0)
                    # Payoff of s when played against r
                    payoff_sr = _get_payoff(payoff_table,
                                            payoffs_are_hpt_format,
                                            strat_profile=[s, r],
                                            k=0)
                    if np.isclose(payoff_rs, payoff_sr, atol=1e-14):
                        c[s, r] = eta * 0.5
                    elif payoff_rs > payoff_sr:
                        # Transition to r since its payoff is higher than s, but remove some
                        # small amount of mass, inf_alpha_eps, to keep the chain irreducible
                        c[s, r] = eta * (1 - inf_alpha_eps)
                    else:
                        # Transition with very small probability
                        c[s, r] = eta * inf_alpha_eps
                else:
                    rhos[s,
                         r] = _get_rho_sr(payoff_table, payoffs_are_hpt_format,
                                          m, r, s, alpha, game_is_constant_sum,
                                          use_local_selection_model,
                                          payoff_sum)
                    eta = 1. / (num_strats - 1)
                    c[s, r] = eta * rhos[s, r]
        # Fixation probability of competing only against one's own strategy is 1
        # rhos[s,s] = 1. # Commented as self-fixations are not interesting (for now)
        c[s, s] = 1 - sum(c[s, :])  # Diagonals

    return c, rhos