Esempi in Python per get_strat_profile_labels, esempi in Python per open_spiel.python.egt.utils.get_strat_profile_labels

Esempio n. 1

0

Mostra file

File: alpharank_visualizer_test.py Progetto: julianhartmann1/HCII

    def test_plot_pi_vs_alpha(self, mock_plt):
        # Construct game
        game = pyspiel.load_matrix_game("matrix_rps")
        payoff_tables = utils.game_payoffs_array(game)
        _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables)
        payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)

        # Compute alpharank
        alpha = 1e2
        _, _, pi, num_profiles, num_strats_per_population =\
            alpharank.compute(payoff_tables, alpha=alpha)
        strat_labels = utils.get_strat_profile_labels(payoff_tables,
                                                      payoffs_are_hpt_format)
        num_populations = len(payoff_tables)

        # Construct synthetic pi-vs-alpha history
        pi_list = np.empty((num_profiles, 0))
        alpha_list = []
        for _ in range(2):
            pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1)
            alpha_list.append(alpha)

        # Test plotting code (via pyplot mocking to prevent plot pop-up)
        alpharank_visualizer.plot_pi_vs_alpha(pi_list.T,
                                              alpha_list,
                                              num_populations,
                                              num_strats_per_population,
                                              strat_labels,
                                              num_strats_to_label=0)
        self.assertTrue(mock_plt.show.called)

Esempio n. 2

0

Mostra file

File: alpharank_example.py Progetto: sunnyboy00/open_spiel

def main(unused_arg):
  # Construct meta-game payoff tables
  payoff_tables = get_kuhn_poker_data()
  payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)
  strat_labels = utils.get_strat_profile_labels(payoff_tables,
                                                payoffs_are_hpt_format)

  # Run AlphaRank
  rhos, rho_m, pi, _, _ = alpharank.compute(payoff_tables, alpha=1e2)

  # Report & plot results
  alpharank.print_results(
      payoff_tables, payoffs_are_hpt_format, rhos=rhos, rho_m=rho_m, pi=pi)
  utils.print_rankings_table(payoff_tables, pi, strat_labels)
  m_network_plotter = alpharank_visualizer.NetworkPlot(
      payoff_tables, rhos, rho_m, pi, strat_labels, num_top_profiles=8)
  m_network_plotter.compute_and_draw_network()

Esempio n. 3

0

Mostra file

File: alpharank.py Progetto: maxiaoba/rlkit

def compute_and_report_alpharank(payoff_tables,
                                 m=50,
                                 alpha=100,
                                 verbose=False,
                                 num_top_strats_to_print=8):
    """Computes and visualizes Alpha-Rank outputs.

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity. Each
      payoff_table may be either a numpy array, or a _PayoffTableInterface
      object.
    m: Finite population size.
    alpha: Fermi distribution temperature parameter.
    verbose: Set to True to print intermediate results.
    num_top_strats_to_print: Number of top strategies to print.

  Returns:
    pi: AlphaRank stationary distribution/rankings.
  """
    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)
    rhos, rho_m, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha)
    strat_labels = utils.get_strat_profile_labels(payoff_tables,
                                                  payoffs_are_hpt_format)

    if verbose:
        print_results(payoff_tables, payoffs_are_hpt_format, pi=pi)

    utils.print_rankings_table(payoff_tables,
                               pi,
                               strat_labels,
                               num_top_strats_to_print=num_top_strats_to_print)
    m_network_plotter = NetworkPlot(payoff_tables,
                                    rhos,
                                    rho_m,
                                    pi,
                                    strat_labels,
                                    num_top_profiles=8)
    m_network_plotter.compute_and_draw_network()
    return pi

Esempio n. 4

0

Mostra file

File: alpharank.py Progetto: maxiaoba/rlkit

def sweep_pi_vs_alpha(payoff_tables,
                      strat_labels=None,
                      warm_start_alpha=None,
                      m=50,
                      rtol=1e-5,
                      atol=1e-8):
    """Computes stationary distribution, pi, for range of selection intensities.

  The range of selection intensities is defined in alpha_list and corresponds
  to the temperature of the Fermi selection function.

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity. Each
      payoff_table may be either a numpy array, or a _PayoffTableInterface
      object.
    strat_labels: Human-readable strategy labels. See get_strat_profile_labels()
      in utils.py for formatting details.
    warm_start_alpha: Initial value of alpha to use.
    visualize: Plot the sweep results.
    return_alpha: Whether to return the final alpha used.
    m: AlphaRank population size.
    rtol: The relative tolerance parameter for np.allclose calls.
    atol: The absolute tolerance parameter for np.allclose calls.

  Returns:
   pi: AlphaRank stationary distribution.
   alpha: The AlphaRank selection-intensity level resulting from sweep.
  """

    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)
    num_populations = len(payoff_tables)
    num_strats_per_population =\
      utils.get_num_strats_per_population(payoff_tables, payoffs_are_hpt_format)

    if num_populations == 1:
        num_profiles = num_strats_per_population[0]
    else:
        num_profiles = utils.get_num_profiles(num_strats_per_population)

    assert strat_labels is None or isinstance(strat_labels, dict)\
        or (len(strat_labels) == num_profiles)

    pi_list = np.empty((num_profiles, 0))
    alpha_list = []
    num_iters = 0
    alpha_mult_factor = 2.

    if warm_start_alpha is not None:
        alpha = warm_start_alpha
        alpharank_succeeded_once = False
    else:
        alpha = 1e-4  # Reasonable default for most games, can be user-overridden

    while 1:
        try:
            _, _, pi, _, _ = compute(payoff_tables, alpha=alpha, m=m)
            pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1)
            alpha_list.append(alpha)
            # Stop when pi converges
            if num_iters > 0 and np.allclose(pi, pi_list[:, num_iters - 1],
                                             rtol, atol):
                break
            alpha *= alpha_mult_factor
            num_iters += 1
            alpharank_succeeded_once = True
        except ValueError as _:
            if warm_start_alpha is not None and not alpharank_succeeded_once:
                # When warm_start_alpha is used, there's a chance that
                # the initial warm_start_alpha is too large and causes exceptions due to
                # the Markov transition matrix being reducible. So keep decreasing until
                # a single success occurs.
                alpha /= 2
            elif not np.allclose(pi_list[:, -1], pi_list[:, -2], rtol, atol):
                # Sweep stopped due to multiple stationary distributions, but pi had
                # not converged due to the alpha scaling being too large.
                alpha /= alpha_mult_factor
                alpha_mult_factor = (alpha_mult_factor + 1.) / 2.
                alpha *= alpha_mult_factor
            else:
                break

    if strat_labels is None:
        strat_labels = utils.get_strat_profile_labels(payoff_tables,
                                                      payoffs_are_hpt_format)
    fig = plot_pi_vs_alpha(pi_list.T,
                           alpha_list,
                           num_populations,
                           num_strats_per_population,
                           strat_labels,
                           num_strats_to_label=10)

    return pi, alpha, fig

Esempio n. 5

0

Mostra file

def sweep_pi_vs_epsilon(payoff_tables,
                        strat_labels=None,
                        warm_start_epsilon=None,
                        visualize=False,
                        return_epsilon=False,
                        min_iters=10,
                        max_iters=100,
                        min_epsilon=1e-14,
                        num_strats_to_label=10,
                        legend_sort_clusters=False):
    """Computes infinite-alpha distribution for a range of perturbations.

  The range of response graph perturbations is defined in epsilon_list.

  Note that min_iters and max_iters is necessary as it may sometimes appear the
  stationary distribution has converged for a game in the first few iterations,
  where in reality a sufficiently smaller epsilon is needed for the distribution
  to first diverge, then reconverge. This behavior is dependent on both the
  payoff structure and bounds, so the parameters min_iters and max_iters can be
  used to fine-tune this.

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity.
      Each payoff_table may be either a numpy array, or a
      _PayoffTableInterface object.
    strat_labels: Human-readable strategy labels. See get_strat_profile_labels()
      in utils.py for formatting details.
    warm_start_epsilon: Initial value of epsilon to use.
    visualize: Plot the sweep results.
    return_epsilon: Whether to return the final epsilon used.
    min_iters: the minimum number of sweep iterations.
    max_iters: the maximum number of sweep iterations.
    min_epsilon: the minimum value of epsilon to be tested, at which point the
      sweep terminates (if not converged already).
    num_strats_to_label: Number of strats to label in legend
    legend_sort_clusters: If true, strategies in the same cluster are sorted in
      the legend according to orderings for earlier alpha values. Primarily for
      visualization purposes! Rankings for lower alpha values should be
      interpreted carefully.

  Returns:
   pi: AlphaRank stationary distribution.
   epsilon: The AlphaRank transition matrix noise level resulting from sweep.
  """
    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)
    num_populations = len(payoff_tables)
    num_strats_per_population = utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format)

    if num_populations == 1:
        num_profiles = num_strats_per_population[0]
    else:
        num_profiles = utils.get_num_profiles(num_strats_per_population)

    assert (strat_labels is None or isinstance(strat_labels, dict)
            or (len(strat_labels) == num_profiles))

    pi_list = np.empty((num_profiles, 0))
    pi, alpha, m = None, None, None  # Unused in infinite-alpha regime
    epsilon_list = []
    epsilon_pi_hist = {}
    num_iters = 0

    epsilon_mult_factor = 0.5
    alpharank_succeeded_once = False

    if warm_start_epsilon is not None:
        epsilon = warm_start_epsilon
    else:
        epsilon = 0.5

    while True:
        try:
            pi_prev = pi
            _, _, pi, _, _ = compute(payoff_tables,
                                     m=m,
                                     alpha=alpha,
                                     use_inf_alpha=True,
                                     inf_alpha_eps=epsilon)
            epsilon_pi_hist[epsilon] = pi
            # Stop when pi converges
            if num_iters > min_iters and np.allclose(pi, pi_prev):
                break

            epsilon *= epsilon_mult_factor
            num_iters += 1
            alpharank_succeeded_once = True
            assert num_iters < max_iters, (
                'Alpharank stationary distr. not found'
                'after {} iterations of pi_vs_epsilon'
                'sweep'.format(num_iters))

        except ValueError as _:
            print('Error: ', _, epsilon, min_epsilon)
            # Case where epsilon has been decreased beyond desirable limits but no
            # distribution found.
            assert epsilon >= min_epsilon, (
                'AlphaRank stationary distr. not found &'
                'epsilon < min_epsilon.')
            # Case where epsilon >= min_epsilon, but still small enough that it causes
            # causes exceptions due to precision issues. So increase it.
            epsilon /= epsilon_mult_factor

            # Case where alpharank_succeeded_once (i.e., epsilon_list and pi_list have
            # at least one entry), and a) has not converged yet and b) failed on this
            # instance due to epsilon being too small. I.e., the rate of decreasing
            # of epsilon is too high.
            if alpharank_succeeded_once:
                epsilon_mult_factor = (epsilon_mult_factor + 1.) / 2.
                epsilon *= epsilon_mult_factor

    epsilon_list, pi_list = zip(
        *[(epsilon, epsilon_pi_hist[epsilon])
          for epsilon in sorted(epsilon_pi_hist.keys(), reverse=True)])
    pi_list = np.asarray(pi_list)

    if visualize:
        if strat_labels is None:
            strat_labels = utils.get_strat_profile_labels(
                payoff_tables, payoffs_are_hpt_format)
        alpharank_visualizer.plot_pi_vs_alpha(
            pi_list.T,
            epsilon_list,
            num_populations,
            num_strats_per_population,
            strat_labels,
            num_strats_to_label=num_strats_to_label,
            legend_sort_clusters=legend_sort_clusters,
            xlabel=r'Infinite-AlphaRank Noise $\epsilon$')

    if return_epsilon:
        return pi_list[-1], epsilon_list[-1]
    else:
        return pi_list[-1]