Python get_strat_profile_from_id 예제들, open_spiel.python.egt.utils.get_strat_profile_from_id Python 예제들

예제 #1

0

파일 보기

def get_joint_policies_from_id_list(payoff_tables, policies, profile_id_list):
    """Returns a list of joint policies, given a list of integer IDs.

  Args:
    payoff_tables: List of payoff tables, one per player.
    policies: A list of policies, one per player.
    profile_id_list: list of integer IDs, each corresponding to a joint policy.
      These integers correspond to those in get_strategy_profile_ids().

  Returns:
    selected_joint_policies: A list, with each element being a joint policy
      instance (i.e., a list of policies, one per player).
  """
    num_strats_per_population = (alpharank_utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format=False))
    np.testing.assert_array_equal(num_strats_per_population,
                                  [len(p) for p in policies])
    num_players = len(policies)

    selected_joint_policies = []
    for profile_id in profile_id_list:
        # Compute the profile associated with the integer profile_id
        policy_profile = alpharank_utils.get_strat_profile_from_id(
            num_strats_per_population, profile_id)
        # Append the joint policy corresponding to policy_profile
        selected_joint_policies.append(
            [policies[k][policy_profile[k]] for k in range(num_players)])
    return selected_joint_policies

예제 #2

0

파일 보기

파일: utils_test.py 프로젝트: DailyActie/AI_RL_APP-open_spiel

    def test_id_profile_mapping(self):
        """Tests forward and backward mapping of pure strategy profiles to IDs."""

        num_strats_per_population = np.asarray([4, 4, 4, 9])
        num_pure_profiles = np.prod(num_strats_per_population)

        strat_ranges = [
            range(num_strats) for num_strats in num_strats_per_population
        ]

        id_list = []
        for strat_profile in itertools.product(strat_ranges[0],
                                               strat_ranges[1],
                                               strat_ranges[2],
                                               strat_ranges[3]):
            profile_id = utils.get_id_from_strat_profile(
                num_strats_per_population, strat_profile)
            id_list.append(profile_id)

            # Tests backward mapping (ID-to-profile lookup)
            strat_profile_from_id = utils.get_strat_profile_from_id(
                num_strats_per_population, profile_id)
            np.testing.assert_array_equal(strat_profile, strat_profile_from_id)

        # Tests forward mapping (profile-to-ID lookup)
        np.testing.assert_array_equal(id_list, range(num_pure_profiles))

예제 #3

0

파일 보기

def get_alpharank_marginals(payoff_tables, pi):
    """Returns marginal strategy rankings for each player given joint rankings pi.

  Args:
    payoff_tables: List of meta-game payoff tables for a K-player game, where
      each table has dim [n_strategies_player_1 x ... x n_strategies_player_K].
      These payoff tables may be asymmetric.
    pi: The vector of joint rankings as computed by alpharank. Each element i
      corresponds to a unique integer ID representing a given strategy profile,
      with profile_to_id mappings provided by
      alpharank_utils.get_id_from_strat_profile().

  Returns:
    pi_marginals: List of np.arrays of player-wise marginal strategy masses,
      where the k-th player's np.array has shape [n_strategies_player_k].
  """
    num_populations = len(payoff_tables)

    if num_populations == 1:
        return pi
    else:
        num_strats_per_population = alpharank_utils.get_num_strats_per_population(
            payoff_tables, payoffs_are_hpt_format=False)
        num_profiles = alpharank_utils.get_num_profiles(
            num_strats_per_population)
        pi_marginals = [np.zeros(n) for n in num_strats_per_population]
        for i_strat in range(num_profiles):
            strat_profile = (alpharank_utils.get_strat_profile_from_id(
                num_strats_per_population, i_strat))
            for i_player in range(num_populations):
                pi_marginals[i_player][strat_profile[i_player]] += pi[i_strat]
        return pi_marginals

예제 #4

0

파일 보기

def suggest_alpha(payoff_tables, tol=.1):
    """Suggests an alpha for use in alpha-rank.

  The suggested alpha is approximately the smallest possible alpha such that
  the ranking has 'settled out'. It is calculated as
  -ln(tol)/min_gap_between_payoffs.

  The logic behind this settling out is that the fixation probabilities can be
  expanded as a series, and the relative size of each term in this series
  changes with alpha. As alpha gets larger and larger, one of the terms in
  this series comes to dominate, and this causes the ranking to settle
  down. Just how fast this domination happens is easy to calculate, and this
  function uses it to estimate the alpha by which the ranking has settled.

  You can find further discussion at the PR:

  https://github.com/deepmind/open_spiel/pull/403

  Args:
    payoff_tables: List of game payoff tables, one for each agent identity. Each
      payoff_table may be either a numpy array, or a _PayoffTableInterface
      object.
    tol: the desired gap between the first and second terms in the fixation
      probability expansion. A smaller tolerance leads to a larger alpha, and
      a 'more settled out' ranking.

  Returns:
    A suggested alpha.
  """
    payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables)

    num_strats_per_population = utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format)
    num_profiles = utils.get_num_profiles(num_strats_per_population)

    gap = np.inf
    for id_row_profile in range(num_profiles):
        row_profile = utils.get_strat_profile_from_id(
            num_strats_per_population, id_row_profile)

        next_profile_gen = utils.get_valid_next_profiles(
            num_strats_per_population, row_profile)

        for index_population_that_changed, col_profile in next_profile_gen:
            payoff_table_k = payoff_tables[index_population_that_changed]
            f_r = _get_payoff(payoff_table_k, payoffs_are_hpt_format,
                              col_profile, index_population_that_changed)
            f_s = _get_payoff(payoff_table_k, payoffs_are_hpt_format,
                              row_profile, index_population_that_changed)
            if f_r > f_s:
                gap = min(gap, f_r - f_s)

    return -np.log(tol) / gap

예제 #5

0

파일 보기

파일: heuristic_payoff_table.py 프로젝트: rahulpandeycs/openspiel-games

def from_matrix_game(matrix_game):
    """Returns a PayOffTable given a symmetric 2-player matrix game.

  Args:
    matrix_game: The payoff matrix corresponding to a 2-player symmetric game.
  """

    if not isinstance(matrix_game, np.ndarray):
        raise ValueError(
            "The matrix game should be a numpy array, not a {}".format(
                type(matrix_game)))
    num_strats_per_population =\
      utils.get_num_strats_per_population(payoff_tables=[matrix_game],
                                          payoffs_are_hpt_format=False)
    assert len(num_strats_per_population) == 2
    assert num_strats_per_population[0] == num_strats_per_population[1]
    num_strategies = num_strats_per_population[0]

    num_profiles = utils.get_num_profiles(num_strats_per_population)
    table = PayoffTable(num_players=2, num_strategies=num_strategies)

    # Construct the HPT by filling in the corresponding payoffs for each profile
    for id_profile in range(num_profiles):
        strat_profile = utils.get_strat_profile_from_id(
            num_strats_per_population, id_profile)
        distribution = table.get_distribution_from_profile(strat_profile)
        # For symmetric matrix games, multiple strategy profiles correspond to the
        # same distribution and payoffs. Thus, ensure the table entry has not
        # already been filled by a previous strategy profile.
        if table.item_is_uninitialized(tuple(distribution)):
            payoffs = np.zeros(num_strategies)
            payoffs[strat_profile[0]] = matrix_game[strat_profile[0],
                                                    strat_profile[1]]
            payoffs[strat_profile[1]] = matrix_game[strat_profile[1],
                                                    strat_profile[0]]
            table[tuple(distribution)] = payoffs

    return table

예제 #6

0

파일 보기

파일: alpharank_visualizer.py 프로젝트: BlueBerryBread/MyOpenSpiel

    def _draw_network(self):
        """Draws the NetworkX object representing the underlying graph."""
        plt.clf()

        if self.num_populations == 1:
            node_sizes = 5000
            node_border_width = 1.
        else:
            node_sizes = 15000
            node_border_width = 3.

        vmin, vmax = 0, np.max(self.pi) + 0.1

        nx.draw_networkx_nodes(self.g,
                               self.pos,
                               node_size=node_sizes,
                               node_color=self.node_colors,
                               edgecolors="k",
                               cmap=plt.cm.Blues,
                               vmin=vmin,
                               vmax=vmax,
                               linewidths=node_border_width)

        nx.draw_networkx_edges(self.g,
                               self.pos,
                               node_size=node_sizes,
                               arrowstyle="->",
                               arrowsize=10,
                               edge_color=self.edge_colors,
                               edge_cmap=plt.cm.Blues,
                               width=5)

        nx.draw_networkx_edge_labels(self.g,
                                     self.pos,
                                     edge_labels=self.edge_labels)

        if self.num_populations > 1:
            subnode_separation = 0.1
            subgraph = nx.Graph()
            for i_population in range(self.num_populations):
                subgraph.add_node(i_population)

        for i_strat_profile in self.g:
            x, y = self.pos[i_strat_profile]
            if self.num_populations == 1:
                node_text = "$\\pi_{" + self.state_labels[
                    i_strat_profile] + "}=$"
                node_text += str(np.round(self.pi[i_strat_profile],
                                          decimals=2))
            else:
                node_text = ""  # No text for multi-population case as plot gets messy
            txt = plt.text(x,
                           y,
                           node_text,
                           horizontalalignment="center",
                           verticalalignment="center",
                           fontsize=12)
            txt.set_path_effects(
                [PathEffects.withStroke(linewidth=3, foreground="w")])

            if self.num_populations > 1:
                sub_pos = nx.circular_layout(subgraph)
                subnode_labels = dict()
                strat_profile = utils.get_strat_profile_from_id(
                    self.num_strats_per_population, i_strat_profile)
                for i_population in subgraph.nodes():
                    i_strat = strat_profile[i_population]
                    subnode_labels[i_population] = "$s^{" + str(i_population +
                                                                1) + "}="
                    subnode_labels[i_population] +=\
                        self.state_labels[i_population][i_strat] + "$"
                    # Adjust the node positions generated by NetworkX's circular_layout(),
                    # such that the node for the 1st strategy starts on the left.
                    sub_pos[i_population] = (
                        -sub_pos[i_population] * subnode_separation +
                        self.pos[i_strat_profile])
                nx.draw(subgraph,
                        pos=sub_pos,
                        with_labels=True,
                        width=0.,
                        node_color="w",
                        labels=subnode_labels,
                        node_size=2500)

예제 #7

0

파일 보기

파일: alpharank_visualizer.py 프로젝트: BlueBerryBread/MyOpenSpiel

def plot_pi_vs_alpha(pi_list,
                     alpha_list,
                     num_populations,
                     num_strats_per_population,
                     strat_labels,
                     num_strats_to_label,
                     plot_semilogx=True,
                     xlabel=r"Ranking-intensity $\alpha$",
                     ylabel=r"Strategy mass in stationary distribution $\pi$"):
    """Plots stationary distributions, pi, against selection intensities, alpha.

  Args:
    pi_list: List of stationary distributions, pi.
    alpha_list: List of selection intensities, alpha.
    num_populations: The number of populations.
    num_strats_per_population: List of the number of strategies per population.
    strat_labels: Human-readable strategy labels.
    num_strats_to_label: The number of top strategies to label in the legend.
    plot_semilogx: Boolean set to enable/disable semilogx plot.
    xlabel: Plot xlabel.
    ylabel: Plot ylabel.
  """

    # Cluster strategies for which the stationary distribution has similar masses
    masses_to_strats = utils.cluster_strats(pi_list[-1, :])

    # Set colors
    num_strat_profiles = np.shape(pi_list)[1]
    num_strats_to_label = min(num_strats_to_label, num_strat_profiles)
    cmap = plt.get_cmap("Paired")
    colors = [cmap(i) for i in np.linspace(0, 1, num_strat_profiles)]

    # Plots stationary distribution vs. alpha series
    plt.figure(facecolor="w")
    axes = plt.gca()

    legend_line_objects = []
    legend_labels = []

    rank = 1
    num_strats_printed = 0
    add_legend_entries = True
    for mass, strats in sorted(masses_to_strats.items(), reverse=True):
        for profile_id in strats:
            if num_populations == 1:
                strat_profile = profile_id
            else:
                strat_profile = utils.get_strat_profile_from_id(
                    num_strats_per_population, profile_id)

            if plot_semilogx:
                series = plt.semilogx(alpha_list,
                                      pi_list[:, profile_id],
                                      color=colors[profile_id],
                                      linewidth=2)
            else:
                series = plt.plot(alpha_list,
                                  pi_list[:, profile_id],
                                  color=colors[profile_id],
                                  linewidth=2)

            if add_legend_entries:
                if num_strats_printed >= num_strats_to_label:
                    # Placeholder blank series for remaining entries
                    series = plt.semilogx(np.NaN, np.NaN, "-", color="none")
                    label = "..."
                    add_legend_entries = False
                else:
                    label = utils.get_label_from_strat_profile(
                        num_populations, strat_profile, strat_labels)
                legend_labels.append(label)
                legend_line_objects.append(series[0])
            num_strats_printed += 1
        rank += 1

    # Plots pie charts on far right of figure to indicate clusters of strategies
    # with identical rank
    for mass, strats in iter(masses_to_strats.items()):
        _draw_pie(axes,
                  ratios=[1 / len(strats)] * len(strats),
                  colors=[colors[i] for i in strats],
                  x_center=alpha_list[-1],
                  y_center=mass,
                  size=200,
                  clip_on=False,
                  zorder=10)

    # Axes ymax set slightly above highest stationary distribution mass
    max_mass = np.amax(pi_list)
    axes_y_max = np.ceil(
        10. * max_mass) / 10  # Round upward to nearest first decimal
    axes_y_max = np.clip(axes_y_max, 0., 1.)

    # Plots a rectangle highlighting the rankings on the far right of the figure
    box_x_min = alpha_list[-1] * 0.7
    box_y_min = np.min(pi_list[-1, :]) - 0.05 * axes_y_max
    width = 0.7 * alpha_list[-1]
    height = np.max(pi_list[-1, :]) - np.min(
        pi_list[-1, :]) + 0.05 * axes_y_max * 2
    axes.add_patch(
        patches.Rectangle((box_x_min, box_y_min),
                          width,
                          height,
                          edgecolor="b",
                          facecolor=(1, 0, 0, 0),
                          clip_on=False,
                          linewidth=5,
                          zorder=20))

    # Plot formatting
    axes.set_xlim(np.min(alpha_list), np.max(alpha_list))
    axes.set_ylim([0.0, axes_y_max])
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_axisbelow(
        True)  # Axes appear below data series in terms of zorder

    # Legend on the right side of the current axis
    box = axes.get_position()
    axes.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    axes.legend(legend_line_objects,
                legend_labels,
                loc="center left",
                bbox_to_anchor=(1.05, 0.5))
    plt.grid()
    plt.show()

예제 #8

0

파일 보기

def _get_multipop_transition_matrix(payoff_tables,
                                    payoffs_are_hpt_format,
                                    m,
                                    alpha,
                                    use_inf_alpha=False,
                                    inf_alpha_eps=0.1):
    """Gets Markov transition matrix for multipopulation games."""

    num_strats_per_population = utils.get_num_strats_per_population(
        payoff_tables, payoffs_are_hpt_format)
    num_profiles = utils.get_num_profiles(num_strats_per_population)

    eta = 1. / (np.sum(num_strats_per_population - 1))

    c = np.zeros((num_profiles, num_profiles))
    rhos = np.zeros((num_profiles, num_profiles))

    for id_row_profile in range(num_profiles):
        row_profile = utils.get_strat_profile_from_id(
            num_strats_per_population, id_row_profile)

        next_profile_gen = utils.get_valid_next_profiles(
            num_strats_per_population, row_profile)

        for index_population_that_changed, col_profile in next_profile_gen:
            id_col_profile = utils.get_id_from_strat_profile(
                num_strats_per_population, col_profile)
            if use_inf_alpha:
                payoff_col = _get_payoff(
                    payoff_tables[index_population_that_changed],
                    payoffs_are_hpt_format,
                    col_profile,
                    k=index_population_that_changed)
                payoff_row = _get_payoff(
                    payoff_tables[index_population_that_changed],
                    payoffs_are_hpt_format,
                    row_profile,
                    k=index_population_that_changed)
                if np.isclose(payoff_col, payoff_row, atol=1e-14):
                    c[id_row_profile, id_col_profile] = eta * 0.5
                elif payoff_col > payoff_row:
                    # Transition to col strategy since its payoff is higher than row
                    # strategy, but remove some small amount of mass, inf_alpha_eps, to
                    # keep the chain irreducible
                    c[id_row_profile,
                      id_col_profile] = eta * (1 - inf_alpha_eps)
                else:
                    # Transition with very small probability
                    c[id_row_profile, id_col_profile] = eta * inf_alpha_eps
            else:
                rhos[id_row_profile, id_col_profile] = _get_rho_sr_multipop(
                    payoff_table_k=payoff_tables[
                        index_population_that_changed],
                    payoffs_are_hpt_format=payoffs_are_hpt_format,
                    k=index_population_that_changed,
                    m=m,
                    r=col_profile,
                    s=row_profile,
                    alpha=alpha)
                c[id_row_profile,
                  id_col_profile] = eta * rhos[id_row_profile, id_col_profile]
        # Special case of self-transition
        c[id_row_profile, id_row_profile] = 1 - sum(c[id_row_profile, :])

    return c, rhos