Exemple #1
0
  def __init__(self, num_players, num_strategies, initialize_payoff_table=True):
    """A heuristic payoff table encodes payoffs from various strategy profiles.

    See `NumpyPayoffTable` for the description of the heuristic payoff table.

    Internally, this is represented as an OrderedDict {distribution: payoff}.

    Args:
      num_players: The number of players in the game.
      num_strategies: The number of strategies an individual could play.
      initialize_payoff_table: If `True`, nan entries will be created for all
        rows. If `False`, no rows are created at all.
    """
    super(PayoffTable, self).__init__()
    self.is_hpt = True
    self._num_players = num_players
    self._num_strategies = num_strategies
    self._payoff_table = collections.OrderedDict()

    if initialize_payoff_table:
      # Populate empty (nan) payoff table.
      player_distributions = utils.distribute(self._num_players,
                                              self._num_strategies)
      for d in player_distributions:
        self._payoff_table[d] = np.full(self._num_strategies, np.nan)
    def test_distribution_equivalent_implementation(self, num_items,
                                                    num_slots):
        distribution = np.vstack(
            utils.distribute(num_items, num_slots, normalize=False))

        other_implementation = _generate_prob_profiles(num_items, num_slots)
        np.testing.assert_array_equal(
            utils.sort_rows_lexicographically(distribution),
            utils.sort_rows_lexicographically(other_implementation))
 def test_construction(self, num_players, num_strategies):
     logging.info("Testing payoff table construction.")
     table = heuristic_payoff_table.PayoffTable(num_players, num_strategies)
     num_rows = utils.n_choose_k(num_players + num_strategies - 1,
                                 num_players)
     distributions = np.array(
         list(utils.distribute(num_players, num_strategies)))
     payoffs = np.full([int(num_rows), num_strategies], np.nan)
     np.testing.assert_array_equal(
         np.concatenate([distributions, payoffs], axis=1), table())
 def test_distribution(self, num_items, num_slots, normalize):
     distribution = list(utils.distribute(num_items, num_slots, normalize))
     # Correct length.
     # See https://en.wikipedia.org/wiki/Stars_and_bars_%28combinatorics%29.
     self.assertLen(distribution,
                    utils.n_choose_k(num_items + num_slots - 1, num_items))
     # No duplicates.
     self.assertLen(distribution, len(set(distribution)))
     sum_distribution = num_items if not normalize else 1
     for d in distribution:
         self.assertTrue(sum_distribution, sum(d))
         self.assertTrue((np.asarray(d) >= 0).all())
def from_elo_scores(elo_ratings, num_agents=2):
    """Computes the Elo win probability payoff matrix `X` from the Elo scores.

  Args:
    elo_ratings: The elo scores vector of length [num_strategies].
    num_agents: The number of agents. Only 2 agents are supported for now.

  Returns:
    The HPT associated to the Elo win probability payoff matrix `X`. The score
    for a given agent is given by its win probability given its Elo score.

  Raises:
    ValueError: If `num_agents != 2`.
  """
    if num_agents != 2:
        raise ValueError(
            "Only 2 agents are supported, because we need to compute "
            "the win probability and that can only be computed with "
            "2 players.")
    num_strategies = len(elo_ratings)

    hpt_rows = []

    possible_teams = utils.distribute(num_agents,
                                      num_strategies,
                                      normalize=False)

    for distribution_row in possible_teams:
        payoff_row = np.zeros([num_strategies])
        non_zero_index = np.nonzero(distribution_row)[0]  # Why [0]?
        assert len(non_zero_index.shape) == 1

        if len(non_zero_index) > 1:
            index_first_player, index_second_player = non_zero_index
            prob = _compute_win_probability_from_elo(
                elo_ratings[index_first_player],
                elo_ratings[index_second_player])
            payoff_row[index_first_player] = prob
            payoff_row[index_second_player] = 1 - prob
        elif len(non_zero_index) == 1:
            payoff_row[non_zero_index[0]] = 0.5
        else:
            assert False, "Impossible case, we have at least one strategy used."

        hpt_rows.append(np.hstack([distribution_row, payoff_row]))

    return NumpyPayoffTable(np.vstack(hpt_rows))