def __init__(self, num_players, num_strategies, initialize_payoff_table=True): """A heuristic payoff table encodes payoffs from various strategy profiles. See `NumpyPayoffTable` for the description of the heuristic payoff table. Internally, this is represented as an OrderedDict {distribution: payoff}. Args: num_players: The number of players in the game. num_strategies: The number of strategies an individual could play. initialize_payoff_table: If `True`, nan entries will be created for all rows. If `False`, no rows are created at all. """ super(PayoffTable, self).__init__() self.is_hpt = True self._num_players = num_players self._num_strategies = num_strategies self._payoff_table = collections.OrderedDict() if initialize_payoff_table: # Populate empty (nan) payoff table. player_distributions = utils.distribute(self._num_players, self._num_strategies) for d in player_distributions: self._payoff_table[d] = np.full(self._num_strategies, np.nan)
def test_distribution_equivalent_implementation(self, num_items, num_slots): distribution = np.vstack( utils.distribute(num_items, num_slots, normalize=False)) other_implementation = _generate_prob_profiles(num_items, num_slots) np.testing.assert_array_equal( utils.sort_rows_lexicographically(distribution), utils.sort_rows_lexicographically(other_implementation))
def test_construction(self, num_players, num_strategies): logging.info("Testing payoff table construction.") table = heuristic_payoff_table.PayoffTable(num_players, num_strategies) num_rows = utils.n_choose_k(num_players + num_strategies - 1, num_players) distributions = np.array( list(utils.distribute(num_players, num_strategies))) payoffs = np.full([int(num_rows), num_strategies], np.nan) np.testing.assert_array_equal( np.concatenate([distributions, payoffs], axis=1), table())
def test_distribution(self, num_items, num_slots, normalize): distribution = list(utils.distribute(num_items, num_slots, normalize)) # Correct length. # See https://en.wikipedia.org/wiki/Stars_and_bars_%28combinatorics%29. self.assertLen(distribution, utils.n_choose_k(num_items + num_slots - 1, num_items)) # No duplicates. self.assertLen(distribution, len(set(distribution))) sum_distribution = num_items if not normalize else 1 for d in distribution: self.assertTrue(sum_distribution, sum(d)) self.assertTrue((np.asarray(d) >= 0).all())
def from_elo_scores(elo_ratings, num_agents=2): """Computes the Elo win probability payoff matrix `X` from the Elo scores. Args: elo_ratings: The elo scores vector of length [num_strategies]. num_agents: The number of agents. Only 2 agents are supported for now. Returns: The HPT associated to the Elo win probability payoff matrix `X`. The score for a given agent is given by its win probability given its Elo score. Raises: ValueError: If `num_agents != 2`. """ if num_agents != 2: raise ValueError( "Only 2 agents are supported, because we need to compute " "the win probability and that can only be computed with " "2 players.") num_strategies = len(elo_ratings) hpt_rows = [] possible_teams = utils.distribute(num_agents, num_strategies, normalize=False) for distribution_row in possible_teams: payoff_row = np.zeros([num_strategies]) non_zero_index = np.nonzero(distribution_row)[0] # Why [0]? assert len(non_zero_index.shape) == 1 if len(non_zero_index) > 1: index_first_player, index_second_player = non_zero_index prob = _compute_win_probability_from_elo( elo_ratings[index_first_player], elo_ratings[index_second_player]) payoff_row[index_first_player] = prob payoff_row[index_second_player] = 1 - prob elif len(non_zero_index) == 1: payoff_row[non_zero_index[0]] = 0.5 else: assert False, "Impossible case, we have at least one strategy used." hpt_rows.append(np.hstack([distribution_row, payoff_row])) return NumpyPayoffTable(np.vstack(hpt_rows))