Ejemplo n.º 1
0
 def test_exploitability_is_zero_on_nash(self, alpha):
     # A similar test exists in:
     # open_spiel/python/algorithms/exploitability_test.py
     game = pyspiel.load_game("kuhn_poker")
     policy = data.kuhn_nash_equilibrium(alpha=alpha)
     expl = exploitability.exploitability(game, policy)
     self.assertAlmostEqual(0, expl)
 def test_2p_nash_conv(self):
   # Note: The first action test_policy is "AlwaysFold".
   kuhn_poker = pyspiel.load_game("kuhn_poker")
   leduc_poker = pyspiel.load_game("leduc_poker")
   test_parameters = [
       (kuhn_poker, policy.UniformRandomPolicy(kuhn_poker),
        0.9166666666666666),
       (kuhn_poker, policy.FirstActionPolicy(kuhn_poker), 2.),
       (kuhn_poker, data.kuhn_nash_equilibrium(alpha=0.2), 0.),
       (leduc_poker, policy.FirstActionPolicy(leduc_poker), 2.),
       (leduc_poker, policy.UniformRandomPolicy(leduc_poker),
        4.747222222222222),
   ]
   for game, test_test_policy, expected_value in test_parameters:
     self.assertAlmostEqual(
         exploitability.nash_conv(game, test_test_policy), expected_value)
Ejemplo n.º 3
0
 def test_kuhn_poker_optimal(self):
     game = pyspiel.load_game("kuhn_poker")
     test_policy = data.kuhn_nash_equilibrium(alpha=0.2)
     self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 0)
Ejemplo n.º 4
0
class ExploitabilityTest(parameterized.TestCase):
    def test_exploitability_on_kuhn_poker_uniform_random(self):
        # NashConv of uniform random test_policy from (found on Google books):
        # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5
        game = pyspiel.load_game("kuhn_poker")
        test_policy = policy.UniformRandomPolicy(game)
        expected_nash_conv = 11 / 12
        self.assertAlmostEqual(
            exploitability.exploitability(game, test_policy),
            expected_nash_conv / 2)

    def test_kuhn_poker_uniform_random_best_response_pid0(self):
        game = pyspiel.load_game("kuhn_poker")
        test_policy = policy.UniformRandomPolicy(game)
        results = exploitability.best_response(game, test_policy, player_id=0)
        self.assertEqual(
            results["best_response_action"],
            {
                "0": 1,  # Bet in case opponent folds when winning
                "1": 1,  # Bet in case opponent folds when winning
                "2": 0,  # Both equally good (we return the lowest action)
                # Some of these will never happen under the best-response policy,
                # but we have computed best-response actions anyway.
                "0pb": 0,  # Fold - we're losing
                "1pb": 1,  # Call - we're 50-50
                "2pb": 1,  # Call - we've won
            })
        self.assertGreater(results["nash_conv"], 0.1)

    def test_kuhn_poker_uniform_random_best_response_pid1(self):
        game = pyspiel.load_game("kuhn_poker")
        test_policy = policy.UniformRandomPolicy(game)
        results = exploitability.best_response(game, test_policy, player_id=1)
        self.assertEqual(
            results["best_response_action"],
            {
                # Bet is always best
                "0p": 1,
                "1p": 1,
                "2p": 1,
                # Call unless we know we're beaten
                "0b": 0,
                "1b": 1,
                "2b": 1,
            })
        self.assertGreater(results["nash_conv"], 0.1)

    def test_kuhn_poker_uniform_random(self):
        # NashConv of uniform random test_policy from (found on Google books):
        # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5
        game = pyspiel.load_game("kuhn_poker")
        test_policy = policy.UniformRandomPolicy(game)
        self.assertAlmostEqual(exploitability.nash_conv(game, test_policy),
                               11 / 12)

    def test_kuhn_poker_always_fold(self):
        game = pyspiel.load_game("kuhn_poker")
        test_policy = policy.FirstActionPolicy(game)
        self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2)

    def test_kuhn_poker_optimal(self):
        game = pyspiel.load_game("kuhn_poker")
        test_policy = data.kuhn_nash_equilibrium(alpha=0.2)
        self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 0)

    def test_leduc_poker_uniform_random(self):
        # NashConv taken from independent implementations
        game = pyspiel.load_game("leduc_poker")
        test_policy = policy.UniformRandomPolicy(game)
        self.assertAlmostEqual(exploitability.nash_conv(game, test_policy),
                               4.747222222222222)

    def test_leduc_poker_always_fold(self):
        game = pyspiel.load_game("leduc_poker")
        test_policy = policy.FirstActionPolicy(game)
        self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2)

    # Values for uniform policies taken from
    # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5
    # (including multiplayer games below). However, the value for Leduc against
    # the uniform test_policy is wrong in the paper. This has been independently
    # verified in a number of independent code bases. The 4.7472 value is correct.
    # Value for AlwaysFold is trivial: if you
    # always fold, you win 0 chips, but if you switch to AlwaysBet, you win 1
    # chip everytime if playing against a player who always folds.
    @parameterized.parameters(
        ("kuhn_poker", policy.UniformRandomPolicy, 0.9166666666666666),
        ("kuhn_poker", policy.FirstActionPolicy, 2.),
        ("kuhn_poker", lambda _: data.kuhn_nash_equilibrium(alpha=0.2), 0.),
        ("leduc_poker", policy.FirstActionPolicy, 2.),
        ("leduc_poker", policy.UniformRandomPolicy, 4.7472222222222),
    )
    def test_2p_nash_conv(self, game_name, policy_func, expected):
        game = pyspiel.load_game(game_name)
        self.assertAlmostEqual(
            exploitability.nash_conv(game, policy_func(game)), expected)

    @parameterized.parameters(3, 4)
    def test_kuhn_poker_uniform_random_nash_conv(self, num_players):
        game = pyspiel.load_game("kuhn_poker", {"players": num_players})
        test_policy = policy.UniformRandomPolicy(game)
        self.assertGreater(exploitability.nash_conv(game, test_policy), 2)

    @parameterized.parameters(("kuhn_poker", 2), ("kuhn_poker", 3),
                              ("kuhn_poker", 4))
    def test_python_same_as_cpp_for_multiplayer_uniform_random_nash_conv(
            self, game_name, num_players):
        game = pyspiel.load_game(game_name, {"players": num_players})

        # TabularPolicy defaults to being a uniform random policy.
        test_policy = policy.TabularPolicy(game)
        python_nash_conv = exploitability.nash_conv(game, test_policy)
        cpp_nash_conv = pyspiel.nash_conv(
            game, policy_utils.policy_to_dict(test_policy, game))
        self.assertAlmostEqual(python_nash_conv, cpp_nash_conv)

    def test_cpp_python_cfr_kuhn(self):
        game = pyspiel.load_game("kuhn_poker")
        solver = pyspiel.CFRSolver(game)
        for _ in range(100):
            solver.evaluate_and_update_policy()
        pyspiel_average_policy = solver.tabular_average_policy()
        cpp_nash_conv = pyspiel.nash_conv(game, pyspiel_average_policy)
        python_policy = policy.pyspiel_policy_to_python_policy(
            game, pyspiel_average_policy)
        python_nash_conv = exploitability.nash_conv(game, python_policy)
        self.assertAlmostEqual(python_nash_conv, cpp_nash_conv)