def test_kuhn_poker_always_pass_p0(self):
     game = pyspiel.load_game("kuhn_poker")
     calc = action_value_vs_best_response.Calculator(game)
     (expl, avvbr, cfrp,
      player_reach_probs) = calc(0, policy.FirstActionPolicy(game),
                                 ["0", "1", "2", "0pb", "1pb", "2pb"])
     self.assertAlmostEqual(expl, 1.)
     np.testing.assert_allclose(
         avvbr,
         [
             # Opening bet. If we pass, we always lose (pass-pass with op's K,
             # otherwise pass-bet-pass).
             # If we bet, we always win (because op's best response is to pass,
             # because this is an unreachable state and we break ties in favour
             # of the lowest action).
             [-1, 1],
             [-1, 1],
             [-1, 1],
             # We pass, opp bets into us. This can be either J or Q (K will pass
             # because of the tie-break rules).
             # So we are guaranteed to be winning with Q or K.
             [-1, -2],  # 0pb
             [-1, 2],  # 1pb
             [-1, 2],  # 2pb
         ])
     np.testing.assert_allclose(cfrp,
                                [1 / 3, 1 / 3, 1 / 3, 1 / 6, 1 / 6, 1 / 3])
     np.testing.assert_allclose([1., 1., 1., 1., 1., 1.],
                                player_reach_probs)
 def test_2p_nash_conv(self):
   # Note: The first action test_policy is "AlwaysFold".
   kuhn_poker = pyspiel.load_game("kuhn_poker")
   leduc_poker = pyspiel.load_game("leduc_poker")
   test_parameters = [
       (kuhn_poker, policy.UniformRandomPolicy(kuhn_poker),
        0.9166666666666666),
       (kuhn_poker, policy.FirstActionPolicy(kuhn_poker), 2.),
       (kuhn_poker, data.kuhn_nash_equilibrium(alpha=0.2), 0.),
       (leduc_poker, policy.FirstActionPolicy(leduc_poker), 2.),
       (leduc_poker, policy.UniformRandomPolicy(leduc_poker),
        4.747222222222222),
   ]
   for game, test_test_policy, expected_value in test_parameters:
     self.assertAlmostEqual(
         exploitability.nash_conv(game, test_test_policy), expected_value)
예제 #3
0
class CommonTest(parameterized.TestCase):
    @parameterized.parameters([
        policy.TabularPolicy(_LEDUC_POKER),
        policy.UniformRandomPolicy(_LEDUC_POKER),
        policy.FirstActionPolicy(_LEDUC_POKER),
    ])
    def test_policy_on_leduc(self, policy_object):
        test_policy_on_game(self, _LEDUC_POKER, policy_object)

    @parameterized.named_parameters([
        ("pyspiel.UniformRandom", pyspiel.UniformRandomPolicy(_LEDUC_POKER)),
    ])
    def test_cpp_policies_on_leduc(self, policy_object):
        test_policy_on_game(self, _LEDUC_POKER, policy_object)
예제 #4
0
 def test_kuhn_poker_always_fold(self):
     game = pyspiel.load_game("kuhn_poker")
     test_policy = policy.FirstActionPolicy(game)
     self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2)
예제 #5
0
  def test_kuhn_poker_always_pass_p0(self):
    game = pyspiel.load_game("kuhn_poker")
    calc = action_value.TreeWalkCalculator(game)
    uniform_policy = policy.TabularPolicy(game)
    always_pass_policy = policy.FirstActionPolicy(game).to_tabular()
    returned_values = calc([always_pass_policy, uniform_policy],
                           always_pass_policy)

    # Action 0 == Pass. Action 1 == Bet
    # Some values are 0 because the states are not reached, thus the expected
    # value of that node is undefined.
    np.testing.assert_array_almost_equal(
        np.asarray([
            # Player 0 states
            [-1.0, -0.5],    # '0'
            [-1.0, -2.0],    # '0pb'
            [-0.5, 0.5],     # '1'
            [-1.0, 0.0],     # '1pb'
            [0.0, 1.5],      # '2'
            [-1.0, 2.0],     # '2pb'
            # Player 1 states
            [0.0, 1.0],      # '1p'
            [0, 0],          # Unreachable
            [1.0, 1.0],      # '2p'
            [0, 0],          # Unreachable
            [-1.0, 1.0],     # '0p'
            [0, 0],          # Unreachable
        ]), returned_values.action_values)

    np.testing.assert_array_almost_equal(
        np.asarray([
            # Player 0 states
            1 / 3,  # '0'
            1 / 6,  # '0pb'
            1 / 3,  # '1'
            1 / 6,  # '1pb'
            1 / 3,  # '2'
            1 / 6,  # '2pb'
            # Player 1 states
            1 / 3,  # '1p'
            0.0,  # '1b': zero because player 0 always play pass
            1 / 3,  # 2p'
            0.0,  # '2b': zero because player 0 always play pass
            1 / 3,  # '0p'
            0.0,  # '0b':  zero because player 0 always play pass
        ]),
        returned_values.counterfactual_reach_probs)

    # The reach probabilities are always one, even though we have player 0
    # who only plays pass, because the unreachable nodes for player 0 are
    # terminal nodes: e.g.  'x x b b p' has a player 0 reach of 0, but it is
    # a terminal node, thus it does not appear in the tabular policy
    # states.
    np.testing.assert_array_equal(
        [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        returned_values.player_reach_probs)

    np.testing.assert_array_almost_equal(
        np.asarray([
            np.array([-1/3, -1/6]),
            np.array([-1/6, -1/3]),
            np.array([-1/6, 1/6]),
            np.array([-1/6, 0.]),
            np.array([0., 0.5]),
            np.array([-1/6, 1/3]),
            np.array([0., 1/3]),
            np.array([0., 0.]),
            np.array([1/3, 1/3]),
            np.array([0., 0.]),
            np.array([-1/3, 1/3]),
            np.array([0., 0.])
        ]), returned_values.sum_cfr_reach_by_action_value)
예제 #6
0
def first_action_policy_n_player(seq_game):
    return policy_module.FirstActionPolicy(seq_game)