def test_kuhn_poker_always_pass_p0(self): game = pyspiel.load_game("kuhn_poker") calc = action_value_vs_best_response.Calculator(game) (expl, avvbr, cfrp, player_reach_probs) = calc(0, policy.FirstActionPolicy(game), ["0", "1", "2", "0pb", "1pb", "2pb"]) self.assertAlmostEqual(expl, 1.) np.testing.assert_allclose( avvbr, [ # Opening bet. If we pass, we always lose (pass-pass with op's K, # otherwise pass-bet-pass). # If we bet, we always win (because op's best response is to pass, # because this is an unreachable state and we break ties in favour # of the lowest action). [-1, 1], [-1, 1], [-1, 1], # We pass, opp bets into us. This can be either J or Q (K will pass # because of the tie-break rules). # So we are guaranteed to be winning with Q or K. [-1, -2], # 0pb [-1, 2], # 1pb [-1, 2], # 2pb ]) np.testing.assert_allclose(cfrp, [1 / 3, 1 / 3, 1 / 3, 1 / 6, 1 / 6, 1 / 3]) np.testing.assert_allclose([1., 1., 1., 1., 1., 1.], player_reach_probs)
def test_2p_nash_conv(self): # Note: The first action test_policy is "AlwaysFold". kuhn_poker = pyspiel.load_game("kuhn_poker") leduc_poker = pyspiel.load_game("leduc_poker") test_parameters = [ (kuhn_poker, policy.UniformRandomPolicy(kuhn_poker), 0.9166666666666666), (kuhn_poker, policy.FirstActionPolicy(kuhn_poker), 2.), (kuhn_poker, data.kuhn_nash_equilibrium(alpha=0.2), 0.), (leduc_poker, policy.FirstActionPolicy(leduc_poker), 2.), (leduc_poker, policy.UniformRandomPolicy(leduc_poker), 4.747222222222222), ] for game, test_test_policy, expected_value in test_parameters: self.assertAlmostEqual( exploitability.nash_conv(game, test_test_policy), expected_value)
class CommonTest(parameterized.TestCase): @parameterized.parameters([ policy.TabularPolicy(_LEDUC_POKER), policy.UniformRandomPolicy(_LEDUC_POKER), policy.FirstActionPolicy(_LEDUC_POKER), ]) def test_policy_on_leduc(self, policy_object): test_policy_on_game(self, _LEDUC_POKER, policy_object) @parameterized.named_parameters([ ("pyspiel.UniformRandom", pyspiel.UniformRandomPolicy(_LEDUC_POKER)), ]) def test_cpp_policies_on_leduc(self, policy_object): test_policy_on_game(self, _LEDUC_POKER, policy_object)
def test_kuhn_poker_always_fold(self): game = pyspiel.load_game("kuhn_poker") test_policy = policy.FirstActionPolicy(game) self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2)
def test_kuhn_poker_always_pass_p0(self): game = pyspiel.load_game("kuhn_poker") calc = action_value.TreeWalkCalculator(game) uniform_policy = policy.TabularPolicy(game) always_pass_policy = policy.FirstActionPolicy(game).to_tabular() returned_values = calc([always_pass_policy, uniform_policy], always_pass_policy) # Action 0 == Pass. Action 1 == Bet # Some values are 0 because the states are not reached, thus the expected # value of that node is undefined. np.testing.assert_array_almost_equal( np.asarray([ # Player 0 states [-1.0, -0.5], # '0' [-1.0, -2.0], # '0pb' [-0.5, 0.5], # '1' [-1.0, 0.0], # '1pb' [0.0, 1.5], # '2' [-1.0, 2.0], # '2pb' # Player 1 states [0.0, 1.0], # '1p' [0, 0], # Unreachable [1.0, 1.0], # '2p' [0, 0], # Unreachable [-1.0, 1.0], # '0p' [0, 0], # Unreachable ]), returned_values.action_values) np.testing.assert_array_almost_equal( np.asarray([ # Player 0 states 1 / 3, # '0' 1 / 6, # '0pb' 1 / 3, # '1' 1 / 6, # '1pb' 1 / 3, # '2' 1 / 6, # '2pb' # Player 1 states 1 / 3, # '1p' 0.0, # '1b': zero because player 0 always play pass 1 / 3, # 2p' 0.0, # '2b': zero because player 0 always play pass 1 / 3, # '0p' 0.0, # '0b': zero because player 0 always play pass ]), returned_values.counterfactual_reach_probs) # The reach probabilities are always one, even though we have player 0 # who only plays pass, because the unreachable nodes for player 0 are # terminal nodes: e.g. 'x x b b p' has a player 0 reach of 0, but it is # a terminal node, thus it does not appear in the tabular policy # states. np.testing.assert_array_equal( [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], returned_values.player_reach_probs) np.testing.assert_array_almost_equal( np.asarray([ np.array([-1/3, -1/6]), np.array([-1/6, -1/3]), np.array([-1/6, 1/6]), np.array([-1/6, 0.]), np.array([0., 0.5]), np.array([-1/6, 1/3]), np.array([0., 1/3]), np.array([0., 0.]), np.array([1/3, 1/3]), np.array([0., 0.]), np.array([-1/3, 1/3]), np.array([0., 0.]) ]), returned_values.sum_cfr_reach_by_action_value)
def first_action_policy_n_player(seq_game): return policy_module.FirstActionPolicy(seq_game)