Beispiel #1
0
    def test_cpp_and_python_implementations_are_identical(self, game_name):
        game = pyspiel.load_game(game_name)

        python_policy = policy.UniformRandomPolicy(game)
        pyspiel_policy = pyspiel.UniformRandomPolicy(game)

        all_states = get_all_states.get_all_states(
            game,
            depth_limit=-1,
            include_terminals=False,
            include_chance_states=False,
            to_string=lambda s: s.information_state_string())

        for current_player in range(game.num_players()):
            python_br = best_response.BestResponsePolicy(
                game, current_player, python_policy)
            cpp_br = pyspiel.TabularBestResponse(
                game, current_player,
                pyspiel_policy).get_best_response_policy()

            for state in all_states.values():
                if state.current_player() != current_player:
                    continue

                # TODO(b/141737795): Decide what to do about this.
                self.assertEqual(
                    python_br.action_probabilities(state), {
                        a: prob
                        for a, prob in cpp_br.action_probabilities(
                            state).items() if prob != 0
                    })
Beispiel #2
0
 def test_exploitability_uniform_random_cc(self):
     """Checks the exploitability of the uniform random policy using C++."""
     game = pyspiel.load_game("python_kuhn_poker")
     test_policy = pyspiel.UniformRandomPolicy(game)
     expected_nash_conv = 11 / 12
     self.assertAlmostEqual(pyspiel.exploitability(game, test_policy),
                            expected_nash_conv / 2)
Beispiel #3
0
    def test_cpp_to_python_policy(self):
        game = pyspiel.load_game("kuhn_poker")
        pyspiel_policy = pyspiel.UniformRandomPolicy(game)
        python_policy = policy.policy_from_pyspiel_policy(pyspiel_policy)

        for info_state_str in policy.TabularPolicy(game).state_lookup.keys():
            self.assertEqual({
                0: 0.5,
                1: 0.5
            }, python_policy.action_probabilities(info_state_str))
Beispiel #4
0
class CommonTest(parameterized.TestCase):
    @parameterized.parameters([
        policy.TabularPolicy(_LEDUC_POKER),
        policy.UniformRandomPolicy(_LEDUC_POKER),
        policy.FirstActionPolicy(_LEDUC_POKER),
    ])
    def test_policy_on_leduc(self, policy_object):
        test_policy_on_game(self, _LEDUC_POKER, policy_object)

    @parameterized.named_parameters([
        ("pyspiel.UniformRandom", pyspiel.UniformRandomPolicy(_LEDUC_POKER)),
    ])
    def test_cpp_policies_on_leduc(self, policy_object):
        test_policy_on_game(self, _LEDUC_POKER, policy_object)