Ejemplo n.º 1
0
    def test_cpp_python_best_response_oracle(self, game_name, num_players):
        # Tests that these best responses interface well with Best Response Oracle
        game = pyspiel.load_game(
            game_name, {"players": pyspiel.GameParameter(num_players)})
        all_states, _ = best_response.compute_states_and_info_states_if_none(
            game, all_states=None, state_to_information_state=None)

        current_best = [[policy.TabularPolicy(game).__copy__()]
                        for _ in range(num_players)]
        probabilities_of_playing_policies = [[1.] for _ in range(num_players)]

        # Construct the python oracle
        py_oracle = best_response_oracle.BestResponseOracle(
            best_response_backend="py")

        # Construct the cpp oracle. Note that in this regime, BestResponseOracle
        # uses base_policy to construct and cache TabularBestResponse internally.
        cpp_oracle = best_response_oracle.BestResponseOracle(
            game=game, best_response_backend="cpp")

        # Prepare the computation of the best responses with each backend
        # pylint:disable=g-complex-comprehension
        training_params = [[{
            "total_policies":
            current_best,
            "current_player":
            i,
            "probabilities_of_playing_policies":
            probabilities_of_playing_policies
        }] for i in range(num_players)]
        # pylint:enable=g-complex-comprehension

        py_best_rep = py_oracle(game, training_params)

        cpp_best_rep = cpp_oracle(game, training_params)

        # Compare the policies
        for state in all_states.values():
            i_player = state.current_player()
            py_dict = py_best_rep[i_player][0].action_probabilities(state)
            cpp_dict = cpp_best_rep[i_player][0].action_probabilities(state)

            for action in py_dict.keys():
                self.assertEqual(py_dict.get(action, 0.0),
                                 cpp_dict.get(action, 0.0))
            for action in cpp_dict.keys():
                self.assertEqual(py_dict.get(action, 0.0),
                                 cpp_dict.get(action, 0.0))
Ejemplo n.º 2
0
def init_br_responder(env):
    """Initializes the tabular best-response based responder and agents."""
    random_policy = policy.TabularPolicy(env.game)
    oracle = best_response_oracle.BestResponseOracle(game=env.game,
                                                     policy=random_policy)
    agents = [random_policy.__copy__() for _ in range(FLAGS.n_players)]
    return oracle, agents