예제 #1
0
    def test_simultaneous_game_noisy_policy(self, game_name):
        game = pyspiel.load_game(game_name)

        policy = openspiel_policy.UniformRandomPolicy(game)

        all_states = get_all_states.get_all_states(
            game,
            depth_limit=10,
            include_terminals=False,
            include_chance_states=False,
            to_string=lambda s: s.history_str())

        for current_player in range(game.num_players()):
            noise = noisy_policy.NoisyPolicy(policy,
                                             player_id=current_player,
                                             alpha=0.5,
                                             beta=10.)
            for state in all_states.values():
                if state.current_player() == pyspiel.PlayerId.SIMULTANEOUS:
                    for player_id in range(game.num_players()):
                        if player_id != current_player:
                            self.assertEqual(
                                policy.action_probabilities(state, player_id),
                                noise.action_probabilities(state, player_id))
                        else:
                            self.assertNotEqual(
                                policy.action_probabilities(state, player_id),
                                noise.action_probabilities(state, player_id))
예제 #2
0
    def test_cpp_and_python_implementations_are_identical(self, game_name):
        game = pyspiel.load_game(game_name)

        policy = openspiel_policy.UniformRandomPolicy(game)

        all_states = get_all_states.get_all_states(
            game,
            depth_limit=-1,
            include_terminals=False,
            include_chance_states=False,
            to_string=lambda s: s.information_state_string())

        for current_player in range(game.num_players()):
            noise = noisy_policy.NoisyPolicy(policy,
                                             player_id=current_player,
                                             alpha=0.5,
                                             beta=10.)
            for state in all_states.values():
                if state.current_player() < 0:
                    continue

                if state.current_player() != current_player:
                    self.assertEqual(policy.action_probabilities(state),
                                     noise.action_probabilities(state))
                else:
                    self.assertNotEqual(policy.action_probabilities(state),
                                        noise.action_probabilities(state))
예제 #3
0
def compute_regret_policy(game,
                          policy,
                          num_random_policy_tested=10,
                          num_sample=100):
    time_tick = time.time()
    expected_value_policy = get_expected_value(game, policy, num_sample)
    worse_regret = 0
    for _ in range(num_random_policy_tested):
        noisy_n_policy = noisy_policy.NoisyPolicy(policy, player_id=0, alpha=1)
        expected_value_noise = get_expected_value(game,
                                                  noisy_n_policy,
                                                  num_sample,
                                                  player=0)
        approximate_regret = expected_value_noise - expected_value_policy
        worse_regret = max(worse_regret, approximate_regret)
    return worse_regret, time.time() - time_tick
예제 #4
0
    def copy_with_noise(self, alpha=0.0, beta=0.0):
        """Copies this policy and adds noise, making it a Noisy Best Response.

    The policy's new probabilities P' on each state s become
    P'(s) = alpha * epsilon + (1-alpha) * P(s)

    With P the former policy's probabilities, and epsilon ~ Softmax(beta *
    Uniform)

    Args:
      alpha: First mixture component
      beta: Softmax 1/temperature component

    Returns:
      Noisy copy of best response.
    """
        return noisy_policy.NoisyPolicy(self, alpha, beta, self.all_states)