コード例 #1
0
ファイル: cfr_br.py プロジェクト: BlueBerryBread/MyOpenSpiel
  def _compute_best_responses(self):
    """Computes each player best-response against the pool of other players."""
    # pylint: disable=g-long-lambda
    current_policy = policy.PolicyFromCallable(
        self._game,
        lambda state: self._get_infostate_policy(state.information_state()))
    # pylint: disable=g-long-lambda

    for player_id in range(self._game.num_players()):
      self._best_responses[player_id] = exploitability.best_response(
          self._game, current_policy, player_id)
コード例 #2
0
ファイル: cfr_br.py プロジェクト: julianhartmann1/HCII
  def _compute_best_responses(self):
    """Computes each player best-response against the pool of other players."""

    def policy_fn(state):
      key = state.information_state_string()
      return self._get_infostate_policy(key)

    current_policy = policy.tabular_policy_from_callable(self._game, policy_fn)

    for player_id in range(self._game.num_players()):
      self._best_responses[player_id] = exploitability.best_response(
          self._game, current_policy, player_id)
コード例 #3
0
 def compute_best_reponses(self):
   """Updates self._oracles to hold best responses for each player."""
   for i in range(self._num_players):
     # Compute a best response policy to pi_{-i}.
     # First, construct pi_{-i}.
     joint_policy = _joint_policy(self._policies)
     br_info = exploitability.best_response(
         self._game, policy.PolicyFromCallable(self._game, joint_policy), i)
     full_br_policy = _full_best_response_policy(
         br_info["best_response_action"])
     self._best_responses[i] = full_br_policy
     if self._oracles is not None:
       self._oracles[i].append(full_br_policy)
コード例 #4
0
 def test_kuhn_poker_uniform_random_best_response_pid1(self):
     game = pyspiel.load_game("kuhn_poker")
     test_policy = policy.UniformRandomPolicy(game)
     results = exploitability.best_response(game, test_policy, player_id=1)
     self.assertEqual(
         results["best_response_action"],
         {
             # Bet is always best
             "0p": 1,
             "1p": 1,
             "2p": 1,
             # Call unless we know we're beaten
             "0b": 0,
             "1b": 1,
             "2b": 1,
         })
     self.assertGreater(results["nash_conv"], 0.1)
コード例 #5
0
 def test_kuhn_poker_uniform_random_best_response_pid0(self):
     game = pyspiel.load_game("kuhn_poker")
     test_policy = policy.UniformRandomPolicy(game)
     results = exploitability.best_response(game, test_policy, player_id=0)
     self.assertEqual(
         results["best_response_action"],
         {
             "0": 1,  # Bet in case opponent folds when winning
             "1": 1,  # Bet in case opponent folds when winning
             "2": 0,  # Both equally good (we return the lowest action)
             # Some of these will never happen under the best-response policy,
             # but we have computed best-response actions anyway.
             "0pb": 0,  # Fold - we're losing
             "1pb": 1,  # Call - we're 50-50
             "2pb": 1,  # Call - we've won
         })
     self.assertGreater(results["nash_conv"], 0.1)
コード例 #6
0
                    print(f"saving to: {save_prefix + '_infostates.npy'}")
                    np.save(save_prefix + '_infostates',
                            np.array(cfr_infostates))

    if algorithm == 'cfr':
        solver = cfr.CFRSolver(game)
        run(solver, iterations)
    elif algorithm == 'xfp':
        solver = fictitious_play.XFPSolver(game)
        run(solver, iterations)
    elif algorithm == 'xdo':
        brs = []
        info_test = []
        for i in range(2):
            br_info = exploitability.best_response(
                game,
                cfr.CFRSolver(game).average_policy(), i)
            full_br_policy = _full_best_response_policy(
                br_info["best_response_action"])
            info_sets = br_info['info_sets']
            info_test.append(info_sets)
            brs.append(full_br_policy)
        br_list = [brs]
        start_time = time.time()
        xdo_times = []
        xdo_exps = []
        xdo_episodes = []
        xdo_infostates = []

        br_conv_threshold = starting_br_conv_threshold