def _run_test(self, player_class):
        options = MctsPlayerOptions(num_processes=1,
                                    max_permutations=10,
                                    max_iterations=10,
                                    merge_scoring_info_func=functools.partial(
                                        self._assert_num_iterations, 10 * 10),
                                    reallocate_computational_budget=False)
        game_view = GameState.new(random_seed=0).next_player_view()
        player = player_class(game_view.next_player, False, options)
        player.get_actions_and_scores(game_view)

        # There is one card left in the talon, the opponent played already a card
        # from their hand, so there are only 4 unknown cards in the opponent's hand.
        # This means there are only 4 permutations possible.
        game_view = get_game_view_for_duck_puzzle()

        # Without reallocating the computational budget, the player runs 4
        # permutations of 100 iterations each.
        options = MctsPlayerOptions(num_processes=1,
                                    max_permutations=100,
                                    max_iterations=100,
                                    merge_scoring_info_func=functools.partial(
                                        self._assert_num_iterations, 4 * 100),
                                    reallocate_computational_budget=False)
        player = player_class(game_view.next_player, False, options)
        player.get_actions_and_scores(game_view)

        # When reallocating the computational budget, the player runs 4
        # permutations of 2500 iterations each, but 5784 are enough to simulate the
        # entire game tree.
        options = MctsPlayerOptions(num_processes=1,
                                    max_permutations=100,
                                    max_iterations=100,
                                    merge_scoring_info_func=functools.partial(
                                        self._assert_num_iterations, 5784),
                                    reallocate_computational_budget=True)
        player = player_class(game_view.next_player, False, options)
        player.get_actions_and_scores(game_view)

        # If max_iterations is None, the computational budget is unlimited, so
        # reallocate_computational_budget has no effect.
        options = MctsPlayerOptions(num_processes=1,
                                    max_permutations=100,
                                    max_iterations=None,
                                    merge_scoring_info_func=functools.partial(
                                        self._assert_num_iterations, 5784),
                                    reallocate_computational_budget=False)
        player = player_class(game_view.next_player, False, options)
        player.get_actions_and_scores(game_view)
        options = MctsPlayerOptions(num_processes=1,
                                    max_permutations=100,
                                    max_iterations=None,
                                    merge_scoring_info_func=functools.partial(
                                        self._assert_num_iterations, 5784),
                                    reallocate_computational_budget=True)
        player = player_class(game_view.next_player, False, options)
        player.get_actions_and_scores(game_view)
def main():
    cheater = False
    options = MctsPlayerOptions(max_iterations=2500,
                                max_permutations=40,
                                select_best_child=True)
    _generate_data(cheater, options, constant_budget=True)
    _plot_results(cheater)
def iterations_for_closing_the_talon():
    """
  This method runs the MctsPlayer on a series of game states to determine the
  average number of iterations required to fully simulate the CloseTheTalon
  action. It does this by first discarding the game states in which the
  algorithm decided it's not worth fully simulating the CloseTheTalon action
  within the given budget. For the remaining game states, it looks at the number
  of iterations after which the action was fully simulated.
  """
    options = MctsPlayerOptions(num_processes=1, max_iterations=10000)
    num_seeds = 1000

    with multiprocessing.Pool(processes=4) as pool:
        data = pool.map(
            functools.partial(
                _min_iteration_to_fully_simulate_closing_the_talon,
                options=options,
                after_n_tricks=5), list(range(num_seeds)))

    dataframe = DataFrame(data, columns=["seed", "iteration"])
    filename_template = os.path.join(os.path.dirname(__file__), "data",
                                     "iterations_for_closing_the_talon")
    # noinspection PyTypeChecker
    dataframe.to_csv(f"{filename_template}.csv", index=False)
    dataframe.iteration.hist()
    print(dataframe.iteration.describe())
    num_not_fully_simulated = len(dataframe[dataframe.iteration.isnull()])
    not_fully_simulated_pct = 100.0 * num_not_fully_simulated / num_seeds
    plt.title("In %.0f%% of the cases (%s out of %s),\n"
              "closing the talon was not fully simulated." %
              (not_fully_simulated_pct, num_not_fully_simulated, num_seeds))
    plt.suptitle("Iterations required to fully simulate closing the talon")
    plt.xlabel("Iterations")
    plt.tight_layout()
    plt.savefig(f"{filename_template}.png")
 def test_max_iterations_less_than_available_actions():
     # Here not all of the root node's children will be expanded.
     options = MctsPlayerOptions(max_iterations=1,
                                 max_permutations=10,
                                 num_processes=1)
     game_state = GameState.new(random_seed=0)
     mcts_player = CythonMctsPlayer(game_state.next_player, False, options)
     mcts_player.request_next_action(game_state.next_player_view())
Ejemplo n.º 5
0
def main():
    options = MctsPlayerOptions(
        max_iterations=667,
        max_permutations=150,
        save_rewards=True,
        merge_scoring_info_func=lower_ci_bound_on_raw_rewards)
    _generate_data(options)
    _plot_results(options)
 def _play_against_another_mcts_player_until_the_end(
         self, game_state) -> GameState:
     player_two: Optional[MctsPlayer] = None
     try:
         options = MctsPlayerOptions(max_iterations=None)
         player_class = self._mcts_player.__class__
         if player_class == CythonMctsPlayer:
             options.num_processes = 1
         player_two = player_class(PlayerId.TWO, options=options)
         players = PlayerPair(self._mcts_player, player_two)
         while not game_state.is_game_over:
             player = players[game_state.next_player]
             action = player.request_next_action(
                 game_state.next_player_view())
             print(f"{game_state.next_player}: {action}")
             game_state = action.execute(game_state)
     finally:
         if player_two is not None:
             player_two.cleanup()
     return game_state
def mcts_ci_widths_and_permutations_across_multiple_game_states(
        options: MctsPlayerOptions, num_samples: int, num_game_states: int,
        keep_total_budget_constant: bool):
    data = []
    total_budget = options.max_iterations * options.max_permutations
    for seed in range(num_game_states):
        print(f"Evaluating on GameState.new(random_seed={seed})")
        for _ in range(num_samples):
            for permutations in [20, 50, 100, 200, 500, 1000]:
                options.max_permutations = permutations
                if keep_total_budget_constant:
                    options.max_iterations = total_budget // permutations
                dataframe = run_mcts_player_step_by_step(
                    GameState.new(random_seed=seed).next_player_view(),
                    options, options.max_iterations)
                dataframe["ci_width"] = dataframe["score_upp"] - dataframe[
                    "score_low"]
                dataframe = dataframe[dataframe.iteration.eq(
                    dataframe.iteration.max())].sort_values("score",
                                                            ascending=False)
                ci_widths = [permutations] + list(dataframe["ci_width"].values)
                while len(ci_widths) < 8:
                    ci_widths.append(np.nan)
                data.append(tuple(ci_widths))
    dataframe = DataFrame(data,
                          columns=["Permutations", "BestAction"] +
                          [f"Action #{i}" for i in range(2, 8)])
    csv_path = "mcts_ci_widths_and_perm_across_game_states.csv"
    # noinspection PyTypeChecker
    dataframe.to_csv(csv_path, index=False)
    # dataframe = pandas.read_csv(csv_path)
    dataframe[["Permutations", "BestAction"]].boxplot(by=["Permutations"])
    plt.xticks(rotation=45, ha='right')
    plt.gcf().set_size_inches((5, 5))
    if keep_total_budget_constant:
        plt.title(f"Total budget: {total_budget} iterations")
    else:
        plt.title(f"Constant iterations: {options.max_iterations} iterations")
    plt.tight_layout()
    plt.savefig("mcts_ci_widths_and_perm_across_game_states.png")
Ejemplo n.º 8
0
def _get_player_closure(game_state: GameState, iterations: int,
                        max_permutations: int) -> Tuple[Closure, Closure]:
    mcts = CythonMctsPlayer(game_state.next_player,
                            cheater=False,
                            options=MctsPlayerOptions(
                                max_permutations=max_permutations,
                                max_iterations=iterations,
                                num_processes=1))

    def _run():
        mcts.request_next_action(game_state.next_player_view())

    return _run, mcts.cleanup
def get_overlap_for_multiple_game_states():
    num_seeds = 1000
    options = MctsPlayerOptions(num_processes=1,
                                max_iterations=667,
                                max_permutations=150,
                                use_heuristic=True)

    # Process the game states and extract the data.
    with multiprocessing.Pool(processes=4) as pool:
        dataframes = pool.map(
            functools.partial(_get_overlap_for_seed, options=options),
            list(range(num_seeds)))
    dataframe = pd.concat(dataframes, ignore_index=True)

    # Save to CSV.
    file_template = os.path.join(os.path.dirname(__file__), "data",
                                 "overlap_between_mcts_and_heuristic")
    # noinspection PyTypeChecker
    dataframe.to_csv(f"{file_template}.csv", index=False)

    # Compute additional metrics.
    dataframe["diff_visits"] = \
      dataframe["max_visits"] - dataframe["heuristic_visits"]
    dataframe["diff_visits_pct"] = \
      dataframe["diff_visits"] / dataframe["max_visits"]

    # Print results.
    print(dataframe.describe())

    # Generate plots.
    fig, axes = plt.subplots(nrows=2, ncols=1, squeeze=False)
    dataframe.plot.scatter(x="max_visits",
                           y="heuristic_visits",
                           alpha=0.02,
                           ax=axes[0, 0])
    fit = np.polyfit(dataframe["max_visits"], dataframe["heuristic_visits"], 1)
    regression = np.poly1d(fit)
    dataframe["reg"] = regression(dataframe["max_visits"])
    axes[0, 0].plot(dataframe.max_visits, dataframe.reg, color="r")
    axes[1, 0].hist(dataframe.heuristic_rank,
                    bins=list(range(8)),
                    density=True,
                    rwidth=0.9,
                    align="left")
    axes[1,
         0].set_xlabel("Rank of the action deemed best by the HeuristicPlayer")
    axes[1, 0].set_ylabel("Fraction of nodes")
    fig.set_size_inches(5, 5)
    plt.suptitle("Overlap between Heuristic and Mcts")
    plt.tight_layout()
    plt.savefig(f"{file_template}.png")
Ejemplo n.º 10
0
def _get_algorithm_closure(game_state: GameState,
                           iterations: int) -> Tuple[Closure, Closure]:
    mcts = CythonMctsPlayer(
        game_state.next_player,
        cheater=True,
        options=MctsPlayerOptions(
            max_permutations=10,  # Won't matter if cheater=True
            max_iterations=iterations,
            num_processes=1))

    def _run():
        mcts.request_next_action(game_state)

    return _run, mcts.cleanup
def main():
    cheater = False
    options = MctsPlayerOptions(num_processes=1,
                                max_iterations=1000,
                                max_permutations=100,
                                select_best_child=True,
                                save_rewards=True)
    num_samples = 1
    mcts_variance(GameState.new(random_seed=0), cheater, options, num_samples)
    mcts_ci_widths_and_permutations_across_multiple_game_states(
        options=options,
        num_samples=num_samples,
        num_game_states=30,
        keep_total_budget_constant=True)
Ejemplo n.º 12
0
def num_threads_and_time(class_under_test, options: MctsPlayerOptions):
  # pylint: disable=too-many-locals,cell-var-from-loop
  data = []
  for seed in range(NUM_SEEDS):
    game_state = GameState.new(random_seed=seed)
    for num_threads in [1, 2, 4, 6, 8]:
      options.num_processes = num_threads
      mcts = class_under_test(game_state.next_player, cheater=False,
                              options=options)
      timer = timeit.Timer(
        lambda: mcts.request_next_action(game_state.next_player_view()))
      number, time_taken = timer.autorange()
      duration_sec = time_taken / number
      logging.info("Mcts took %.5f seconds using %d threads (seed=%s)",
                   duration_sec, num_threads, seed)
      data.append((seed, num_threads, duration_sec))
      mcts.cleanup()

  # Save the dataframe with the timing info.
  dataframe = DataFrame(data, columns=["seed", "num_threads", "duration_sec"])
  folder = os.path.join(os.path.dirname(__file__), "data")
  csv_path = os.path.join(folder, "num_threads_and_time.csv")
  # noinspection PyTypeChecker
  dataframe.to_csv(csv_path, index=False)

  # Plot the timing data obtained.
  for seed in sorted(dataframe.seed.drop_duplicates()):
    filtered_dataframe = dataframe[dataframe["seed"].eq(seed)]
    plt.plot(filtered_dataframe.num_threads, filtered_dataframe.duration_sec,
             label=None, alpha=0.5)
    plt.scatter(filtered_dataframe.num_threads, filtered_dataframe.duration_sec,
                s=10)
  mean = dataframe.groupby("num_threads").mean().sort_index()
  plt.plot(mean.index, mean.duration_sec, label="Average", color="r",
           linewidth=3)
  plt.grid(which="both", linestyle="--")
  plt.legend(loc=0)
  plt.xlabel("Number of threads")
  plt.ylabel("Duration (seconds)")
  plt.title(f"{class_under_test.__name__}: " +
            f"{options.max_permutations} permutations x " +
            f"{options.max_iterations} iterations on\n" +
            cpuinfo.get_cpu_info()["brand_raw"])
  plt.savefig(os.path.join(folder, "num_threads_and_time.png"))
Ejemplo n.º 13
0
def evaluate_bummerl(
        bummerl: Bummerl,
        bummerl_id: str = "0",
        options: Optional[MctsPlayerOptions] = None) -> EvalResults:
    options = options or MctsPlayerOptions(
        num_processes=1,
        max_permutations=150,
        max_iterations=667,
        merge_scoring_info_func=average_score_with_tiebreakers)
    players = PlayerPair(CythonMctsPlayer(PlayerId.ONE, False, options),
                         CythonMctsPlayer(PlayerId.TWO, False, options))
    bummerl_score = PlayerPair(0, 0)
    eval_results = []
    for game_id, game in enumerate(bummerl.completed_games):
        eval_results.extend(
            evaluate_game(game, players, bummerl_score, bummerl_id, game_id))
        bummerl_score.one += game.game_state.game_points.one
        bummerl_score.two += game.game_state.game_points.two
    return eval_results
Ejemplo n.º 14
0
def _main():
    # filename = "../autosave_bummerl.pickle"
    # with open(filename, "rb") as input_file:
    #   bummerl = pickle.load(input_file)
    # results = evaluate_bummerl(bummerl)
    # print_eval_results(results, None)

    options = MctsPlayerOptions(
        num_processes=1,
        max_permutations=150,
        max_iterations=667,
        merge_scoring_info_func=average_score_with_tiebreakers)
    players = PlayerPair(CythonMctsPlayer(PlayerId.ONE, False, options),
                         CythonMctsPlayer(PlayerId.TWO, False, options))
    bummerl_score = PlayerPair(0, 0)
    with open("../autosave_game.pickle", "rb") as input_file:
        game = pickle.load(input_file)
    print_eval_results(evaluate_game(game, players, bummerl_score, "0", "0"),
                       None)
 def setUp(self) -> None:
     options = MctsPlayerOptions(max_iterations=None,
                                 merge_scoring_info_func=count_visits,
                                 num_processes=1)
     self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
Ejemplo n.º 16
0
def _main():
  options = MctsPlayerOptions(max_iterations=4000, max_permutations=100)
  num_threads_and_time(CythonMctsPlayer, options)
Ejemplo n.º 17
0
    [action for action in game.actions if action.player_id == PlayerId.ONE])
  action_counter = 0
  fig, ax = plt.subplots(nrows=num_actions, ncols=2, squeeze=False)
  for action in game.actions:
    if action.player_id == PlayerId.ONE:
      # cheater = False
      dataframe = run_mcts_player_step_by_step(game_state.next_player_view(),
                                               options,
                                               iterations_step=100,
                                               game_points=game_points)
      _plot_data(dataframe, "score", ax[action_counter, 0], _hlines_for_scores)
      # cheater = True
      dataframe = run_mcts_player_step_by_step(game_state, options,
                                               iterations_step=100,
                                               game_points=game_points)
      _plot_data(dataframe, "score", ax[action_counter, 1], _hlines_for_scores)
      action_counter += 1
    game_state = action.execute(game_state)
  fig.set_size_inches(20, 5 * num_actions)
  fig.suptitle(f"Debug game: dealer={game.dealer}, seed={game.seed}")
  plt.tight_layout()
  plt.savefig("debug_game.png")


if __name__ == "__main__":
  main_wrapper(
    lambda: debug_game("bummerl_4180_2_done.pickle", 5,
                       MctsPlayerOptions(max_iterations=667 * 4,
                                         max_permutations=150,
                                         save_rewards=False)))
 def setUp(self) -> None:
     options = MctsPlayerOptions(max_iterations=None)
     self._mcts_player = MctsPlayer(PlayerId.ONE, options=options)
def _min_iterations_to_find_the_best_action(
        num_game_states: int = 100,
        cheater: bool = True,
        num_samples_per_game_state: int = 10,
        options: Optional[MctsPlayerOptions] = None):
    """
  This functions computes the number of iterations required until the best
  action seems to be found. It looks for the moment when an action becomes the
  best action and it remains the best action even if we continue to run up to
  max_iterations iterations. It measures this for num_game_states different game
  states and plots a histogram.
  """
    options = options or MctsPlayerOptions()
    data = []
    for seed in range(num_game_states):
        for sample_index in range(num_samples_per_game_state):
            game_state = GameState.new(dealer=PlayerId.ONE, random_seed=seed)
            dataframe = run_mcts_and_collect_data(game_state, options)
            last_iteration = dataframe.iteration.max()
            best_actions = dataframe[dataframe.iteration.eq(last_iteration)
                                     & dataframe["rank"].eq(1)].action
            best_actions_per_iteration = dataframe[dataframe["rank"].eq(1)]
            iterations_with_other_best_actions = [
                iteration
                for iteration in dataframe.iteration.drop_duplicates()
                if iteration not in best_actions_per_iteration[
                    best_actions_per_iteration.action.isin(
                        best_actions)].iteration.values
            ]
            found_at_iteration = 0
            if len(iterations_with_other_best_actions) > 0:
                found_at_iteration = max(iterations_with_other_best_actions)
            best_action = min(best_actions)
            logging.info("Best action for seed %s: %s, found at iteration %s",
                         seed, best_action, found_at_iteration)
            data.append((seed, sample_index, best_action, found_at_iteration))
    dataframe = DataFrame(
        data, columns=["seed", "sample_index", "action", "iteration"])
    suffix = "_cheater" if cheater else f"_{options.max_permutations}perm"
    csv_path = os.path.join(
        _folder, f"min_iterations_to_find_the_best_action{suffix}.csv")
    # noinspection PyTypeChecker
    dataframe.to_csv(csv_path, index=False)
    # dataframe = pandas.read_csv(csv_path)
    fig, ax = plt.subplots()
    ax2 = ax.twinx()
    dataframe.iteration.hist(color="b", linewidth=3, ax=ax)
    dataframe.iteration.plot(kind="kde",
                             label="Overall",
                             color="r",
                             linewidth=3,
                             ax=ax2)
    plt.legend(loc=0)
    plt.xlabel("Iterations")
    title_suffix = \
      "" if cheater else f" ({options.max_permutations} permutations)"
    plt.title(
        f"Number of iterations until the best action is found{title_suffix}")
    fig.set_size_inches(10, 5)
    fig.savefig(
        os.path.join(_folder,
                     f"min_iterations_to_find_the_best_action{suffix}.png"))
    logging.info("Overall results: %s", dataframe.iteration.describe())
    logging.info("Value counts: %s", dataframe.iteration.value_counts())
 def setUp(self) -> None:
     # Run in-process so that code coverage sees this code-path.
     options = MctsPlayerOptions(max_iterations=None,
                                 select_best_child=True,
                                 num_processes=1)
     self._mcts_player = MctsPlayer(PlayerId.ONE, options=options)
 def setUp(self) -> None:
     options = MctsPlayerOptions(max_iterations=None,
                                 select_best_child=True,
                                 save_rewards=True,
                                 num_processes=1)
     self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
 def test_cannot_instantiate_with_multi_threading(self) -> None:
     options = MctsPlayerOptions(max_iterations=None, num_processes=10)
     with self.assertRaisesRegex(ValueError, "10 threads"):
         CythonMctsPlayer(PlayerId.ONE, options=options)
 def test_cannot_instantiate_with_save_rewards(self) -> None:
     options = MctsPlayerOptions(max_iterations=None, save_rewards=True)
     with self.assertRaisesRegex(ValueError,
                                 "save_rewards is not supported"):
         MctsPlayer(PlayerId.ONE, options=options)
 def setUp(self) -> None:
     options = MctsPlayerOptions(
         max_iterations=None,
         merge_scoring_info_func=best_action_frequency,
         num_processes=1)
     self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
 def setUp(self) -> None:
     options = MctsPlayerOptions(
         max_iterations=None,
         merge_scoring_info_func=merge_ucbs_using_weighted_average,
         num_processes=1)
     self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
Ejemplo n.º 26
0
    lambda player_id: HeuristicPlayer(
        player_id, HeuristicPlayerOptions(trump_for_marriage=False)),
    "HeuristicNoAvoidDirectLoss":
    lambda player_id: HeuristicPlayer(
        player_id, HeuristicPlayerOptions(avoid_direct_loss=False)),
    "HeuristicWithTrumpControl":
    lambda player_id: HeuristicPlayer(
        player_id, HeuristicPlayerOptions(trump_control=True)),

    # Same permutations, different iterations
    "MctsPlayer30perm10000iter":
    lambda player_id: CythonMctsPlayer(
        player_id, False,
        MctsPlayerOptions(num_processes=1,
                          max_permutations=30,
                          max_iterations=10000,
                          select_best_child=True,
                          exploration_param=math.sqrt(2))),
    "MctsPlayer30perm5000iter":
    lambda player_id: CythonMctsPlayer(
        player_id, False,
        MctsPlayerOptions(num_processes=1,
                          max_permutations=30,
                          max_iterations=5000,
                          select_best_child=True,
                          exploration_param=math.sqrt(2))),
    "MctsPlayer30perm2500iter":
    lambda player_id: CythonMctsPlayer(
        player_id, False,
        MctsPlayerOptions(num_processes=1,
                          max_permutations=30,