def _run_test(self, player_class): options = MctsPlayerOptions(num_processes=1, max_permutations=10, max_iterations=10, merge_scoring_info_func=functools.partial( self._assert_num_iterations, 10 * 10), reallocate_computational_budget=False) game_view = GameState.new(random_seed=0).next_player_view() player = player_class(game_view.next_player, False, options) player.get_actions_and_scores(game_view) # There is one card left in the talon, the opponent played already a card # from their hand, so there are only 4 unknown cards in the opponent's hand. # This means there are only 4 permutations possible. game_view = get_game_view_for_duck_puzzle() # Without reallocating the computational budget, the player runs 4 # permutations of 100 iterations each. options = MctsPlayerOptions(num_processes=1, max_permutations=100, max_iterations=100, merge_scoring_info_func=functools.partial( self._assert_num_iterations, 4 * 100), reallocate_computational_budget=False) player = player_class(game_view.next_player, False, options) player.get_actions_and_scores(game_view) # When reallocating the computational budget, the player runs 4 # permutations of 2500 iterations each, but 5784 are enough to simulate the # entire game tree. options = MctsPlayerOptions(num_processes=1, max_permutations=100, max_iterations=100, merge_scoring_info_func=functools.partial( self._assert_num_iterations, 5784), reallocate_computational_budget=True) player = player_class(game_view.next_player, False, options) player.get_actions_and_scores(game_view) # If max_iterations is None, the computational budget is unlimited, so # reallocate_computational_budget has no effect. options = MctsPlayerOptions(num_processes=1, max_permutations=100, max_iterations=None, merge_scoring_info_func=functools.partial( self._assert_num_iterations, 5784), reallocate_computational_budget=False) player = player_class(game_view.next_player, False, options) player.get_actions_and_scores(game_view) options = MctsPlayerOptions(num_processes=1, max_permutations=100, max_iterations=None, merge_scoring_info_func=functools.partial( self._assert_num_iterations, 5784), reallocate_computational_budget=True) player = player_class(game_view.next_player, False, options) player.get_actions_and_scores(game_view)
def main(): cheater = False options = MctsPlayerOptions(max_iterations=2500, max_permutations=40, select_best_child=True) _generate_data(cheater, options, constant_budget=True) _plot_results(cheater)
def iterations_for_closing_the_talon(): """ This method runs the MctsPlayer on a series of game states to determine the average number of iterations required to fully simulate the CloseTheTalon action. It does this by first discarding the game states in which the algorithm decided it's not worth fully simulating the CloseTheTalon action within the given budget. For the remaining game states, it looks at the number of iterations after which the action was fully simulated. """ options = MctsPlayerOptions(num_processes=1, max_iterations=10000) num_seeds = 1000 with multiprocessing.Pool(processes=4) as pool: data = pool.map( functools.partial( _min_iteration_to_fully_simulate_closing_the_talon, options=options, after_n_tricks=5), list(range(num_seeds))) dataframe = DataFrame(data, columns=["seed", "iteration"]) filename_template = os.path.join(os.path.dirname(__file__), "data", "iterations_for_closing_the_talon") # noinspection PyTypeChecker dataframe.to_csv(f"{filename_template}.csv", index=False) dataframe.iteration.hist() print(dataframe.iteration.describe()) num_not_fully_simulated = len(dataframe[dataframe.iteration.isnull()]) not_fully_simulated_pct = 100.0 * num_not_fully_simulated / num_seeds plt.title("In %.0f%% of the cases (%s out of %s),\n" "closing the talon was not fully simulated." % (not_fully_simulated_pct, num_not_fully_simulated, num_seeds)) plt.suptitle("Iterations required to fully simulate closing the talon") plt.xlabel("Iterations") plt.tight_layout() plt.savefig(f"{filename_template}.png")
def test_max_iterations_less_than_available_actions(): # Here not all of the root node's children will be expanded. options = MctsPlayerOptions(max_iterations=1, max_permutations=10, num_processes=1) game_state = GameState.new(random_seed=0) mcts_player = CythonMctsPlayer(game_state.next_player, False, options) mcts_player.request_next_action(game_state.next_player_view())
def main(): options = MctsPlayerOptions( max_iterations=667, max_permutations=150, save_rewards=True, merge_scoring_info_func=lower_ci_bound_on_raw_rewards) _generate_data(options) _plot_results(options)
def _play_against_another_mcts_player_until_the_end( self, game_state) -> GameState: player_two: Optional[MctsPlayer] = None try: options = MctsPlayerOptions(max_iterations=None) player_class = self._mcts_player.__class__ if player_class == CythonMctsPlayer: options.num_processes = 1 player_two = player_class(PlayerId.TWO, options=options) players = PlayerPair(self._mcts_player, player_two) while not game_state.is_game_over: player = players[game_state.next_player] action = player.request_next_action( game_state.next_player_view()) print(f"{game_state.next_player}: {action}") game_state = action.execute(game_state) finally: if player_two is not None: player_two.cleanup() return game_state
def mcts_ci_widths_and_permutations_across_multiple_game_states( options: MctsPlayerOptions, num_samples: int, num_game_states: int, keep_total_budget_constant: bool): data = [] total_budget = options.max_iterations * options.max_permutations for seed in range(num_game_states): print(f"Evaluating on GameState.new(random_seed={seed})") for _ in range(num_samples): for permutations in [20, 50, 100, 200, 500, 1000]: options.max_permutations = permutations if keep_total_budget_constant: options.max_iterations = total_budget // permutations dataframe = run_mcts_player_step_by_step( GameState.new(random_seed=seed).next_player_view(), options, options.max_iterations) dataframe["ci_width"] = dataframe["score_upp"] - dataframe[ "score_low"] dataframe = dataframe[dataframe.iteration.eq( dataframe.iteration.max())].sort_values("score", ascending=False) ci_widths = [permutations] + list(dataframe["ci_width"].values) while len(ci_widths) < 8: ci_widths.append(np.nan) data.append(tuple(ci_widths)) dataframe = DataFrame(data, columns=["Permutations", "BestAction"] + [f"Action #{i}" for i in range(2, 8)]) csv_path = "mcts_ci_widths_and_perm_across_game_states.csv" # noinspection PyTypeChecker dataframe.to_csv(csv_path, index=False) # dataframe = pandas.read_csv(csv_path) dataframe[["Permutations", "BestAction"]].boxplot(by=["Permutations"]) plt.xticks(rotation=45, ha='right') plt.gcf().set_size_inches((5, 5)) if keep_total_budget_constant: plt.title(f"Total budget: {total_budget} iterations") else: plt.title(f"Constant iterations: {options.max_iterations} iterations") plt.tight_layout() plt.savefig("mcts_ci_widths_and_perm_across_game_states.png")
def _get_player_closure(game_state: GameState, iterations: int, max_permutations: int) -> Tuple[Closure, Closure]: mcts = CythonMctsPlayer(game_state.next_player, cheater=False, options=MctsPlayerOptions( max_permutations=max_permutations, max_iterations=iterations, num_processes=1)) def _run(): mcts.request_next_action(game_state.next_player_view()) return _run, mcts.cleanup
def get_overlap_for_multiple_game_states(): num_seeds = 1000 options = MctsPlayerOptions(num_processes=1, max_iterations=667, max_permutations=150, use_heuristic=True) # Process the game states and extract the data. with multiprocessing.Pool(processes=4) as pool: dataframes = pool.map( functools.partial(_get_overlap_for_seed, options=options), list(range(num_seeds))) dataframe = pd.concat(dataframes, ignore_index=True) # Save to CSV. file_template = os.path.join(os.path.dirname(__file__), "data", "overlap_between_mcts_and_heuristic") # noinspection PyTypeChecker dataframe.to_csv(f"{file_template}.csv", index=False) # Compute additional metrics. dataframe["diff_visits"] = \ dataframe["max_visits"] - dataframe["heuristic_visits"] dataframe["diff_visits_pct"] = \ dataframe["diff_visits"] / dataframe["max_visits"] # Print results. print(dataframe.describe()) # Generate plots. fig, axes = plt.subplots(nrows=2, ncols=1, squeeze=False) dataframe.plot.scatter(x="max_visits", y="heuristic_visits", alpha=0.02, ax=axes[0, 0]) fit = np.polyfit(dataframe["max_visits"], dataframe["heuristic_visits"], 1) regression = np.poly1d(fit) dataframe["reg"] = regression(dataframe["max_visits"]) axes[0, 0].plot(dataframe.max_visits, dataframe.reg, color="r") axes[1, 0].hist(dataframe.heuristic_rank, bins=list(range(8)), density=True, rwidth=0.9, align="left") axes[1, 0].set_xlabel("Rank of the action deemed best by the HeuristicPlayer") axes[1, 0].set_ylabel("Fraction of nodes") fig.set_size_inches(5, 5) plt.suptitle("Overlap between Heuristic and Mcts") plt.tight_layout() plt.savefig(f"{file_template}.png")
def _get_algorithm_closure(game_state: GameState, iterations: int) -> Tuple[Closure, Closure]: mcts = CythonMctsPlayer( game_state.next_player, cheater=True, options=MctsPlayerOptions( max_permutations=10, # Won't matter if cheater=True max_iterations=iterations, num_processes=1)) def _run(): mcts.request_next_action(game_state) return _run, mcts.cleanup
def main(): cheater = False options = MctsPlayerOptions(num_processes=1, max_iterations=1000, max_permutations=100, select_best_child=True, save_rewards=True) num_samples = 1 mcts_variance(GameState.new(random_seed=0), cheater, options, num_samples) mcts_ci_widths_and_permutations_across_multiple_game_states( options=options, num_samples=num_samples, num_game_states=30, keep_total_budget_constant=True)
def num_threads_and_time(class_under_test, options: MctsPlayerOptions): # pylint: disable=too-many-locals,cell-var-from-loop data = [] for seed in range(NUM_SEEDS): game_state = GameState.new(random_seed=seed) for num_threads in [1, 2, 4, 6, 8]: options.num_processes = num_threads mcts = class_under_test(game_state.next_player, cheater=False, options=options) timer = timeit.Timer( lambda: mcts.request_next_action(game_state.next_player_view())) number, time_taken = timer.autorange() duration_sec = time_taken / number logging.info("Mcts took %.5f seconds using %d threads (seed=%s)", duration_sec, num_threads, seed) data.append((seed, num_threads, duration_sec)) mcts.cleanup() # Save the dataframe with the timing info. dataframe = DataFrame(data, columns=["seed", "num_threads", "duration_sec"]) folder = os.path.join(os.path.dirname(__file__), "data") csv_path = os.path.join(folder, "num_threads_and_time.csv") # noinspection PyTypeChecker dataframe.to_csv(csv_path, index=False) # Plot the timing data obtained. for seed in sorted(dataframe.seed.drop_duplicates()): filtered_dataframe = dataframe[dataframe["seed"].eq(seed)] plt.plot(filtered_dataframe.num_threads, filtered_dataframe.duration_sec, label=None, alpha=0.5) plt.scatter(filtered_dataframe.num_threads, filtered_dataframe.duration_sec, s=10) mean = dataframe.groupby("num_threads").mean().sort_index() plt.plot(mean.index, mean.duration_sec, label="Average", color="r", linewidth=3) plt.grid(which="both", linestyle="--") plt.legend(loc=0) plt.xlabel("Number of threads") plt.ylabel("Duration (seconds)") plt.title(f"{class_under_test.__name__}: " + f"{options.max_permutations} permutations x " + f"{options.max_iterations} iterations on\n" + cpuinfo.get_cpu_info()["brand_raw"]) plt.savefig(os.path.join(folder, "num_threads_and_time.png"))
def evaluate_bummerl( bummerl: Bummerl, bummerl_id: str = "0", options: Optional[MctsPlayerOptions] = None) -> EvalResults: options = options or MctsPlayerOptions( num_processes=1, max_permutations=150, max_iterations=667, merge_scoring_info_func=average_score_with_tiebreakers) players = PlayerPair(CythonMctsPlayer(PlayerId.ONE, False, options), CythonMctsPlayer(PlayerId.TWO, False, options)) bummerl_score = PlayerPair(0, 0) eval_results = [] for game_id, game in enumerate(bummerl.completed_games): eval_results.extend( evaluate_game(game, players, bummerl_score, bummerl_id, game_id)) bummerl_score.one += game.game_state.game_points.one bummerl_score.two += game.game_state.game_points.two return eval_results
def _main(): # filename = "../autosave_bummerl.pickle" # with open(filename, "rb") as input_file: # bummerl = pickle.load(input_file) # results = evaluate_bummerl(bummerl) # print_eval_results(results, None) options = MctsPlayerOptions( num_processes=1, max_permutations=150, max_iterations=667, merge_scoring_info_func=average_score_with_tiebreakers) players = PlayerPair(CythonMctsPlayer(PlayerId.ONE, False, options), CythonMctsPlayer(PlayerId.TWO, False, options)) bummerl_score = PlayerPair(0, 0) with open("../autosave_game.pickle", "rb") as input_file: game = pickle.load(input_file) print_eval_results(evaluate_game(game, players, bummerl_score, "0", "0"), None)
def setUp(self) -> None: options = MctsPlayerOptions(max_iterations=None, merge_scoring_info_func=count_visits, num_processes=1) self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
def _main(): options = MctsPlayerOptions(max_iterations=4000, max_permutations=100) num_threads_and_time(CythonMctsPlayer, options)
[action for action in game.actions if action.player_id == PlayerId.ONE]) action_counter = 0 fig, ax = plt.subplots(nrows=num_actions, ncols=2, squeeze=False) for action in game.actions: if action.player_id == PlayerId.ONE: # cheater = False dataframe = run_mcts_player_step_by_step(game_state.next_player_view(), options, iterations_step=100, game_points=game_points) _plot_data(dataframe, "score", ax[action_counter, 0], _hlines_for_scores) # cheater = True dataframe = run_mcts_player_step_by_step(game_state, options, iterations_step=100, game_points=game_points) _plot_data(dataframe, "score", ax[action_counter, 1], _hlines_for_scores) action_counter += 1 game_state = action.execute(game_state) fig.set_size_inches(20, 5 * num_actions) fig.suptitle(f"Debug game: dealer={game.dealer}, seed={game.seed}") plt.tight_layout() plt.savefig("debug_game.png") if __name__ == "__main__": main_wrapper( lambda: debug_game("bummerl_4180_2_done.pickle", 5, MctsPlayerOptions(max_iterations=667 * 4, max_permutations=150, save_rewards=False)))
def setUp(self) -> None: options = MctsPlayerOptions(max_iterations=None) self._mcts_player = MctsPlayer(PlayerId.ONE, options=options)
def _min_iterations_to_find_the_best_action( num_game_states: int = 100, cheater: bool = True, num_samples_per_game_state: int = 10, options: Optional[MctsPlayerOptions] = None): """ This functions computes the number of iterations required until the best action seems to be found. It looks for the moment when an action becomes the best action and it remains the best action even if we continue to run up to max_iterations iterations. It measures this for num_game_states different game states and plots a histogram. """ options = options or MctsPlayerOptions() data = [] for seed in range(num_game_states): for sample_index in range(num_samples_per_game_state): game_state = GameState.new(dealer=PlayerId.ONE, random_seed=seed) dataframe = run_mcts_and_collect_data(game_state, options) last_iteration = dataframe.iteration.max() best_actions = dataframe[dataframe.iteration.eq(last_iteration) & dataframe["rank"].eq(1)].action best_actions_per_iteration = dataframe[dataframe["rank"].eq(1)] iterations_with_other_best_actions = [ iteration for iteration in dataframe.iteration.drop_duplicates() if iteration not in best_actions_per_iteration[ best_actions_per_iteration.action.isin( best_actions)].iteration.values ] found_at_iteration = 0 if len(iterations_with_other_best_actions) > 0: found_at_iteration = max(iterations_with_other_best_actions) best_action = min(best_actions) logging.info("Best action for seed %s: %s, found at iteration %s", seed, best_action, found_at_iteration) data.append((seed, sample_index, best_action, found_at_iteration)) dataframe = DataFrame( data, columns=["seed", "sample_index", "action", "iteration"]) suffix = "_cheater" if cheater else f"_{options.max_permutations}perm" csv_path = os.path.join( _folder, f"min_iterations_to_find_the_best_action{suffix}.csv") # noinspection PyTypeChecker dataframe.to_csv(csv_path, index=False) # dataframe = pandas.read_csv(csv_path) fig, ax = plt.subplots() ax2 = ax.twinx() dataframe.iteration.hist(color="b", linewidth=3, ax=ax) dataframe.iteration.plot(kind="kde", label="Overall", color="r", linewidth=3, ax=ax2) plt.legend(loc=0) plt.xlabel("Iterations") title_suffix = \ "" if cheater else f" ({options.max_permutations} permutations)" plt.title( f"Number of iterations until the best action is found{title_suffix}") fig.set_size_inches(10, 5) fig.savefig( os.path.join(_folder, f"min_iterations_to_find_the_best_action{suffix}.png")) logging.info("Overall results: %s", dataframe.iteration.describe()) logging.info("Value counts: %s", dataframe.iteration.value_counts())
def setUp(self) -> None: # Run in-process so that code coverage sees this code-path. options = MctsPlayerOptions(max_iterations=None, select_best_child=True, num_processes=1) self._mcts_player = MctsPlayer(PlayerId.ONE, options=options)
def setUp(self) -> None: options = MctsPlayerOptions(max_iterations=None, select_best_child=True, save_rewards=True, num_processes=1) self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
def test_cannot_instantiate_with_multi_threading(self) -> None: options = MctsPlayerOptions(max_iterations=None, num_processes=10) with self.assertRaisesRegex(ValueError, "10 threads"): CythonMctsPlayer(PlayerId.ONE, options=options)
def test_cannot_instantiate_with_save_rewards(self) -> None: options = MctsPlayerOptions(max_iterations=None, save_rewards=True) with self.assertRaisesRegex(ValueError, "save_rewards is not supported"): MctsPlayer(PlayerId.ONE, options=options)
def setUp(self) -> None: options = MctsPlayerOptions( max_iterations=None, merge_scoring_info_func=best_action_frequency, num_processes=1) self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
def setUp(self) -> None: options = MctsPlayerOptions( max_iterations=None, merge_scoring_info_func=merge_ucbs_using_weighted_average, num_processes=1) self._mcts_player = CythonMctsPlayer(PlayerId.ONE, options=options)
lambda player_id: HeuristicPlayer( player_id, HeuristicPlayerOptions(trump_for_marriage=False)), "HeuristicNoAvoidDirectLoss": lambda player_id: HeuristicPlayer( player_id, HeuristicPlayerOptions(avoid_direct_loss=False)), "HeuristicWithTrumpControl": lambda player_id: HeuristicPlayer( player_id, HeuristicPlayerOptions(trump_control=True)), # Same permutations, different iterations "MctsPlayer30perm10000iter": lambda player_id: CythonMctsPlayer( player_id, False, MctsPlayerOptions(num_processes=1, max_permutations=30, max_iterations=10000, select_best_child=True, exploration_param=math.sqrt(2))), "MctsPlayer30perm5000iter": lambda player_id: CythonMctsPlayer( player_id, False, MctsPlayerOptions(num_processes=1, max_permutations=30, max_iterations=5000, select_best_child=True, exploration_param=math.sqrt(2))), "MctsPlayer30perm2500iter": lambda player_id: CythonMctsPlayer( player_id, False, MctsPlayerOptions(num_processes=1, max_permutations=30,