def play_against_minimax(self, color, game_count): """ lets the agent play against an optimal minimax player. since the minimax player has an optimal strategy the best score the agent can get is 0.5 :param color: the color of the agent :param game_count: the number of games that are played :return: the mean score against the minimax player 0: lose, 0.5 draw, 1: win """ q_player = tournament.QNetPlayer(self.network) minimax_player = tournament.MinimaxPlayer() score = tournament.play_one_color(game_count, q_player, color, minimax_player) return score
def net_vs_minimax(net, game_count, mcts_sim_count, c_puct, temp, color=None): """ lets the alpha zero network play against a minimax player :param net: alpha zero network :param game_count: total games to play :param mcts_sim_count number of monte carlo simulations :param c_puct constant that controls the exploration :param temp the temperature :param color the color of the network :return: score of network """ az_player = tournament.AlphaZeroPlayer(net, c_puct, mcts_sim_count, temp) minimax_player = tournament.MinimaxPlayer() if color is None: az_score = tournament.play_match(game_count, az_player, minimax_player) else: az_score = tournament.play_one_color(game_count, az_player, color, minimax_player) return az_score
# play random vs random game_count = 1000 player1 = tournament.RandomPlayer() player2 = tournament.RandomPlayer() white_score = tournament.play_one_color(game_count, player1, CONST.WHITE, player2) logger.info("white score for random vs random: {}".format(white_score)) black_score = tournament.play_one_color(game_count, player1, CONST.BLACK, player2) logger.info("black score for random vs random: {}".format(black_score)) # play minimax vs minimax to check if the score is 0.5 game_count = 100 player1 = tournament.MinimaxPlayer() player2 = tournament.MinimaxPlayer() player1_score = tournament.play_match(game_count, player1, player2) logger.info("minimax vs minimax score: {}".format(player1_score)) # play random vs minimax game_count = 1000 player1 = tournament.MinimaxPlayer() player2 = tournament.RandomPlayer() white_score = tournament.play_one_color(game_count, player1, CONST.WHITE, player2) logger.info("white score for minimax vs random: {}".format(white_score)) black_score = tournament.play_one_color(game_count, player1, CONST.BLACK, player2) logger.info("black score for minimax vs random: {}".format(black_score))