def run(config=None): if config == None: config = load_config(file_name=root_data_file + 'resnet_6_6_4.model', only_load_param=True) try: board = Board(width=config.board_width, height=config.board_height, n_in_row=config.n_in_row) game = Game(board) # --------------- human VS AI ---------------- best_policy = PolicyValueNet( config.board_width, config.board_height, Network=config.network, net_params=config.policy_param ) # setup which Network to use based on the net_params mcts_player = AlphaZeroPlayer( best_policy.predict, c_puct=config.c_puct, nplays=100, add_noise=True) # set larger nplays for better performance # uncomment the following line to play with pure MCTS # mcts_player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct) # human player, input your move in the format: 2,3 human = HumanPlayer() # set who_first=0 for human first game.start_game(human, mcts_player, who_first=1, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def evaluate(self, n_games=10): """ Evaluate the trained policy by playing games against the pure MCTS player Note: this is only for monitoring the progress of training """ current_mcts_player = AlphaZeroPlayer(self.policy_value_net.predict, c_puct=self.config.c_puct, nplays=self.config.n_playout) if self.config.evaluate_opponent == 'Pure': # opponent is rolloutplayer print("Begin evaluation, Opponent is RolloutMCTSPlayer") opponent_mcts_player = RolloutPlayer(c_puct=5, nplays=self.config.pure_mcts_playout_num) else: # oppenent is AlphaZeroPlayer print("Begin evaluation, Opponent is AlphaZeroMCTSPlayer") opponent_mcts_player = load_current_best_player(self.config.cur_best_alphazero_store_filename) win_cnt = defaultdict(int) for i in range(n_games): print ("evaluate game %d" %i) winner = self.config.game.start_game(current_mcts_player, opponent_mcts_player, who_first=i % 2, is_shown=0) win_cnt[winner] += 1 win_ratio = 1.0 * (win_cnt[1] + 0.5 * win_cnt[-1]) / n_games print("num_playouts:{}, win: {}, lose: {}, tie:{}".format(self.config.pure_mcts_playout_num, win_cnt[1], win_cnt[2], win_cnt[-1])) return win_ratio
def __init__(self, config=None): # params of the board and the game self.config = config if config else Config() # Network wrapper self.policy_value_net = PolicyValueNet(self.config.board_width, self.config.board_height, net_params=self.config.policy_param, Network=self.config.network) # 传入policy_value_net的predict方法,神经网络辅助MCTS搜索过程 self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict, c_puct=self.config.c_puct, nplays=self.config.n_playout, is_selfplay=True)
def load_player_from_file(file_name, add_noise=True, nplays=1200): config = load_config(file_name, only_load_param=False) best_policy = PolicyValueNet( config.board_width, config.board_height, Network=config.network, net_params=config.policy_param ) # setup which Network to use based on the net_params best_player = AlphaZeroPlayer( best_policy.predict, c_puct=config.c_puct, nplays=nplays, add_noise=add_noise ) #increase nplays=1200, add_noise=True, add_noise_to_best_player, avoid the same play every game return best_player
def __init__(self, config=None): # params of the board and the game self.config = config if config else Config() if not hasattr(self.config, "use_gpu"): setattr(config, "use_gpu", False) # compatible with old version config # Network wrapper self.policy_value_net = PolicyValueNet( self.config.board_width, self.config.board_height, net_params=self.config.policy_param, Network=self.config.network, use_gpu=self.config.use_gpu) # forward the reference of policy_value_net'predict function,for MCTS simulation self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict, c_puct=self.config.c_puct, nplays=self.config.n_playout, is_selfplay=True)
def run(config=None): if config == None: config = load_config(file_name=root_data_file + 'resnet_6_6_4.model', only_load_param=True) try: board = Board(width=config.board_width, height=config.board_height, n_in_row=config.n_in_row) #--------------------1.set player:alphazero VS human---------------------# best_policy = PolicyValueNet( config.board_width, config.board_height, Network=config.network, net_params=config.policy_param ) # setup which Network to use based on the net_params player1 = AlphaZeroPlayer( best_policy.predict, c_puct=config.c_puct, nplays=1000) #set larger nplays for better performance # uncomment the following line to play with pure MCTS #player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct) player2 = HumanPlayer() # --------------------2.set order---------------------# who_first = 0 # 0 means player1 first, otherwise player2 first # --------------------3.start game--------------------# game = Game(board, is_visualize=True) t = threading.Thread(target=game.start_game, args=(player1, player2, who_first)) t.start() game.show() except: print('\n\rquit')