Python AlphaZeroPlayer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: AlphaZeroPlayer

클래스/타입: AlphaZeroPlayer

hotexamples.com에서의 예제들: 6

Python AlphaZeroPlayer - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 AlphaZeroPlayer.AlphaZeroPlayer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

AlphaZeroPlayer(6)

자주 사용되는 메소드들

AlphaZeroPlayer (6)

예제 #1

파일 보기

파일: Run.py 프로젝트: zeal4u/AlphaZero_Gobang

def run(config=None):
    if config == None:
        config = load_config(file_name=root_data_file + 'resnet_6_6_4.model',
                             only_load_param=True)
    try:
        board = Board(width=config.board_width,
                      height=config.board_height,
                      n_in_row=config.n_in_row)
        game = Game(board)

        # --------------- human VS AI ----------------
        best_policy = PolicyValueNet(
            config.board_width,
            config.board_height,
            Network=config.network,
            net_params=config.policy_param
        )  # setup which Network to use based on the net_params

        mcts_player = AlphaZeroPlayer(
            best_policy.predict,
            c_puct=config.c_puct,
            nplays=100,
            add_noise=True)  # set larger nplays for better performance

        # uncomment the following line to play with pure MCTS
        # mcts_player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct)

        # human player, input your move in the format: 2,3
        human = HumanPlayer()

        # set who_first=0 for human first
        game.start_game(human, mcts_player, who_first=1, is_shown=1)

    except KeyboardInterrupt:
        print('\n\rquit')

예제 #2

파일 보기

파일: Train.py 프로젝트: wsszh/AlphaZero_Gobang

    def evaluate(self, n_games=10):
        """
        Evaluate the trained policy by playing games against the pure MCTS player
        Note: this is only for monitoring the progress of training
        """
        current_mcts_player = AlphaZeroPlayer(self.policy_value_net.predict, c_puct=self.config.c_puct,
                                              nplays=self.config.n_playout)

        if self.config.evaluate_opponent == 'Pure':
            # opponent is rolloutplayer
            print("Begin evaluation, Opponent is RolloutMCTSPlayer")
            opponent_mcts_player = RolloutPlayer(c_puct=5, nplays=self.config.pure_mcts_playout_num)
        else:
            # oppenent is AlphaZeroPlayer
            print("Begin evaluation, Opponent is AlphaZeroMCTSPlayer")
            opponent_mcts_player = load_current_best_player(self.config.cur_best_alphazero_store_filename)

        win_cnt = defaultdict(int)
        for i in range(n_games):
            print ("evaluate game %d" %i)
            winner = self.config.game.start_game(current_mcts_player, opponent_mcts_player, who_first=i % 2, is_shown=0)
            win_cnt[winner] += 1
        win_ratio = 1.0 * (win_cnt[1] + 0.5 * win_cnt[-1]) / n_games
        print("num_playouts:{}, win: {}, lose: {}, tie:{}".format(self.config.pure_mcts_playout_num, win_cnt[1], win_cnt[2],
                                                                  win_cnt[-1]))
        return win_ratio

예제 #3

파일 보기

파일: Train.py 프로젝트: wsszh/AlphaZero_Gobang

    def __init__(self, config=None):
        # params of the board and the game
        self.config = config if config else Config()

        # Network wrapper
        self.policy_value_net = PolicyValueNet(self.config.board_width, self.config.board_height,
                                               net_params=self.config.policy_param,
                                               Network=self.config.network)

        # 传入policy_value_net的predict方法，神经网络辅助MCTS搜索过程
        self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict, c_puct=self.config.c_puct,
                                           nplays=self.config.n_playout, is_selfplay=True)

예제 #4

파일 보기

파일: Util.py 프로젝트: yangzl2014/AlphaZero_Gobang

def load_player_from_file(file_name, add_noise=True, nplays=1200):
    config = load_config(file_name, only_load_param=False)
    best_policy = PolicyValueNet(
        config.board_width,
        config.board_height,
        Network=config.network,
        net_params=config.policy_param
    )  # setup which Network to use based on the net_params

    best_player = AlphaZeroPlayer(
        best_policy.predict,
        c_puct=config.c_puct,
        nplays=nplays,
        add_noise=add_noise
    )  #increase nplays=1200, add_noise=True, add_noise_to_best_player, avoid the same play every game
    return best_player

예제 #5

파일 보기

    def __init__(self, config=None):
        # params of the board and the game
        self.config = config if config else Config()
        if not hasattr(self.config, "use_gpu"):
            setattr(config, "use_gpu",
                    False)  # compatible with old version config
        # Network wrapper
        self.policy_value_net = PolicyValueNet(
            self.config.board_width,
            self.config.board_height,
            net_params=self.config.policy_param,
            Network=self.config.network,
            use_gpu=self.config.use_gpu)

        # forward the reference of policy_value_net'predict function，for MCTS simulation
        self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict,
                                           c_puct=self.config.c_puct,
                                           nplays=self.config.n_playout,
                                           is_selfplay=True)

예제 #6

파일 보기

def run(config=None):
    if config == None:
        config = load_config(file_name=root_data_file + 'resnet_6_6_4.model',
                             only_load_param=True)
    try:
        board = Board(width=config.board_width,
                      height=config.board_height,
                      n_in_row=config.n_in_row)

        #--------------------1.set player:alphazero VS human---------------------#
        best_policy = PolicyValueNet(
            config.board_width,
            config.board_height,
            Network=config.network,
            net_params=config.policy_param
        )  # setup which Network to use based on the net_params

        player1 = AlphaZeroPlayer(
            best_policy.predict, c_puct=config.c_puct,
            nplays=1000)  #set larger nplays for better performance

        # uncomment the following line to play with pure MCTS
        #player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct)
        player2 = HumanPlayer()
        # --------------------2.set order---------------------#
        who_first = 0  # 0 means player1 first, otherwise player2 first

        # --------------------3.start game--------------------#
        game = Game(board, is_visualize=True)
        t = threading.Thread(target=game.start_game,
                             args=(player1, player2, who_first))
        t.start()
        game.show()

    except:
        print('\n\rquit')