Example #1
0
    def __init__(self, init_model=None, transfer_model=None):
        self.resnet_block = 19  # num of block structures in resnet
        # params of the board and the game
        self.board_width = 11
        self.board_height = 11
        self.n_in_row = 5
        self.board = Board(width=self.board_width,
                           height=self.board_height,
                           n_in_row=self.n_in_row)
        self.game = Game(self.board)
        # training params
        self.learn_rate = 1e-3
        self.n_playout = 400  # num of simulations for each move
        self.c_puct = 5
        self.buffer_size = 500000  # memory size
        self.batch_size = 512  # mini-batch size for training
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1  # play n games for each network training
        self.check_freq = 50
        self.game_batch_num = 50000000  # total game to train
        self.best_win_ratio = 0.0
        # num of simulations used for the pure mcts, which is used as
        # the opponent to evaluate the trained policy
        self.pure_mcts_playout_num = 200
        if (init_model is not None) and os.path.exists(init_model + ".index"):
            # start training from an initial policy-value net
            self.policy_value_net = PolicyValueNet(
                self.board_width,
                self.board_height,
                block=self.resnet_block,
                init_model=init_model,
                cuda=True,
            )
        elif (transfer_model
              is not None) and os.path.exists(transfer_model + ".index"):
            # start training from a pre-trained policy-value net
            self.policy_value_net = PolicyValueNet(
                self.board_width,
                self.board_height,
                block=self.resnet_block,
                transfer_model=transfer_model,
                cuda=True,
            )
        else:
            # start training from a new policy-value net
            self.policy_value_net = PolicyValueNet(self.board_width,
                                                   self.board_height,
                                                   block=self.resnet_block,
                                                   cuda=True)

        self.mcts_player = MCTSPlayer(
            policy_value_function=self.policy_value_net.policy_value_fn_random,
            action_fc=self.policy_value_net.action_fc_test,
            evaluation_fc=self.policy_value_net.evaluation_fc2_test,
            c_puct=self.c_puct,
            n_playout=self.n_playout,
            is_selfplay=True,
        )
Example #2
0
def run(start_player=0, is_shown=1):
    # run a gomoku game with AI
    # you can set
    # human vs AI or AI vs AI
    n = 5
    width, height = 15, 15
    # model_file = 'model_15_15_5/best_policy.model'
    # width, height = 6, 6
    # model_file = 'model/best_policy.model'
    # width, height = 11, 11
    # model_file = 'model/best_policy.model'
    model_file = 'training/model_best/policy.model'
    # model_file = 'training/best_policy.model'
    p = os.getcwd()
    model_file = path.join(p, model_file)

    board = Board(width=width, height=height, n_in_row=n)
    game = Game(board)

    mcts_player = MCTS_pure(5, 8000)

    best_policy = PolicyValueNet(board_width=width,
                                 board_height=height,
                                 block=19,
                                 init_model=model_file,
                                 cuda=True)

    # alpha_zero vs alpha_zero

    # best_policy.save_numpy(best_policy.network_all_params)
    # best_policy.load_numpy(best_policy.network_oppo_all_params)
    alpha_zero_player = MCTSPlayer(
        best_policy,
        model_file,
        policy_value_function=best_policy.policy_value_fn_random,
        action_fc=best_policy.action_fc_test,
        evaluation_fc=best_policy.evaluation_fc2_test,
        c_puct=5,
        n_playout=400,
        is_selfplay=False)

    # alpha_zero_player_oppo = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random,
    #                                     action_fc=best_policy.action_fc_test_oppo,
    #                                     evaluation_fc=best_policy.evaluation_fc2_test_oppo,
    #                                     c_puct=5,
    #                                     n_playout=400,
    #                                     is_selfplay=False)

    # human player, input your move in the format: 2,3
    # set start_player=0 for human first
    # play in termianl without GUI

    # human = Human()
    # win = game.start_play(human, alpha_zero_player, start_player=start_player, is_shown=is_shown,print_prob=True)
    # return win

    # play in GUI
    game.start_play_with_UI(alpha_zero_player)  # Play with alpha zero
    def __init__(self):
        # def run(start_player=0,is_shown=1):
        self.start_player = 0
        self.is_shown = 1
        menu = Gui_menu()

        # run a gomoku game with AI
        # you can set
        # human vs AI or AI vs AI
        self.n = 5  # Rule 5 목

        if menu.rule == 11:
            width, height = 11, 11  # 바둑판의 폭과 높이
            model_file = 'model_11_11_5/best_policy.model'
        elif menu.rule == 15:
            width, height = 15, 15  # 바둑판의 폭과 높이
            model_file = 'model_15_15_5/best_policy.model'

        p = os.getcwd()  # 현재 작업 경로를 얻음
        model_file = path.join(p, model_file)  # 파일 경로 + model file name

        board = Board(width=width, height=height, n_in_row=self.n)  # 게임 판
        game = Game(board)

        mcts_player = MCTS_pure(5, 400)

        best_policy = PolicyValueNet(board_width=width,
                                     board_height=height,
                                     block=19,
                                     init_model=model_file,
                                     cuda=True)

        # alpha_zero vs alpha_zero

        # best_policy.save_numpy(best_policy.network_all_params)
        # best_policy.load_numpy(best_policy.network_oppo_all_params)
        alpha_zero_player = MCTSPlayer(
            policy_value_function=best_policy.policy_value_fn_random,
            action_fc=best_policy.action_fc_test,
            evaluation_fc=best_policy.evaluation_fc2_test,
            c_puct=5,
            n_playout=400,
            is_selfplay=False)

        # alpha_zero_player_oppo = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random,
        #                                     action_fc=best_policy.action_fc_test_oppo,
        #                                     evaluation_fc=best_policy.evaluation_fc2_test_oppo,
        #                                     c_puct=5,
        #                                     n_playout=400,
        #                                     is_selfplay=False)

        # human player, input your move in the format: 2,3
        # set start_player=0 for human first
        # play in termianl without GUI

        # human = Human()
        # win = game.start_play(human, alpha_zero_player, start_player=start_player, is_shown=is_shown,print_prob=True)
        # return win

        # play in GUI
        game.start_play_with_UI(alpha_zero_player)
    def __init__(self, init_model=None, transfer_model=None):
        self.game_count = 0  # count total game have played
        self.resnet_block = 19  # num of block structures in resnet
        # params of the board and the game
        self.board_width = 11
        self.board_height = 11
        self.n_in_row = 5
        self.board = Board(
            width=self.board_width, height=self.board_height, n_in_row=self.n_in_row
        )
        self.game = Game(self.board)
        # training params
        self.learn_rate = 1e-3
        self.n_playout = 400  # num of simulations for each move
        self.c_puct = 5
        self.buffer_size = 500000
        # memory size, should be larger with bigger board
        # in paper it can stores 500,000 games, here with 11x11 board can store only around 2000 games
        self.batch_size = 512  # mini-batch size for training
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.game_batch_num = 10000000  # total game to train

        # num of simulations used for the pure mcts, which is used as
        # the opponent to evaluate the trained policy
        # only for monitoring the progress of training
        self.pure_mcts_playout_num = 200
        # record the win rate against pure mcts
        # once the win ratio risen to 1,
        # pure mcts playout num will plus 100 and win ratio reset to 0
        self.best_win_ratio = 0.0

        # GPU setting
        # be careful to set your GPU using depends on GPUs' and CPUs' memory
        if rank in {0, 1, 2}:
            cuda = True
        elif rank in range(10, 30):
            cuda = True
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
            os.environ["CUDA_VISIBLE_DEVICES"] = "1"
        else:
            cuda = False

        # cuda = True
        if (init_model is not None) and os.path.exists(init_model + ".index"):
            # start training from an initial policy-value net
            self.policy_value_net = PolicyValueNet(
                self.board_width,
                self.board_height,
                block=self.resnet_block,
                init_model=init_model,
                cuda=cuda,
            )
        elif (transfer_model is not None) and os.path.exists(transfer_model + ".index"):
            # start training from a pre-trained policy-value net
            self.policy_value_net = PolicyValueNet(
                self.board_width,
                self.board_height,
                block=self.resnet_block,
                transfer_model=transfer_model,
                cuda=cuda,
            )
        else:
            # start training from a new policy-value net
            self.policy_value_net = PolicyValueNet(
                self.board_width, self.board_height, block=self.resnet_block, cuda=cuda
            )

        self.mcts_player = MCTSPlayer(
            policy_value_function=self.policy_value_net.policy_value_fn_random,
            action_fc=self.policy_value_net.action_fc_test,
            evaluation_fc=self.policy_value_net.evaluation_fc2_test,
            c_puct=self.c_puct,
            n_playout=self.n_playout,
            is_selfplay=True,
        )