Esempio n. 1
0
def run(start_player=0, is_shown=1):
    # run a gomoku game with AI
    # you can set
    # human vs AI or AI vs AI
    n = 5
    width, height = 15, 15
    # model_file = 'model_15_15_5/best_policy.model'
    # width, height = 6, 6
    # model_file = 'model/best_policy.model'
    # width, height = 11, 11
    # model_file = 'model/best_policy.model'
    model_file = 'training/model_best/policy.model'
    # model_file = 'training/best_policy.model'
    p = os.getcwd()
    model_file = path.join(p, model_file)

    board = Board(width=width, height=height, n_in_row=n)
    game = Game(board)

    mcts_player = MCTS_pure(5, 8000)

    best_policy = PolicyValueNet(board_width=width,
                                 board_height=height,
                                 block=19,
                                 init_model=model_file,
                                 cuda=True)

    # alpha_zero vs alpha_zero

    # best_policy.save_numpy(best_policy.network_all_params)
    # best_policy.load_numpy(best_policy.network_oppo_all_params)
    alpha_zero_player = MCTSPlayer(
        best_policy,
        model_file,
        policy_value_function=best_policy.policy_value_fn_random,
        action_fc=best_policy.action_fc_test,
        evaluation_fc=best_policy.evaluation_fc2_test,
        c_puct=5,
        n_playout=400,
        is_selfplay=False)

    # alpha_zero_player_oppo = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random,
    #                                     action_fc=best_policy.action_fc_test_oppo,
    #                                     evaluation_fc=best_policy.evaluation_fc2_test_oppo,
    #                                     c_puct=5,
    #                                     n_playout=400,
    #                                     is_selfplay=False)

    # human player, input your move in the format: 2,3
    # set start_player=0 for human first
    # play in termianl without GUI

    # human = Human()
    # win = game.start_play(human, alpha_zero_player, start_player=start_player, is_shown=is_shown,print_prob=True)
    # return win

    # play in GUI
    game.start_play_with_UI(alpha_zero_player)  # Play with alpha zero
Esempio n. 2
0
    def __init__(self):
        # def run(start_player=0,is_shown=1):
        self.start_player = 0
        self.is_shown = 1
        menu = Gui_menu()

        # run a gomoku game with AI
        # you can set
        # human vs AI or AI vs AI
        self.n = 5  # Rule 5 목

        if menu.rule == 11:
            width, height = 11, 11  # 바둑판의 폭과 높이
            model_file = 'model_11_11_5/best_policy.model'
        elif menu.rule == 15:
            width, height = 15, 15  # 바둑판의 폭과 높이
            model_file = 'model_15_15_5/best_policy.model'

        p = os.getcwd()  # 현재 작업 경로를 얻음
        model_file = path.join(p, model_file)  # 파일 경로 + model file name

        board = Board(width=width, height=height, n_in_row=self.n)  # 게임 판
        game = Game(board)

        mcts_player = MCTS_pure(5, 400)

        best_policy = PolicyValueNet(board_width=width,
                                     board_height=height,
                                     block=19,
                                     init_model=model_file,
                                     cuda=True)

        # alpha_zero vs alpha_zero

        # best_policy.save_numpy(best_policy.network_all_params)
        # best_policy.load_numpy(best_policy.network_oppo_all_params)
        alpha_zero_player = MCTSPlayer(
            policy_value_function=best_policy.policy_value_fn_random,
            action_fc=best_policy.action_fc_test,
            evaluation_fc=best_policy.evaluation_fc2_test,
            c_puct=5,
            n_playout=400,
            is_selfplay=False)

        # alpha_zero_player_oppo = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random,
        #                                     action_fc=best_policy.action_fc_test_oppo,
        #                                     evaluation_fc=best_policy.evaluation_fc2_test_oppo,
        #                                     c_puct=5,
        #                                     n_playout=400,
        #                                     is_selfplay=False)

        # human player, input your move in the format: 2,3
        # set start_player=0 for human first
        # play in termianl without GUI

        # human = Human()
        # win = game.start_play(human, alpha_zero_player, start_player=start_player, is_shown=is_shown,print_prob=True)
        # return win

        # play in GUI
        game.start_play_with_UI(alpha_zero_player)