Esempio n. 1
0
def MCTS_Search(board, player, num_reads, n_net):
    root = Node(board,
                checkers.get_all_moves(board, player),
                player,
                move=None,
                parent=ParentRootNode())
    for i in range(num_reads):
        leaf = root.select_leaf()
        player = checkers.switch_player(player)
        child_prior_prob, value = n_net(
            checkers.get_state2(leaf.board, leaf.player))
        # print(child_prior_prob)
        # print("The number of reads", i)
        if checkers.isTerminal(board) or checkers.get_all_moves(
                leaf.board, leaf.player) == []:
            print("Finished Game")
            leaf.backpropagate(value)
            leaf.print_tree()
        else:
            child_prior_prob = child_prior_prob.cpu().detach().numpy().reshape(
                -1)
            leaf.expand_and_evaluate(child_prior_prob)
            leaf.backpropagate(value)

    root.print_tree()
    return root
Esempio n. 2
0
def MCTS_self_play(nnet, num_games, s_index, iteration):
    data_x = []
    for itt in tqdm(range(s_index, num_games + s_index)):
        board = checkers.initial_board(board_size, board_size)
        #board = checkers.initial_b6()
        player = 1
        data = []
        value = 0
        num_moves = 0
        t = 1
        while checkers.isTerminal(board, player) is not True:
            # if num_moves > 15:
            #     t = 0.1
            root = MCTS_Search(board, player, roll_out, nnet)
            # print("The turn of player {:d} and Moves {:d}".format(player, num_moves))
            # checkers.print_board(root.board)
            policy = get_policy(root, t)
            data.append([board, player, policy])
            move = np.argmax(policy)
            board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1],
                                        root.player)
            player = checkers.switch_player(player)
            if len(checkers.get_all_moves(board, player)) == 0:
                # Player == 1 means White pieces
                # print("Game Finished")
                if player == 1:
                    value = -1
                elif player == 2:
                    value = 1
                else:
                    value = 0
                break
            if num_moves == 150:
                value = 0
                break
            num_moves += 1

        for ind, dx in enumerate(data):
            s, pl, po = dx
            if ind == 0:
                data_x.append([checkers.get_state2(s, pl), po, 0])
            else:
                data_x.append([checkers.get_state2(s, pl), po, value])
        del data
        # filename = "MCTS_iteration-{:d}_game-{:d}.p".format(iteration, itt)
        # save_data(filename, data_x)
    return data_x
Esempio n. 3
0
def MCTS_self_play(nnet, num_games, s_index, iteration):
    for itt in tqdm(range(s_index, num_games + s_index)):
        board = checkers.initial_board(board_size, board_size)
        player = 1
        data = []
        value = 0
        num_moves = 0
        t = 1
        while checkers.isTerminal(board):
            if num_moves > 15:
                t = 0.1
            root = MCTS_Search(board, player, 500, nnet)
            policy = get_policy(root, t)
            data.append([board, player, policy])
            move = np.argmax(policy)
            board = checkers.apply_move(root.board,
                                        root.possible_moves[move][0],
                                        root.possible_moves[move][1],
                                        root.player)
            player = checkers.switch_player(player)
            if len(checkers.get_all_moves(board, player)) == 0:
                # Player == 1 means White pieces
                if player == 1:
                    value = 1
                elif player == 2:
                    value = -1
                else:
                    value = 0
            if num_moves == 150:
                value = 0
                break
            num_moves += 1
        data_x = []
        for ind, d in enumerate(data):
            s, pl, po = d
            if ind == 0:
                data_x.append([s, pl, po, 0])
            else:
                data_x.append([s, pl, po, value])
        del data
        filename = "MCTS_iteration-{:d}_game-{:d}.p".format(iteration, itt)
        save_data(filename, data_x)
    return
Esempio n. 4
0
def MCTS_Play_WithRandom(nnet, num_games):
    number_of_wins = 0
    number_of_draws = 0
    for itt in tqdm(range(num_games)):
        board = checkers.initial_board(board_size, board_size)
        # board = checkers.initial_b6()
        player = 1
        num_moves = 0
        t = 1
        while checkers.isTerminal(board, player) is not True:
            # if num_moves > 15:
            #     t = 0.1
            if player == 1:
                root = MCTS_Search(board, player, roll_out, nnet)
                policy = get_policy(root, t)
                move = np.argmax(policy)
                board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1],
                                            root.player)
            else:
                move = checkers.get_random_move(board, player)
                board = checkers.apply_move(board, move[0], move[1], player)
            # print("The turn of player {:d} and Moves {:d}".format(player, num_moves))
            # checkers.print_board(board)

            player = checkers.switch_player(player)
            if len(checkers.get_all_moves(board, player)) == 0:
                # Player == 1 means White pieces
                # print("Game Finished")
                if player == 2:
                    number_of_wins += 1
                break
            if num_moves == 200:
                number_of_draws += 1
                break
            num_moves += 1
    return number_of_wins, number_of_draws