예제 #1
0
def PlayGame(opponent):
    global board
    board = Functions.BoardInit()
    player = 1
    UpdateBoard(player)
    if (opponent == "Human"):

        player = randint(1, 2)
        movesAvailable = 1
        gamePlaying = 1
        while gamePlaying != 0:

            if (Functions.GameOver(board) == 1):
                gamePlaying = 0
                score = Functions.BlackWhiteCount(board)
                if (score > 0):
                    winner = 1
                elif (score < 0):
                    winner = 2
                else:
                    winner = 0
                if (winner != 0):
                    print("Game Over! The winner is player ", winner)
                else:
                    print("It's a tie!")

                board = Functions.BoardInit()

            cont.set(0)
            window.wait_variable(cont)
            player = Functions.nextPlayer(board, player)

            time.sleep(0.25)
    if (opponent == "Rand"):
        while Functions.GameOver(board) == 0:

            player = randint(1, 2)
            movesAvailable = 1
            gamePlaying = 1
            while gamePlaying != 0:
                UpdateBoard(player)

                if (Functions.GameOver(board) == 1):
                    gamePlaying = 0
                    score = Functions.BlackWhiteCount(board)
                    if (score > 0):
                        winner = 1
                    elif (score < 0):
                        winner = 2
                    else:
                        winner = 0
                    if (winner != 0):
                        print("Game Over! The winner is player ", winner)
                    else:
                        print("It's a tie!")

                    board = Functions.BoardInit()
                movesAvailable = Functions.MovesAvailable(board, player)

                cont.set(0)
                if (player == 1):
                    window.wait_variable(cont)
                player = Functions.nextPlayer(board, player)

                time.sleep(0.25)
                while (player == 2):
                    movesAvailable = Functions.MovesAvailable(board, player)
                    print("movesAvailable", movesAvailable)
                    if (movesAvailable):
                        numPicked = randint(0, len(movesAvailable) - 1)
                        print("numPicked", numPicked)
                        movePicked = movesAvailable[numPicked]
                        print("movePicked from movesAvailable", movePicked[0],
                              movePicked[1])
                        x = int(movePicked[1])
                        y = int(movePicked[0])
                        board = Functions.MakeMove(board, y, x, player)
                    player = Functions.nextPlayer(board, player)

    elif (opponent == "MinMax"):
        while Functions.GameOver(board) == 0:

            player = randint(1, 2)
            UpdateBoard(player)
            movesAvailable = 1
            gamePlaying = 1
            while gamePlaying != 0:
                UpdateBoard(player)

                if (Functions.GameOver(board) == 1):
                    gamePlaying = 0
                    score = Functions.BlackWhiteCount(board)
                    if (score > 0):
                        winner = 1
                    elif (score < 0):
                        winner = 2
                    else:
                        winner = 0
                    if (winner != 0):
                        print("Game Over! The winner is player ", winner)
                    else:
                        print("It's a tie!")

                    board = Functions.BoardInit()
                movesAvailable = Functions.MovesAvailable(board, player)

                cont.set(0)
                if (player == 1):
                    window.wait_variable(cont)
                player = Functions.nextPlayer(board, player)

                time.sleep(0.25)
                while (player == 2):
                    nextX = 0
                    nextY = 0
                    movesAvailable = Functions.MovesAvailable(
                        board, player)  # all available moves
                    bestPred = 0
                    j = 0
                    while (j <= len(movesAvailable) - 1 and
                           movesAvailable):  # for each ove in movesAvailable
                        moveTest = movesAvailable[j]
                        x = int(moveTest[1])
                        y = int(moveTest[0])
                        testBoard = Functions.BoardCopy(board)
                        testBoard = Functions.MakeMove(testBoard, y, x, player)

                        pred = np.sum(Functions.boardToNN(
                            testBoard,
                            player))  # get the output of an available move

                        #finds the move with the highest output, output of 1 means player 1 is guaranteed to win
                        if (j == 0):
                            bestPred = pred
                            nextX = x
                            nextY = y
                        else:
                            if (pred < bestPred):
                                bestPred = pred
                                nextX = x
                                nextY = y
                        j += 1
                    board = Functions.MakeMove(board, nextY, nextX, player)
                    player = Functions.nextPlayer(board, player)
    elif (opponent == "Network"):
        ops.reset_default_graph()
        numInp = 65  # the number of inputs for the neural network
        numLabel = 1  # the number of inputs for the labels

        # Create Placeholders for input and label
        inp, label = ml.placeholders(numInp, numLabel)

        # Initialise parameters
        parameters = ml.initialiseParameters()

        #make the nerual network
        out = ml.network(inp, parameters)

        modelPath = "./save/NetworkPlayer"
        # Initialise all the variables
        init = tf.global_variables_initializer()

        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(init)
            #loads the network in or initialises a new network
            try:
                saver.restore(sess, modelPath)
                #print("Model restored from file: %s" % modelPath)
            except:
                #sessctd = False
                print("Initialiing")
            while Functions.GameOver(board) == 0:

                board = Functions.BoardInit()
                player = randint(1, 2)
                UpdateBoard(player)
                movesAvailable = 1
                gamePlaying = 1
                while gamePlaying != 0:
                    UpdateBoard(player)

                    if (Functions.GameOver(board) == 1):
                        gamePlaying = 0
                        score = Functions.BlackWhiteCount(board)
                        if (score > 0):
                            winner = 1
                        elif (score < 0):
                            winner = 2
                        else:
                            winner = 0
                        if (winner != 0):
                            print("Game Over! The winner is player ", winner)
                        else:
                            print("It's a tie!")

                        board = Functions.BoardInit()
                    movesAvailable = Functions.MovesAvailable(board, player)

                    cont.set(0)
                    if (player == 1):
                        window.wait_variable(cont)
                    player = Functions.nextPlayer(board, player)

                    time.sleep(0.25)
                    while (player == 2):
                        nextX = 0
                        nextY = 0
                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        bestPred = 0
                        j = 0
                        while (j <= len(movesAvailable) - 1 and movesAvailable
                               ):  # for each ove in movesAvailable
                            moveTest = movesAvailable[j]
                            x = int(moveTest[1])
                            y = int(moveTest[0])
                            testBoard = Functions.BoardCopy(board)
                            testBoard = Functions.MakeMove(
                                testBoard, y, x, player)

                            pred = out.eval(feed_dict={
                                inp:
                                Functions.boardToNN(testBoard, player)
                            })  # get the output of an available move

                            #finds the move with the highest output, output of 1 means player 1 is guaranteed to win
                            if (j == 0):
                                bestPred = pred
                                nextX = x
                                nextY = y
                            else:
                                if (pred < bestPred):
                                    bestPred = pred
                                    nextX = x
                                    nextY = y
                            j += 1
                        board = Functions.MakeMove(board, nextY, nextX, player)
                        player = Functions.nextPlayer(board, player)
예제 #2
0
def training(opponent, batches):
    import Functions
    import MachineLearning as ml
    import tensorflow as tf
    import numpy as np
    import time
    from random import random, randint
    #    from tensorflow import ops

    tf.reset_default_graph()
    lr = 0.0001  # the learning rate
    numInp = 65  # the number of inputs for the neural network
    numLabel = 1  # the number of inputs for the labels
    batchSize = 50  # the size of the batches
    discountRate = 0.99  # the discount rate for temporal difference learning

    # Create Placeholders for input and label
    inp, label = ml.placeholders(numInp, numLabel)

    # Initialise parameters
    parameters = ml.initialiseParameters()

    #make the nerual network
    out = ml.network(inp, parameters)

    #cost function for use in training
    cost = ml.computeCost(out, label)

    #training function
    optimiser = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)

    player1 = "Network"
    player2 = opponent
    # Initialise all the variables
    init = tf.global_variables_initializer()

    progstart = time.time()
    saver = tf.train.Saver()
    expRate = 0.2
    p1wins = 0  # number of games player 1 has won overall
    p2wins = 0  # number of games player 2 has won overall
    modelPath = "./save/NetworkPlayer"  #WLTD1step #testNetScore or testWinLoss
    i = 1
    while i <= (batches):
        s = 1
        winLoss = np.ndarray((1, 1))
        board = Functions.BoardInit()
        boardArray = Functions.boardToNN(board, 1)
        batchp1wins = 0
        batchp2wins = 0
        labelArray = np.ndarray((1, 1))
        labelArray[0][0] = 0
        with tf.Session() as sess:
            sess.run(init)
            #loads the network in or initialises a new network
            try:
                saver.restore(sess, modelPath)
            except:
                print("Initialising")
            start = time.time()
            while s <= batchSize:
                movesAvailable = 1
                gamePlaying = 1
                player = randint(
                    1, 2
                )  # returns either a 1 or a 2 which determines the starting player
                board = Functions.BoardInit()
                nnBoard = Functions.boardToNN(board, player)
                boardArray = np.concatenate((boardArray, nnBoard), axis=1)
                gameLabelArray = np.ndarray((1, 1))
                while gamePlaying != 0:

                    while ((player1 == "Network" and player == 1)
                           and Functions.GameOver(board)
                           == 0):  # if player 1 is a network
                        nextX = 0
                        nextY = 0
                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        bestPred = 0
                        j = 0
                        if (
                                random() > expRate
                        ):  # most of the time the network will play the move with the highest output
                            while (j <= len(movesAvailable) - 1
                                   and movesAvailable
                                   ):  # for each ove in movesAvailable
                                moveTest = movesAvailable[j]
                                x = int(moveTest[1])
                                y = int(moveTest[0])
                                testBoard = Functions.BoardCopy(board)
                                testBoard = Functions.MakeMove(
                                    testBoard, y, x, player)

                                pred = out.eval(feed_dict={
                                    inp:
                                    Functions.boardToNN(testBoard, player)
                                })  # get the output of an available move

                                #finds the move with the highest output, output of 1 means player 1 is guaranteed to win
                                if (j == 0):
                                    bestPred = pred
                                    nextX = x
                                    nextY = y
                                else:
                                    if (pred > bestPred):
                                        bestPred = pred
                                        nextX = x
                                        nextY = y
                                j += 1
                        elif (
                                movesAvailable
                        ):  # sometimes the network will play a move completely randomly to better explore all possible moves
                            move = movesAvailable[randint(
                                0,
                                len(movesAvailable) - 1)]
                            nextX = int(move[1])
                            nextY = int(move[0])
                            testBoard = Functions.BoardCopy(board)
                            testBoard = Functions.MakeMove(
                                testBoard, nextY, nextX, player)
                            bestPred = out.eval(feed_dict={
                                inp:
                                Functions.boardToNN(testBoard, player)
                            })  # get the output of the move

                        winLoss[0][
                            0] = bestPred * discountRate  # label = output * discount rate
                        gameLabelArray = np.concatenate(
                            (gameLabelArray, winLoss),
                            axis=1)  # add the label to the list of labels
                        board = Functions.MakeMove(board, nextY, nextX,
                                                   player)  # update the board
                        nnBoard = Functions.boardToNN(board, player)
                        if (Functions.GameOver(board) == 1
                            ):  # if the game is over
                            result = Functions.BlackWhiteCount(
                                board)  #sum(nnBoard)#find the winner
                            winLoss[0][0] = result
                            #                            if(result>0):
                            #                                winLoss[0][0] = 1
                            #                            elif(result==0):
                            #                                winLoss[0][0] = 0
                            #                            elif(result<0):
                            #                                winLoss[0][0] = -1
                            # final label is equal to the winner
                            gameLabelArray = np.concatenate(
                                (gameLabelArray, winLoss),
                                axis=1)  # add the label to the list of labels
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards
                            #print(winLoss[0][0])
                        else:
                            player = Functions.nextPlayer(
                                board, player)  # find who plays next
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards

                    while (((player2 == "Rand" and player == 2))
                           and Functions.GameOver(board)
                           == 0):  # if player 2 is random

                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        if (movesAvailable):
                            # pick and play a move at random
                            numPicked = randint(0, len(movesAvailable) - 1)
                            movePicked = movesAvailable[numPicked]
                            x = int(movePicked[1])
                            y = int(movePicked[0])
                            board = Functions.MakeMove(board, y, x, player)
                            # add the resulting position to the board array and the label array
                            nnBoard = Functions.boardToNN(board, player)
                            bestPred = out.eval(feed_dict={inp: nnBoard})
                            winLoss[0][0] = bestPred * discountRate
                            gameLabelArray = np.concatenate(
                                (gameLabelArray, winLoss), axis=1)
                            if (Functions.GameOver(board) == 1
                                ):  # if the game is over
                                result = Functions.BlackWhiteCount(
                                    board)  #sum(nnBoard)#find the winner
                                winLoss[0][0] = result
                                #                                if(result>0):
                                #                                    winLoss[0][0] = 1
                                #                                elif(result==0):
                                #                                    winLoss[0][0] = 0
                                #                                elif(result<0):
                                #                                    winLoss[0][0] = -1
                                # final label is equal to the winner
                                gameLabelArray = np.concatenate(
                                    (gameLabelArray, winLoss), axis=1
                                )  # add the label to the list of labels
                                boardArray = np.concatenate(
                                    (boardArray, nnBoard), axis=1
                                )  # add the board to the list of boards

                        if (Functions.GameOver(board) == 0):
                            player = Functions.nextPlayer(
                                board, player)  # find who plays next
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards

                    while ((player2 == "Network" and player == 2)
                           and Functions.GameOver(board) == 0):
                        nextX = 0
                        nextY = 0
                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        bestPred = 0
                        j = 0
                        if (
                                random() > expRate
                        ):  # most of the time the network will play the move with the highest output
                            while (j <= len(movesAvailable) - 1
                                   and movesAvailable
                                   ):  # for each ove in movesAvailable
                                moveTest = movesAvailable[j]
                                x = int(moveTest[1])
                                y = int(moveTest[0])
                                testBoard = Functions.BoardCopy(board)
                                testBoard = Functions.MakeMove(
                                    testBoard, y, x, player)

                                pred = out.eval(feed_dict={
                                    inp:
                                    Functions.boardToNN(testBoard, player)
                                })  # get the output of an available move

                                #finds the move with the lowest output, output of -1 means player 2 is guaranteed to win
                                if (j == 0):
                                    bestPred = pred
                                    nextX = x
                                    nextY = y
                                else:
                                    if (pred < bestPred):
                                        bestPred = pred
                                        nextX = x
                                        nextY = y
                                j += 1
                        elif (
                                movesAvailable
                        ):  # sometimes the network will play a move completely randomly to better explore all possible moves
                            move = movesAvailable[randint(
                                0,
                                len(movesAvailable) - 1)]
                            nextX = int(move[1])
                            nextY = int(move[0])
                            testBoard = Functions.BoardCopy(board)
                            testBoard = Functions.MakeMove(
                                testBoard, nextY, nextX, player)
                            bestPred = out.eval(feed_dict={
                                inp:
                                Functions.boardToNN(testBoard, player)
                            })  # get the output of the move

                        winLoss[0][
                            0] = bestPred * discountRate  # label = output * discount rate
                        gameLabelArray = np.concatenate(
                            (gameLabelArray, winLoss),
                            axis=1)  # add the label to the list of labels
                        board = Functions.MakeMove(board, nextY, nextX,
                                                   player)  # update the board
                        nnBoard = Functions.boardToNN(board, player)
                        if (Functions.GameOver(board) == 1
                            ):  # if the game is over
                            result = Functions.BlackWhiteCount(
                                board)  #sum(nnBoard)#find the winner
                            winLoss[0][0] = result
                            #                            if(result>0):
                            #                                winLoss[0][0] = 1
                            #                            elif(result==0):
                            #                                winLoss[0][0] = 0
                            #                            elif(result<0):
                            #                                winLoss[0][0] = -1
                            # final label is equal to the winner
                            gameLabelArray = np.concatenate(
                                (gameLabelArray, winLoss),
                                axis=1)  # add the label to the list of labels
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards
                            #print(winLoss[0][0])
                        else:
                            player = Functions.nextPlayer(
                                board, player)  # find who plays next
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards

                    if (Functions.GameOver(board) == 1):  #if the game ends
                        concArray = np.copy(
                            gameLabelArray[0]
                            [1:])  # copy all the data from the array of labels
                        size = len(concArray)
                        concArray = np.reshape(concArray, (1, size))
                        """
                        reshape the array and remove the first value, 
                        this means the label for the first board state is equal to the output of the next move * the discount rate
                        """
                        labelArray = np.concatenate((labelArray, concArray),
                                                    axis=1)
                        gamePlaying = 0
                        Winner = Functions.BlackWhiteCount(
                            board)  #sum(nnBoard)
                        if (Winner > 0):
                            batchp1wins += 1
                            p1wins += 1
                        elif (Winner < 0):
                            batchp2wins += 1
                            p2wins += 1
                s += 1
            print("batch: ", i, " out of ", batches)

            print("player 1 (", player1, ") wins ", batchp1wins)
            print("player 2 (", player2, ") wins ", batchp2wins)
            """
            the following code flips the training data since a position where player 2 is guaranteed to win 
            would be a position where player 1 is guaranteed to win if every piece was reversed
            """
            labelArrayOpp = Functions.ReverseArray(labelArray)
            boardArrayOpp = Functions.ReverseArray(boardArray)
            labelArray = np.concatenate((labelArray, labelArrayOpp), axis=1)
            boardArray = np.concatenate((boardArray, boardArrayOpp), axis=1)

            #trains the neural network
            _, boardCost = sess.run([optimiser, cost],
                                    feed_dict={
                                        inp: boardArray,
                                        label: labelArray
                                    })
            print(boardCost)
            #savePath = saver.save(sess, modelPath)# saves the updated network
            end = time.time()
            print("batch time(secs) ", end - start)

        i += 1

    progend = time.time()
    print("total games: ", batches * batchSize)
    print("player 1 (", player1, ") wins ", p1wins)
    print("player 2 (", player2, ") wins ", p2wins)
    print("total time(secs) ", progend - progstart)