def simulateChildren(mct, board, player, myPlayer, numSims, verbose=False):
    key = genFullKey(board, player)
    if key not in mct:
        mct[key] = 0

    nextBoards = tt.listNextBoards(board, player)

    winner = tt.getWinner(board)
    if not winner and nextBoards:
        #   add next boards to mct
        if verbose:
            for nextBoard in tqdm(nextBoards):
                simChildrenInner(mct, nextBoard, player, myPlayer, numSims)
        else:
            for nextBoard in nextBoards:
                simChildrenInner(mct, nextBoard, player, myPlayer, numSims)

        #   pick the highest score and return that
        scores = [
            mct[genFullKey(nextBoard, tt.togglePlayer(player))]
            for nextBoard in nextBoards
        ]
        highestScore = sorted(scores)[-1]
        return highestScore
    else:
        score = scoreEndBoard(board, tt.togglePlayer(player), myPlayer)
        score = score * math.pow(numSims, 2)
        mct[key] = score
        return score
def simChildrenInner(mct, nextBoard, player, myPlayer, numSims):
    nextBoardKey = genFullKey(nextBoard, tt.togglePlayer(player))
    if nextBoardKey not in mct:
        mct[nextBoardKey] = 0

    #   give each one a bunch of game sims
    simScoreTotal = simulate(numSims, nextBoard, tt.togglePlayer(player),
                             myPlayer)
    mct[nextBoardKey] += simScoreTotal
def minimax_inner(count, board, player, myPlayer, prevBoards, alpha, beta):
    hashKey = tt.hash(board)
    if hashKey in prevBoards:
        return prevBoards[hashKey]

    count[0] += 1

    winner = tt.getWinner(board)
    if winner:
        score = scoreEndBoard(board, winner, myPlayer)
        # prevBoards[hashKey] = score
        return score
    elif tt.noMoreMoves(board):
        score = scoreEndBoard(board, winner, myPlayer)
        # prevBoards[hashKey] = score
        return score
    else:
        nextBoards = tt.listNextBoards(board, tt.togglePlayer(player))
        if player == myPlayer:  #   maximizing next moves
            bestScore = -math.inf
            for nextBoard in nextBoards:
                if beta <= alpha:
                    break
                score = minimax_inner(count,
                                      nextBoard,
                                      player=tt.togglePlayer(player),
                                      myPlayer=myPlayer,
                                      prevBoards=prevBoards,
                                      alpha=alpha,
                                      beta=beta)
                prevBoards[tt.hash(nextBoard)] = score
                if score > bestScore:
                    bestScore = score
                    alpha = bestScore
            return bestScore
        else:  #   minimizing next moves
            bestScore = math.inf
            for nextBoard in nextBoards:
                if beta <= alpha:
                    break
                score = minimax_inner(count,
                                      nextBoard,
                                      player=tt.togglePlayer(player),
                                      myPlayer=myPlayer,
                                      prevBoards=prevBoards,
                                      alpha=alpha,
                                      beta=beta)
                prevBoards[tt.hash(nextBoard)] = score
                if score < bestScore:
                    bestScore = score
                    beta = bestScore
            return bestScore
예제 #4
0
def test(net, epochs):
    net.eval()

    numWins = 0
    numLosses = 0
    numTies = 0

    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        board = np.zeros(shape = (3, 3))
        # board = np.random.randint(low = 0, high = 3, size = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        while(not winner and movesLeft):
            if player == computersPlayer:
                #   generate a move
                oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                output = net(oneHot)

                #   mask out invalid moves
                invalidMoves = np.where( board.flatten() > 0, True, False)
                maskedOutput = output.clone().view(9)
                maskedOutput[invalidMoves] = -10
                values, index = maskedOutput.max(0)

                #   apply the move
                move = index
                board = board.flatten()
                board[move] = computersPlayer
                board = board.reshape(3, 3)
                        
            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:   #   winner == False
            numTies += 1

    return numWins, numLosses, numTies
def playGame():
    board = tt.genBoard()
    movesLeft = True
    winner = False
    player = 2
    computersPlayer = 2  #random.randint(1,2)
    turn = 0

    print("NEW GAME")
    if computersPlayer == 2:
        print("COMPUTER GOES FIRST...")
    while (movesLeft and not winner):
        if player == 2:
            print("X's Turn")
        else:  # player == 1
            print("O's Turn")
        tt.printBoard(board)

        if player == computersPlayer:
            board = pickBestNextBoard(board, player, computersPlayer)
            player = tt.togglePlayer(player)
        elif player == tt.togglePlayer(computersPlayer):
            validMove = False
            while validMove == False:
                move = input("input move of form 'y x' ")
                y = int(move[0])
                x = int(move[2])
                #   validate move
                if board[y][x] is not 0:
                    print("!!!INVALID MOVE!!!")
                    continue
                else:
                    validMove = True
                board[y][x] = tt.togglePlayer(computersPlayer)
                player = tt.togglePlayer(player)
        turn += 1

        winner = tt.getWinner(board)
        movesLeft = not tt.noMoreMoves(board)

    tt.printBoard(board)

    if winner:
        if winner == 2:
            print("WINNER: X")
        else:  # winner == 1
            print("WINNER: O")
    else:
        print("TIE")
def expand(mct, board, player):
    nextBoards = tt.listNextBoards(board, player)
    for nextBoard in nextBoards:
        nextBoardKey = genFullKey(nextBoard, tt.togglePlayer(player))
        if nextBoardKey not in mct:  #   there might be convergent branches
            vals = {'n': 0, 'v': 0}
            mct[nextBoardKey] = vals
예제 #7
0
def scoreEndBoard(board, winner, myPlayer):
    if not winner:
        return 1
    elif winner == tt.togglePlayer(myPlayer):
        return -10
    elif winner == myPlayer:
        return 10
예제 #8
0
def train(qTables, numGames, alpha, tryHard):
    tryHardGrowth = (1 - tryHard) / numGames

    for i in tqdm(range(numGames)):
        board = tt.genBoard()
        movesLeft = True
        winner = False
        player = 2
        keysSoFar = []
        movesSoFar = []

        computersPlayer = random.randint(1, 2)
        while (movesLeft and not winner):
            if player == computersPlayer:
                bestMove = pickBestNextMove(qTables, keysSoFar, board, player,
                                            computersPlayer, tryHard)
                movesSoFar.append(bestMove)
                tt.applyMove(player, bestMove, board)
            else:
                moves = tt.listEmpties(board)
                randomMove = random.choice(moves)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            winner = tt.getWinner(board)
            movesLeft = not tt.noMoreMoves(board)

        score = scoreEndBoard(board, winner, computersPlayer)
        updateQTable(score, qTables, keysSoFar, movesSoFar, alpha)
        tryHard = tryHard + tryHardGrowth
예제 #9
0
def oneHotTicTacToe(board, computersPlayer):
    me = np.where(board == computersPlayer, 1, 0)
    notMe = np.where(board == tt.togglePlayer(computersPlayer), 1, 0)
    me = me.flatten()
    notMe = notMe.flatten()
    oneHot = np.append(me, notMe)
    oneHot = torch.tensor(oneHot, dtype=torch.float32)
    return oneHot
def pickBestNextMove(mct, board, player):
    nextBoards = tt.listNextBoards(board, player)
    bestBoard = None
    highest = -math.inf
    for nextBoard in nextBoards:
        nextBoardKey = genFullKey(nextBoard, tt.togglePlayer(player))
        score = mct[nextBoardKey]
        if score > highest:
            highest = score
            bestBoard = nextBoard
    return bestBoard
def minimaxGenBoardScores(inBoard, myPlayer, inPlayer):
    stack = []
    boardScores = {}

    stack.append({'board': inBoard, 'player': inPlayer})

    while len(stack) > 0:
        args = stack[-1]
        board = args['board']
        player = args['player']

        maximizing = False
        if player == myPlayer:
            maximizing = True

        #   base case, end board
        winner = tt.getWinner(board)
        if winner:
            boardScores[tt.hash(board)] = tt.scoreEndBoard(
                board, winner, myPlayer)
            print("###########")
            tt.printBoard(board)
            print(boardScores[tt.hash(board)])

            stack.pop()
        elif tt.noMoreMoves(board):
            boardScores[tt.hash(board)] = tt.scoreEndBoard(
                board, winner, myPlayer)
            stack.pop()

        else:  #   nobody won yet, and there are move moves
            nextBoards = tt.listNextBoards(board, player)
            allPresent = True
            for nextBoard in nextBoards:
                if not (tt.hash(nextBoard) in boardScores):
                    allPresent = False
                    newArgs = {
                        'board': nextBoard,
                        'player': tt.togglePlayer(player),
                    }
                    stack.append(newArgs)
            if allPresent:
                scores = [
                    boardScores[tt.hash(nextBoard)] for board in nextBoards
                ]
                if maximizing:
                    boardScores[tt.hash(board)] = max(scores)
                else:
                    boardScores[tt.hash(board)] = min(scores)
                stack.pop()

    return boardScores
def test(mct, numGames, numSims):
    numWins = 0
    numTies = 0
    numLosses = 0

    for i in tqdm(range(numGames)):
        board = tt.genBoard()
        movesLeft = True
        winner = False
        player = 2

        computersPlayer = random.randint(1, 2)
        while (movesLeft and not winner):
            if player == computersPlayer:
                simulateChildren(mct, board, player, computersPlayer, numSims)
                bestBoard = pickBestNextMove(mct, board, player)
                # print("################")
                # tt.printBoard(board)
                # tt.printBoard(bestBoard)
                # print("BESTMOVE")
                board = bestBoard
            else:
                moves = tt.listEmpties(board)
                randomMove = random.choice(moves)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            winner = tt.getWinner(board)
            movesLeft = not tt.noMoreMoves(board)

        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:  #   tie
            numTies += 1

    return numWins, numLosses, numTies
예제 #13
0
def pickBestMove(nextMoves, player, computersPlayer):
    oneHots = [oneHotTicTacToe(nextMove, tt.togglePlayer(player), computersPlayer) for nextMove in nextMoves]
    trainingSessions = [forward(network, oneHot, dropout=True) for oneHot in oneHots]

    goodMoves = []
    okayMoves = []
    badMoves = []
    for i in range(len(trainingSessions)):
        trainingSession = trainingSessions[i]
        score = trainingSession['outputs'][-1]
        loss = score[0]
        tie = score[1]
        win = score[2]

        moveScore = {"move":nextMoves[i],
                    "score":score,
                    "trainingSession":trainingSession}

        if win > tie and win > loss:
            goodMoves.append(moveScore)
        elif tie > loss:
            okayMoves.append(moveScore)
        else:
            badMoves.append(moveScore)

    if goodMoves:
        bestMove = None
        bestWin = -100000
        for move in goodMoves:
            win = move["score"][2]
            if win > bestWin:
                bestMove = move
                bestWin = win
    elif okayMoves:
        bestMove = None
        bestTie = -100000
        for move in okayMoves:
            tie = move["score"][1]
            if tie > bestTie:
                bestMove = move
                bestTie = tie
    else: # only bad moves :(
        bestMove = None
        bestLoss = 100000
        for move in badMoves:
            loss = move["score"][0]
            if loss < bestLoss:
                bestMove = move
                bestLoss = loss
    return bestMove
예제 #14
0
def test(qTables, numGames, tryHard=1.0):
    numWins = 0
    numTies = 0
    numLosses = 0

    for i in tqdm(range(numGames)):
        board = tt.genBoard()
        movesLeft = True
        winner = False
        player = 2
        keysSoFar = []
        movesSoFar = []

        computersPlayer = random.randint(1, 2)
        while (movesLeft and not winner):
            if player == computersPlayer:
                bestMove = pickBestNextMove(qTables, keysSoFar, board, player,
                                            computersPlayer, tryHard)
                movesSoFar.append(bestMove)
                tt.applyMove(player, bestMove, board)
            else:
                moves = tt.listEmpties(board)
                randomMove = random.choice(moves)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            winner = tt.getWinner(board)
            movesLeft = not tt.noMoreMoves(board)

        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:  #   tie
            numTies += 1

    return numWins, numLosses, numTies
def pickBestNextBoard(board, player, myPlayer):
    totalCount = [0]

    nextBoards = tt.listNextBoards(board, myPlayer)
    bestBoard = None
    bestScore = -10000
    for nextBoard in tqdm(nextBoards):
        score = minimax(totalCount,
                        nextBoard,
                        player=tt.togglePlayer(player),
                        myPlayer=myPlayer)
        if score > bestScore:
            bestScore = score
            bestBoard = nextBoard
    print("\ntotal boards evaluated: " + str(totalCount[0]))
    return bestBoard
def simulate(numSimulations, board, player, myPlayer):
    originBoard = copy.deepcopy(board)
    originPlayer = player
    totalScore = 0
    for i in range(numSimulations):
        simBoard = copy.deepcopy(originBoard)
        simPlayer = originPlayer

        winner = tt.getWinner(simBoard)
        movesLeft = not tt.noMoreMoves(simBoard)
        while (movesLeft and not winner):
            moves = tt.listEmpties(simBoard)
            randomMove = random.choice(moves)
            tt.applyMove(simPlayer, randomMove, simBoard)
            simPlayer = tt.togglePlayer(simPlayer)

            winner = tt.getWinner(simBoard)
            movesLeft = not tt.noMoreMoves(simBoard)

        score = scoreEndBoard(simBoard, winner, myPlayer)
        totalScore += score

    return totalScore
예제 #17
0
def playGame():
    saveQTables = False
    fileName = 'qTables.pickle'

    qTables = {}

    keysSoFar = []
    movesSoFar = []
    tryHard = 0
    alpha = 0.9
    numTrials = 1000000

    if saveQTables:
        train(qTables, numTrials, alpha, tryHard)
        f = open(fileName, 'wb')
        pickle.dump(qTables, f, pickle.HIGHEST_PROTOCOL)
        f.close()
    else:
        f = open(fileName, 'rb')
        qTables = pickle.load(f)
        f.close()

    numWins, numLosses, numTies = test(qTables, 1000)
    print("VS RANDOM OPPONENT...")
    print("numWins:" + str(numWins))
    print("numLosses:" + str(numLosses))
    print("numTies:" + str(numTies))
    quit()

    board = tt.genBoard()
    movesLeft = True
    winner = False
    player = 2
    computersPlayer = random.randint(1, 2)

    print("NEW GAME")
    if computersPlayer == 2:
        print("COMPUTER GOES FIRST...")
    while (movesLeft and not winner):
        if player == 2:
            print("X's Turn")
        else:  # player == 1
            print("O's Turn")
        tt.printBoard(board)

        if player == computersPlayer:
            bestMove = pickBestNextMove(qTables,
                                        keysSoFar,
                                        board,
                                        player,
                                        computersPlayer,
                                        tryHard=1.0,
                                        verbose=True)
            movesSoFar.append(bestMove)
            tt.applyMove(player, bestMove, board)
            player = tt.togglePlayer(player)
        elif player == tt.togglePlayer(computersPlayer):
            validMove = False
            while validMove == False:
                move = input("input move of form 'y x' ")
                y = int(move[0])
                x = int(move[2])
                #   validate move
                if board[y][x] is not 0:
                    print("!!!INVALID MOVE!!!")
                    continue
                else:
                    validMove = True
                board[y][x] = tt.togglePlayer(computersPlayer)
                player = tt.togglePlayer(player)

        winner = tt.getWinner(board)
        movesLeft = not tt.noMoreMoves(board)

    tt.printBoard(board)

    score = scoreEndBoard(board, winner, computersPlayer)
    updateQTable(score, qTables, keysSoFar, movesSoFar, alpha)
    for key in keysSoFar:
        pprint(key)
        pprint(qTables[key])

    if winner:
        if winner == 2:
            print("WINNER: X")
        else:  # winner == 1
            print("WINNER: O")
    else:
        print("TIE")
예제 #18
0
def test(net, criterion, optimizer, epochs):
    numInvalidMoves = 0

    numWins = 0
    numLosses = 0
    numTies = 0

    optimizer.zero_grad()

    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        board = np.zeros(shape = (3, 3))
        # board = np.random.randint(low = 0, high = 3, size = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        while(not winner and movesLeft):
            if player == computersPlayer:
                move = None
                moveValid = False
                while not moveValid:
                    #   generate a move
                    oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                    output = net(oneHot)
                    values, index = output.view(9).max(0)
                    if board.flatten()[index] == 0: #   if move is valid
                        moveValid = True

                        #   apply the move
                        move = index
                        board = board.flatten()
                        board[move] = computersPlayer
                        board = board.reshape(3, 3)
                        
                    else:   #   invalid move, prime the whip
                        # print("invalid move")
                        numInvalidMoves += 1
                        optimizer.zero_grad()
                        validMoves = np.where(board == 0, 1, 0)
                        target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9)
                        loss = criterion(output, target)
                        loss.backward()
                        optimizer.step()
            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:   #   winner == False
            numTies += 1

    return numWins, numLosses, numTies
예제 #19
0
def trainAgainstSelf(net, criterion, optimizer, epochs):
    net.train()

    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        optimizer.zero_grad()

        board = np.zeros(shape = (3, 3))
        # board = np.random.randint(low = 0, high = 3, size = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        movesA = []
        outputsA = []
        movesB = []
        outputsB = []
        while(not winner and movesLeft):
            #   generate a move
            if player == computersPlayer:
                oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
            else:
                oneHot = oneHotTicTacToe(board, tt.togglePlayer(computersPlayer)).view(1, 1, 18)
            output = net(oneHot)

            #   mask out invalid moves
            invalidMoves = np.where( board.flatten() > 0, True, False)
            maskedOutput = output.clone().view(9)
            maskedOutput[invalidMoves] = -10
            values, index = maskedOutput.max(0)

            #   apply the move
            move = index
            board = board.flatten()
            if player == computersPlayer:
                board[move] = computersPlayer
            else:
                board[move] = tt.togglePlayer(computersPlayer)            
            board = board.reshape(3, 3)

            #   store for later
            if player == computersPlayer:
                movesA.append(move)
                outputsA.append(output)
            else:
                movesB.append(move)
                outputsB.append(output)

            player = tt.togglePlayer(player)

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        #   get end score of game

        score = scoreEndBoard(board, winner, computersPlayer)
        for i, move in enumerate(movesA):
            output = outputsA[i]
            target = output.clone().view(9)
            target[move] = score
            target = target.view(1, 1, 9)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        score = scoreEndBoard(board, winner, tt.togglePlayer(computersPlayer))
        for i, move in enumerate(movesB):
            output = outputsB[i]
            target = output.clone().view(9)
            target[move] = score
            target = target.view(1, 1, 9)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
예제 #20
0
def train(net, criterion, optimizer, epochs):
    net.train()

    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        optimizer.zero_grad()

        board = np.zeros(shape = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        gameDuration = 0

        moves = []
        outputs = []
        while(not winner and movesLeft):
            if player == computersPlayer:
                #   generate a move
                oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                output = net(oneHot)

                #   mask out invalid moves
                invalidMoves = np.where( board.flatten() > 0, True, False)
                maskedOutput = output.clone().view(9)
                maskedOutput[invalidMoves] = -10
                values, index = maskedOutput.max(0)

                #   apply the move
                move = index
                board = board.flatten()
                board[move] = computersPlayer
                board = board.reshape(3, 3)
                        
                #   store for later
                moves.append(move)
                outputs.append(output)

            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)
            gameDuration += 1

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        #   get end score of game
        score = scoreEndBoard(board, winner, computersPlayer)
        # gameDurationMultiplier = 1.0 - gameDuration / 10
        # gameDurationMultiplier = gameDurationMultiplier * 0.9
        dilutionFactor = 0.9
        totalDilutant = 1.0
        for i, move in reversed(list(enumerate(moves))):
            totalDilutant *= dilutionFactor
            output = outputs[i]
            target = output.clone().view(9)
            target[move] = score * totalDilutant
            target = target.view(1, 1, 9)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
예제 #21
0
def playWithUser(net, online=True):
    while(True):
        board = genBoard()
        movesLeft = True
        winner = False
        player = 2
        computersPlayer = 2 #random.randint(1,2)

        print()
        print("NEW GAME")
        if computersPlayer == 2:
            print("COMPUTER GOES FIRST...")

        moves = []
        outputs = []
        while(movesLeft and not winner):
            if player == 2:
                print("X's Turn")
            else: # player == 1
                print("O's Turn")
            tt.printBoard(board)

            if player == computersPlayer:
                move = None
                moveValid = False
                while not moveValid:
                    #   generate a move
                    oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                    output = net(oneHot)
                    values, index = output.view(9).max(0)
                    if board.flatten()[index] == 0: #   if move is valid
                        moveValid = True

                        #   apply the move
                        move = index
                        board = board.flatten()
                        board[move] = computersPlayer
                        board = board.reshape(3, 3)
                        
                        #   store for later
                        moves.append(move)
                        outputs.append(output)
                    else:   #   invalid move, prime the whip
                        print("invalid move")
                        optimizer.zero_grad()
                        validMoves = np.where(board == 0, 1, 0)
                        target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9)
                        loss = criterion(output, target)
                        loss.backward()
                        optimizer.step()
            elif player == tt.togglePlayer(computersPlayer):
                validMove = False
                while validMove == False:
                    move = input("input move of form 'y x' ")
                    y = int(move[0])
                    x = int(move[2])
                    #   validate move
                    if not board[y][x] == 0:
                        print("!!!INVALID MOVE!!!")
                        continue
                    else:
                        validMove = True
                    board[y][x] = tt.togglePlayer(computersPlayer)
            player = tt.togglePlayer(player)
            
            winner = tt.getWinner(board)
            movesLeft = not tt.noMoreMoves(board)

        tt.printBoard(board)

        if online:
            score = scoreEndBoard(board, winner, computersPlayer)
            for i, move in enumerate(moves):
                output = outputs[i]
                target = output.clone().view(9)
                target[move] = score
                target = target.view(1, 1, 9)

                optimizer.zero_grad()
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

        if winner:
            if winner == 2:
                print("WINNER: X")
            else: # winner == 1
                print("WINNER: O")
        else:
            print("TIE")
def playGame():
    mct = {}
    board = tt.genBoard()
    player = 2
    computersPlayer = 2
    numSimsPreGame = 100000
    numSimsOnline = 100

    saveMCTree = False
    fileName = 'mct.pickle'

    if saveMCTree:
        simulateChildren(mct,
                         board,
                         player,
                         computersPlayer,
                         numSimsPreGame,
                         verbose=True)
        f = open(fileName, 'wb')
        pickle.dump(mct, f, pickle.HIGHEST_PROTOCOL)
        f.close()
        quit()
    else:
        f = open(fileName, 'rb')
        mct = pickle.load(f)
        f.close()

    # mct = {}
    # numTrials = 100
    # numWins, numLosses, numTies = test(mct, numTrials, numSimsOnline)
    # print("VS RANDOM OPPONENT...")
    # print("numWins:"  + str(numWins))
    # print("numLosses:"  + str(numLosses))
    # print("numTies:"  + str(numTies))
    # quit()

    #   w 0.6, t 0.11, l 0.3

    board = tt.genBoard()
    movesLeft = True
    winner = False
    player = 2
    computersPlayer = random.randint(1, 2)

    print("NEW GAME")
    if computersPlayer == 2:
        print("COMPUTER GOES FIRST...")
    while (movesLeft and not winner):
        if player == 2:
            print("X's Turn")
        else:  # player == 1
            print("O's Turn")
        tt.printBoard(board)

        if player == computersPlayer:
            simulateChildren(mct,
                             board,
                             player,
                             computersPlayer,
                             numSimsOnline,
                             verbose=True)
            bestBoard = pickBestNextMove(mct, board, player)
            board = bestBoard
        elif player == tt.togglePlayer(computersPlayer):
            validMove = False
            while validMove == False:
                move = input("input move of form 'y x' ")
                y = int(move[0])
                x = int(move[2])
                #   validate move
                if board[y][x] is not 0:
                    print("!!!INVALID MOVE!!!")
                    continue
                else:
                    validMove = True
                board[y][x] = tt.togglePlayer(computersPlayer)
        player = tt.togglePlayer(player)

        winner = tt.getWinner(board)
        movesLeft = not tt.noMoreMoves(board)

    tt.printBoard(board)

    score = scoreEndBoard(board, winner, computersPlayer)

    if winner:
        if winner == 2:
            print("WINNER: X")
        else:  # winner == 1
            print("WINNER: O")
    else:
        print("TIE")
예제 #23
0
        if verbose and i % logInterval == 0:
            print("NEW GAME")
        while(movesLeft and not winner):
            if verbose and i % logInterval == 0:
                if player == 2:
                    print("X's Turn")
                else: # player == 1
                    print("O's Turn")
                tt.printBoard(board)

            nextMoves = tt.listNextBoards(board, player)
            if makeRandomMoves:
                randomMove = random.randint(0, len(nextMoves)-1)
                randomMove = nextMoves[randomMove]
                oneHot = oneHotTicTacToe(randomMove, tt.togglePlayer(player), computersPlayer)
                trainingSession = forward(network, oneHot, dropout=True)
                bestMove = randomMove
            else:
                bestMoveDict = pickBestMove(nextMoves, player, computersPlayer)
                bestMove = bestMoveDict['move']
                score = bestMoveDict['score']
                trainingSession = bestMoveDict['trainingSession']

            if player == 1:
                playerOneTrainingSessions.append(trainingSession)
            else: # player == 2
                playerTwoTrainingSessions.append(trainingSession)

            board = bestMove
            player = tt.togglePlayer(player)
예제 #24
0
def train(net, criterion, optimizer, epochs):
    numInvalidMoves = 0
    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        optimizer.zero_grad()

        board = np.zeros(shape = (3, 3))
        # board = np.random.randint(low = 0, high = 3, size = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        moves = []
        outputs = []
        while(not winner and movesLeft):
            if player == computersPlayer:
                move = None
                moveValid = False
                while not moveValid:
                    #   generate a move
                    oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                    output = net(oneHot)
                    values, index = output.view(9).max(0)
                    if board.flatten()[index] == 0: #   if move is valid
                        moveValid = True

                        #   apply the move
                        move = index
                        board = board.flatten()
                        board[move] = computersPlayer
                        board = board.reshape(3, 3)
                        
                        #   store for later
                        moves.append(move)
                        outputs.append(output)
                    else:   #   invalid move, prime the whip
                        # print("invalid move")
                        numInvalidMoves += 1
                        optimizer.zero_grad()
                        validMoves = np.where(board == 0, 1, 0)
                        target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9)
                        loss = criterion(output, target)
                        loss.backward()
                        optimizer.step()
            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        #   get end score of game

        score = scoreEndBoard(board, winner, computersPlayer)
        for i, move in enumerate(moves):
            output = outputs[i]
            target = output.clone().view(9)
            target[move] = score
            target = target.view(1, 1, 9)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()