コード例 #1
0
def train(qTables, numGames, alpha, tryHard):
    tryHardGrowth = (1 - tryHard) / numGames

    for i in tqdm(range(numGames)):
        board = tt.genBoard()
        movesLeft = True
        winner = False
        player = 2
        keysSoFar = []
        movesSoFar = []

        computersPlayer = random.randint(1, 2)
        while (movesLeft and not winner):
            if player == computersPlayer:
                bestMove = pickBestNextMove(qTables, keysSoFar, board, player,
                                            computersPlayer, tryHard)
                movesSoFar.append(bestMove)
                tt.applyMove(player, bestMove, board)
            else:
                moves = tt.listEmpties(board)
                randomMove = random.choice(moves)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            winner = tt.getWinner(board)
            movesLeft = not tt.noMoreMoves(board)

        score = scoreEndBoard(board, winner, computersPlayer)
        updateQTable(score, qTables, keysSoFar, movesSoFar, alpha)
        tryHard = tryHard + tryHardGrowth
コード例 #2
0
def test(net, epochs):
    net.eval()

    numWins = 0
    numLosses = 0
    numTies = 0

    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        board = np.zeros(shape = (3, 3))
        # board = np.random.randint(low = 0, high = 3, size = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        while(not winner and movesLeft):
            if player == computersPlayer:
                #   generate a move
                oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                output = net(oneHot)

                #   mask out invalid moves
                invalidMoves = np.where( board.flatten() > 0, True, False)
                maskedOutput = output.clone().view(9)
                maskedOutput[invalidMoves] = -10
                values, index = maskedOutput.max(0)

                #   apply the move
                move = index
                board = board.flatten()
                board[move] = computersPlayer
                board = board.reshape(3, 3)
                        
            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:   #   winner == False
            numTies += 1

    return numWins, numLosses, numTies
コード例 #3
0
def pickBestNextMove(qTables,
                     keysSoFar,
                     board,
                     player,
                     myPlayer,
                     tryHard,
                     verbose=False):
    boardKey = (tt.hash(board), player)
    empties = tt.listEmpties(board)

    if boardKey in qTables:
        keysSoFar.append(boardKey)
        qTable = qTables[boardKey]

        if verbose:
            pprint(qTable)

        tryHardOrNot = random.random()
        if tryHardOrNot > tryHard:
            return random.choice(empties)
        else:
            greatest = -math.inf
            bestEmpty = None
            for empty in empties:
                x = empty[0]
                y = empty[1]
                val = qTable[y][x]
                if val > greatest:
                    greatest = val
                    bestEmpty = empty
            return bestEmpty

        #   FUTURE FEATURE:
        #   #   take into account the tryhard number
    else:
        qTables[boardKey] = genQTable()
        keysSoFar.append(boardKey)
        return random.choice(empties)
コード例 #4
0
def test(mct, numGames, numSims):
    numWins = 0
    numTies = 0
    numLosses = 0

    for i in tqdm(range(numGames)):
        board = tt.genBoard()
        movesLeft = True
        winner = False
        player = 2

        computersPlayer = random.randint(1, 2)
        while (movesLeft and not winner):
            if player == computersPlayer:
                simulateChildren(mct, board, player, computersPlayer, numSims)
                bestBoard = pickBestNextMove(mct, board, player)
                # print("################")
                # tt.printBoard(board)
                # tt.printBoard(bestBoard)
                # print("BESTMOVE")
                board = bestBoard
            else:
                moves = tt.listEmpties(board)
                randomMove = random.choice(moves)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            winner = tt.getWinner(board)
            movesLeft = not tt.noMoreMoves(board)

        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:  #   tie
            numTies += 1

    return numWins, numLosses, numTies
コード例 #5
0
def simulate(numSimulations, board, player, myPlayer):
    originBoard = copy.deepcopy(board)
    originPlayer = player
    totalScore = 0
    for i in range(numSimulations):
        simBoard = copy.deepcopy(originBoard)
        simPlayer = originPlayer

        winner = tt.getWinner(simBoard)
        movesLeft = not tt.noMoreMoves(simBoard)
        while (movesLeft and not winner):
            moves = tt.listEmpties(simBoard)
            randomMove = random.choice(moves)
            tt.applyMove(simPlayer, randomMove, simBoard)
            simPlayer = tt.togglePlayer(simPlayer)

            winner = tt.getWinner(simBoard)
            movesLeft = not tt.noMoreMoves(simBoard)

        score = scoreEndBoard(simBoard, winner, myPlayer)
        totalScore += score

    return totalScore
コード例 #6
0
def test(qTables, numGames, tryHard=1.0):
    numWins = 0
    numTies = 0
    numLosses = 0

    for i in tqdm(range(numGames)):
        board = tt.genBoard()
        movesLeft = True
        winner = False
        player = 2
        keysSoFar = []
        movesSoFar = []

        computersPlayer = random.randint(1, 2)
        while (movesLeft and not winner):
            if player == computersPlayer:
                bestMove = pickBestNextMove(qTables, keysSoFar, board, player,
                                            computersPlayer, tryHard)
                movesSoFar.append(bestMove)
                tt.applyMove(player, bestMove, board)
            else:
                moves = tt.listEmpties(board)
                randomMove = random.choice(moves)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            winner = tt.getWinner(board)
            movesLeft = not tt.noMoreMoves(board)

        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:  #   tie
            numTies += 1

    return numWins, numLosses, numTies
コード例 #7
0
def train(net, criterion, optimizer, epochs):
    net.train()

    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        optimizer.zero_grad()

        board = np.zeros(shape = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        gameDuration = 0

        moves = []
        outputs = []
        while(not winner and movesLeft):
            if player == computersPlayer:
                #   generate a move
                oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                output = net(oneHot)

                #   mask out invalid moves
                invalidMoves = np.where( board.flatten() > 0, True, False)
                maskedOutput = output.clone().view(9)
                maskedOutput[invalidMoves] = -10
                values, index = maskedOutput.max(0)

                #   apply the move
                move = index
                board = board.flatten()
                board[move] = computersPlayer
                board = board.reshape(3, 3)
                        
                #   store for later
                moves.append(move)
                outputs.append(output)

            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)
            gameDuration += 1

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        #   get end score of game
        score = scoreEndBoard(board, winner, computersPlayer)
        # gameDurationMultiplier = 1.0 - gameDuration / 10
        # gameDurationMultiplier = gameDurationMultiplier * 0.9
        dilutionFactor = 0.9
        totalDilutant = 1.0
        for i, move in reversed(list(enumerate(moves))):
            totalDilutant *= dilutionFactor
            output = outputs[i]
            target = output.clone().view(9)
            target[move] = score * totalDilutant
            target = target.view(1, 1, 9)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
コード例 #8
0
def test(net, criterion, optimizer, epochs):
    numInvalidMoves = 0

    numWins = 0
    numLosses = 0
    numTies = 0

    optimizer.zero_grad()

    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        board = np.zeros(shape = (3, 3))
        # board = np.random.randint(low = 0, high = 3, size = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        while(not winner and movesLeft):
            if player == computersPlayer:
                move = None
                moveValid = False
                while not moveValid:
                    #   generate a move
                    oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                    output = net(oneHot)
                    values, index = output.view(9).max(0)
                    if board.flatten()[index] == 0: #   if move is valid
                        moveValid = True

                        #   apply the move
                        move = index
                        board = board.flatten()
                        board[move] = computersPlayer
                        board = board.reshape(3, 3)
                        
                    else:   #   invalid move, prime the whip
                        # print("invalid move")
                        numInvalidMoves += 1
                        optimizer.zero_grad()
                        validMoves = np.where(board == 0, 1, 0)
                        target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9)
                        loss = criterion(output, target)
                        loss.backward()
                        optimizer.step()
            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        if winner == computersPlayer:
            numWins += 1
        elif winner == tt.togglePlayer(computersPlayer):
            numLosses += 1
        else:   #   winner == False
            numTies += 1

    return numWins, numLosses, numTies
コード例 #9
0
def train(net, criterion, optimizer, epochs):
    numInvalidMoves = 0
    for i in tqdm(range(epochs)):
        player = 2
        computersPlayer = random.randint(1,2)

        optimizer.zero_grad()

        board = np.zeros(shape = (3, 3))
        # board = np.random.randint(low = 0, high = 3, size = (3, 3))

        movesLeft = np.any(np.where(board == 0, 1, 0))
        winner = tt.getWinner(board)

        moves = []
        outputs = []
        while(not winner and movesLeft):
            if player == computersPlayer:
                move = None
                moveValid = False
                while not moveValid:
                    #   generate a move
                    oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18)
                    output = net(oneHot)
                    values, index = output.view(9).max(0)
                    if board.flatten()[index] == 0: #   if move is valid
                        moveValid = True

                        #   apply the move
                        move = index
                        board = board.flatten()
                        board[move] = computersPlayer
                        board = board.reshape(3, 3)
                        
                        #   store for later
                        moves.append(move)
                        outputs.append(output)
                    else:   #   invalid move, prime the whip
                        # print("invalid move")
                        numInvalidMoves += 1
                        optimizer.zero_grad()
                        validMoves = np.where(board == 0, 1, 0)
                        target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9)
                        loss = criterion(output, target)
                        loss.backward()
                        optimizer.step()
            else:   #   opponents turn
                empties = tt.listEmpties(board)
                randomMove = random.choice(empties)
                tt.applyMove(player, randomMove, board)
            player = tt.togglePlayer(player)

            movesLeft = np.any(np.where(board == 0, 1, 0))
            winner = tt.getWinner(board)
        
        #   get end score of game

        score = scoreEndBoard(board, winner, computersPlayer)
        for i, move in enumerate(moves):
            output = outputs[i]
            target = output.clone().view(9)
            target[move] = score
            target = target.view(1, 1, 9)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()