def simulateChildren(mct, board, player, myPlayer, numSims, verbose=False): key = genFullKey(board, player) if key not in mct: mct[key] = 0 nextBoards = tt.listNextBoards(board, player) winner = tt.getWinner(board) if not winner and nextBoards: # add next boards to mct if verbose: for nextBoard in tqdm(nextBoards): simChildrenInner(mct, nextBoard, player, myPlayer, numSims) else: for nextBoard in nextBoards: simChildrenInner(mct, nextBoard, player, myPlayer, numSims) # pick the highest score and return that scores = [ mct[genFullKey(nextBoard, tt.togglePlayer(player))] for nextBoard in nextBoards ] highestScore = sorted(scores)[-1] return highestScore else: score = scoreEndBoard(board, tt.togglePlayer(player), myPlayer) score = score * math.pow(numSims, 2) mct[key] = score return score
def simChildrenInner(mct, nextBoard, player, myPlayer, numSims): nextBoardKey = genFullKey(nextBoard, tt.togglePlayer(player)) if nextBoardKey not in mct: mct[nextBoardKey] = 0 # give each one a bunch of game sims simScoreTotal = simulate(numSims, nextBoard, tt.togglePlayer(player), myPlayer) mct[nextBoardKey] += simScoreTotal
def minimax_inner(count, board, player, myPlayer, prevBoards, alpha, beta): hashKey = tt.hash(board) if hashKey in prevBoards: return prevBoards[hashKey] count[0] += 1 winner = tt.getWinner(board) if winner: score = scoreEndBoard(board, winner, myPlayer) # prevBoards[hashKey] = score return score elif tt.noMoreMoves(board): score = scoreEndBoard(board, winner, myPlayer) # prevBoards[hashKey] = score return score else: nextBoards = tt.listNextBoards(board, tt.togglePlayer(player)) if player == myPlayer: # maximizing next moves bestScore = -math.inf for nextBoard in nextBoards: if beta <= alpha: break score = minimax_inner(count, nextBoard, player=tt.togglePlayer(player), myPlayer=myPlayer, prevBoards=prevBoards, alpha=alpha, beta=beta) prevBoards[tt.hash(nextBoard)] = score if score > bestScore: bestScore = score alpha = bestScore return bestScore else: # minimizing next moves bestScore = math.inf for nextBoard in nextBoards: if beta <= alpha: break score = minimax_inner(count, nextBoard, player=tt.togglePlayer(player), myPlayer=myPlayer, prevBoards=prevBoards, alpha=alpha, beta=beta) prevBoards[tt.hash(nextBoard)] = score if score < bestScore: bestScore = score beta = bestScore return bestScore
def test(net, epochs): net.eval() numWins = 0 numLosses = 0 numTies = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def playGame(): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = 2 #random.randint(1,2) turn = 0 print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: board = pickBestNextBoard(board, player, computersPlayer) player = tt.togglePlayer(player) elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) turn += 1 winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def expand(mct, board, player): nextBoards = tt.listNextBoards(board, player) for nextBoard in nextBoards: nextBoardKey = genFullKey(nextBoard, tt.togglePlayer(player)) if nextBoardKey not in mct: # there might be convergent branches vals = {'n': 0, 'v': 0} mct[nextBoardKey] = vals
def scoreEndBoard(board, winner, myPlayer): if not winner: return 1 elif winner == tt.togglePlayer(myPlayer): return -10 elif winner == myPlayer: return 10
def train(qTables, numGames, alpha, tryHard): tryHardGrowth = (1 - tryHard) / numGames for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) tryHard = tryHard + tryHardGrowth
def oneHotTicTacToe(board, computersPlayer): me = np.where(board == computersPlayer, 1, 0) notMe = np.where(board == tt.togglePlayer(computersPlayer), 1, 0) me = me.flatten() notMe = notMe.flatten() oneHot = np.append(me, notMe) oneHot = torch.tensor(oneHot, dtype=torch.float32) return oneHot
def pickBestNextMove(mct, board, player): nextBoards = tt.listNextBoards(board, player) bestBoard = None highest = -math.inf for nextBoard in nextBoards: nextBoardKey = genFullKey(nextBoard, tt.togglePlayer(player)) score = mct[nextBoardKey] if score > highest: highest = score bestBoard = nextBoard return bestBoard
def minimaxGenBoardScores(inBoard, myPlayer, inPlayer): stack = [] boardScores = {} stack.append({'board': inBoard, 'player': inPlayer}) while len(stack) > 0: args = stack[-1] board = args['board'] player = args['player'] maximizing = False if player == myPlayer: maximizing = True # base case, end board winner = tt.getWinner(board) if winner: boardScores[tt.hash(board)] = tt.scoreEndBoard( board, winner, myPlayer) print("###########") tt.printBoard(board) print(boardScores[tt.hash(board)]) stack.pop() elif tt.noMoreMoves(board): boardScores[tt.hash(board)] = tt.scoreEndBoard( board, winner, myPlayer) stack.pop() else: # nobody won yet, and there are move moves nextBoards = tt.listNextBoards(board, player) allPresent = True for nextBoard in nextBoards: if not (tt.hash(nextBoard) in boardScores): allPresent = False newArgs = { 'board': nextBoard, 'player': tt.togglePlayer(player), } stack.append(newArgs) if allPresent: scores = [ boardScores[tt.hash(nextBoard)] for board in nextBoards ] if maximizing: boardScores[tt.hash(board)] = max(scores) else: boardScores[tt.hash(board)] = min(scores) stack.pop() return boardScores
def test(mct, numGames, numSims): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSims) bestBoard = pickBestNextMove(mct, board, player) # print("################") # tt.printBoard(board) # tt.printBoard(bestBoard) # print("BESTMOVE") board = bestBoard else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def pickBestMove(nextMoves, player, computersPlayer): oneHots = [oneHotTicTacToe(nextMove, tt.togglePlayer(player), computersPlayer) for nextMove in nextMoves] trainingSessions = [forward(network, oneHot, dropout=True) for oneHot in oneHots] goodMoves = [] okayMoves = [] badMoves = [] for i in range(len(trainingSessions)): trainingSession = trainingSessions[i] score = trainingSession['outputs'][-1] loss = score[0] tie = score[1] win = score[2] moveScore = {"move":nextMoves[i], "score":score, "trainingSession":trainingSession} if win > tie and win > loss: goodMoves.append(moveScore) elif tie > loss: okayMoves.append(moveScore) else: badMoves.append(moveScore) if goodMoves: bestMove = None bestWin = -100000 for move in goodMoves: win = move["score"][2] if win > bestWin: bestMove = move bestWin = win elif okayMoves: bestMove = None bestTie = -100000 for move in okayMoves: tie = move["score"][1] if tie > bestTie: bestMove = move bestTie = tie else: # only bad moves :( bestMove = None bestLoss = 100000 for move in badMoves: loss = move["score"][0] if loss < bestLoss: bestMove = move bestLoss = loss return bestMove
def test(qTables, numGames, tryHard=1.0): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def pickBestNextBoard(board, player, myPlayer): totalCount = [0] nextBoards = tt.listNextBoards(board, myPlayer) bestBoard = None bestScore = -10000 for nextBoard in tqdm(nextBoards): score = minimax(totalCount, nextBoard, player=tt.togglePlayer(player), myPlayer=myPlayer) if score > bestScore: bestScore = score bestBoard = nextBoard print("\ntotal boards evaluated: " + str(totalCount[0])) return bestBoard
def simulate(numSimulations, board, player, myPlayer): originBoard = copy.deepcopy(board) originPlayer = player totalScore = 0 for i in range(numSimulations): simBoard = copy.deepcopy(originBoard) simPlayer = originPlayer winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) while (movesLeft and not winner): moves = tt.listEmpties(simBoard) randomMove = random.choice(moves) tt.applyMove(simPlayer, randomMove, simBoard) simPlayer = tt.togglePlayer(simPlayer) winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) score = scoreEndBoard(simBoard, winner, myPlayer) totalScore += score return totalScore
def playGame(): saveQTables = False fileName = 'qTables.pickle' qTables = {} keysSoFar = [] movesSoFar = [] tryHard = 0 alpha = 0.9 numTrials = 1000000 if saveQTables: train(qTables, numTrials, alpha, tryHard) f = open(fileName, 'wb') pickle.dump(qTables, f, pickle.HIGHEST_PROTOCOL) f.close() else: f = open(fileName, 'rb') qTables = pickle.load(f) f.close() numWins, numLosses, numTies = test(qTables, 1000) print("VS RANDOM OPPONENT...") print("numWins:" + str(numWins)) print("numLosses:" + str(numLosses)) print("numTies:" + str(numTies)) quit() board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard=1.0, verbose=True) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) player = tt.togglePlayer(player) elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) for key in keysSoFar: pprint(key) pprint(qTables[key]) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def test(net, criterion, optimizer, epochs): numInvalidMoves = 0 numWins = 0 numLosses = 0 numTies = 0 optimizer.zero_grad() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def trainAgainstSelf(net, criterion, optimizer, epochs): net.train() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) movesA = [] outputsA = [] movesB = [] outputsB = [] while(not winner and movesLeft): # generate a move if player == computersPlayer: oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) else: oneHot = oneHotTicTacToe(board, tt.togglePlayer(computersPlayer)).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() if player == computersPlayer: board[move] = computersPlayer else: board[move] = tt.togglePlayer(computersPlayer) board = board.reshape(3, 3) # store for later if player == computersPlayer: movesA.append(move) outputsA.append(output) else: movesB.append(move) outputsB.append(output) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(movesA): output = outputsA[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() score = scoreEndBoard(board, winner, tt.togglePlayer(computersPlayer)) for i, move in enumerate(movesB): output = outputsB[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()
def train(net, criterion, optimizer, epochs): net.train() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) gameDuration = 0 moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) gameDuration += 1 movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) # gameDurationMultiplier = 1.0 - gameDuration / 10 # gameDurationMultiplier = gameDurationMultiplier * 0.9 dilutionFactor = 0.9 totalDilutant = 1.0 for i, move in reversed(list(enumerate(moves))): totalDilutant *= dilutionFactor output = outputs[i] target = output.clone().view(9) target[move] = score * totalDilutant target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()
def playWithUser(net, online=True): while(True): board = genBoard() movesLeft = True winner = False player = 2 computersPlayer = 2 #random.randint(1,2) print() print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") moves = [] outputs = [] while(movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # invalid move, prime the whip print("invalid move") optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if not board[y][x] == 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) if online: score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(moves): output = outputs[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def playGame(): mct = {} board = tt.genBoard() player = 2 computersPlayer = 2 numSimsPreGame = 100000 numSimsOnline = 100 saveMCTree = False fileName = 'mct.pickle' if saveMCTree: simulateChildren(mct, board, player, computersPlayer, numSimsPreGame, verbose=True) f = open(fileName, 'wb') pickle.dump(mct, f, pickle.HIGHEST_PROTOCOL) f.close() quit() else: f = open(fileName, 'rb') mct = pickle.load(f) f.close() # mct = {} # numTrials = 100 # numWins, numLosses, numTies = test(mct, numTrials, numSimsOnline) # print("VS RANDOM OPPONENT...") # print("numWins:" + str(numWins)) # print("numLosses:" + str(numLosses)) # print("numTies:" + str(numTies)) # quit() # w 0.6, t 0.11, l 0.3 board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSimsOnline, verbose=True) bestBoard = pickBestNextMove(mct, board, player) board = bestBoard elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) score = scoreEndBoard(board, winner, computersPlayer) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
if verbose and i % logInterval == 0: print("NEW GAME") while(movesLeft and not winner): if verbose and i % logInterval == 0: if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) nextMoves = tt.listNextBoards(board, player) if makeRandomMoves: randomMove = random.randint(0, len(nextMoves)-1) randomMove = nextMoves[randomMove] oneHot = oneHotTicTacToe(randomMove, tt.togglePlayer(player), computersPlayer) trainingSession = forward(network, oneHot, dropout=True) bestMove = randomMove else: bestMoveDict = pickBestMove(nextMoves, player, computersPlayer) bestMove = bestMoveDict['move'] score = bestMoveDict['score'] trainingSession = bestMoveDict['trainingSession'] if player == 1: playerOneTrainingSessions.append(trainingSession) else: # player == 2 playerTwoTrainingSessions.append(trainingSession) board = bestMove player = tt.togglePlayer(player)
def train(net, criterion, optimizer, epochs): numInvalidMoves = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(moves): output = outputs[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()