def train(qTables, numGames, alpha, tryHard): tryHardGrowth = (1 - tryHard) / numGames for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) tryHard = tryHard + tryHardGrowth
def test(net, epochs): net.eval() numWins = 0 numLosses = 0 numTies = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def pickBestNextMove(qTables, keysSoFar, board, player, myPlayer, tryHard, verbose=False): boardKey = (tt.hash(board), player) empties = tt.listEmpties(board) if boardKey in qTables: keysSoFar.append(boardKey) qTable = qTables[boardKey] if verbose: pprint(qTable) tryHardOrNot = random.random() if tryHardOrNot > tryHard: return random.choice(empties) else: greatest = -math.inf bestEmpty = None for empty in empties: x = empty[0] y = empty[1] val = qTable[y][x] if val > greatest: greatest = val bestEmpty = empty return bestEmpty # FUTURE FEATURE: # # take into account the tryhard number else: qTables[boardKey] = genQTable() keysSoFar.append(boardKey) return random.choice(empties)
def test(mct, numGames, numSims): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSims) bestBoard = pickBestNextMove(mct, board, player) # print("################") # tt.printBoard(board) # tt.printBoard(bestBoard) # print("BESTMOVE") board = bestBoard else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def simulate(numSimulations, board, player, myPlayer): originBoard = copy.deepcopy(board) originPlayer = player totalScore = 0 for i in range(numSimulations): simBoard = copy.deepcopy(originBoard) simPlayer = originPlayer winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) while (movesLeft and not winner): moves = tt.listEmpties(simBoard) randomMove = random.choice(moves) tt.applyMove(simPlayer, randomMove, simBoard) simPlayer = tt.togglePlayer(simPlayer) winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) score = scoreEndBoard(simBoard, winner, myPlayer) totalScore += score return totalScore
def test(qTables, numGames, tryHard=1.0): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def train(net, criterion, optimizer, epochs): net.train() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) gameDuration = 0 moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) gameDuration += 1 movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) # gameDurationMultiplier = 1.0 - gameDuration / 10 # gameDurationMultiplier = gameDurationMultiplier * 0.9 dilutionFactor = 0.9 totalDilutant = 1.0 for i, move in reversed(list(enumerate(moves))): totalDilutant *= dilutionFactor output = outputs[i] target = output.clone().view(9) target[move] = score * totalDilutant target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()
def test(net, criterion, optimizer, epochs): numInvalidMoves = 0 numWins = 0 numLosses = 0 numTies = 0 optimizer.zero_grad() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def train(net, criterion, optimizer, epochs): numInvalidMoves = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(moves): output = outputs[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()