def train(qTables, numGames, alpha, tryHard): tryHardGrowth = (1 - tryHard) / numGames for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) tryHard = tryHard + tryHardGrowth
def test(net, epochs): net.eval() numWins = 0 numLosses = 0 numTies = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def test(mct, numGames, numSims): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSims) bestBoard = pickBestNextMove(mct, board, player) # print("################") # tt.printBoard(board) # tt.printBoard(bestBoard) # print("BESTMOVE") board = bestBoard else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def simulate(numSimulations, board, player, myPlayer): originBoard = copy.deepcopy(board) originPlayer = player totalScore = 0 for i in range(numSimulations): simBoard = copy.deepcopy(originBoard) simPlayer = originPlayer winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) while (movesLeft and not winner): moves = tt.listEmpties(simBoard) randomMove = random.choice(moves) tt.applyMove(simPlayer, randomMove, simBoard) simPlayer = tt.togglePlayer(simPlayer) winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) score = scoreEndBoard(simBoard, winner, myPlayer) totalScore += score return totalScore
def test(qTables, numGames, tryHard=1.0): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def playGame(): saveQTables = False fileName = 'qTables.pickle' qTables = {} keysSoFar = [] movesSoFar = [] tryHard = 0 alpha = 0.9 numTrials = 1000000 if saveQTables: train(qTables, numTrials, alpha, tryHard) f = open(fileName, 'wb') pickle.dump(qTables, f, pickle.HIGHEST_PROTOCOL) f.close() else: f = open(fileName, 'rb') qTables = pickle.load(f) f.close() numWins, numLosses, numTies = test(qTables, 1000) print("VS RANDOM OPPONENT...") print("numWins:" + str(numWins)) print("numLosses:" + str(numLosses)) print("numTies:" + str(numTies)) quit() board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard=1.0, verbose=True) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) player = tt.togglePlayer(player) elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) for key in keysSoFar: pprint(key) pprint(qTables[key]) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def train(net, criterion, optimizer, epochs): net.train() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) gameDuration = 0 moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) gameDuration += 1 movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) # gameDurationMultiplier = 1.0 - gameDuration / 10 # gameDurationMultiplier = gameDurationMultiplier * 0.9 dilutionFactor = 0.9 totalDilutant = 1.0 for i, move in reversed(list(enumerate(moves))): totalDilutant *= dilutionFactor output = outputs[i] target = output.clone().view(9) target[move] = score * totalDilutant target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()
def test(net, criterion, optimizer, epochs): numInvalidMoves = 0 numWins = 0 numLosses = 0 numTies = 0 optimizer.zero_grad() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def train(net, criterion, optimizer, epochs): numInvalidMoves = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(moves): output = outputs[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()