def test(net, epochs): net.eval() numWins = 0 numLosses = 0 numTies = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def handler(x, y, g, p1, p2, b, eve): """ handles the click :param x: x-coordinate :param y: y-coordinate :param g: situation :param p1: player :param p2: player :param b: list of buttons :param eve: the event """ global current spec = Player.get_spec(current) if g[x][y] == '': g[x][y] = spec if Player.get_name(current) != 'computer' and Player.get_name( switch_player( current, p1, p2)) == "computer" and not tictactoe.isFinished(g): g = minmax.IA(g, switch_player(current, p1, p2), current, 10, tictactoe) else: current = switch_player(current, p1, p2) __redraw(g, b) if tictactoe.isFinished(g): winner = tictactoe.getWinner(g, g, switch_player(current, p1, p2)) _disable(g, b) msg = finalState(winner) msgbox.showinfo("finished", msg)
def simulateChildren(mct, board, player, myPlayer, numSims, verbose=False): key = genFullKey(board, player) if key not in mct: mct[key] = 0 nextBoards = tt.listNextBoards(board, player) winner = tt.getWinner(board) if not winner and nextBoards: # add next boards to mct if verbose: for nextBoard in tqdm(nextBoards): simChildrenInner(mct, nextBoard, player, myPlayer, numSims) else: for nextBoard in nextBoards: simChildrenInner(mct, nextBoard, player, myPlayer, numSims) # pick the highest score and return that scores = [ mct[genFullKey(nextBoard, tt.togglePlayer(player))] for nextBoard in nextBoards ] highestScore = sorted(scores)[-1] return highestScore else: score = scoreEndBoard(board, tt.togglePlayer(player), myPlayer) score = score * math.pow(numSims, 2) mct[key] = score return score
def train(qTables, numGames, alpha, tryHard): tryHardGrowth = (1 - tryHard) / numGames for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) tryHard = tryHard + tryHardGrowth
def minimaxGenBoardScores(inBoard, myPlayer, inPlayer): stack = [] boardScores = {} stack.append({'board': inBoard, 'player': inPlayer}) while len(stack) > 0: args = stack[-1] board = args['board'] player = args['player'] maximizing = False if player == myPlayer: maximizing = True # base case, end board winner = tt.getWinner(board) if winner: boardScores[tt.hash(board)] = tt.scoreEndBoard( board, winner, myPlayer) print("###########") tt.printBoard(board) print(boardScores[tt.hash(board)]) stack.pop() elif tt.noMoreMoves(board): boardScores[tt.hash(board)] = tt.scoreEndBoard( board, winner, myPlayer) stack.pop() else: # nobody won yet, and there are move moves nextBoards = tt.listNextBoards(board, player) allPresent = True for nextBoard in nextBoards: if not (tt.hash(nextBoard) in boardScores): allPresent = False newArgs = { 'board': nextBoard, 'player': tt.togglePlayer(player), } stack.append(newArgs) if allPresent: scores = [ boardScores[tt.hash(nextBoard)] for board in nextBoards ] if maximizing: boardScores[tt.hash(board)] = max(scores) else: boardScores[tt.hash(board)] = min(scores) stack.pop() return boardScores
def minimax_inner(count, board, player, myPlayer, prevBoards, alpha, beta): hashKey = tt.hash(board) if hashKey in prevBoards: return prevBoards[hashKey] count[0] += 1 winner = tt.getWinner(board) if winner: score = scoreEndBoard(board, winner, myPlayer) # prevBoards[hashKey] = score return score elif tt.noMoreMoves(board): score = scoreEndBoard(board, winner, myPlayer) # prevBoards[hashKey] = score return score else: nextBoards = tt.listNextBoards(board, tt.togglePlayer(player)) if player == myPlayer: # maximizing next moves bestScore = -math.inf for nextBoard in nextBoards: if beta <= alpha: break score = minimax_inner(count, nextBoard, player=tt.togglePlayer(player), myPlayer=myPlayer, prevBoards=prevBoards, alpha=alpha, beta=beta) prevBoards[tt.hash(nextBoard)] = score if score > bestScore: bestScore = score alpha = bestScore return bestScore else: # minimizing next moves bestScore = math.inf for nextBoard in nextBoards: if beta <= alpha: break score = minimax_inner(count, nextBoard, player=tt.togglePlayer(player), myPlayer=myPlayer, prevBoards=prevBoards, alpha=alpha, beta=beta) prevBoards[tt.hash(nextBoard)] = score if score < bestScore: bestScore = score beta = bestScore return bestScore
def simulate(numSimulations, board, player, myPlayer): originBoard = copy.deepcopy(board) originPlayer = player totalScore = 0 for i in range(numSimulations): simBoard = copy.deepcopy(originBoard) simPlayer = originPlayer winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) while (movesLeft and not winner): moves = tt.listEmpties(simBoard) randomMove = random.choice(moves) tt.applyMove(simPlayer, randomMove, simBoard) simPlayer = tt.togglePlayer(simPlayer) winner = tt.getWinner(simBoard) movesLeft = not tt.noMoreMoves(simBoard) score = scoreEndBoard(simBoard, winner, myPlayer) totalScore += score return totalScore
def playGame(): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = 2 #random.randint(1,2) turn = 0 print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: board = pickBestNextBoard(board, player, computersPlayer) player = tt.togglePlayer(player) elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) turn += 1 winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def test(mct, numGames, numSims): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSims) bestBoard = pickBestNextMove(mct, board, player) # print("################") # tt.printBoard(board) # tt.printBoard(bestBoard) # print("BESTMOVE") board = bestBoard else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def test(qTables, numGames, tryHard=1.0): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def playGame(): saveQTables = False fileName = 'qTables.pickle' qTables = {} keysSoFar = [] movesSoFar = [] tryHard = 0 alpha = 0.9 numTrials = 1000000 if saveQTables: train(qTables, numTrials, alpha, tryHard) f = open(fileName, 'wb') pickle.dump(qTables, f, pickle.HIGHEST_PROTOCOL) f.close() else: f = open(fileName, 'rb') qTables = pickle.load(f) f.close() numWins, numLosses, numTies = test(qTables, 1000) print("VS RANDOM OPPONENT...") print("numWins:" + str(numWins)) print("numLosses:" + str(numLosses)) print("numTies:" + str(numTies)) quit() board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard=1.0, verbose=True) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) player = tt.togglePlayer(player) elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) for key in keysSoFar: pprint(key) pprint(qTables[key]) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def test(net, criterion, optimizer, epochs): numInvalidMoves = 0 numWins = 0 numLosses = 0 numTies = 0 optimizer.zero_grad() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # winner == False numTies += 1 return numWins, numLosses, numTies
def playGame(): mct = {} board = tt.genBoard() player = 2 computersPlayer = 2 numSimsPreGame = 100000 numSimsOnline = 100 saveMCTree = False fileName = 'mct.pickle' if saveMCTree: simulateChildren(mct, board, player, computersPlayer, numSimsPreGame, verbose=True) f = open(fileName, 'wb') pickle.dump(mct, f, pickle.HIGHEST_PROTOCOL) f.close() quit() else: f = open(fileName, 'rb') mct = pickle.load(f) f.close() # mct = {} # numTrials = 100 # numWins, numLosses, numTies = test(mct, numTrials, numSimsOnline) # print("VS RANDOM OPPONENT...") # print("numWins:" + str(numWins)) # print("numLosses:" + str(numLosses)) # print("numTies:" + str(numTies)) # quit() # w 0.6, t 0.11, l 0.3 board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSimsOnline, verbose=True) bestBoard = pickBestNextMove(mct, board, player) board = bestBoard elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) score = scoreEndBoard(board, winner, computersPlayer) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def train(net, criterion, optimizer, epochs): numInvalidMoves = 0 for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # invalid move, prime the whip # print("invalid move") numInvalidMoves += 1 optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(moves): output = outputs[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()
bestMove = randomMove else: bestMoveDict = pickBestMove(nextMoves, player, computersPlayer) bestMove = bestMoveDict['move'] score = bestMoveDict['score'] trainingSession = bestMoveDict['trainingSession'] if player == 1: playerOneTrainingSessions.append(trainingSession) else: # player == 2 playerTwoTrainingSessions.append(trainingSession) board = bestMove player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) # do backprop here playerOneTruth = None playerTwoTruth = None if winner == False: truth = np.array([0.0, 1.0, 0.0]) playerOneTruth = truth playerTwoTruth = truth elif winner == 1: playerOneTruth = np.array([0.0, 0.0, 1.0]) playerTwoTruth = np.array([1.0, 0.0, 0.0]) else: # winner == 2 playerTwoTruth = np.array([0.0, 0.0, 1.0]) playerOneTruth = np.array([1.0, 0.0, 0.0])
def trainAgainstSelf(net, criterion, optimizer, epochs): net.train() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) # board = np.random.randint(low = 0, high = 3, size = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) movesA = [] outputsA = [] movesB = [] outputsB = [] while(not winner and movesLeft): # generate a move if player == computersPlayer: oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) else: oneHot = oneHotTicTacToe(board, tt.togglePlayer(computersPlayer)).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() if player == computersPlayer: board[move] = computersPlayer else: board[move] = tt.togglePlayer(computersPlayer) board = board.reshape(3, 3) # store for later if player == computersPlayer: movesA.append(move) outputsA.append(output) else: movesB.append(move) outputsB.append(output) player = tt.togglePlayer(player) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(movesA): output = outputsA[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() score = scoreEndBoard(board, winner, tt.togglePlayer(computersPlayer)) for i, move in enumerate(movesB): output = outputsB[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()
def playWithUser(net, online=True): while(True): board = genBoard() movesLeft = True winner = False player = 2 computersPlayer = 2 #random.randint(1,2) print() print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") moves = [] outputs = [] while(movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: move = None moveValid = False while not moveValid: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) values, index = output.view(9).max(0) if board.flatten()[index] == 0: # if move is valid moveValid = True # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # invalid move, prime the whip print("invalid move") optimizer.zero_grad() validMoves = np.where(board == 0, 1, 0) target = torch.tensor(validMoves, dtype=torch.float).view(1, 1, 9) loss = criterion(output, target) loss.backward() optimizer.step() elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if not board[y][x] == 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) if online: score = scoreEndBoard(board, winner, computersPlayer) for i, move in enumerate(moves): output = outputs[i] target = output.clone().view(9) target[move] = score target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def train(net, criterion, optimizer, epochs): net.train() for i in tqdm(range(epochs)): player = 2 computersPlayer = random.randint(1,2) optimizer.zero_grad() board = np.zeros(shape = (3, 3)) movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) gameDuration = 0 moves = [] outputs = [] while(not winner and movesLeft): if player == computersPlayer: # generate a move oneHot = oneHotTicTacToe(board, computersPlayer).view(1, 1, 18) output = net(oneHot) # mask out invalid moves invalidMoves = np.where( board.flatten() > 0, True, False) maskedOutput = output.clone().view(9) maskedOutput[invalidMoves] = -10 values, index = maskedOutput.max(0) # apply the move move = index board = board.flatten() board[move] = computersPlayer board = board.reshape(3, 3) # store for later moves.append(move) outputs.append(output) else: # opponents turn empties = tt.listEmpties(board) randomMove = random.choice(empties) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) gameDuration += 1 movesLeft = np.any(np.where(board == 0, 1, 0)) winner = tt.getWinner(board) # get end score of game score = scoreEndBoard(board, winner, computersPlayer) # gameDurationMultiplier = 1.0 - gameDuration / 10 # gameDurationMultiplier = gameDurationMultiplier * 0.9 dilutionFactor = 0.9 totalDilutant = 1.0 for i, move in reversed(list(enumerate(moves))): totalDilutant *= dilutionFactor output = outputs[i] target = output.clone().view(9) target[move] = score * totalDilutant target = target.view(1, 1, 9) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step()