def simulateCompetitiveGame(self, playouts): PGN = chess.pgn.Game() PGN.headers["Event"] = "Simulated Competitive Game" PGN.headers["Site"] = "Nayoung's Home" PGN.headers["Date"] = datetime.datetime.today().strftime( '%Y-%m-%d %H:%M') PGN.headers["Round"] = "1" PGN.headers["White"] = "Network: " + self.nameOfNetwork PGN.headers["Black"] = "Network: " + self.nameOfNetwork PGN.headers["Variant"] = "Crazyhouse" # refresh the MCTS tree from scratch initially. self.dictionary = { # 'string' = n position. Let this string be the FEN of the position. } self.childrenMoveNames = [ ] # a 2D list, each directory may be of different size, stores name of moves self.childrenStateSeen = [ ] # a 2D list, each directory contains numpy array self.childrenStateWin = [ ] # a 2D list, each directory contains numpy array self.childrenNNEvaluation = [ ] # a 2D list, each directory contains numpy array sim = ChessEnvironment() while sim.result == 2: # now start looking at variations self.competitivePlayoutsFromPosition(playouts, sim) directory = self.dictionary[sim.boardToString()] index = np.argmax( PUCT_Algorithm(self.childrenStateWin[directory], self.childrenStateSeen[directory], 0, np.sum(self.childrenStateSeen[directory]), self.childrenNNEvaluation[directory])) move = self.childrenMoveNames[directory][index] print(move) sim.makeMove(move) sim.gameResult() if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) print(sim.board) if sim.result == 1: PGN.headers["Result"] = "1-0" if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" if sim.result == -1: PGN.headers["Result"] = "0-1" print(PGN)
def predictions(outputs): listOfMoves = [] newBoard = ChessEnvironment() for i in range(len(outputs)): if newBoard.result != 2: listOfMoves.append('0000') else: legalMoves = ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board) evaluationScores = ActionToArray.moveEvaluations( ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board), newBoard.arrayBoard, outputs[i]) move = legalMoves[np.argmax(evaluationScores)] newBoard.makeMove(move) newBoard.gameResult() listOfMoves.append(move) return listOfMoves
#print("found index") return i return None dir_path = os.path.dirname(os.path.realpath(__file__)) print(dir_path) # PARAMETERS ENGINE_DEPTH = 8 ENGINE_PLAYOUTS = 0 NOISE_INITIAL = 0.5 NOISE_DECAY = 1.22 ESTIMATED_NPS = 6 board = ChessEnvironment() model = MCTS('/Users/gordon/Documents/CrazyhouseRL/New Networks/(MCTS)(12X256|16|8)(GPU)64fish.pt', ENGINE_DEPTH) while True: command = input("") if command == "uci": print("id name 64\nid author Gordon Chi") print("option name UCI_Variant type combo default crazyhouse var crazyhouse") print("uciok") elif command.startswith("setoption"): settings = command[10:] if settings.__contains__("ENGINE_PLAYOUTS"): settings = int(settings[16:]) ENGINE_PLAYOUTS = settings elif settings.__contains__("ENGINE_DEPTH"): settings = int(settings[13:])
def playout( self, round, explorationConstant=2**0.5, # lower? will test more. notFromBeginning=False, arrayBoard=0, pythonBoard=0, plies=0, wCap=0, bCap=0, actuallyAPawn=0, noise=True, printPGN=True ): # Here is the information just for starting at a different position whiteParentStateDictionary = [] whiteStateSeen = [] whiteStateWin = [] blackParentStateDictionary = [] blackStateSeen = [] blackStateWin = [] tempBoard = ChessEnvironment() if notFromBeginning: tempBoard.arrayBoard = arrayBoard tempBoard.board = pythonBoard tempBoard.plies = plies tempBoard.whiteCaptivePieces = wCap tempBoard.blackCaptivePieces = bCap tempBoard.actuallyAPawn = actuallyAPawn tempBoard.updateNumpyBoards() depth = 0 while tempBoard.result == 2 and depth < self.DEPTH_VALUE: depth += 1 position = tempBoard.boardToString() if position not in self.dictionary: # Create a new entry in the tree, if the state is not seen before. state = torch.from_numpy(tempBoard.boardToState()) action = torch.zeros(1) data = DoubleHeadDataset(state, action, action) testLoader = torch.utils.data.DataLoader(dataset=data, batch_size=1, shuffle=False) device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') start = time.time() for images, irrelevant1, irrelevant2 in testLoader: images = images.to(device) outputs = self.neuralNet(images)[0] end = time.time() print("BLAH:", end - start) self.addPositionToMCTS( tempBoard.boardToString(), ActionToArray.legalMovesForState(tempBoard.arrayBoard, tempBoard.board), tempBoard.arrayBoard, outputs) # find and make the preferred move if noise: noiseConstant = 0.15 / (1 * (1 + tempBoard.plies) ) # should decrease this... else: noiseConstant = 0 if len(self.childrenStateWin) > 0: _, index = (PUCT_Algorithm( self.childrenStateWin[len(self.childrenStateSeen) - 1], self.childrenStateSeen[len(self.childrenStateSeen) - 1], explorationConstant, torch.sum( self.childrenStateSeen[len(self.childrenStateSeen) - 1]), self.childrenValueEval[len(self.childrenStateSeen) - 1], noiseEvals( self.childrenPolicyEval[len(self.childrenStateSeen) - 1], noiseConstant))).max(0) else: index = 0 move = self.childrenMoveNames[len(self.childrenStateSeen) - 1][index] # print(move) tempBoard.makeMove(move) actionVector = torch.zeros( len(self.childrenMoveNames[len(self.childrenStateSeen) - 1])) actionVector[index] = 1 else: # find the directory of the move directory = self.dictionary[position] if noise: noiseConstant = 0.6 / (2.5 * (1 + tempBoard.plies)) else: noiseConstant = 0 _, index = (PUCT_Algorithm( self.childrenStateWin[directory], self.childrenStateSeen[directory], explorationConstant, torch.sum(self.childrenStateSeen[directory]), self.childrenValueEval[directory], noiseEvals(self.childrenPolicyEval[directory], noiseConstant))).max(0) move = self.childrenMoveNames[directory][index] # print(move) tempBoard.makeMove(move) # the move will have to be indexed correctly based on where the position is. actionVector = torch.zeros( len(self.childrenMoveNames[directory])) actionVector[index] = 1 # add this into the actions chosen. if tempBoard.plies % 2 == 1: # white has moved. whiteParentStateDictionary.append(position) whiteStateSeen.append(actionVector) else: # black has moved blackParentStateDictionary.append(position) blackStateSeen.append(actionVector) # print(tempBoard.board) tempBoard.gameResult() if tempBoard.result == 1: # white victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i]) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0) if tempBoard.result == -1: # black victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j]) # this is okay, because if the game is played til checkmate then # this ensures that the move count for both sides is equal. if tempBoard.result == 0: # 'tis a tie for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0.5) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0.5) if tempBoard.result == 2: # game isn't played to very end winRate = ValueEvaluation.positionEval(tempBoard, self.neuralNet) # tempBoard.printBoard() # print(ActionToArray.legalMovesForState(tempBoard.arrayBoard, tempBoard.board)) # if depth is not divisible by two then win rate is of opponent if depth % 2 == 0: if tempBoard.plies % 2 == 0: # this means that we are evaluating white for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * winRate) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * (1 - winRate)) else: # this means that we are evaluating black for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * (1 - winRate)) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * winRate) else: winRate = 1 - winRate if tempBoard.plies % 2 == 1: # this means that we are evaluating white for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * winRate) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * (1 - winRate)) else: # this means that we are evaluating black for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * (1 - winRate)) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * winRate) # now, add the information into the MCTS database. for i in range(len(whiteStateSeen)): directory = self.dictionary[whiteParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + whiteStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + whiteStateWin[i] for i in range(len(blackStateSeen)): directory = self.dictionary[blackParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + blackStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + blackStateWin[i]
torch.sum(self.childrenStateSeen[self.dictionary[ sim.boardToString()]])) testing = False if testing: # TESTING legalMoves = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] nnPolicyPredictions = torch.rand(10) won = torch.rand(10) seen = torch.rand(10) seen[4] = 0 nnValuePredictions = torch.rand(10) final = PUCT_Algorithm(won, seen, 2**0.5, torch.sum(seen), nnValuePredictions, noiseEvals(nnPolicyPredictions, 0.5)) # THIS FINDS INDEX OF TOP VALUE _, indice = PUCT_Algorithm(won, seen, 2**0.5, torch.sum(seen), nnValuePredictions, noiseEvals(nnPolicyPredictions, 0.5)).max(0) print(indice) print(legalMoves[indice]) MCTS = True if MCTS: board = ChessEnvironment() search = MCTSTensor( '/Users/gordon/Documents/CrazyhouseRL/New Networks/(MCTS)(8X256|8|8)(GPU)64fish.pt', 3) search.competitivePlayoutsFromPosition(20, board)
def trainNetwork(states, outputMoves, EPOCHS=10000, BATCH_SIZE=1000, LR=0.001, loadDirectory = 'none.pt', saveDirectory='network1.pt', OUTPUT_ARRAY_LEN=4504, THRESHOLD_FOR_SAVE=100): states = torch.from_numpy(states) outputMoves = torch.from_numpy(outputMoves) boards, actions = states, outputMoves data = MyDataset(boards, actions) trainLoader = torch.utils.data.DataLoader(dataset=data, batch_size=BATCH_SIZE, shuffle=True) testLoader = torch.utils.data.DataLoader(dataset=data, batch_size=len(boards), shuffle=False) # to create a prediction, create a new dataset with input of the states, and output should just be np.zeros() # TRAINING! model = ChessConvNet(OUTPUT_ARRAY_LEN).double() try: model = torch.load(loadDirectory) except: print("Pretrained NN model not found!") criterion = nn.PoissonNLLLoss() optimizer = torch.optim.Adam(model.parameters(), lr=LR) total_step = len(trainLoader) trainNotFinished = True for epoch in range(EPOCHS): if trainNotFinished: for i, (images, labels) in enumerate(trainLoader): images = images.to('cpu') labels = labels.to('cpu') # Forward pass outputMoves = model(images) loss = criterion(outputMoves, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 1 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch + 1, EPOCHS, i + 1, total_step, loss.item())) # Test the model model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) answers = np.argmax(actions.numpy(), axis=1) with torch.no_grad(): for images, labels in testLoader: images = images.to('cpu') labels = labels.to('cpu') outputMoves = model(images) _, predicted = torch.max(outputMoves.data, 1) # print expectations vs reality print("MAX", np.amax(outputMoves.numpy())) print("MIN", np.amin(outputMoves.numpy())) print(predicted.numpy()) print(answers) correct = (predicted.numpy() == answers).sum() acc = 100 * (correct / len(answers)) print("argmax prediction: ", acc, "% correct.") if epoch % 2000 == 100: newBoard = ChessEnvironment() for i in range(len(outputMoves.numpy())): if newBoard.result == 2: move = ActionToArray.moveArrayToString(outputMoves.numpy()[i].reshape((1, 4504)), newBoard.arrayBoard, newBoard.board, newBoard.whiteCaptivePieces, newBoard.blackCaptivePieces, newBoard.plies) print("NN PREDICTED MOVE: ", move) # See if the evaluation score matches up with the argmax function! legalMoves = ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board) evaluationScores = ActionToArray.moveEvaluations( ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board), newBoard.arrayBoard, outputMoves[i]) #print("Evaluation Rankings: ") print(" = " + legalMoves[np.argmax(evaluationScores)]) #print(ActionToArray.sortEvals(legalMoves, evaluationScores)) newBoard.makeMove(move) newBoard.gameResult() else: print(newBoard.gameStatus) print(newBoard.board) newBoard.gameResult() print(newBoard.boardToString()) print(newBoard.gameStatus) if acc >= THRESHOLD_FOR_SAVE: torch.save(model, saveDirectory) print("Updated!") trainNotFinished = False # make sure it saves the model regardless. torch.save(model, saveDirectory) print("Updated!")
def moveValueEvaluationsNew(legalMoves, board, network): positions = np.zeros((len(legalMoves), 15, 8, 8)) # make the input vector for i in range(len(legalMoves)): tempBoard = copy.deepcopy(board) # import the game board evalBoard = ChessEnvironment() evalBoard.arrayBoard = tempBoard.arrayBoard evalBoard.board = tempBoard.board evalBoard.plies = tempBoard.plies evalBoard.whiteCaptivePieces = tempBoard.whiteCaptivePieces evalBoard.blackCaptivePieces = tempBoard.blackCaptivePieces evalBoard.actuallyAPawn = tempBoard.actuallyAPawn evalBoard.updateNumpyBoards() evalBoard.makeMove(legalMoves[i]) evalBoard.updateNumpyBoards() positions[i] = evalBoard.boardToState() positions = torch.from_numpy(positions) nullAction = torch.from_numpy(np.zeros( len(positions))) # this will not be used, is only a filler testSet = DoubleHeadDataset(positions, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=128, shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: output = neuralNet(images)[1].detach().numpy().flatten() # so far, output gives a winning probability from -1 to 1, 1 for white, -1 for black. We want to scale this to # a value between 0 and 1. output = (output / 2) + 0.5 # now we have an evaluation from 0 to 1. Now we have to scale this to a probability # for either black or white depending on who moves next. turn = evalBoard.plies % 2 # if plies is divisible by 2, then black has just moved, which means that # our evaluation should be for black. If plies is not, then white has just moved, # which means that our evaluation should be for white. if turn == 0: output = 1 - output return output
plt.figure(figsize=(10, 10)) for idx, filt in enumerate(blocks[h]): plt.subplot(16, 16, idx + 1) plt.imshow(filt[i, :, :], cmap="gray") title = "Kernels in Block " + str(int(h+1)) + ", Part " + str(int(i+1)) plt.gcf().canvas.set_window_title(title) plt.axis('off') saveFolder = 'Visualization of Network/'+networkName+'/Block ' + str(int(h+1)) saveDirec = 'Visualization of Network/'+networkName+'/Block ' + str(int(h+1)) + '/' + title if not os.path.exists(saveFolder): os.makedirs(saveFolder) plt.savefig(saveDirec) plt.close() board = ChessEnvironment() representation = board.boardToState() """ # PRINT BOARD AND ITS REPRESENTATION plt.figure(figsize=(4, 4)) for idx, filt in enumerate(representation[0]): #print(filt[0, :, :]) plt.subplot(5, 3, idx + 1) plt.imshow(filt[:, :], cmap="gray") plt.axis('off') plt.show() """
def NetworkCompetitionWhite(bestNet, playouts, round="1"): PGN = chess.pgn.Game() PGN.headers["Event"] = "Neural Network Comparison Test" PGN.headers["Site"] = "Cozy Computer Lounge" PGN.headers["Date"] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + bestNet.nameOfNetwork PGN.headers["Black"] = "You" PGN.headers["Variant"] = "crazyhouse" sim = ChessEnvironment() while sim.result == 2: noiseVal = 0.0 / (10 * (sim.plies // 2 + 1)) if sim.plies % 2 == 0: if playouts > 0: start = time.time() bestNet.competitivePlayoutsFromPosition(playouts, sim) end = time.time() print(end - start) else: position = sim.boardToString() if position not in bestNet.dictionary: image = torch.from_numpy(sim.boardToState()) outputs = bestNet.neuralNet(image)[0] if playouts > 0: bestNet.addPositionToMCTS( sim.boardToString(), ActionToArray.legalMovesForState( sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) else: bestNet.dictionary[sim.boardToString()] = len( bestNet.dictionary) policy = ActionToArray.moveEvaluations( ActionToArray.legalMovesForState( sim.arrayBoard, sim.board), sim.arrayBoard, outputs) bestNet.childrenMoveNames.append( ActionToArray.legalMovesForState( sim.arrayBoard, sim.board)) bestNet.childrenPolicyEval.append(policy) directory = bestNet.dictionary[sim.boardToString()] if playouts > 0: index = np.argmax( MCTSCrazyhouse.PUCT_Algorithm( bestNet.childrenStateWin[directory], bestNet.childrenStateSeen[directory], 1, np.sum(bestNet.childrenStateSeen[directory]), bestNet.childrenValueEval[directory], MCTSCrazyhouse.noiseEvals( bestNet.childrenPolicyEval[directory], noiseVal))) else: index = np.argmax( MCTSCrazyhouse.noiseEvals( bestNet.childrenPolicyEval[directory], noiseVal)) move = bestNet.childrenMoveNames[directory][index] if chess.Move.from_uci(move) not in sim.board.legal_moves: move = ActionToArray.legalMovesForState( sim.arrayBoard, sim.board)[0] # PRINT WIN PROBABILITY W/ MCTS? print("-----") print(move) print("Win Probability: {:.4f} %".format( 100 * ValueEvaluation.positionEval(sim, bestNet.neuralNet))) if playouts > 0 and bestNet.childrenStateSeen[directory][index] > 0: mctsWinRate = 100 * bestNet.childrenStateWin[directory][ index] / bestNet.childrenStateSeen[directory][index] print("MCTS Win Probability: {:.4f} %".format(mctsWinRate)) totalWinRate = (100 * ValueEvaluation.positionEval( sim, bestNet.neuralNet) + mctsWinRate) / 2 print("Total Win Probability: {:.4f} %".format(totalWinRate)) print("-----") sim.makeMove(move) sim.gameResult() elif sim.plies % 2 == 1: legal = False while not legal: move = input("Enter move: ") if len(move) == 4 or len(move) == 5: if chess.Move.from_uci(move) in sim.board.legal_moves: legal = True else: print("Illegal move! Try again:") else: print("Illegal move! Try again:") print(move) sim.makeMove(move) sim.gameResult() if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) print(sim.board) print("WHITE POCKET") print(sim.whiteCaptivePieces) print("BLACK POCKET") print(sim.blackCaptivePieces) if sim.result == 1: PGN.headers["Result"] = "1-0" if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" if sim.result == -1: PGN.headers["Result"] = "0-1" print(PGN)
def playout( self, round, explorationConstant=0.15, # lower? will test more. notFromBeginning=False, arrayBoard=0, pythonBoard=0, plies=0, wCap=0, bCap=0, actuallyAPawn=0, noise=True, printPGN=True ): # Here is the information just for starting at a different position if printPGN: PGN = chess.pgn.Game() PGN.headers["Event"] = "Playout" PGN.headers["Site"] = "Nayoung's Home" PGN.headers["Date"] = datetime.datetime.today().strftime( '%Y-%m-%d') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + self.nameOfNetwork PGN.headers["Black"] = "Network: " + self.nameOfNetwork PGN.headers["Variant"] = "Crazyhouse" whiteParentStateDictionary = [] whiteStateSeen = [] whiteStateWin = [] blackParentStateDictionary = [] blackStateSeen = [] blackStateWin = [] tempBoard = ChessEnvironment() if notFromBeginning: tempBoard.arrayBoard = arrayBoard tempBoard.board = pythonBoard tempBoard.plies = plies tempBoard.whiteCaptivePieces = wCap tempBoard.blackCaptivePieces = bCap tempBoard.actuallyAPawn = actuallyAPawn tempBoard.updateNumpyBoards() while tempBoard.result == 2: position = tempBoard.boardToString() if position not in self.dictionary: # Create a new entry in the tree, if the state is not seen before. state = torch.from_numpy(tempBoard.boardToState()) nullAction = torch.from_numpy(np.zeros( (1, 4504))) # this will not be used, is only a filler testSet = MyDataset(state, nullAction) generatePredic = torch.utils.data.DataLoader( dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels in generatePredic: outputs = self.neuralNet(images) self.addPositionToMCTS( tempBoard.boardToString(), ActionToArray.legalMovesForState( tempBoard.arrayBoard, tempBoard.board), tempBoard.arrayBoard, outputs) # find and make the preferred move if noise: noiseConstant = 0.6 / (2.5 * (1 + tempBoard.plies)) else: noiseConstant = 0 if len(self.childrenStateWin) > 0: index = np.argmax( PUCT_Algorithm( self.childrenStateWin[ len(self.childrenStateSeen) - 1], self.childrenStateSeen[ len(self.childrenStateSeen) - 1], explorationConstant, np.sum(self.childrenStateSeen[ len(self.childrenStateSeen) - 1]), noiseEvals( self.childrenNNEvaluation[ len(self.childrenStateSeen) - 1], noiseConstant))) else: index = 0 move = self.childrenMoveNames[ len(self.childrenStateSeen) - 1][index] if chess.Move.from_uci( move) not in tempBoard.board.legal_moves: print("Not legal move.") # play a random move move = self.childrenMoveNames[ len(self.childrenStateSeen) - 1][0] # print(move) tempBoard.makeMove(move) actionVector = np.zeros( len(self.childrenMoveNames[ len(self.childrenStateSeen) - 1])) actionVector[index] = 1 else: # find the directory of the move directory = self.dictionary[position] if noise: noiseConstant = 0.6 / (2.5 * (1 + tempBoard.plies)) else: noiseConstant = 0 index = np.argmax( PUCT_Algorithm( self.childrenStateWin[directory], self.childrenStateSeen[directory], explorationConstant, np.sum(self.childrenStateSeen[directory]), noiseEvals(self.childrenNNEvaluation[directory], noiseConstant))) move = self.childrenMoveNames[directory][index] # print(move) tempBoard.makeMove(move) # the move will have to be indexed correctly based on where the position is. actionVector = np.zeros(len(self.childrenMoveNames[directory])) actionVector[index] = 1 if printPGN: if tempBoard.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) # add this into the actions chosen. if tempBoard.plies % 2 == 1: # white has moved. whiteParentStateDictionary.append(position) whiteStateSeen.append(actionVector) else: # black has moved blackParentStateDictionary.append(position) blackStateSeen.append(actionVector) # print(tempBoard.board) tempBoard.gameResult() if tempBoard.result == 1: # white victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i]) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0) if printPGN: PGN.headers["Result"] = "1-0" if tempBoard.result == -1: # black victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j]) # this is okay, because if the game is played til checkmate then # this ensures that the move count for both sides is equal. if printPGN: PGN.headers["Result"] = "0-1" if tempBoard.result == 0: # 'tis a tie for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0.5) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0.5) if printPGN: PGN.headers["Result"] = "1/2-1/2" # Print PGN and final state if printPGN: print("PGN: ") print(PGN) # now, add the information into the MCTS database. for i in range(len(whiteStateSeen)): directory = self.dictionary[whiteParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + whiteStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + whiteStateWin[i] for i in range(len(blackStateSeen)): directory = self.dictionary[blackParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + blackStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + blackStateWin[i] if printPGN: print(tempBoard.board) self.printSize()
def simulateTrainingGame(self, playouts, round="1"): PGN = chess.pgn.Game() PGN.headers["Event"] = "Simulated Training Game" PGN.headers["Site"] = "Nayoung's Home" PGN.headers["Date"] = datetime.datetime.today().strftime( '%Y-%m-%d %H:%M') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + self.nameOfNetwork PGN.headers["Black"] = "Network: " + self.nameOfNetwork PGN.headers["Variant"] = "Crazyhouse" whiteParentState = np.zeros(1) whiteStateSeen = [] whiteStateWin = [] whiteStateNames = [] blackParentState = np.zeros(1) blackStateSeen = [] blackStateWin = [] blackStateNames = [] sim = ChessEnvironment() while sim.result == 2: self.trainingPlayoutsFromPosition(playouts, sim) directory = self.dictionary[sim.boardToString()] index = np.argmax( PUCT_Algorithm( self.childrenStateWin[directory], self.childrenStateSeen[directory], 0.22, # 0.25-0.30 guarantees diversity np.sum(self.childrenStateSeen[directory]), noiseEvals(self.childrenNNEvaluation[directory], 1.35 / (6 * ((sim.plies // 2) + 1))))) move = self.childrenMoveNames[directory][index] moveNames = self.childrenMoveNames[directory] actionVector = np.zeros(len(self.childrenMoveNames[directory])) actionVector[index] = 1 if sim.plies == 0: whiteParentState = sim.boardToState() whiteStateSeen.append(actionVector) whiteStateNames.append(moveNames) if sim.plies == 1: blackParentState = sim.boardToState() blackStateSeen.append(actionVector) blackStateNames.append(moveNames) if sim.plies % 2 == 0 and sim.plies != 0: whiteParentState = np.concatenate( (whiteParentState, sim.boardToState())) whiteStateSeen.append(actionVector) whiteStateNames.append(moveNames) if sim.plies % 2 == 1 and sim.plies != 1: blackParentState = np.concatenate( (blackParentState, sim.boardToState())) blackStateSeen.append(actionVector) blackStateNames.append(moveNames) sim.makeMove(move) sim.gameResult() print(sim.board) if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) if sim.result == 1: PGN.headers["Result"] = "1-0" for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i]) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0) if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0.5) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0.5) if sim.result == -1: PGN.headers["Result"] = "0-1" for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j]) if sim.result == 2: for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 1) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 1) parentStates = np.concatenate((whiteParentState, blackParentState)) statesSeen = whiteStateSeen + blackStateSeen statesWin = whiteStateWin + blackStateWin statesNames = whiteStateNames + blackStateNames print(PGN) return parentStates, statesSeen, statesWin, statesNames
def moveValueEvaluation(move, board, network): # import the network neuralNet = network tempBoard = copy.deepcopy(board) # import the game board evalBoard = ChessEnvironment() evalBoard.arrayBoard = tempBoard.arrayBoard evalBoard.board = tempBoard.board evalBoard.plies = tempBoard.plies evalBoard.whiteCaptivePieces = tempBoard.whiteCaptivePieces evalBoard.blackCaptivePieces = tempBoard.blackCaptivePieces evalBoard.actuallyAPawn = tempBoard.actuallyAPawn evalBoard.updateNumpyBoards() # make temporary move evalBoard.makeMove(move) # evalBoard.printBoard() state = evalBoard.boardToState() nullAction = torch.from_numpy( np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: neuralNet.eval() output = (neuralNet(images)[1].numpy())[0][0] # so far, output gives a winning probability from -1 to 1, 1 for white, -1 for black. We want to scale this to # a value between 0 and 1. output = (output / 2) + 0.5 # now we have an evaluation from 0 to 1. Now we have to scale this to a probability # for either black or white depending on who moves next. turn = evalBoard.plies % 2 # if plies is divisible by 2, then black has just moved, which means that # our evaluation should be for black. If plies is not, then white has just moved, # which means that our evaluation should be for white. if turn == 0: output = 1 - output # now, let's return our evaluation # print(output) return output
def objectivePositionEval(board, network): # import the network neuralNet = network tempBoard = copy.deepcopy(board) # import the game board evalBoard = ChessEnvironment() evalBoard.arrayBoard = tempBoard.arrayBoard evalBoard.board = tempBoard.board evalBoard.plies = tempBoard.plies evalBoard.whiteCaptivePieces = tempBoard.whiteCaptivePieces evalBoard.blackCaptivePieces = tempBoard.blackCaptivePieces evalBoard.actuallyAPawn = tempBoard.actuallyAPawn evalBoard.updateNumpyBoards() # evalBoard.printBoard() state = evalBoard.boardToState() nullAction = torch.from_numpy( np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: neuralNet.eval() output = (neuralNet(images)[1].numpy())[0][0] # so far, output gives a winning probability from -1 to 1, 1 for white, -1 for black. We want to scale this to # a value between 0 and 1. output = (output / 2) + 0.5 turn = evalBoard.plies % 2 # now, this is a probability of white winning. we need to change this to centipawns... output = min(1, 4 * np.arctanh(output - 0.500001)) if turn == 1: output = -output return output
# so far, output gives a winning probability from -1 to 1, 1 for white, -1 for black. We want to scale this to # a value between 0 and 1. output = (output / 2) + 0.5 # now we have an evaluation from 0 to 1. Now we have to scale this to a probability # for either black or white depending on who moves next. turn = evalBoard.plies % 2 # if plies is not divisible by 2, then it is black to move. if turn == 1: output = 1 - output # now, let's return our evaluation # print(output) return output testing = False if testing: hi = ChessEnvironment() hi.printBoard() moves = ActionToArray.legalMovesForState(hi.arrayBoard, hi.board) print(moves) network = torch.load("New Networks/1712-finalnet.pt") evaluations = moveValueEvaluations(moves, hi, network) print(evaluations) eval = positionEval(hi, network) print(eval)
for move in game.mainline_moves(): board.push_uci(move.uci()) singleGame.append(move.uci()) listOfMoves.append(singleGame) listOfResults.append(result) print(pgnGames[g]) except: print("", end="") inList = [] outList = [] actionList = [] actionMagList = [] for j in range(len(listOfMoves)): board = ChessEnvironment() for i in range(len(listOfMoves[j])): state = ActionToArray.boardToBinaryArray(board.boardToState()) value = listOfResults[j] action = ActionToArray.moveArray(listOfMoves[j][i], board.arrayBoard) if i % 2 == 0: if value == 1: mag = 1 elif value == 0: mag = 0.5 else: mag = 0.2 else: if value == -1: mag = 1 elif value == 0:
def NetworkCompetitionWhite(bestNet, testingNet, playouts, round="1"): score = 0 PGN = chess.pgn.Game() PGN.headers["Event"] = "Neural Network Comparison Test" PGN.headers["Site"] = "Cozy Computer Lounge" PGN.headers["Date"] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + bestNet.nameOfNetwork PGN.headers["Black"] = "Network: " + testingNet.nameOfNetwork PGN.headers["Variant"] = "crazyhouse" sim = ChessEnvironment() while sim.result == 2: #print("Win Probability:", ValueEvaluation.positionEval(sim, bestNet.neuralNet)) noiseVal = 1.0 / (2 * (sim.plies // 2 + 1)) if sim.plies % 2 == 0: if playouts > 0: bestNet.competitivePlayoutsFromPosition(playouts, sim) else: position = sim.boardToString() if position not in bestNet.dictionary: state = torch.from_numpy(sim.boardToState()) nullAction = torch.from_numpy(np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: bestNet.neuralNet.eval() #start = time.time() outputs = bestNet.neuralNet(images)[0] #end = time.time() #print(end-start) if playouts > 0: bestNet.addPositionToMCTS(sim.boardToString(), ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) else: bestNet.dictionary[sim.boardToString()] = len(bestNet.dictionary) policy = ActionToArray.moveEvaluations(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs) bestNet.childrenMoveNames.append(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)) bestNet.childrenPolicyEval.append(policy) directory = bestNet.dictionary[sim.boardToString()] if playouts > 0: index = np.argmax( MCTSCrazyhouse.PUCT_Algorithm(bestNet.childrenStateWin[directory], bestNet.childrenStateSeen[directory], 1, np.sum(bestNet.childrenStateSeen[directory]), bestNet.childrenValueEval[directory], MCTSCrazyhouse.noiseEvals(bestNet.childrenPolicyEval[directory], noiseVal)) ) else: index = np.argmax(MCTSCrazyhouse.noiseEvals(bestNet.childrenPolicyEval[directory], noiseVal)) move = bestNet.childrenMoveNames[directory][index] if chess.Move.from_uci(move) not in sim.board.legal_moves: move = ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)[0] #print(move) sim.makeMove(move) sim.gameResult() elif sim.plies % 2 == 1: if playouts > 0: testingNet.competitivePlayoutsFromPosition(playouts, sim) else: position = sim.boardToString() if position not in testingNet.dictionary: state = torch.from_numpy(sim.boardToState()) nullAction = torch.from_numpy(np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: testingNet.neuralNet.eval() outputs = testingNet.neuralNet(images)[0] if playouts > 0: testingNet.addPositionToMCTS(sim.boardToString(), ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) else: testingNet.dictionary[sim.boardToString()] = len(testingNet.dictionary) policy = ActionToArray.moveEvaluations(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs) testingNet.childrenMoveNames.append(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)) testingNet.childrenPolicyEval.append(policy) directory = testingNet.dictionary[sim.boardToString()] if playouts > 0: index = np.argmax( MCTSCrazyhouse.PUCT_Algorithm(testingNet.childrenStateWin[directory], testingNet.childrenStateSeen[directory], 1, np.sum(testingNet.childrenStateSeen[directory]), testingNet.childrenValueEval[directory], MCTSCrazyhouse.noiseEvals(testingNet.childrenPolicyEval[directory], noiseVal)) ) else: index = np.argmax(MCTSCrazyhouse.noiseEvals(testingNet.childrenPolicyEval[directory], noiseVal)) move = testingNet.childrenMoveNames[directory][index] if chess.Move.from_uci(move) not in sim.board.legal_moves: move = ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)[0] #print(move) sim.makeMove(move) sim.gameResult() if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) #print(sim.board) if sim.result == 1: PGN.headers["Result"] = "1-0" if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" score = 0.5 if sim.result == -1: PGN.headers["Result"] = "0-1" score = 1 print(PGN) return score
import sys import torch.utils.data as data_utils from MCTSCrazyhouse import MCTS import MCTSCrazyhouse import copy import chess.variant import chess.pgn import chess import time import ValueEvaluation import os dir_path = os.path.dirname(os.path.realpath(__file__)) print(dir_path) board = ChessEnvironment() model = MCTS('/Users/gordon/Documents/CrazyhouseRL/New Networks/smallnet.pt', 3) playouts = 0 while True: command = input("") if command == "uci": print("id name 64\nid author Gordon Chi\nuciok") elif command.startswith("setoption"): settings = command[10:] if settings.__contains__("playouts"): settings = int(settings[9:]) playouts = settings elif settings.__contains__("depth"): settings = int(settings[6:])
def simulateTrainingGame(self, playouts, round="1"): PGN = chess.pgn.Game() PGN.headers["Event"] = "Simulated Training Game" PGN.headers["Site"] = "Cozy Computer Lounge" PGN.headers["Date"] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + self.nameOfNetwork PGN.headers["Black"] = "Network: " + self.nameOfNetwork PGN.headers["Variant"] = "crazyhouse" sim = ChessEnvironment() while sim.result == 2: if playouts == 0: position = sim.boardToString() if position not in self.dictionary: state = torch.from_numpy(sim.boardToState()) nullAction = torch.from_numpy(np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: outputs = self.neuralNet(images)[0] self.dictionary[sim.boardToString()] = len(self.dictionary) policy = ActionToArray.moveEvaluations(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs) self.childrenPolicyEval.append(policy) self.childrenMoveNames.append(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)) directory = self.dictionary[sim.boardToString()] index = np.argmax(noiseEvals(self.childrenPolicyEval[directory], 3.0 / (6 * ((sim.plies // 2) + 1)))) move = self.childrenMoveNames[directory][index] moveNames = self.childrenMoveNames[directory] else: self.trainingPlayoutsFromPosition(playouts, sim) position = sim.boardToString() if position not in self.dictionary: state = torch.from_numpy(sim.boardToState()) action = torch.from_numpy(np.zeros(1)) data = DoubleHeadDataset(state, action, action) testLoader = torch.utils.data.DataLoader(dataset=data, batch_size=1, shuffle=False) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') start = time.time() for images, irrelevant1, irrelevant2 in testLoader: images = images.to(device) outputs = self.neuralNet(images)[0] end = time.time() print("BLAH:", end-start) self.addPositionToMCTS(sim.boardToString(), ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) directory = self.dictionary[sim.boardToString()] index = np.argmax( PUCT_Algorithm(self.childrenStateWin[directory], self.childrenStateSeen[directory], 2**0.5, # 0.25-0.30 guarantees diversity np.sum(self.childrenStateSeen[directory]), self.childrenValueEval[directory], noiseEvals(self.childrenPolicyEval[directory], 2.1 / (7 * ((sim.plies // 2) + 1)))) ) move = self.childrenMoveNames[directory][index] moveNames = self.childrenMoveNames[directory] actionVector = np.zeros(len(self.childrenMoveNames[directory])) actionVector[index] = 1 sim.makeMove(move) sim.gameResult() if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) if sim.result == 1: PGN.headers["Result"] = "1-0" if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" if sim.result == -1: PGN.headers["Result"] = "0-1" print(PGN) return PGN
listOfMoves.append(singleGame) print(pgnGames[i]) except: print("", end="") f = open("Training Data/201805games2000.txt", "w+") for i in range(len(listOfMoves)): print(listOfMoves[i], ",") f.write(str(listOfMoves[i]) + ",\n") f.close() inList = [] outList = [] for j in range(len(listOfMoves)): board = ChessEnvironment() for i in range(len(listOfMoves[j])): state = board.boardToState() action = ActionToArray.moveArray(listOfMoves[j][i], board.arrayBoard) for k in range(320, 384): action[0][k] = 0 if board.board.legal_moves.count() != len( ActionToArray.legalMovesForState(board.arrayBoard, board.board)): print("ERROR!") board.makeMove(listOfMoves[j][i]) # add it to database inList.append(state) outList.append(np.argmax(action))
def run(self): # import the network neuralNet = network tempBoard = copy.deepcopy(self.board) # import the game board evalBoard = ChessEnvironment() evalBoard.arrayBoard = tempBoard.arrayBoard evalBoard.board = tempBoard.board evalBoard.plies = tempBoard.plies evalBoard.whiteCaptivePieces = tempBoard.whiteCaptivePieces evalBoard.blackCaptivePieces = tempBoard.blackCaptivePieces evalBoard.actuallyAPawn = tempBoard.actuallyAPawn evalBoard.updateNumpyBoards() # make temporary move evalBoard.makeMove(self.move) state = torch.from_numpy(evalBoard.boardToState()) output = (neuralNet(state)[1].detach().numpy())[0][0] # so far, output gives a winning probability from -1 to 1, 1 for white, -1 for black. We want to scale this to # a value between 0 and 1. output = (output / 2) + 0.5 # now we have an evaluation from 0 to 1. Now we have to scale this to a probability # for either black or white depending on who moves next. turn = evalBoard.plies % 2 # if plies is divisible by 2, then black has just moved, which means that # our evaluation should be for black. If plies is not, then white has just moved, # which means that our evaluation should be for white. if turn == 0: output = 1 - output # now, let's return our evaluation evaluation[self.index] = output