def main(): import checkers import moveEvaluation.ConvolutionStack print ("generateMoveStatistics.py main()") authority = checkers.Authority() playerList = authority.PlayersList() #neuralNetwork = moveEvaluation.ConvolutionStack.Net() #neuralNetwork.Load("/home/sebastien/projects/DeepReinforcementLearning/outputs/Net_(6,1,8,8)_[(5,32),(5,32),(5,32)]_(4,1,8,8)_checkers_48.pth") keepNumberOfMovesBeforeEndGame = 3 numberOfPositions = 3 numberOfGamesForEvaluation = 5 softMaxTemperatureForSelfPlayEvaluation = 0.1 epsilon = 0.1 maximumNumberOfMovesForFullGameSimulation = 100 maximumNumberOfMovesForEndGameSimulation = 10 positionMovesStatistics = GenerateEndGameStatistics( playerList, authority, None, #neuralNetwork, keepNumberOfMovesBeforeEndGame, numberOfPositions, numberOfGamesForEvaluation, softMaxTemperatureForSelfPlayEvaluation, epsilon, maximumNumberOfMovesForFullGameSimulation, maximumNumberOfMovesForEndGameSimulation ) print ("positionMovesStatistics = \n{}".format(positionMovesStatistics))
def main(): print("autoencoder > position.py main()") import checkers authority = checkers.Authority() positionTensorShape = authority.PositionTensorShape() autoencoder = Net(positionTensorShape, [(3, 32, 2), (3, 64, 2)], 100) inputPositionTensor = authority.InitialPosition() outputTensor = autoencoder(inputPositionTensor.unsqueeze(0))
def main(): print("ConvolutionStack.py main()") import autoencoder.position import checkers autoencoderNet = autoencoder.position.Net() autoencoderNet.Load( '/home/sebastien/projects/DeepReinforcementLearning/autoencoder/outputs/AutoencoderNet_(6,1,8,8)_[(5,16,2),(5,32,2)]_200_checkersAutoencoder_44.pth' ) actionValuePyramid = BuildAnActionValuePyramidFromAnAutoencoder( autoencoderNet, [512, 256], (4, 1, 8, 8)) print(actionValuePyramid) authority = checkers.Authority() inputTensor = authority.InitialPosition() outputTensor = actionValuePyramid(inputTensor.unsqueeze(0)) #print ("outputTensor = {}".format(outputTensor)) actionValuePyramid.Save('./', 'test3')
def main(): print("learnCheckersAutoencoder.py main()") authority = checkers.Authority() positionTensorShape = authority.PositionTensorShape() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: neuralNetwork = position.Net() neuralNetwork.Load(args.startWithNeuralNetwork) for name, p in neuralNetwork.named_parameters(): logging.info("layer: {}".format(name)) if "layer_0" in name or "layer_1" in name: logging.info("Setting p.requires_grad = False") p.requires_grad = False else: neuralNetwork = position.Net( positionTensorShape, bodyStructure=[(5, 16, 2), (5, 32, 2)], #, (3, 64, 2)],#, (5, 16), (5, 16)], numberOfLatentVariables=200) # Create the optimizer optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, neuralNetwork.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.BCEWithLogitsLoss(pos_weight=torch.Tensor( [args.positiveCaseWeight])) # torch.nn.MSELoss() # Initial learning rate learningRate = args.learningRate # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write("epoch,trainingLoss,validationLoss,errorRate\n") for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) # Set the neural network to training mode neuralNetwork.train() # Generate positions minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForPositions = numpy.random.randint( args.maximumNumberOfMovesForPositions ) # Between 0 and args.maximumNumberOfMovesForPositions logging.info("Generating {} training random positions".format( args.numberOfPositionsForTraining)) trainingPositionsList = GenerateRandomPositions( args.numberOfPositionsForTraining, playerList, authority, maximumNumberOfMovesForPositions) logging.info("Generating {} validation random positions".format( args.numberOfPositionsForValidation)) validationPositionsList = GenerateRandomPositions( args.numberOfPositionsForValidation, playerList, authority, maximumNumberOfMovesForPositions) trainingLossSum = 0.0 minibatchIndicesList = utilities.MinibatchIndices( len(trainingPositionsList), args.minibatchSize) logging.info("Going through the minibatch") for minibatchNdx in range(len(minibatchIndicesList)): print('.', end='', flush=True) minibatchPositions = [] for index in minibatchIndicesList[minibatchNdx]: #logging.debug("len(positionStatisticsList[{}]) = {}".format(index, len(positionStatisticsList[index]))) minibatchPositions.append(trainingPositionsList[index]) minibatchPositionsTensor = utilities.MinibatchTensor( minibatchPositions) minibatchTargetPositionsTensor = utilities.MinibatchTensor( minibatchPositions) # Autoencoder => target output = input optimizer.zero_grad() # Forward pass outputTensor = neuralNetwork(minibatchPositionsTensor) # Calculate the error and backpropagate trainingLoss = loss(outputTensor, minibatchTargetPositionsTensor) #logging.debug("trainingLoss.item() = {}".format(trainingLoss.item())) trainingLoss.backward() trainingLossSum += trainingLoss.item() # Move in the gradient descent direction optimizer.step() averageTrainingLoss = trainingLossSum / len(minibatchIndicesList) # Compute the validation loss neuralNetwork.eval() validationPositionsTensor = utilities.MinibatchTensor( validationPositionsList) validationOutputTensor = neuralNetwork(validationPositionsTensor) validationLoss = loss(validationOutputTensor, validationPositionsTensor ).item() # Autoencoder => target output = input # Compare the output tensor converted to one-hot with the target oneHotValidationOutputTensor = position.ConvertToOneHotPositionTensor( validationOutputTensor) numberOfErrors = torch.nonzero(validationPositionsTensor.long() - oneHotValidationOutputTensor).shape[0] errorRate = numberOfErrors / max( torch.nonzero(validationPositionsTensor).shape[0], 0) print(" * ") logging.info( "Epoch {}: averageTrainingLoss = {}\tvalidationLoss = {}\terrorRate = {}" .format(epoch, averageTrainingLoss, validationLoss, errorRate)) epochLossFile.write( str(epoch) + ',' + str(averageTrainingLoss) + ',' + str(validationLoss) + ',' + str(errorRate) + '\n') # Save the neural network neuralNetwork.Save(args.outputDirectory, 'checkersAutoencoder_' + str(epoch)) # Update the learning rate learningRate = learningRate * args.learningRateExponentialDecay utilities.adjust_lr(optimizer, learningRate)
positionsList.append(positionTensor) #movesList.append(chosenMove) numberOfPlayedMoves += 1 player = playerList[numberOfPlayedMoves % 2] return positionsList, winner if __name__ == '__main__': print("expectedMoveValues.py __main__") import checkers import moveEvaluation.ConvolutionStack authority = checkers.Authority() playersList = authority.PlayersList() neuralNetwork = moveEvaluation.ConvolutionStack.Net() neuralNetwork.Load( "/home/sebastien/projects/DeepReinforcementLearning/outputs/Net_(6,1,8,8)_[(5,32),(5,32),(5,32)]_(4,1,8,8)_checkers_48.pth" ) softMaxTemperatureForSelfPlayEvaluation = 0.1 epsilon = 0.1 maximumNumberOfMoves = 200 startingPosition = None positionsList, winner = SimulateAGame( playersList, authority, neuralNetwork, softMaxTemperatureForSelfPlayEvaluation,
def main(): logging.info("learnCheckers.py main()") authority = checkers.Authority() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: neuralNetwork = moveEvaluation.ConvolutionStack.Net() neuralNetwork.Load(args.startWithNeuralNetwork) else: """neuralNetwork = moveEvaluation.ConvolutionStack.Net( positionTensorShape, [(5, 32), (5, 32), (5, 32)], moveTensorShape ) """ autoencoderNet = autoencoder.position.Net() autoencoderNet.Load( '/home/sebastien/projects/DeepReinforcementLearning/moveEvaluation/autoencoder/outputs/AutoencoderNet_(6,1,8,8)_[(5,16,2),(5,32,2)]_200_checkersAutoencoder_44.pth' ) neuralNetwork = moveEvaluation.ConvolutionStack.BuildAnActionValuePyramidFromAnAutoencoder( autoencoderNet, [512, 360], (4, 1, 8, 8)) for name, p in neuralNetwork.named_parameters(): logging.info("layer: {}".format(name)) if "encoding" in name: logging.info("Setting p.requires_grad = False") p.requires_grad = False print("main(): neuralNetwork = {}".format(neuralNetwork)) # Create the optimizer optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, neuralNetwork.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.MSELoss() # Initial learning rate learningRate = args.learningRate # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write( "epoch,averageActionValuesTrainingLoss,averageRewardAgainstRandomPlayer,winRate,drawRate,lossRate\n" ) # Save the initial neural network, and write it's score against a random player neuralNetwork.Save(args.outputDirectory, 'checkers_0') neuralNetwork.eval() (averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate, losingGamePositionsListList) = \ expectedMoveValues.AverageRewardAgainstARandomPlayerKeepLosingGames( playerList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, True, softMaxTemperature=0.1, numberOfGames=12, moveChoiceMode='SemiExhaustiveMiniMax', numberOfGamesForMoveEvaluation=0, # ignored by SoftMax depthOfExhaustiveSearch=1, numberOfTopMovesToDevelop=7 ) logging.info( "main(): averageRewardAgainstRandomPlayer = {}; winRate = {}; drawRate = {}; lossRate = {}" .format(averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate)) epochLossFile.write('0' + ',' + '-' + ',' + str(averageRewardAgainstRandomPlayer) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') softMaxTemperatureForSelfPlayEvaluation = args.softMaxTemperatureForSelfPlayEvaluation losingGamesAgainstRandomPlayerPositionsList = [] for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) # Set the neural network to training mode neuralNetwork.train() # Generate positions minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions logging.info("Generating positions...") if epoch % 3 == -1: positionStatisticsList = generateMoveStatistics.GenerateMoveStatisticsMultiprocessing( playerList, authority, neuralNetwork, args.proportionOfRandomInitialPositions, (minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions), 16, #args.numberOfInitialPositions, args.numberOfGamesForEvaluation, softMaxTemperatureForSelfPlayEvaluation, args.epsilon, args.depthOfExhaustiveSearch, args.chooseHighestProbabilityIfAtLeast, [], #losingGamesAgainstRandomPlayerPositionsList, args.numberOfProcesses) else: positionStatisticsList = generateMoveStatistics.GenerateMoveStatisticsWithMiniMax( playerList, authority, neuralNetwork, (minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions), args.numberOfInitialPositions, args.depthOfExhaustiveSearch, [], #losingGamesAgainstRandomPlayerPositionsList ) # Add end games logging.info("Generating end games...") keepNumberOfMovesBeforeEndGame = 3 numberOfEndGamePositions = 64 numberOfGamesForEndGameEvaluation = 15 maximumNumberOfMovesForFullGameSimulation = args.maximumNumberOfMovesForInitialPositions maximumNumberOfMovesForEndGameSimulation = 10 endGamePositionsStatisticsList = generateMoveStatistics.GenerateEndGameStatistics( playerList, authority, neuralNetwork, keepNumberOfMovesBeforeEndGame, numberOfEndGamePositions, numberOfGamesForEndGameEvaluation, softMaxTemperatureForSelfPlayEvaluation, args.epsilon, maximumNumberOfMovesForFullGameSimulation, maximumNumberOfMovesForEndGameSimulation, ) #logging.debug("len(positionStatisticsList) = {}; len(endGamePositionsStatisticsList) = {}".format(len(positionStatisticsList), len(endGamePositionsStatisticsList))) positionStatisticsList += endGamePositionsStatisticsList #logging.debug("After +=: len(positionStatisticsList) = {}".format(len(positionStatisticsList))) actionValuesLossSum = 0.0 numberOfSubEpochs = 100 for subEpochNdx in range(numberOfSubEpochs): minibatchIndicesList = utilities.MinibatchIndices( len(positionStatisticsList), args.minibatchSize) logging.info("Going through the minibatch") for minibatchNdx in range(len(minibatchIndicesList)): print('.', end='', flush=True) minibatchPositions = [] minibatchTargetActionValues = [] minibatchLegalMovesMasks = [] for index in minibatchIndicesList[minibatchNdx]: #logging.debug("len(positionStatisticsList[{}]) = {}".format(index, len(positionStatisticsList[index]))) minibatchPositions.append(positionStatisticsList[index][0]) averageValue = positionStatisticsList[index][1] #- \ #args.numberOfStandardDeviationsBelowAverageForValueEstimate * positionStatisticsList[index][2] legalMovesMask = positionStatisticsList[index][3] averageValue = averageValue * legalMovesMask.float() minibatchTargetActionValues.append(averageValue) minibatchLegalMovesMasks.append(legalMovesMask) minibatchPositionsTensor = utilities.MinibatchTensor( minibatchPositions) minibatchTargetActionValuesTensor = utilities.MinibatchTensor( minibatchTargetActionValues) optimizer.zero_grad() # Forward pass outputActionValuesTensor = neuralNetwork( minibatchPositionsTensor) # Mask the output action values with the legal moves mask for maskNdx in range(len(minibatchLegalMovesMasks)): outputActionValues = outputActionValuesTensor[ maskNdx].clone() legalMovesMask = minibatchLegalMovesMasks[maskNdx] maskedOutputActionValues = outputActionValues * legalMovesMask.float( ) outputActionValuesTensor[ maskNdx] = maskedOutputActionValues # Calculate the error and backpropagate # Create a tensor with the list of legal values mask minibatchLegalMovesMasksTensor = torch.zeros( outputActionValuesTensor.shape) for maskNdx in range(len(minibatchLegalMovesMasks)): minibatchLegalMovesMasksTensor[ maskNdx] = minibatchLegalMovesMasks[maskNdx] standardDeviationOfLegalValues = utilities.StandardDeviationOfLegalValues( outputActionValuesTensor, minibatchLegalMovesMasksTensor) #logging.debug("standardDeviationOfLegalValues = {}".format(standardDeviationOfLegalValues)) actionValuesLoss = loss( outputActionValuesTensor, minibatchTargetActionValuesTensor ) - standardDeviationAlpha * standardDeviationOfLegalValues try: actionValuesLoss.backward() actionValuesLossSum += actionValuesLoss.item() # Move in the gradient descent direction optimizer.step() except Exception as exc: msg = "Caught excetion: {}".format(exc) print(msg) logging.error(msg) print('X', end='', flush=True) averageActionValuesTrainingLoss = actionValuesLossSum / (len( minibatchIndicesList * numberOfSubEpochs)) print(" * ") logging.info("Epoch {}: averageActionValuesTrainingLoss = {}".format( epoch, averageActionValuesTrainingLoss)) # Update the learning rate learningRate = learningRate * args.learningRateExponentialDecay utilities.adjust_lr(optimizer, learningRate) # Save the neural network #modelParametersFilename = os.path.join(args.outputDirectory, "neuralNet_connect4_" + str(epoch) + '.pth') #torch.save(neuralNetwork.state_dict(), modelParametersFilename) neuralNetwork.Save(args.outputDirectory, 'checkers_' + str(epoch)) neuralNetwork.eval() if epoch % 200 == -1: moveChoiceMode = 'ExpectedMoveValuesThroughSelfPlay' numberOfGames = 100 depthOfExhaustiveSearch = 2 monitoringSoftMaxTemperature = 0.1 else: moveChoiceMode = 'SemiExhaustiveMiniMax' numberOfGames = 12 depthOfExhaustiveSearch = 1 numberOfTopMovesToDevelop = 7 (averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate, losingGamePositionsListList) = \ expectedMoveValues.AverageRewardAgainstARandomPlayerKeepLosingGames( playerList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, True, softMaxTemperature=softMaxTemperatureForSelfPlayEvaluation, numberOfGames=numberOfGames, moveChoiceMode=moveChoiceMode, numberOfGamesForMoveEvaluation=41, # ignored by SoftMax depthOfExhaustiveSearch=depthOfExhaustiveSearch, numberOfTopMovesToDevelop=numberOfTopMovesToDevelop ) logging.info( "averageRewardAgainstRandomPlayer = {}; winRate = {}; drawRate = {}; lossRate = {}" .format(averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate)) # Collect the positions from losing games losingGamesAgainstRandomPlayerPositionsList = [] for (losingGamePositionsList, firstPlayer) in losingGamePositionsListList: for positionNdx in range(len(losingGamePositionsList) - 1): if firstPlayer == playerList[0]: # Keep even positions if positionNdx % 2 == 0: losingGamesAgainstRandomPlayerPositionsList.append( losingGamePositionsList[positionNdx]) else: # fistPlayer == playerList[1] -> Keep odd positions if positionNdx % 2 == 1: losingGamesAgainstRandomPlayerPositionsList.append( losingGamePositionsList[positionNdx]) epochLossFile.write( str(epoch) + ',' + str(averageActionValuesTrainingLoss) + ',' + str(averageRewardAgainstRandomPlayer) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') """initialPosition = authority.InitialPosition()
def main(): print("gameArena.py main()") # Create the game authority if args.game == 'tictactoe': authority = tictactoe.Authority() elif args.game == 'connect4': authority = connect4.Authority() elif args.game == 'checkers': authority = checkers.Authority() else: raise NotImplementedError("main(): unknown game '{}'".format( args.game)) playersList = authority.PlayersList() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() #if type(ast.literal_eval(args.neuralNetwork)) is list: # Neural networks ensemble if args.neuralNetwork is not None and args.neuralNetwork.startswith( '[') and args.neuralNetwork.endswith( ']'): # List => neural networks ensemble committeeMembersList = [] for neuralNetworkFilepath in ast.literal_eval(args.neuralNetwork): committeeMember = moveEvaluation.ConvolutionStack.Net() committeeMember.Load(neuralNetworkFilepath) committeeMembersList.append(committeeMember) neuralNetwork = moveEvaluation.netEnsemble.Committee( committeeMembersList) else: # Single neural network neuralNetwork = moveEvaluation.ConvolutionStack.Net( positionTensorShape, ast.literal_eval(args.networkBodyArchitecture), moveTensorShape) if args.neuralNetwork is not None: neuralNetwork.Load(args.neuralNetwork) winner = None numberOfPlayedMoves = 0 player = playersList[numberOfPlayedMoves % 2] positionTensor = authority.InitialPosition() humanPlayerTurn = 0 if args.opponentPlaysFirst: humanPlayerTurn = 1 """moveTensor = AskTheNeuralNetworkToChooseAMove( playersList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, positionTensor, args.numberOfGamesForMoveEvaluation, args.softMaxTemperature, epsilon=0, displayExpectedMoveValues=args.displayExpectedMoveValues, depthOfExhaustiveSearch=args.depthOfExhaustiveSearch) """ moveTensor = SemiExhaustiveMinimaxHighestValue( playersList, authority, neuralNetwork, positionTensor, epsilon=0, maximumDepthOfSemiExhaustiveSearch=args.depthOfExhaustiveSearch, numberOfTopMovesToDevelop=args.numberOfTopMovesToDevelop, displayExpectedMoveValues=args.displayExpectedMoveValues, ) positionTensor, winner = authority.Move(positionTensor, playersList[0], moveTensor) numberOfPlayedMoves = 1 player = playersList[numberOfPlayedMoves % 2] authority.Display(positionTensor) while winner is None: print("numberOfPlayedMoves % 2 = {}; humanPlayerTurn = {}".format( numberOfPlayedMoves % 2, humanPlayerTurn)) if numberOfPlayedMoves % 2 == humanPlayerTurn: inputIsLegal = False while not inputIsLegal: try: userInput = input( "Your move ('?' to get the legal moves mask, 'positionTensor' to get the position tensor): " ) if userInput == "?": legalMovesMask = authority.LegalMovesMask( positionTensor, player) print("legalMovesMask = \n{}".format(legalMovesMask)) inputIsLegal = False elif userInput == "positionTensor": print("positionTensor = \n{}".format(positionTensor)) else: positionTensor, winner = authority.MoveWithString( positionTensor, player, userInput) inputIsLegal = True except ValueError as e: print("Caught exception '{}'.\nTry again".format(e)) numberOfPlayedMoves += 1 player = playersList[numberOfPlayedMoves % 2] authority.Display(positionTensor) else: # Neural network turn if player is playersList[1]: positionTensor = authority.SwapPositions( positionTensor, playersList[0], playersList[1]) startTime = time.time() """moveTensor = AskTheNeuralNetworkToChooseAMove( playersList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, positionTensor, args.numberOfGamesForMoveEvaluation, args.softMaxTemperature, epsilon=0, displayExpectedMoveValues=args.displayExpectedMoveValues, depthOfExhaustiveSearch=args.depthOfExhaustiveSearch) """ moveTensor = SemiExhaustiveMinimaxHighestValue( playersList, authority, neuralNetwork, positionTensor, epsilon=0, maximumDepthOfSemiExhaustiveSearch=args. depthOfExhaustiveSearch, numberOfTopMovesToDevelop=args.numberOfTopMovesToDevelop, displayExpectedMoveValues=args.displayExpectedMoveValues, ) endTime = time.time() decisionTime = endTime - startTime print("decisionTime = {}".format(decisionTime)) positionTensor, winner = authority.Move(positionTensor, playersList[0], moveTensor) if player is playersList[1]: positionTensor = authority.SwapPositions( positionTensor, playersList[0], playersList[1]) if winner is playersList[0] and player is playersList[1]: winner = playersList[1] numberOfPlayedMoves += 1 player = playersList[numberOfPlayedMoves % 2] authority.Display(positionTensor) if winner == 'draw': print("Draw!") else: print("{} won!".format(winner))