def main(): logging.info("learnTicTacToeWithDecoderRandomForest.py main()") authority = tictactoe.Authority() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: raise NotImplementedError( "main(): Start with a neural network is not implemented...") else: if args.startWithAutoencoder is not None: autoencoderNet = autoencoder.position.Net() autoencoderNet.Load(args.startWithAutoencoder) decoderRandomForest = Decoder.BuildARandomForestDecoderFromAnAutoencoder( autoencoderNet, args.maximumNumberOfTrees, args.treesMaximumDepth) decoderRandomForest.SetEvaluationMode('mean') print("main(): decoderRandomForest.encodingBodyStructureSeq = {}".format( decoderRandomForest.encodingBodyStructureSeq)) """# Create the optimizer optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, neuralNetwork.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.MSELoss() # Initial learning rate learningRate = args.learningRate """ # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write( "epoch,trainingMSE,validationMSE,averageRewardAgainstRandomPlayer,winRate,drawRate,lossRate\n" ) # First game with a random player, before any training (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer( decoderRandomForest, authority, 30) winRate = numberOfWinsForEvaluator / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}". format(winRate, drawRate, lossRate)) epochLossFile.write('0' + ',' + '-' + ',' + '-' + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) # Generate positions minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions logging.info("Generating positions...") startingPositionsList = Predictor.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForTraining) startingPositionsTensor = StartingPositionsTensor( startingPositionsList) logging.info( "Evaluating expected reward for each starting position...") expectedRewardsList = ExpectedRewardsList(authority, decoderRandomForest, startingPositionsList, args.numberOfSimulations, playerList[1], args.epsilon) #print ("expectedRewardsList = {}".format(expectedRewardsList)) expectedRewardsTensor = ExpectedRewardsTensor(expectedRewardsList) logging.info("Learning from the examples...") decoderRandomForest.LearnFromMinibatch(startingPositionsTensor, expectedRewardsTensor) afterLearningTrainingPredictionsList = decoderRandomForest.Value( startingPositionsTensor) afterLearningTrainingPredictionsTensor = ExpectedRewardsTensor( afterLearningTrainingPredictionsList) trainingMSE = torch.nn.functional.mse_loss( afterLearningTrainingPredictionsTensor, expectedRewardsTensor).item() logging.info("trainingMSE = {}".format(trainingMSE)) # Test on validation positions logging.info("Generating validation positions...") validationStartingPositionsList = Predictor.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForValidation) validationStartingPositionsTensor = StartingPositionsTensor( validationStartingPositionsList) logging.info( "Evaluating expected reward for each validation starting position..." ) validationExpectedRewardsList = ExpectedRewardsList( authority, decoderRandomForest, validationStartingPositionsList, args.numberOfSimulations, playerList[1], args.epsilon) validationExpectedRewardsTensor = ExpectedRewardsTensor( validationExpectedRewardsList) currentValidationPredictionList = decoderRandomForest.Value( validationStartingPositionsTensor) currentValidationPredictionTensor = ExpectedRewardsTensor( currentValidationPredictionList) validationMSE = torch.nn.functional.mse_loss( currentValidationPredictionTensor, validationExpectedRewardsTensor).item() logging.info("validationMSE = {}".format(validationMSE)) # Play against a random player (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer( decoderRandomForest, authority, 30) winRate = numberOfWinsForEvaluator / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}" .format(winRate, drawRate, lossRate)) epochLossFile.write( str(epoch) + ',' + str(trainingMSE) + ',' + str(validationMSE) + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') filepath = os.path.join(args.outputDirectory, 'tictactoe_' + str(epoch) + '.bin') decoderRandomForest.Save(filepath)