def TrainEpoch(self, e): games = [Game.GetRandomGame(e) for i in range(self.batch)] OIndexes = [i for i in range(self.batch) if games[i].GetCurrentPlayer() == Game.O] XIndexes = [i for i in range(self.batch) if games[i].GetCurrentPlayer() == Game.X] xInput = np.stack([g.AsVector() for g in games], axis=0) yTrue = self.PlayGames(games) trainedModel : Model = self.modelO.GetModel() trainedModel.train_on_batch(x=xInput[OIndexes], y=yTrue[OIndexes]) trainingLossO = trainedModel.evaluate(xInput[OIndexes], yTrue[OIndexes]) print("modelO: training_loss: %f" % trainingLossO) trainedModel : Model = self.modelX.GetModel() trainedModel.train_on_batch(x=xInput[XIndexes], y=yTrue[XIndexes]) trainingLossX = trainedModel.evaluate(xInput[XIndexes], yTrue[XIndexes]) print("modelX: training_loss: %f" % trainingLossX) print("total loss: %f" % (trainingLossO + trainingLossX)) if e % 3 == 0: print("Example of O: ") exampleGame = games[OIndexes[0]] print(exampleGame) print(np.resize(self.GetPrediction(self.modelO, exampleGame), (Game.ROW_COUNT, Game.COL_COUNT))) print(np.resize(yTrue[OIndexes[0]], (Game.ROW_COUNT, Game.COL_COUNT))) print("Example of X: ") exampleGame = games[XIndexes[0]] print(exampleGame) print(np.resize(self.GetPrediction(self.modelX, exampleGame), (Game.ROW_COUNT, Game.COL_COUNT))) print(np.resize(yTrue[XIndexes[0]], (Game.ROW_COUNT, Game.COL_COUNT))) if e % 20 == 0: print("Game play: ") def GameCallback(g): print("----------------") print(g) model = { Game.O : self.modelO, Game.X : self.modelX }[g.GetCurrentPlayer()] print(np.reshape(self.GetPrediction(model, g), (Game.ROW_COUNT, Game.COL_COUNT))) self.PlayGame(Game(), GameCallback) return trainingLossO, trainingLossX
def RandomGames(): game = Game.GetRandomGame() assert (game.GetWinner() == Game.EMPTY) return game