Exemple #1
0
    def TrainEpoch(self, e):
        games = [Game.GetRandomGame(e) for i in range(self.batch)]
        OIndexes = [i for i in range(self.batch) if games[i].GetCurrentPlayer() == Game.O]
        XIndexes = [i for i in range(self.batch) if games[i].GetCurrentPlayer() == Game.X]

        xInput = np.stack([g.AsVector() for g in games], axis=0)
        yTrue = self.PlayGames(games)

        trainedModel : Model = self.modelO.GetModel()
        trainedModel.train_on_batch(x=xInput[OIndexes], y=yTrue[OIndexes])
        trainingLossO = trainedModel.evaluate(xInput[OIndexes], yTrue[OIndexes])
        print("modelO: training_loss: %f" % trainingLossO)

        trainedModel : Model = self.modelX.GetModel()
        trainedModel.train_on_batch(x=xInput[XIndexes], y=yTrue[XIndexes])
        trainingLossX = trainedModel.evaluate(xInput[XIndexes], yTrue[XIndexes])
        print("modelX: training_loss: %f" % trainingLossX)

        print("total loss: %f" % (trainingLossO + trainingLossX))

        if e % 3 == 0:
            print("Example of O: ")
            exampleGame = games[OIndexes[0]]
            print(exampleGame)
            print(np.resize(self.GetPrediction(self.modelO, exampleGame), (Game.ROW_COUNT, Game.COL_COUNT)))
            print(np.resize(yTrue[OIndexes[0]], (Game.ROW_COUNT, Game.COL_COUNT)))

            print("Example of X: ")
            exampleGame = games[XIndexes[0]]
            print(exampleGame)
            print(np.resize(self.GetPrediction(self.modelX, exampleGame), (Game.ROW_COUNT, Game.COL_COUNT)))
            print(np.resize(yTrue[XIndexes[0]], (Game.ROW_COUNT, Game.COL_COUNT)))
        if e % 20 == 0:
            print("Game play: ")
            def GameCallback(g):
                print("----------------")
                print(g)
                model = { Game.O : self.modelO, Game.X : self.modelX }[g.GetCurrentPlayer()]
                print(np.reshape(self.GetPrediction(model, g), (Game.ROW_COUNT, Game.COL_COUNT)))

            self.PlayGame(Game(), GameCallback)


        return trainingLossO, trainingLossX
Exemple #2
0
 def RandomGames():
     game = Game.GetRandomGame()
     assert (game.GetWinner() == Game.EMPTY)
     return game