class RLTTTPlayer(TTTPlayer): def __init__(self): self.learningAlgo = TableLearning() super(RLTTTPlayer, self).__init__() def printValues(self): self.learningAlgo.printValues() def testNextMove(self, state, i, j): boardCopy = list(state) boardCopy[3 * i + j] = self.player return ''.join(boardCopy) def makeNextMove(self): previousState = self.board.getBoardState() if self.isBoardActive(): emptyPlaces = self.board.getEmptyBoardPlaces() pickOne = random.choice(emptyPlaces) if random.uniform( 0, 1) < 0.8: # Make a random move with probability 0.2 moveChoices = {} for (i, j) in emptyPlaces: possibleNextState = self.testNextMove(previousState, i, j) moveChoices[(i, j)] = self.learningAlgo.getBoardStateValue( self.player, self.board, possibleNextState) pickOne = max(moveChoices, key=moveChoices.get) self.board.makeMove(self.player, pickOne[0], pickOne[1]) return previousState def learnFromMove(self, prevBoardState): self.learningAlgo.learnFromMove(self.player, self.board, prevBoardState)
def playUltimateForTraining(): learningModel = TableLearning() learningPlayer = RLUTTTPlayer(learningModel) randomPlayer = RandomUTTTPlayer() results, tempFileName = [], 'temp_learning.json' for i in range(40): games = GameSequence(1000, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision) games.playGamesAndGetWinPercent() learningPlayer.saveLearning(tempFileName) results.append(os.path.getsize(tempFileName)) print(('\n'.join(map(str, results)))) os.remove(tempFileName)
def __init__(self): self.learningAlgo = TableLearning() super(RLTTTPlayer, self).__init__()
from player import RLTTTPlayer, TTTPlayer, RealTTTPlayer from board import TTTBoardDecision, BoxState, TTTBoard from learning import NNLearning, TableLearning from random import randint learningModel = TableLearning(TTTBoardDecision) learningModel.loadLearning("FinalTableModel.json") player1 = RLTTTPlayer(learningModel) player2 = RealTTTPlayer() board = TTTBoard() BoardDecisionClass = TTTBoardDecision() player1.startNewGame() player2.startNewGame() playOrder = randint(0, 1) while board.getBoardDecision() == BoardDecisionClass.ACTIVE: player1.setBoard(board, BoxState.PLAYER_X) player2.setBoard(board, BoxState.PLAYER_O) if playOrder == 0 and board.getBoardDecision( ) == BoardDecisionClass.ACTIVE: pState1 = player1.makeNextMove() board.printBoard() if board.getBoardDecision() == BoardDecisionClass.ACTIVE: inpplay = input("Select position (1-9): ") pState2 = player2.makeNextMove(BoxState.PLAYER_O, int(inpplay) - 1) if playOrder == 1 and board.getBoardDecision( ) == BoardDecisionClass.ACTIVE: pState1 = player1.makeNextMove() player1.finishGame() player2.finishGame() board.printBoard() if board.getBoardDecision() == TTTBoardDecision.DRAW:
for placeOnBoard in emptyPlaces: possibleNextState = self.testNextMove( previousState, boardLocation, placeOnBoard) moveChoices[(tuple(boardLocation), placeOnBoard )] = self.learningAlgo.getBoardStateValue( self.player, self.board, possibleNextState) (chosenBoard, pickOne) = max(moveChoices, key=moveChoices.get) else: chosenBoard = random.choice(activeBoardLocations) emptyPlaces = self.board.getEmptyBoardPlaces(chosenBoard) pickOne = random.choice(emptyPlaces) self.board.makeMove(self.player, chosenBoard, pickOne) return previousState def learnFromMove(self, prevBoardState): self.learningAlgo.learnFromMove(self.player, self.board, prevBoardState) def saveLearning(self, filename): self.learningAlgo.saveLearning(filename) def loadLearning(self, filename): self.learningAlgo.loadLearning(filename) if __name__ == '__main__': board = UTTTBoard() player1 = RandomUTTTPlayer() player2 = RLUTTTPlayer(TableLearning(UTTTBoardDecision))