class LearningBoard(TicTacToeBoard): winning_action_reward = 1.0 loosing_action_reward = -0.5 neutral_action_reward = 0.0 prohibited_action_reward = -1.0 def MakeMoveWithReward(self, action): row = action // 3 column = action % 3 if self.SiteIsOccupied(row, column): return self.prohibited_action_reward, True elif self.MakeMove(row, column): return self.winning_action_reward, True else: if self.GameEnded(): return self.neutral_action_reward, True return self.neutral_action_reward, False store_path = "TicTacToeNewTraining" TicTacToeANN = ANN.TrainingNetwork(18, 9, 0.05, [50, 50, 50]) if __name__ == '__main__': board = LearningBoard() #TicTacToeANN.Training_Episodic_Decorrelated_Batches(board,store_path,500, 50, 5, TicTacToeANN.SARSA_Episodic) TicTacToeANN.Training_Episodic_Single_Matches_Reverse( board, store_path, 25000, TicTacToeANN.SARSA_Episodic_Single_Game)
class LearningFourInARow(FourInARowBoard): winning_action_reward = 1.0 loosing_action_reward = -0.5 neutral_action_reward = 0.0 prohibited_action_reward = -1.0 def MakeMoveWithReward(self, action): if self.ColumnIsFull(action): return self.prohibited_action_reward, True elif self.MakeMove(action): return self.winning_action_reward, True else: if self.GameEnded(): return self.neutral_action_reward, True return self.neutral_action_reward, False store_path = "FourInARowNewTraining" FourInARowANN = ANN.TrainingNetwork(84, 7, 0.05, [50, 50, 50]) if __name__ == '__main__': board = LearningFourInARow() FourInARowANN.Training_Episodic_Single_Matches_Reverse( board, store_path, 10000, FourInARowANN.Q_Learning_Episodic_Single_Game) #FourInARowANN.Training_Episodic_Decorrelated_Batches(board,store_path,50, 50, 5, FourInARowANN.Q_Learning_Episodic)
elif np.all(self.current_position == self.goal): self.current_position = np.array([row, column]) return self.winning_action_reward, True else: self.current_position = np.array([row, column]) return self.neutral_action_reward, False def GetState(self): return self.current_position def ResetGame(self): self.current_position = self.maze.start store_path = "MazeNewTraining" MazeANN = ANN.TrainingNetwork(2, 4, 0.05, [50, 50, 50], discount=0.95) def PrintLearnedPath(name): runner = MazeRunner(name) game_ended = False saver = ANN.tf.train.Saver() path = "Trainings/" + store_path with ANN.tf.Session() as sess: saver.restore(sess, path) count = 0 while not game_ended: if runner.maze.hight * runner.maze.width < count: break count += 1 current_state = runner.GetState()