예제 #1
0

class LearningBoard(TicTacToeBoard):

    winning_action_reward = 1.0
    loosing_action_reward = -0.5
    neutral_action_reward = 0.0
    prohibited_action_reward = -1.0

    def MakeMoveWithReward(self, action):
        row = action // 3
        column = action % 3

        if self.SiteIsOccupied(row, column):
            return self.prohibited_action_reward, True
        elif self.MakeMove(row, column):
            return self.winning_action_reward, True
        else:
            if self.GameEnded():
                return self.neutral_action_reward, True
            return self.neutral_action_reward, False


store_path = "TicTacToeNewTraining"
TicTacToeANN = ANN.TrainingNetwork(18, 9, 0.05, [50, 50, 50])

if __name__ == '__main__':
    board = LearningBoard()
    #TicTacToeANN.Training_Episodic_Decorrelated_Batches(board,store_path,500, 50, 5, TicTacToeANN.SARSA_Episodic)
    TicTacToeANN.Training_Episodic_Single_Matches_Reverse(
        board, store_path, 25000, TicTacToeANN.SARSA_Episodic_Single_Game)
예제 #2
0

class LearningFourInARow(FourInARowBoard):

    winning_action_reward = 1.0
    loosing_action_reward = -0.5
    neutral_action_reward = 0.0
    prohibited_action_reward = -1.0

    def MakeMoveWithReward(self, action):

        if self.ColumnIsFull(action):
            return self.prohibited_action_reward, True
        elif self.MakeMove(action):
            return self.winning_action_reward, True
        else:
            if self.GameEnded():
                return self.neutral_action_reward, True
            return self.neutral_action_reward, False


store_path = "FourInARowNewTraining"
FourInARowANN = ANN.TrainingNetwork(84, 7, 0.05, [50, 50, 50])

if __name__ == '__main__':
    board = LearningFourInARow()
    FourInARowANN.Training_Episodic_Single_Matches_Reverse(
        board, store_path, 10000,
        FourInARowANN.Q_Learning_Episodic_Single_Game)
    #FourInARowANN.Training_Episodic_Decorrelated_Batches(board,store_path,50, 50, 5, FourInARowANN.Q_Learning_Episodic)
예제 #3
0
        elif np.all(self.current_position == self.goal):
            self.current_position = np.array([row, column])
            return self.winning_action_reward, True
        else:
            self.current_position = np.array([row, column])
            return self.neutral_action_reward, False

    def GetState(self):
        return self.current_position

    def ResetGame(self):
        self.current_position = self.maze.start


store_path = "MazeNewTraining"
MazeANN = ANN.TrainingNetwork(2, 4, 0.05, [50, 50, 50], discount=0.95)


def PrintLearnedPath(name):
    runner = MazeRunner(name)
    game_ended = False
    saver = ANN.tf.train.Saver()
    path = "Trainings/" + store_path
    with ANN.tf.Session() as sess:
        saver.restore(sess, path)
        count = 0
        while not game_ended:
            if runner.maze.hight * runner.maze.width < count:
                break
            count += 1
            current_state = runner.GetState()