Python SGD.creat_mini_batch 예제들

프로그래밍 언어: Python
네임스페이스/패키지 이름: SGD
클래스/타입: SGD
메소드/함수: creat_mini_batch
hotexamples.com에서의 예제들: 1
Python SGD.creat_mini_batch - 1개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 SGD.SGD.creat_mini_batch에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.
자주 사용되는 메소드들
보기 숨기기
SGD(17)
LearnParamsUsingSGD(1)
__init__(1)
creat_mini_batch(1)
fit(1)
get_rates(1)
mf(1)
predict(1)
step(1)
예제 #1
파일 보기
파일: Neural_Network.py 프로젝트: lasse-steinnes/MachineLearning_Project2
class Neural_Network:

    def __init__(self, number_of_nodes, active_fn, cost_function, pol_order = None, regularization =('none', 1e-15), log = True):
        """
        Initialize a NN
        number of nodes: -list of number of nodes including input and output layer
                         - at least number of inputs and output nodes
        active_fn:       - list of activation functions
                         - strings 'sigmoid', 'tanh', 'relu' and 'softmax' are supported
        cost_function:   - either str 'mse' or 'classification' using cross entropy
        regularization:  - regularization schme for cost function, either 'l1' or 'l2' and strenght
        log:             - creats table of information and keeps track of evolution of NN during training

        Methods:
        -feedforward:   claculate output of NN based on data shape (#features, #samples)
        -training:      trains the NN, usage of SGD class and backpropagation
        """

        self.pol_order = pol_order
        self.nodes = number_of_nodes
        self.layers = len(number_of_nodes)

        #initalze biases shape (#nodes l, 1)
        self.biases = [np.random.randn(i, 1) for i in self.nodes[1:]]
        #initalize weights shape (#nodes l+1, #nodes l)
        self.weights = [np.random.randn(i, j)/np.sqrt(j) for j, i in zip(self.nodes[:-1], self.nodes[1:])]

        # setup up a list of activation functions only one literal
        if active_fn == 'sigmoid':
            self.functions = [Neural_Network.sigmoid_act for i in range(0, self.layers-1)]
        elif active_fn == 'tanh':
            self.functions = [Neural_Network.tanh_act for i in range(0, self.layers-1)]
        else:
            d = {'sigmoid': Neural_Network.sigmoid_act, 'tanh': Neural_Network.tanh_act, 'softmax':Neural_Network.softmax_act, 'relu': Neural_Network.relu_act}
            self.functions = [d[name] for name in active_fn]
        #derivative of layer activation functions
        self.functions_prime = [autograd.elementwise_grad(l, 1) for l in self.functions]
        self.reg = regularization
        self.cost_mse = False
        
        # set up cost function
        if cost_function == 'classification':
            self.cost_function = Neural_Network.cross_entropy
            self.functions[self.layers - 2] = Neural_Network.softmax_act
            self.has_acc = True
        if cost_function == 'mse':
            self.cost_mse = True
            self.cost_function = Neural_Network.mse
            self.has_acc = False
        


        self.log = False
        if log:
            self.log = True
            self.call =0
            #creat topology mapping
            self.mapping = str(self.nodes[0])
            for i in range(1, self.layers):
                self.mapping += ' : ' + str(self.nodes[i])
                if type (active_fn) == list :
                    self.mapping += '_' + active_fn[i-1]
                else:
                    self.mapping += '_' + active_fn
            if pol_order:
                self.toi = pd.DataFrame(columns=["number of layers", "nodes per layer",
                                        "epoch", "batch size",
                                        "learning rate","initial learning rate","momentum parameter","lambda", "stopping tol",
                                         "cost", "accuracy", "data set"," pol order"])
            else:
                self.toi = pd.DataFrame(columns=["number of layers", "nodes per layer",
                                            "epoch", "batch size",
                                            "learning rate","initial learning rate","momentum parameter","lambda", "stopping tol",
                                             "cost", "accuracy", "data set"])


    def feedforward(self, data):
        '''
        Feed an initial input data, this is feed to calculate the
        activation a, this is then feed in again
        as an input for the next layer, and so on for each layer,
        till we reach the output layer L.
        '''
        data = np.copy(data)
        self.activations = [data]
        self.z = [0]
        a = data
        for weight, bias, function in zip(self.weights, self.biases, self.functions):
            z = np.matmul(weight, a) + bias
            self.z.append(z)
            a = function(self, z)
            self.activations.append(a)
        return a


    def __backpropagation(self, f_z, target):
        '''
        Description:
        Backpropagation minimise the error and
        calculates the gradient for each layer,
        working backwards from last layer L. In
        this way, weights which contribute to large
        errors can be updated by a feed forward.

        (Need to work differently on hidden layers and output
        How to do this on different layers depend on dimensions of f_z)
        ---------------------------------------
        Parameters:
        - data (corresponding to Y)
        - X
        - f_z: activation (function a^l?)
        - prob: probabilities
        - lambda is penalty for weigths
        ----------------------------------------
        '''
        f_z = np.copy(f_z)
        target = np.copy(target)
        Neural_Network.feedforward(self, f_z)
        #set all inputs for cost function
        self.gradient.weights = (self, self.biases[self.layers -2], target)
        self.gradient.run_minibatch((f_z, target), update_weight= False)
        delta = self.gradient.delta# contains learning rate and momentum

        current_weights = np.copy(self.weights) #current weights before adjustment
        current_biases = np.copy(self.biases)

        # looping through layers
        for i in reversed(range(1, self.layers)):
            self.activations[i-1] = np.mean(self.activations[i-1], axis = 1, keepdims = True)
            delta_W = np.matmul(delta, self.activations[i-1].T)
            if self.lmbd > 0.0:
                delta_W += self.lmbd * current_weights[i-1] # or 1/n taking the mean, lambda is penalty on weights

            self.weights[i-1] = current_weights[i-1] - delta_W
            self.biases[i-1]  = current_biases[i-1] -  delta

            if i > 1:
                a_prime = (self.functions_prime[i-1](self, self.z[i-1])).mean(axis = 1, keepdims = True)
                delta = np.matmul(current_weights[i-1].T, delta) * a_prime

    def training(self, data, target, epochs, mini_batch_size,
            eta = 0.5, eta_schedule = ('decay',0.1),
            momentum = True, gamma = 0.1,
            lmbd = 0.1, tolerance = 1e-3,
            test_data = None,
            validation_data = None):
        """
        training NN
        data shape (#samples, #features)
        target shape (#samples, #output nodes)
        eta: learning rate
        eta_schedule: (scheme, cycles) 'decay' or 'const', if 'decay' the time is multiplied with cycles
        momentum, gamma, set momentum to true, gamma strength of momentum (gamma=0 ==momentum =False)
        lmbd fraction of old weights taken into change
        test_data/validation_data  (inut, outpur ); input shape (#samples, #features), output shape (#samples, #output nodes)
        """
        data = np.copy(data)
        target = np.copy(target)
        self.gradient = SGD( self.cost_function, epochs = epochs, mini_batch_size = mini_batch_size,
                learning_rate = eta, adaptive_learning_rate = eta_schedule[0],
                momentum = momentum, m0 = gamma)

        self.lmbd = lmbd
        best_accuracy = 0.0
        samples = data.shape[0]
        num_mini_batches = samples // mini_batch_size
        self.init_eta = eta
        self.tolerance = tolerance

        for self.epoch in range(epochs):
            #run minibatches
            for mini_batch_data, mini_batch_target in self.gradient.creat_mini_batch(data, target, num_mini_batches):
                Neural_Network.feedforward(self, mini_batch_data.T)
                #calls backpropagation to find the new gradient
                Neural_Network.__backpropagation(self, mini_batch_data.T, mini_batch_target.T)

            self.gradient.time += float(eta_schedule[1])* 1 #update time for decay

            # calculate the cost of the epoch
            Neural_Network.__epoch_output(self, data, target, name = 'train')
            if test_data != None:
                Neural_Network.__epoch_output(self, *test_data, name = 'test')

            # Checking if accuracy
            if self.has_acc == True:
                if self.accuracy > best_accuracy:
                    best_accuracy = self.accuracy
                    best_weights = np.copy(self.weights)
                if Neural_Network.accuracy_test(self) == True:
                    break
                
            # Checking if MSE
            if self.cost_mse == True:
                if Neural_Network.cost_test(self) == True:
                    break
        #after training set the weights to the best weights        
        if self.has_acc:
            self.weights = best_weights
            
        if validation_data != None:
            Neural_Network.__epoch_output(self, *validation_data, name = 'validation')


    def classification_accuracy(self, prediction, y):
        prediction = prediction.T
        prediction = np.argmax(prediction, axis =1)
        y = np.argmax(y, axis =1)
        return len(prediction[prediction == y])/len(y)

    def sigmoid_act(self, z):
        return 1.0/(1.0 + np.exp(-z))

    def tanh_act(self, z):
        return np.tanh(z)

    def softmax_act(self, z):
        denom = np.sum(np.exp(z), axis = 0) #(#samples)
        denom = np.array([denom for i in range(z.shape[0])])
        return np.exp(z)/denom

    def relu_act(self, z):
        return np.where( z > 0, z, 0)

    def epoch_cost(self, f_z, target):
        cost = 0.0
        a = Neural_Network.feedforward(self, f_z)
        cost += self.cost_function(self,  self.biases[self.layers -2], target )
        return cost, a


    def cross_entropy(self, b, y):
        z = np.matmul(self.weights[self.layers -2], self.activations[self.layers -2 ]) + b
        a = self.functions[self.layers-2](self, z)
        ret = - np.sum(np.where(y==1, np.log(a), 0) )/y.shape[1]
        if self.reg[0] == 'l1':
            ret -=  float(self.reg[1]) *np.sum(np.abs(b), axis =1).mean()
        if self.reg[0] == 'l2':
            ret -=  float(self.reg[1]) * np.linalg.norm(b, axis =1).mean()
        return ret


    def mse(self, b, y):
        z = np.matmul(self.weights[self.layers -2], self.activations[self.layers -2 ]) + b
        a = self.functions[self.layers-2](self, z)
        res = a - y
        ret = np.dot(res[0], res[0])/len(y)
        if self.reg[0] == 'l1':
            ret -=  float(self.reg[1]) * np.sum(np.abs(b), axis = 1).mean()
        if self.reg[0] == 'l2':
            ret -=  float(self.reg[1]) * np.linalg.norm(b,axis = 1).mean()
        return ret

    #make table of information
    def __epoch_output(self, data, target, name='test'):
        data = np.copy(data)
        target = np.copy(target)
        print('Current epoch: ', self.epoch)
        cost, a = Neural_Network.epoch_cost(self, data.T, target.T)
        print('The %s cost is: %.4f' % (name, cost))
        if self.has_acc == True:
            accuracy = Neural_Network.classification_accuracy(self, a, target)
            print('The %s accuracy is : %.4f' % (name, accuracy))
            #store the current test accuracy
            if name == 'test':
                self.accuracy = accuracy
        else:
            accuracy = 'Nan'
        if self.log:
            if self.pol_order:
                temp = pd.DataFrame({"number of layers": self.layers, "nodes per layer": self.mapping,
                                            "epoch":self.epoch, "batch size":self.gradient.mini_batch_size,
                                            "learning rate": self.gradient.gamma, "initial learning rate": self.init_eta,
                                            "momentum parameter":self.gradient.m0,
                                            "lambda": self.lmbd, "stopping tol": self.tolerance,
                                            "cost": cost, "accuracy":accuracy, "data set":name,"pol order":self.pol_order}, index=[self.call])
                self.toi = self.toi.append(temp)
                self.call += 1
                del temp

            else:
                temp = pd.DataFrame({"number of layers": self.layers, "nodes per layer": self.mapping,
                                            "epoch":self.epoch, "batch size":self.gradient.mini_batch_size,
                                            "learning rate": self.gradient.gamma, "initial learning rate": self.init_eta,
                                            "momentum parameter":self.gradient.m0,
                                            "lambda": self.lmbd, "stopping tol": self.tolerance,
                                             "cost": cost, "accuracy":accuracy, "data set":name}, index=[self.call])
                self.toi = self.toi.append(temp)
                self.call += 1
                del temp

    # check if accuracy is constant
    def accuracy_test(self):
        '''
        function for keeping track of the accuracy of the past five epochs. 
        If the standard deviation of the past five is less than the tolerance
        then the epoch loop is broken and the learning stops. 
        
        returns: True or False
        '''
        if self.epoch > 5:
            filter = self.toi['data set'] == 'test'
            accuracy = self.toi[filter]['accuracy']
            acc_array =  accuracy.to_numpy()
            std_acc = np.std(acc_array[-5:])
            if self.tolerance > std_acc:
                return True
        else:
            return False

    def cost_test(self):
        '''
        function for keeping track of the cost of the past five epochs. 
        If the standard deviation of the past five is less than the tolerance
        then the epoch loop is broken and the learning stops. 
        
        returns: True or False
        '''
            if self.epoch > 5:
                filter = self.toi['data set'] == 'test'
                cost = self.toi[filter]['cost']
                cost_array =  cost.to_numpy()
                std_cost = np.std(cost_array[-5:])
                if self.tolerance > std_cost:
                    return True
            else:
                return False