Python SGDの例、SGD.SGD Pythonの例

コード例 #1

0

ファイルを表示

ファイル: Simulation.py プロジェクト: pidzso/Collaborative-filtering

def run_sgd(source, steps, learn_rate, reg_rate):

    simulate = SGD()
    simulate.get_rates(source)
    start = time()
    simulate.mf(steps, learn_rate, reg_rate)
    stop = time()
    with open(str("OUT_" + dataIN + "_" + rateIN), "a") as myfile:
        myfile.write("All:\t" + str(stop - start) + "\n")

コード例 #2

0

ファイルを表示

def jobman(state, channel):
    # load dataset
    _train_data = ListSequences(path=state['path'],
                                pca=state['pca'],
                                subset=state['subset'],
                                which='train',
                                one_hot=False,
                                nbits=32)
    train_data = _train_data.export_dense_format(
        sequence_length=state['seqlen'],
        overlap=state['overlap'])

    valid_data = ListSequences(
        path = state['path'],
        pca=state['pca'],
        subset=state['subset'],
        which='valid',
        one_hot=False,
        nbits=32)
    model = biRNN(
        nhids=state['nhids'],
        nouts=numpy.max(train_data.data_y)+1,
        nins=train_data.data_x.shape[-1],
        activ = TT.nnet.sigmoid,
        seed = state['seed'],
        bs = state['bs'],
        seqlen = state['seqlen'])

    algo = SGD(model, state, train_data)

    main = MainLoop(train_data,valid_data, None, model, algo, state, channel)
    main.main()

コード例 #3

0

ファイルを表示

ファイル: main_nat.py プロジェクト: vd114/galatea

 def __init__(self, options, channel):
     """
     options: a dictionary contains all the configurations
     channel: jobman channel
     """
     # Step 0. Load data
     print 'Loading data'
     data = numpy.load(options['data'])
     self.options = options
     self.channel = channel
     # Step 1. Construct Model
     print 'Constructing Model'
     if options['model'] == 'mlp':
         model = mlp(options, channel, data)
     elif options['model'] == 'daa':
         model = daa(options, channel, data)
     self.model = model
     print 'Constructing algo'
     # Step 2. Construct optimization technique
     if options['algo'] == 'natSGD_basic':
         algo = natSGD(options, channel, data, model)
     elif options['algo'] == 'natSGD_jacobi':
         algo = natSGD_jacobi(options, channel, data, model)
     elif options['algo'] == 'natSGD_ls':
         algo = natSGD_linesearch(options, channel, data, model)
     elif options['algo'] == 'natNCG':
         algo = natNCG(options, channel, data, model)
     elif options['algo'] == 'krylov':
         algo = KrylovDescent(options, channel, data, model)
     elif options['algo'] == 'hf':
         raise NotImplemented
     elif options['algo'] == 'hf_jacobi':
         raise NotImplemented
     elif options['algo'] == 'sgd':
         algo = SGD(options, channel, data, model)
     self.algo = algo
     self.options['validscore'] = 1e20
     self.train_timing = numpy.zeros((options['loopIters'], 13),
                                     dtype='float32')
     self.valid_timing = numpy.zeros((options['loopIters'], 2),
                                     dtype='float32')
     if self.channel is not None:
         self.channel.save()
     self.start_time = time.time()
     self.batch_start_time = time.time()

コード例 #4

0

ファイルを表示

 def __init__(self, model, loss):
     SGD.__init__(self, model, loss)

コード例 #5

0

ファイルを表示

ファイル: MomentumSGD.py プロジェクト: Irene-Li/susyML

 def __init__(self, model, loss):
     SGD.__init__(self, model, loss)

コード例 #6

0

ファイルを表示

def otimizacao(U, X, tipo, metodo):
    if tipo == 1:
        #-----------------------------------------------------------
        #variáveis globais
        #-----------------------------------------------------------
        glob = GlobalVariables()
        maxNGrad = glob.getMaxNGrad()
        #ganhoAlpha = glob.getGanhoAlpha
        #gamma = glob.getGamma
        #global maxNGrad, ganhoAlpha, gamma
        #-----------------------------------------------------------
        #iniciar variáveis de controle inicial
        #-----------------------------------------------------------
        u1 = U[0, 0]
        u2 = U[1, 0]
        u3 = U[2, 0]
        u4 = U[3, 0]
        u5 = U[4, 0]
        #-----------------------------------------------------------
        #inicio do método
        #----------------------------------------------------------
        fo = 1  #condição de parada
        #-----------------------------------------------------------
        #melhores valores
        #----------------------------------------------------------
        fm = fo
        UM = U
        #-----------------------------------------------------------
        #vetores axiliares para os métodos de otimização
        #----------------------------------------------------------
        vt = np.zeros((4, 1))  #usado como auxiliar NAG
        Grad = np.zeros((4, 1))  #usado como auxiliar adagrad

        [pa, pb, pc, M, ponto] = trajetoria(U, X)
        fo = funcaoObjetivo(pa, pb, pc)
        if fo < 1 * 10**(-10):
            print("Valores já otimizados")
            return

        for j in range(1, maxNGrad, 1):

            #-----------------------------------------------------------
            # gradiente descendente estocástico SGD
            #----------------------------------------------------------
            if metodo == 0:
                U = SGD(U, X)

            #-----------------------------------------------------------
            #SGD com momento
            #----------------------------------------------------------
            if metodo == 1:
                [U, vt] = SGDMomento(U, X, vt)

            #-----------------------------------------------------------
            #Nesterov accelerated gradient
            #----------------------------------------------------------
            if metodo == 2:
                [U, vt] = NAG(U, X, vt)

            #-----------------------------------------------------------
            #Adagrad
            #----------------------------------------------------------
            if metodo == 3:
                [U, Grad] = adagrad(U, X, Grad)

            #-----------------------------------------------------------
            #Setar os limites inferiores e superiores em U
            #----------------------------------------------------------
            U0 = np.zeros((5, 1))
            U0 = setLimites(U)
            u1 = U0[0, 0]
            u2 = U0[1, 0]
            u3 = U0[2, 0]
            u4 = U0[3, 0]
            u5 = U0[4, 0]
            #-----------------------------------------------------------
            #atualizar o vetor U  (variáveis de controle)
            #----------------------------------------------------------
            U = np.array([[u1], [u2], [u3], [u4], [u5]])
            #-----------------------------------------------------------
            #Cálculo do valor da função objetivo
            #a função de otimização é usada para calcular os valores de U,
            #que serão inseridos na função de calcular a trajetória
            #----------------------------------------------------------
            [pa, pb, pc, M, ponto] = trajetoria(U, X)
            fo = funcaoObjetivo(pa, pb, pc)
            #-----------------------------------------------------------
            #verificar melhor resultado
            #fm = 1, inicialmente
            #cada vez que fo < fm, fm armazena o valor de fo
            #fo sempre é comparado com seu valor anterior, desde que
            #esteja convergindo
            #valores de fo maiores que o anterior, serão ignorados
            #----------------------------------------------------------
            if fo < fm:
                fm = fo
                UM = U

            #-----------------------------------------------------------
            #verificar condição de parada
            #a condição de parada ocorre quando a projeção do CoM no plano xy
            #praticamente coincide com o ponto médio das duas pernas, no mesmo plano
            #isso deve acontecer na fase LH, da caminhada
            #----------------------------------------------------------
            if fo < 1 * 10**(-10):
                break

            #-----------------------------------------------------------
            #imprimir resultado no console
            #----------------------------------------------------------
            imprimirConsole(j, [U, fo])

        #-----------------------------------------------------------
        #imprimir resultado final no console
        #----------------------------------------------------------
        print('************************************************************')
        print('Melhor Solução: ')
        imprimirConsole(0, [UM, fm])
        print('************************************************************')
        #-----------------------------------------------------------
        #mostrar a trajetória
        #----------------------------------------------------------
        plotarTrajetoria(UM, X)
    else:

        #-----------------------------------------------------------
        #mostrar a trajetoria
        #----------------------------------------------------------
        plotarTrajetoria(U, X)

コード例 #7

0

ファイルを表示

ファイル: SparseLinear.py プロジェクト: mahkons/Lottery-ticket-hypothesis

        self.bias = torch.Tensor(out_features)
        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
        bound = 1 / math.sqrt(fan_in)
        init.uniform_(self.bias, -bound, bound)
        self.bias = Parameter(self.bias)

    def forward(self, input):
        return torch.sparse.mm(self.weight, input.T).T + self.bias


if __name__ == "__main__":
    x = torch.tensor([[1, 2]], dtype=torch.float)
    a = SparseLinear(2, 3)
    #  b = Parameter(torch.tensor([[1, 2], [-1, -2]], dtype=torch.float).to_sparse())

    #  optimizer = optim.SparseAdam(a.parameters(), lr=1e-3)
    #  optimizer = optim.Adam(a.parameters())
    from SGD import SGD

    optimizer = SGD(a.parameters(), lr=1e-3)
    #  loss = a(x).sum()
    loss = a(x).sum()
    loss.backward()
    optimizer.step()

    print(a.weight)
    with torch.no_grad():
        a.weight.add_(a.weight.grad)
    print(a.weight)

コード例 #8

0

ファイルを表示

ファイル: Neural_Network.py プロジェクト: lasse-steinnes/MachineLearning_Project2

class Neural_Network:

    def __init__(self, number_of_nodes, active_fn, cost_function, pol_order = None, regularization =('none', 1e-15), log = True):
        """
        Initialize a NN
        number of nodes: -list of number of nodes including input and output layer
                         - at least number of inputs and output nodes
        active_fn:       - list of activation functions
                         - strings 'sigmoid', 'tanh', 'relu' and 'softmax' are supported
        cost_function:   - either str 'mse' or 'classification' using cross entropy
        regularization:  - regularization schme for cost function, either 'l1' or 'l2' and strenght
        log:             - creats table of information and keeps track of evolution of NN during training

        Methods:
        -feedforward:   claculate output of NN based on data shape (#features, #samples)
        -training:      trains the NN, usage of SGD class and backpropagation
        """

        self.pol_order = pol_order
        self.nodes = number_of_nodes
        self.layers = len(number_of_nodes)

        #initalze biases shape (#nodes l, 1)
        self.biases = [np.random.randn(i, 1) for i in self.nodes[1:]]
        #initalize weights shape (#nodes l+1, #nodes l)
        self.weights = [np.random.randn(i, j)/np.sqrt(j) for j, i in zip(self.nodes[:-1], self.nodes[1:])]

        # setup up a list of activation functions only one literal
        if active_fn == 'sigmoid':
            self.functions = [Neural_Network.sigmoid_act for i in range(0, self.layers-1)]
        elif active_fn == 'tanh':
            self.functions = [Neural_Network.tanh_act for i in range(0, self.layers-1)]
        else:
            d = {'sigmoid': Neural_Network.sigmoid_act, 'tanh': Neural_Network.tanh_act, 'softmax':Neural_Network.softmax_act, 'relu': Neural_Network.relu_act}
            self.functions = [d[name] for name in active_fn]
        #derivative of layer activation functions
        self.functions_prime = [autograd.elementwise_grad(l, 1) for l in self.functions]
        self.reg = regularization
        self.cost_mse = False
        
        # set up cost function
        if cost_function == 'classification':
            self.cost_function = Neural_Network.cross_entropy
            self.functions[self.layers - 2] = Neural_Network.softmax_act
            self.has_acc = True
        if cost_function == 'mse':
            self.cost_mse = True
            self.cost_function = Neural_Network.mse
            self.has_acc = False
        


        self.log = False
        if log:
            self.log = True
            self.call =0
            #creat topology mapping
            self.mapping = str(self.nodes[0])
            for i in range(1, self.layers):
                self.mapping += ' : ' + str(self.nodes[i])
                if type (active_fn) == list :
                    self.mapping += '_' + active_fn[i-1]
                else:
                    self.mapping += '_' + active_fn
            if pol_order:
                self.toi = pd.DataFrame(columns=["number of layers", "nodes per layer",
                                        "epoch", "batch size",
                                        "learning rate","initial learning rate","momentum parameter","lambda", "stopping tol",
                                         "cost", "accuracy", "data set"," pol order"])
            else:
                self.toi = pd.DataFrame(columns=["number of layers", "nodes per layer",
                                            "epoch", "batch size",
                                            "learning rate","initial learning rate","momentum parameter","lambda", "stopping tol",
                                             "cost", "accuracy", "data set"])


    def feedforward(self, data):
        '''
        Feed an initial input data, this is feed to calculate the
        activation a, this is then feed in again
        as an input for the next layer, and so on for each layer,
        till we reach the output layer L.
        '''
        data = np.copy(data)
        self.activations = [data]
        self.z = [0]
        a = data
        for weight, bias, function in zip(self.weights, self.biases, self.functions):
            z = np.matmul(weight, a) + bias
            self.z.append(z)
            a = function(self, z)
            self.activations.append(a)
        return a


    def __backpropagation(self, f_z, target):
        '''
        Description:
        Backpropagation minimise the error and
        calculates the gradient for each layer,
        working backwards from last layer L. In
        this way, weights which contribute to large
        errors can be updated by a feed forward.

        (Need to work differently on hidden layers and output
        How to do this on different layers depend on dimensions of f_z)
        ---------------------------------------
        Parameters:
        - data (corresponding to Y)
        - X
        - f_z: activation (function a^l?)
        - prob: probabilities
        - lambda is penalty for weigths
        ----------------------------------------
        '''
        f_z = np.copy(f_z)
        target = np.copy(target)
        Neural_Network.feedforward(self, f_z)
        #set all inputs for cost function
        self.gradient.weights = (self, self.biases[self.layers -2], target)
        self.gradient.run_minibatch((f_z, target), update_weight= False)
        delta = self.gradient.delta# contains learning rate and momentum

        current_weights = np.copy(self.weights) #current weights before adjustment
        current_biases = np.copy(self.biases)

        # looping through layers
        for i in reversed(range(1, self.layers)):
            self.activations[i-1] = np.mean(self.activations[i-1], axis = 1, keepdims = True)
            delta_W = np.matmul(delta, self.activations[i-1].T)
            if self.lmbd > 0.0:
                delta_W += self.lmbd * current_weights[i-1] # or 1/n taking the mean, lambda is penalty on weights

            self.weights[i-1] = current_weights[i-1] - delta_W
            self.biases[i-1]  = current_biases[i-1] -  delta

            if i > 1:
                a_prime = (self.functions_prime[i-1](self, self.z[i-1])).mean(axis = 1, keepdims = True)
                delta = np.matmul(current_weights[i-1].T, delta) * a_prime

    def training(self, data, target, epochs, mini_batch_size,
            eta = 0.5, eta_schedule = ('decay',0.1),
            momentum = True, gamma = 0.1,
            lmbd = 0.1, tolerance = 1e-3,
            test_data = None,
            validation_data = None):
        """
        training NN
        data shape (#samples, #features)
        target shape (#samples, #output nodes)
        eta: learning rate
        eta_schedule: (scheme, cycles) 'decay' or 'const', if 'decay' the time is multiplied with cycles
        momentum, gamma, set momentum to true, gamma strength of momentum (gamma=0 ==momentum =False)
        lmbd fraction of old weights taken into change
        test_data/validation_data  (inut, outpur ); input shape (#samples, #features), output shape (#samples, #output nodes)
        """
        data = np.copy(data)
        target = np.copy(target)
        self.gradient = SGD( self.cost_function, epochs = epochs, mini_batch_size = mini_batch_size,
                learning_rate = eta, adaptive_learning_rate = eta_schedule[0],
                momentum = momentum, m0 = gamma)

        self.lmbd = lmbd
        best_accuracy = 0.0
        samples = data.shape[0]
        num_mini_batches = samples // mini_batch_size
        self.init_eta = eta
        self.tolerance = tolerance

        for self.epoch in range(epochs):
            #run minibatches
            for mini_batch_data, mini_batch_target in self.gradient.creat_mini_batch(data, target, num_mini_batches):
                Neural_Network.feedforward(self, mini_batch_data.T)
                #calls backpropagation to find the new gradient
                Neural_Network.__backpropagation(self, mini_batch_data.T, mini_batch_target.T)

            self.gradient.time += float(eta_schedule[1])* 1 #update time for decay

            # calculate the cost of the epoch
            Neural_Network.__epoch_output(self, data, target, name = 'train')
            if test_data != None:
                Neural_Network.__epoch_output(self, *test_data, name = 'test')

            # Checking if accuracy
            if self.has_acc == True:
                if self.accuracy > best_accuracy:
                    best_accuracy = self.accuracy
                    best_weights = np.copy(self.weights)
                if Neural_Network.accuracy_test(self) == True:
                    break
                
            # Checking if MSE
            if self.cost_mse == True:
                if Neural_Network.cost_test(self) == True:
                    break
        #after training set the weights to the best weights        
        if self.has_acc:
            self.weights = best_weights
            
        if validation_data != None:
            Neural_Network.__epoch_output(self, *validation_data, name = 'validation')


    def classification_accuracy(self, prediction, y):
        prediction = prediction.T
        prediction = np.argmax(prediction, axis =1)
        y = np.argmax(y, axis =1)
        return len(prediction[prediction == y])/len(y)

    def sigmoid_act(self, z):
        return 1.0/(1.0 + np.exp(-z))

    def tanh_act(self, z):
        return np.tanh(z)

    def softmax_act(self, z):
        denom = np.sum(np.exp(z), axis = 0) #(#samples)
        denom = np.array([denom for i in range(z.shape[0])])
        return np.exp(z)/denom

    def relu_act(self, z):
        return np.where( z > 0, z, 0)

    def epoch_cost(self, f_z, target):
        cost = 0.0
        a = Neural_Network.feedforward(self, f_z)
        cost += self.cost_function(self,  self.biases[self.layers -2], target )
        return cost, a


    def cross_entropy(self, b, y):
        z = np.matmul(self.weights[self.layers -2], self.activations[self.layers -2 ]) + b
        a = self.functions[self.layers-2](self, z)
        ret = - np.sum(np.where(y==1, np.log(a), 0) )/y.shape[1]
        if self.reg[0] == 'l1':
            ret -=  float(self.reg[1]) *np.sum(np.abs(b), axis =1).mean()
        if self.reg[0] == 'l2':
            ret -=  float(self.reg[1]) * np.linalg.norm(b, axis =1).mean()
        return ret


    def mse(self, b, y):
        z = np.matmul(self.weights[self.layers -2], self.activations[self.layers -2 ]) + b
        a = self.functions[self.layers-2](self, z)
        res = a - y
        ret = np.dot(res[0], res[0])/len(y)
        if self.reg[0] == 'l1':
            ret -=  float(self.reg[1]) * np.sum(np.abs(b), axis = 1).mean()
        if self.reg[0] == 'l2':
            ret -=  float(self.reg[1]) * np.linalg.norm(b,axis = 1).mean()
        return ret

    #make table of information
    def __epoch_output(self, data, target, name='test'):
        data = np.copy(data)
        target = np.copy(target)
        print('Current epoch: ', self.epoch)
        cost, a = Neural_Network.epoch_cost(self, data.T, target.T)
        print('The %s cost is: %.4f' % (name, cost))
        if self.has_acc == True:
            accuracy = Neural_Network.classification_accuracy(self, a, target)
            print('The %s accuracy is : %.4f' % (name, accuracy))
            #store the current test accuracy
            if name == 'test':
                self.accuracy = accuracy
        else:
            accuracy = 'Nan'
        if self.log:
            if self.pol_order:
                temp = pd.DataFrame({"number of layers": self.layers, "nodes per layer": self.mapping,
                                            "epoch":self.epoch, "batch size":self.gradient.mini_batch_size,
                                            "learning rate": self.gradient.gamma, "initial learning rate": self.init_eta,
                                            "momentum parameter":self.gradient.m0,
                                            "lambda": self.lmbd, "stopping tol": self.tolerance,
                                            "cost": cost, "accuracy":accuracy, "data set":name,"pol order":self.pol_order}, index=[self.call])
                self.toi = self.toi.append(temp)
                self.call += 1
                del temp

            else:
                temp = pd.DataFrame({"number of layers": self.layers, "nodes per layer": self.mapping,
                                            "epoch":self.epoch, "batch size":self.gradient.mini_batch_size,
                                            "learning rate": self.gradient.gamma, "initial learning rate": self.init_eta,
                                            "momentum parameter":self.gradient.m0,
                                            "lambda": self.lmbd, "stopping tol": self.tolerance,
                                             "cost": cost, "accuracy":accuracy, "data set":name}, index=[self.call])
                self.toi = self.toi.append(temp)
                self.call += 1
                del temp

    # check if accuracy is constant
    def accuracy_test(self):
        '''
        function for keeping track of the accuracy of the past five epochs. 
        If the standard deviation of the past five is less than the tolerance
        then the epoch loop is broken and the learning stops. 
        
        returns: True or False
        '''
        if self.epoch > 5:
            filter = self.toi['data set'] == 'test'
            accuracy = self.toi[filter]['accuracy']
            acc_array =  accuracy.to_numpy()
            std_acc = np.std(acc_array[-5:])
            if self.tolerance > std_acc:
                return True
        else:
            return False

    def cost_test(self):
        '''
        function for keeping track of the cost of the past five epochs. 
        If the standard deviation of the past five is less than the tolerance
        then the epoch loop is broken and the learning stops. 
        
        returns: True or False
        '''
            if self.epoch > 5:
                filter = self.toi['data set'] == 'test'
                cost = self.toi[filter]['cost']
                cost_array =  cost.to_numpy()
                std_cost = np.std(cost_array[-5:])
                if self.tolerance > std_cost:
                    return True
            else:
                return False

コード例 #9

0

ファイルを表示

ファイル: Neural_Network.py プロジェクト: lasse-steinnes/MachineLearning_Project2

    def training(self, data, target, epochs, mini_batch_size,
            eta = 0.5, eta_schedule = ('decay',0.1),
            momentum = True, gamma = 0.1,
            lmbd = 0.1, tolerance = 1e-3,
            test_data = None,
            validation_data = None):
        """
        training NN
        data shape (#samples, #features)
        target shape (#samples, #output nodes)
        eta: learning rate
        eta_schedule: (scheme, cycles) 'decay' or 'const', if 'decay' the time is multiplied with cycles
        momentum, gamma, set momentum to true, gamma strength of momentum (gamma=0 ==momentum =False)
        lmbd fraction of old weights taken into change
        test_data/validation_data  (inut, outpur ); input shape (#samples, #features), output shape (#samples, #output nodes)
        """
        data = np.copy(data)
        target = np.copy(target)
        self.gradient = SGD( self.cost_function, epochs = epochs, mini_batch_size = mini_batch_size,
                learning_rate = eta, adaptive_learning_rate = eta_schedule[0],
                momentum = momentum, m0 = gamma)

        self.lmbd = lmbd
        best_accuracy = 0.0
        samples = data.shape[0]
        num_mini_batches = samples // mini_batch_size
        self.init_eta = eta
        self.tolerance = tolerance

        for self.epoch in range(epochs):
            #run minibatches
            for mini_batch_data, mini_batch_target in self.gradient.creat_mini_batch(data, target, num_mini_batches):
                Neural_Network.feedforward(self, mini_batch_data.T)
                #calls backpropagation to find the new gradient
                Neural_Network.__backpropagation(self, mini_batch_data.T, mini_batch_target.T)

            self.gradient.time += float(eta_schedule[1])* 1 #update time for decay

            # calculate the cost of the epoch
            Neural_Network.__epoch_output(self, data, target, name = 'train')
            if test_data != None:
                Neural_Network.__epoch_output(self, *test_data, name = 'test')

            # Checking if accuracy
            if self.has_acc == True:
                if self.accuracy > best_accuracy:
                    best_accuracy = self.accuracy
                    best_weights = np.copy(self.weights)
                if Neural_Network.accuracy_test(self) == True:
                    break
                
            # Checking if MSE
            if self.cost_mse == True:
                if Neural_Network.cost_test(self) == True:
                    break
        #after training set the weights to the best weights        
        if self.has_acc:
            self.weights = best_weights
            
        if validation_data != None:
            Neural_Network.__epoch_output(self, *validation_data, name = 'validation')