def run_sgd(source, steps, learn_rate, reg_rate): simulate = SGD() simulate.get_rates(source) start = time() simulate.mf(steps, learn_rate, reg_rate) stop = time() with open(str("OUT_" + dataIN + "_" + rateIN), "a") as myfile: myfile.write("All:\t" + str(stop - start) + "\n")
def jobman(state, channel): # load dataset _train_data = ListSequences(path=state['path'], pca=state['pca'], subset=state['subset'], which='train', one_hot=False, nbits=32) train_data = _train_data.export_dense_format( sequence_length=state['seqlen'], overlap=state['overlap']) valid_data = ListSequences( path = state['path'], pca=state['pca'], subset=state['subset'], which='valid', one_hot=False, nbits=32) model = biRNN( nhids=state['nhids'], nouts=numpy.max(train_data.data_y)+1, nins=train_data.data_x.shape[-1], activ = TT.nnet.sigmoid, seed = state['seed'], bs = state['bs'], seqlen = state['seqlen']) algo = SGD(model, state, train_data) main = MainLoop(train_data,valid_data, None, model, algo, state, channel) main.main()
def __init__(self, options, channel): """ options: a dictionary contains all the configurations channel: jobman channel """ # Step 0. Load data print 'Loading data' data = numpy.load(options['data']) self.options = options self.channel = channel # Step 1. Construct Model print 'Constructing Model' if options['model'] == 'mlp': model = mlp(options, channel, data) elif options['model'] == 'daa': model = daa(options, channel, data) self.model = model print 'Constructing algo' # Step 2. Construct optimization technique if options['algo'] == 'natSGD_basic': algo = natSGD(options, channel, data, model) elif options['algo'] == 'natSGD_jacobi': algo = natSGD_jacobi(options, channel, data, model) elif options['algo'] == 'natSGD_ls': algo = natSGD_linesearch(options, channel, data, model) elif options['algo'] == 'natNCG': algo = natNCG(options, channel, data, model) elif options['algo'] == 'krylov': algo = KrylovDescent(options, channel, data, model) elif options['algo'] == 'hf': raise NotImplemented elif options['algo'] == 'hf_jacobi': raise NotImplemented elif options['algo'] == 'sgd': algo = SGD(options, channel, data, model) self.algo = algo self.options['validscore'] = 1e20 self.train_timing = numpy.zeros((options['loopIters'], 13), dtype='float32') self.valid_timing = numpy.zeros((options['loopIters'], 2), dtype='float32') if self.channel is not None: self.channel.save() self.start_time = time.time() self.batch_start_time = time.time()
def __init__(self, model, loss): SGD.__init__(self, model, loss)
def otimizacao(U, X, tipo, metodo): if tipo == 1: #----------------------------------------------------------- #variáveis globais #----------------------------------------------------------- glob = GlobalVariables() maxNGrad = glob.getMaxNGrad() #ganhoAlpha = glob.getGanhoAlpha #gamma = glob.getGamma #global maxNGrad, ganhoAlpha, gamma #----------------------------------------------------------- #iniciar variáveis de controle inicial #----------------------------------------------------------- u1 = U[0, 0] u2 = U[1, 0] u3 = U[2, 0] u4 = U[3, 0] u5 = U[4, 0] #----------------------------------------------------------- #inicio do método #---------------------------------------------------------- fo = 1 #condição de parada #----------------------------------------------------------- #melhores valores #---------------------------------------------------------- fm = fo UM = U #----------------------------------------------------------- #vetores axiliares para os métodos de otimização #---------------------------------------------------------- vt = np.zeros((4, 1)) #usado como auxiliar NAG Grad = np.zeros((4, 1)) #usado como auxiliar adagrad [pa, pb, pc, M, ponto] = trajetoria(U, X) fo = funcaoObjetivo(pa, pb, pc) if fo < 1 * 10**(-10): print("Valores já otimizados") return for j in range(1, maxNGrad, 1): #----------------------------------------------------------- # gradiente descendente estocástico SGD #---------------------------------------------------------- if metodo == 0: U = SGD(U, X) #----------------------------------------------------------- #SGD com momento #---------------------------------------------------------- if metodo == 1: [U, vt] = SGDMomento(U, X, vt) #----------------------------------------------------------- #Nesterov accelerated gradient #---------------------------------------------------------- if metodo == 2: [U, vt] = NAG(U, X, vt) #----------------------------------------------------------- #Adagrad #---------------------------------------------------------- if metodo == 3: [U, Grad] = adagrad(U, X, Grad) #----------------------------------------------------------- #Setar os limites inferiores e superiores em U #---------------------------------------------------------- U0 = np.zeros((5, 1)) U0 = setLimites(U) u1 = U0[0, 0] u2 = U0[1, 0] u3 = U0[2, 0] u4 = U0[3, 0] u5 = U0[4, 0] #----------------------------------------------------------- #atualizar o vetor U (variáveis de controle) #---------------------------------------------------------- U = np.array([[u1], [u2], [u3], [u4], [u5]]) #----------------------------------------------------------- #Cálculo do valor da função objetivo #a função de otimização é usada para calcular os valores de U, #que serão inseridos na função de calcular a trajetória #---------------------------------------------------------- [pa, pb, pc, M, ponto] = trajetoria(U, X) fo = funcaoObjetivo(pa, pb, pc) #----------------------------------------------------------- #verificar melhor resultado #fm = 1, inicialmente #cada vez que fo < fm, fm armazena o valor de fo #fo sempre é comparado com seu valor anterior, desde que #esteja convergindo #valores de fo maiores que o anterior, serão ignorados #---------------------------------------------------------- if fo < fm: fm = fo UM = U #----------------------------------------------------------- #verificar condição de parada #a condição de parada ocorre quando a projeção do CoM no plano xy #praticamente coincide com o ponto médio das duas pernas, no mesmo plano #isso deve acontecer na fase LH, da caminhada #---------------------------------------------------------- if fo < 1 * 10**(-10): break #----------------------------------------------------------- #imprimir resultado no console #---------------------------------------------------------- imprimirConsole(j, [U, fo]) #----------------------------------------------------------- #imprimir resultado final no console #---------------------------------------------------------- print('************************************************************') print('Melhor Solução: ') imprimirConsole(0, [UM, fm]) print('************************************************************') #----------------------------------------------------------- #mostrar a trajetória #---------------------------------------------------------- plotarTrajetoria(UM, X) else: #----------------------------------------------------------- #mostrar a trajetoria #---------------------------------------------------------- plotarTrajetoria(U, X)
self.bias = torch.Tensor(out_features) fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) self.bias = Parameter(self.bias) def forward(self, input): return torch.sparse.mm(self.weight, input.T).T + self.bias if __name__ == "__main__": x = torch.tensor([[1, 2]], dtype=torch.float) a = SparseLinear(2, 3) # b = Parameter(torch.tensor([[1, 2], [-1, -2]], dtype=torch.float).to_sparse()) # optimizer = optim.SparseAdam(a.parameters(), lr=1e-3) # optimizer = optim.Adam(a.parameters()) from SGD import SGD optimizer = SGD(a.parameters(), lr=1e-3) # loss = a(x).sum() loss = a(x).sum() loss.backward() optimizer.step() print(a.weight) with torch.no_grad(): a.weight.add_(a.weight.grad) print(a.weight)
class Neural_Network: def __init__(self, number_of_nodes, active_fn, cost_function, pol_order = None, regularization =('none', 1e-15), log = True): """ Initialize a NN number of nodes: -list of number of nodes including input and output layer - at least number of inputs and output nodes active_fn: - list of activation functions - strings 'sigmoid', 'tanh', 'relu' and 'softmax' are supported cost_function: - either str 'mse' or 'classification' using cross entropy regularization: - regularization schme for cost function, either 'l1' or 'l2' and strenght log: - creats table of information and keeps track of evolution of NN during training Methods: -feedforward: claculate output of NN based on data shape (#features, #samples) -training: trains the NN, usage of SGD class and backpropagation """ self.pol_order = pol_order self.nodes = number_of_nodes self.layers = len(number_of_nodes) #initalze biases shape (#nodes l, 1) self.biases = [np.random.randn(i, 1) for i in self.nodes[1:]] #initalize weights shape (#nodes l+1, #nodes l) self.weights = [np.random.randn(i, j)/np.sqrt(j) for j, i in zip(self.nodes[:-1], self.nodes[1:])] # setup up a list of activation functions only one literal if active_fn == 'sigmoid': self.functions = [Neural_Network.sigmoid_act for i in range(0, self.layers-1)] elif active_fn == 'tanh': self.functions = [Neural_Network.tanh_act for i in range(0, self.layers-1)] else: d = {'sigmoid': Neural_Network.sigmoid_act, 'tanh': Neural_Network.tanh_act, 'softmax':Neural_Network.softmax_act, 'relu': Neural_Network.relu_act} self.functions = [d[name] for name in active_fn] #derivative of layer activation functions self.functions_prime = [autograd.elementwise_grad(l, 1) for l in self.functions] self.reg = regularization self.cost_mse = False # set up cost function if cost_function == 'classification': self.cost_function = Neural_Network.cross_entropy self.functions[self.layers - 2] = Neural_Network.softmax_act self.has_acc = True if cost_function == 'mse': self.cost_mse = True self.cost_function = Neural_Network.mse self.has_acc = False self.log = False if log: self.log = True self.call =0 #creat topology mapping self.mapping = str(self.nodes[0]) for i in range(1, self.layers): self.mapping += ' : ' + str(self.nodes[i]) if type (active_fn) == list : self.mapping += '_' + active_fn[i-1] else: self.mapping += '_' + active_fn if pol_order: self.toi = pd.DataFrame(columns=["number of layers", "nodes per layer", "epoch", "batch size", "learning rate","initial learning rate","momentum parameter","lambda", "stopping tol", "cost", "accuracy", "data set"," pol order"]) else: self.toi = pd.DataFrame(columns=["number of layers", "nodes per layer", "epoch", "batch size", "learning rate","initial learning rate","momentum parameter","lambda", "stopping tol", "cost", "accuracy", "data set"]) def feedforward(self, data): ''' Feed an initial input data, this is feed to calculate the activation a, this is then feed in again as an input for the next layer, and so on for each layer, till we reach the output layer L. ''' data = np.copy(data) self.activations = [data] self.z = [0] a = data for weight, bias, function in zip(self.weights, self.biases, self.functions): z = np.matmul(weight, a) + bias self.z.append(z) a = function(self, z) self.activations.append(a) return a def __backpropagation(self, f_z, target): ''' Description: Backpropagation minimise the error and calculates the gradient for each layer, working backwards from last layer L. In this way, weights which contribute to large errors can be updated by a feed forward. (Need to work differently on hidden layers and output How to do this on different layers depend on dimensions of f_z) --------------------------------------- Parameters: - data (corresponding to Y) - X - f_z: activation (function a^l?) - prob: probabilities - lambda is penalty for weigths ---------------------------------------- ''' f_z = np.copy(f_z) target = np.copy(target) Neural_Network.feedforward(self, f_z) #set all inputs for cost function self.gradient.weights = (self, self.biases[self.layers -2], target) self.gradient.run_minibatch((f_z, target), update_weight= False) delta = self.gradient.delta# contains learning rate and momentum current_weights = np.copy(self.weights) #current weights before adjustment current_biases = np.copy(self.biases) # looping through layers for i in reversed(range(1, self.layers)): self.activations[i-1] = np.mean(self.activations[i-1], axis = 1, keepdims = True) delta_W = np.matmul(delta, self.activations[i-1].T) if self.lmbd > 0.0: delta_W += self.lmbd * current_weights[i-1] # or 1/n taking the mean, lambda is penalty on weights self.weights[i-1] = current_weights[i-1] - delta_W self.biases[i-1] = current_biases[i-1] - delta if i > 1: a_prime = (self.functions_prime[i-1](self, self.z[i-1])).mean(axis = 1, keepdims = True) delta = np.matmul(current_weights[i-1].T, delta) * a_prime def training(self, data, target, epochs, mini_batch_size, eta = 0.5, eta_schedule = ('decay',0.1), momentum = True, gamma = 0.1, lmbd = 0.1, tolerance = 1e-3, test_data = None, validation_data = None): """ training NN data shape (#samples, #features) target shape (#samples, #output nodes) eta: learning rate eta_schedule: (scheme, cycles) 'decay' or 'const', if 'decay' the time is multiplied with cycles momentum, gamma, set momentum to true, gamma strength of momentum (gamma=0 ==momentum =False) lmbd fraction of old weights taken into change test_data/validation_data (inut, outpur ); input shape (#samples, #features), output shape (#samples, #output nodes) """ data = np.copy(data) target = np.copy(target) self.gradient = SGD( self.cost_function, epochs = epochs, mini_batch_size = mini_batch_size, learning_rate = eta, adaptive_learning_rate = eta_schedule[0], momentum = momentum, m0 = gamma) self.lmbd = lmbd best_accuracy = 0.0 samples = data.shape[0] num_mini_batches = samples // mini_batch_size self.init_eta = eta self.tolerance = tolerance for self.epoch in range(epochs): #run minibatches for mini_batch_data, mini_batch_target in self.gradient.creat_mini_batch(data, target, num_mini_batches): Neural_Network.feedforward(self, mini_batch_data.T) #calls backpropagation to find the new gradient Neural_Network.__backpropagation(self, mini_batch_data.T, mini_batch_target.T) self.gradient.time += float(eta_schedule[1])* 1 #update time for decay # calculate the cost of the epoch Neural_Network.__epoch_output(self, data, target, name = 'train') if test_data != None: Neural_Network.__epoch_output(self, *test_data, name = 'test') # Checking if accuracy if self.has_acc == True: if self.accuracy > best_accuracy: best_accuracy = self.accuracy best_weights = np.copy(self.weights) if Neural_Network.accuracy_test(self) == True: break # Checking if MSE if self.cost_mse == True: if Neural_Network.cost_test(self) == True: break #after training set the weights to the best weights if self.has_acc: self.weights = best_weights if validation_data != None: Neural_Network.__epoch_output(self, *validation_data, name = 'validation') def classification_accuracy(self, prediction, y): prediction = prediction.T prediction = np.argmax(prediction, axis =1) y = np.argmax(y, axis =1) return len(prediction[prediction == y])/len(y) def sigmoid_act(self, z): return 1.0/(1.0 + np.exp(-z)) def tanh_act(self, z): return np.tanh(z) def softmax_act(self, z): denom = np.sum(np.exp(z), axis = 0) #(#samples) denom = np.array([denom for i in range(z.shape[0])]) return np.exp(z)/denom def relu_act(self, z): return np.where( z > 0, z, 0) def epoch_cost(self, f_z, target): cost = 0.0 a = Neural_Network.feedforward(self, f_z) cost += self.cost_function(self, self.biases[self.layers -2], target ) return cost, a def cross_entropy(self, b, y): z = np.matmul(self.weights[self.layers -2], self.activations[self.layers -2 ]) + b a = self.functions[self.layers-2](self, z) ret = - np.sum(np.where(y==1, np.log(a), 0) )/y.shape[1] if self.reg[0] == 'l1': ret -= float(self.reg[1]) *np.sum(np.abs(b), axis =1).mean() if self.reg[0] == 'l2': ret -= float(self.reg[1]) * np.linalg.norm(b, axis =1).mean() return ret def mse(self, b, y): z = np.matmul(self.weights[self.layers -2], self.activations[self.layers -2 ]) + b a = self.functions[self.layers-2](self, z) res = a - y ret = np.dot(res[0], res[0])/len(y) if self.reg[0] == 'l1': ret -= float(self.reg[1]) * np.sum(np.abs(b), axis = 1).mean() if self.reg[0] == 'l2': ret -= float(self.reg[1]) * np.linalg.norm(b,axis = 1).mean() return ret #make table of information def __epoch_output(self, data, target, name='test'): data = np.copy(data) target = np.copy(target) print('Current epoch: ', self.epoch) cost, a = Neural_Network.epoch_cost(self, data.T, target.T) print('The %s cost is: %.4f' % (name, cost)) if self.has_acc == True: accuracy = Neural_Network.classification_accuracy(self, a, target) print('The %s accuracy is : %.4f' % (name, accuracy)) #store the current test accuracy if name == 'test': self.accuracy = accuracy else: accuracy = 'Nan' if self.log: if self.pol_order: temp = pd.DataFrame({"number of layers": self.layers, "nodes per layer": self.mapping, "epoch":self.epoch, "batch size":self.gradient.mini_batch_size, "learning rate": self.gradient.gamma, "initial learning rate": self.init_eta, "momentum parameter":self.gradient.m0, "lambda": self.lmbd, "stopping tol": self.tolerance, "cost": cost, "accuracy":accuracy, "data set":name,"pol order":self.pol_order}, index=[self.call]) self.toi = self.toi.append(temp) self.call += 1 del temp else: temp = pd.DataFrame({"number of layers": self.layers, "nodes per layer": self.mapping, "epoch":self.epoch, "batch size":self.gradient.mini_batch_size, "learning rate": self.gradient.gamma, "initial learning rate": self.init_eta, "momentum parameter":self.gradient.m0, "lambda": self.lmbd, "stopping tol": self.tolerance, "cost": cost, "accuracy":accuracy, "data set":name}, index=[self.call]) self.toi = self.toi.append(temp) self.call += 1 del temp # check if accuracy is constant def accuracy_test(self): ''' function for keeping track of the accuracy of the past five epochs. If the standard deviation of the past five is less than the tolerance then the epoch loop is broken and the learning stops. returns: True or False ''' if self.epoch > 5: filter = self.toi['data set'] == 'test' accuracy = self.toi[filter]['accuracy'] acc_array = accuracy.to_numpy() std_acc = np.std(acc_array[-5:]) if self.tolerance > std_acc: return True else: return False def cost_test(self): ''' function for keeping track of the cost of the past five epochs. If the standard deviation of the past five is less than the tolerance then the epoch loop is broken and the learning stops. returns: True or False ''' if self.epoch > 5: filter = self.toi['data set'] == 'test' cost = self.toi[filter]['cost'] cost_array = cost.to_numpy() std_cost = np.std(cost_array[-5:]) if self.tolerance > std_cost: return True else: return False
def training(self, data, target, epochs, mini_batch_size, eta = 0.5, eta_schedule = ('decay',0.1), momentum = True, gamma = 0.1, lmbd = 0.1, tolerance = 1e-3, test_data = None, validation_data = None): """ training NN data shape (#samples, #features) target shape (#samples, #output nodes) eta: learning rate eta_schedule: (scheme, cycles) 'decay' or 'const', if 'decay' the time is multiplied with cycles momentum, gamma, set momentum to true, gamma strength of momentum (gamma=0 ==momentum =False) lmbd fraction of old weights taken into change test_data/validation_data (inut, outpur ); input shape (#samples, #features), output shape (#samples, #output nodes) """ data = np.copy(data) target = np.copy(target) self.gradient = SGD( self.cost_function, epochs = epochs, mini_batch_size = mini_batch_size, learning_rate = eta, adaptive_learning_rate = eta_schedule[0], momentum = momentum, m0 = gamma) self.lmbd = lmbd best_accuracy = 0.0 samples = data.shape[0] num_mini_batches = samples // mini_batch_size self.init_eta = eta self.tolerance = tolerance for self.epoch in range(epochs): #run minibatches for mini_batch_data, mini_batch_target in self.gradient.creat_mini_batch(data, target, num_mini_batches): Neural_Network.feedforward(self, mini_batch_data.T) #calls backpropagation to find the new gradient Neural_Network.__backpropagation(self, mini_batch_data.T, mini_batch_target.T) self.gradient.time += float(eta_schedule[1])* 1 #update time for decay # calculate the cost of the epoch Neural_Network.__epoch_output(self, data, target, name = 'train') if test_data != None: Neural_Network.__epoch_output(self, *test_data, name = 'test') # Checking if accuracy if self.has_acc == True: if self.accuracy > best_accuracy: best_accuracy = self.accuracy best_weights = np.copy(self.weights) if Neural_Network.accuracy_test(self) == True: break # Checking if MSE if self.cost_mse == True: if Neural_Network.cost_test(self) == True: break #after training set the weights to the best weights if self.has_acc: self.weights = best_weights if validation_data != None: Neural_Network.__epoch_output(self, *validation_data, name = 'validation')