def phi(self, a): """Compute the phi value when an alpha 'a' parameter is passed Parameters ---------- a : scalar alpha parameter """ w_a = restore_w_to_model(self.model, self.w + a * self.d) phia = metrics.mse_reg(self.Y, self.optimizer.forward(w_a, self.X), self.model, w_a) return phia
def derphi(self, a): """Compute the dot producte between the gradient and the direction when an alpha 'a' parameter is passed:: phips = g_a^T*d Parameters ---------- a : scalar alpha parameter """ w_a = self.w + a * self.d l_w_a = restore_w_to_model(self.model, w_a) g_a = make_vector( self.optimizer.backpropagation(self.model, l_w_a, self.X, self.Y)) phips = np.asscalar(np.dot(g_a.T, self.d)) return phips
def step(self, model, X, Y, verbose): """Implements the NCG step update method. Parameters ---------- model : isanet.model.MLP Specify the Multilayer Perceptron object to optimize X : array-like of shape (n_samples, n_features) The input data. Y : array-like of shape (n_samples, n_output) The target values. verbose : integer, default=0 Controls the verbosity: the higher, the more messages. Returns ------- float The gradient norm. """ w = make_vector(model.weights) if ~model.is_fitted and self.epoch == 0: beta = 0 self.__g = make_vector( self.backpropagation(model, model.weights, X, Y)) d = -self.__g phi0 = metrics.mse_reg(Y, model.predict(X), model, model.weights) else: # calcolo del beta beta = self.__fbeta(self.__g, self.__past_g, self.__past_ng, self.__past_d) self.restart += 1 if self.max_restart is not None and (self.restart == self.max_restart): self.restart = 0 beta = 0 if beta != 0: d = -self.__g + beta * self.__past_d else: d = -self.__g phi0 = model.history["loss_mse_reg"][-1] norm_g = np.linalg.norm(self.__g) self.__past_ng = norm_g self.__past_g = self.__g self.__past_d = d derphi0 = np.asscalar(np.dot(self.__g.T, d)) phi = phi_function(model, self, w, X, Y, d) ls_verbose = False if verbose >= 3: ls_verbose = True alpha, ls_log = line_search_wolfe(phi=phi.phi, derphi=phi.derphi, phi0=phi0, old_phi0=self.__old_phi0, derphi0=derphi0, c1=self.c1, c2=self.c2, maxiter=self.ln_maxiter, verbose=ls_verbose) self.__old_phi0 = phi0 self.__g = phi.get_last_g() w += alpha * d model.weights = restore_w_to_model(model, w) if verbose >= 2: print( "| beta: {} | alpha: {} | ng: {} | ls conv: {}, it: {}, time: {:4.4f} | zoom used: {}, conv: {}, it: {}|" .format(beta, alpha, norm_g, ls_log["ls_conv"], ls_log["ls_it"], ls_log["ls_time"], ls_log["zoom_used"], ls_log["zoom_conv"], ls_log["zoom_it"])) self.__append_history(beta, alpha, norm_g, ls_log) return norm_g
def step(self, model, X, Y, verbose): """Implements the LBFGS step update method. Parameters ---------- model : isanet.model.MLP Specify the Multilayer Perceptron object to optimize X : array-like of shape (n_samples, n_features) The input data. Y : array-like of shape (n_samples, n_output) The target values. verbose : integer, default=0 Controls the verbosity: the higher, the more messages. Returns ------ float The gradient norm. """ current_batch_size = X.shape[0] w = make_vector(model.weights) g = make_vector(self.backpropagation(model, model.weights, X, Y)) norm_g = np.linalg.norm(g) phi0 = metrics.mse_reg(Y, model.predict(X), model, model.weights) if ~model.is_fitted and self.epoch == 0: d = -g else: self.__y[-1] = g - self.__y[-1] gamma = np.dot(self.__s[-1].T, self.__y[-1]) / np.dot( self.__y[-1].T, self.__y[-1]) H0 = gamma d = -self.__compute_search_dir(g, H0, self.__s, self.__y) curvature_condition = np.dot(self.__s[-1].T, self.__y[-1]) if curvature_condition <= 1e-8: print("curvature condition: {}".format(curvature_condition)) raise Exception("Curvature condition is negative") phi = phi_function(model, self, w, X, Y, d) ls_verbose = False if verbose >= 3: ls_verbose = True alpha, ls_log = line_search_wolfe(phi=phi.phi, derphi=phi.derphi, phi0=phi0, old_phi0=self.__old_phi0, c1=self.c1, c2=self.c2, verbose=ls_verbose) self.__old_phi0 = phi0 delta = alpha * d w += delta model.weights = restore_w_to_model(model, w) # l_w1 = restore_w_to_model(model, w1) # for i in range(0, len(model.weights)): # regularizer = model.kernel_regularizer[i]*current_batch_size/self.tot_n_patterns # weights_decay = 2*regularizer*model.weights[i] # # weights_decay[0,:] = 0 # In ML the bias should not be regularized # model.weights[i] = l_w1[i] - weights_decay if (len(self.__s) == self.m and len(self.__y) == self.m): self.__s.pop(0) self.__y.pop(0) # w_new - w_old = w_old + alpha*d - w_old = alpha*d = delta self.__s.append(delta) # delta = w_new - w_old self.__y.append(g) if verbose >= 2: print( "| alpha: {} | ng: {} | ls conv: {}, it: {}, time: {:4.4f} | zoom used: {}, conv: {}, it: {}|" .format(alpha, norm_g, ls_log["ls_conv"], ls_log["ls_it"], ls_log["ls_time"], ls_log["zoom_used"], ls_log["zoom_conv"], ls_log["zoom_it"])) self.__append_history(alpha, norm_g, ls_log) return norm_g
def step(self, model, X, Y, verbose): """Implements the LBFGS step update method. Parameters ---------- model : isanet.model.MLP Specify the Multilayer Perceptron object to optimize X : array-like of shape (n_samples, n_features) The input data. Y : array-like of shape (n_samples, n_output) The target values. verbose : integer, default=0 Controls the verbosity: the higher, the more messages. Returns ------ float The gradient norm. """ w = make_vector(model.weights) if ~model.is_fitted and self.epoch == 0: self.__g = make_vector( self.backpropagation(model, model.weights, X, Y)) d = -self.__g phi0 = metrics.mse_reg(Y, model.predict(X), model, model.weights) else: gamma = np.dot(self.__s[-1].T, self.__y[-1]) / np.dot( self.__y[-1].T, self.__y[-1]) H0 = gamma d = -self.__compute_search_dir(self.__g, H0, self.__s, self.__y) phi0 = model.history["loss_mse_reg"][-1] curvature_condition = np.dot(self.__s[-1].T, self.__y[-1]) if curvature_condition <= 1e-8: print("curvature condition: {}".format(curvature_condition)) raise Exception("Curvature condition is negative") norm_g = np.linalg.norm(self.__g) phi = phi_function(model, self, w, X, Y, d) ls_verbose = False if verbose >= 3: ls_verbose = True alpha, ls_log = line_search_wolfe(phi=phi.phi, derphi=phi.derphi, phi0=phi0, old_phi0=self.__old_phi0, c1=self.c1, c2=self.c2, verbose=ls_verbose) self.__old_phi0 = phi0 new_g = phi.get_last_g() delta = alpha * d w += delta model.weights = restore_w_to_model(model, w) if (len(self.__s) == self.m and len(self.__y) == self.m): self.__s.pop(0) self.__y.pop(0) # w_new - w_old = w_old + alpha*d - w_old = alpha*d = delta self.__s.append(delta) # delta = w_new - w_old self.__y.append(new_g - self.__g) self.__g = new_g if verbose >= 2: print( "| alpha: {} | ng: {} | ls conv: {}, it: {}, time: {:4.4f} | zoom used: {}, conv: {}, it: {}|" .format(alpha, norm_g, ls_log["ls_conv"], ls_log["ls_it"], ls_log["ls_time"], ls_log["zoom_used"], ls_log["zoom_conv"], ls_log["zoom_it"])) self.__append_history(alpha, norm_g, ls_log) return norm_g