Example #1
0
    def phi(self, a):
        """Compute the phi value when an alpha 'a' parameter is passed

        Parameters
        ----------
        a : scalar
            alpha parameter
        """
        w_a = restore_w_to_model(self.model, self.w + a * self.d)
        phia = metrics.mse_reg(self.Y, self.optimizer.forward(w_a, self.X),
                               self.model, w_a)
        return phia
Example #2
0
    def derphi(self, a):
        """Compute the dot producte between the gradient and 
        the direction when an alpha 'a' parameter is passed::

                phips = g_a^T*d

        Parameters
        ----------
        a : scalar
            alpha parameter
        """
        w_a = self.w + a * self.d
        l_w_a = restore_w_to_model(self.model, w_a)
        g_a = make_vector(
            self.optimizer.backpropagation(self.model, l_w_a, self.X, self.Y))
        phips = np.asscalar(np.dot(g_a.T, self.d))
        return phips
Example #3
0
    def step(self, model, X, Y, verbose):
        """Implements the NCG step update method.

        Parameters
        ----------
        model : isanet.model.MLP
            Specify the Multilayer Perceptron object to optimize

         X : array-like of shape (n_samples, n_features)
            The input data.

        Y : array-like of shape (n_samples, n_output)
            The target values.

        verbose : integer, default=0
            Controls the verbosity: the higher, the more messages.

        Returns
        -------
            float
                The gradient norm.

        """
        w = make_vector(model.weights)

        if ~model.is_fitted and self.epoch == 0:
            beta = 0
            self.__g = make_vector(
                self.backpropagation(model, model.weights, X, Y))
            d = -self.__g
            phi0 = metrics.mse_reg(Y, model.predict(X), model, model.weights)
        else:
            # calcolo del beta
            beta = self.__fbeta(self.__g, self.__past_g, self.__past_ng,
                                self.__past_d)
            self.restart += 1
            if self.max_restart is not None and (self.restart
                                                 == self.max_restart):
                self.restart = 0
                beta = 0
            if beta != 0:
                d = -self.__g + beta * self.__past_d
            else:
                d = -self.__g
            phi0 = model.history["loss_mse_reg"][-1]

        norm_g = np.linalg.norm(self.__g)

        self.__past_ng = norm_g
        self.__past_g = self.__g
        self.__past_d = d
        derphi0 = np.asscalar(np.dot(self.__g.T, d))

        phi = phi_function(model, self, w, X, Y, d)
        ls_verbose = False
        if verbose >= 3:
            ls_verbose = True
        alpha, ls_log = line_search_wolfe(phi=phi.phi,
                                          derphi=phi.derphi,
                                          phi0=phi0,
                                          old_phi0=self.__old_phi0,
                                          derphi0=derphi0,
                                          c1=self.c1,
                                          c2=self.c2,
                                          maxiter=self.ln_maxiter,
                                          verbose=ls_verbose)

        self.__old_phi0 = phi0
        self.__g = phi.get_last_g()

        w += alpha * d
        model.weights = restore_w_to_model(model, w)

        if verbose >= 2:
            print(
                "| beta: {} | alpha: {} | ng: {} | ls conv: {}, it: {}, time: {:4.4f} | zoom used: {}, conv: {}, it: {}|"
                .format(beta, alpha, norm_g, ls_log["ls_conv"],
                        ls_log["ls_it"], ls_log["ls_time"],
                        ls_log["zoom_used"], ls_log["zoom_conv"],
                        ls_log["zoom_it"]))
        self.__append_history(beta, alpha, norm_g, ls_log)
        return norm_g
Example #4
0
    def step(self, model, X, Y, verbose):
        """Implements the LBFGS step update method.

        Parameters
        ----------
        model : isanet.model.MLP
            Specify the Multilayer Perceptron object to optimize

         X : array-like of shape (n_samples, n_features)
            The input data.

        Y : array-like of shape (n_samples, n_output)
            The target values.

        verbose : integer, default=0
            Controls the verbosity: the higher, the more messages.

        Returns
        ------
            float
                The gradient norm.

        """

        current_batch_size = X.shape[0]

        w = make_vector(model.weights)
        g = make_vector(self.backpropagation(model, model.weights, X, Y))
        norm_g = np.linalg.norm(g)
        phi0 = metrics.mse_reg(Y, model.predict(X), model, model.weights)

        if ~model.is_fitted and self.epoch == 0:
            d = -g
        else:
            self.__y[-1] = g - self.__y[-1]
            gamma = np.dot(self.__s[-1].T, self.__y[-1]) / np.dot(
                self.__y[-1].T, self.__y[-1])
            H0 = gamma
            d = -self.__compute_search_dir(g, H0, self.__s, self.__y)
            curvature_condition = np.dot(self.__s[-1].T, self.__y[-1])
            if curvature_condition <= 1e-8:
                print("curvature condition: {}".format(curvature_condition))
                raise Exception("Curvature condition is negative")

        phi = phi_function(model, self, w, X, Y, d)
        ls_verbose = False
        if verbose >= 3:
            ls_verbose = True
        alpha, ls_log = line_search_wolfe(phi=phi.phi,
                                          derphi=phi.derphi,
                                          phi0=phi0,
                                          old_phi0=self.__old_phi0,
                                          c1=self.c1,
                                          c2=self.c2,
                                          verbose=ls_verbose)

        self.__old_phi0 = phi0
        delta = alpha * d
        w += delta
        model.weights = restore_w_to_model(model, w)

        # l_w1 = restore_w_to_model(model, w1)
        # for i in range(0, len(model.weights)):
        #     regularizer = model.kernel_regularizer[i]*current_batch_size/self.tot_n_patterns
        #     weights_decay = 2*regularizer*model.weights[i]
        #     # weights_decay[0,:] = 0 # In ML the bias should not be regularized
        #     model.weights[i] = l_w1[i] - weights_decay

        if (len(self.__s) == self.m and len(self.__y) == self.m):
            self.__s.pop(0)
            self.__y.pop(0)
        # w_new - w_old = w_old + alpha*d - w_old = alpha*d = delta
        self.__s.append(delta)  # delta = w_new - w_old
        self.__y.append(g)
        if verbose >= 2:
            print(
                "| alpha: {} | ng: {} | ls conv: {}, it: {}, time: {:4.4f} | zoom used: {}, conv: {}, it: {}|"
                .format(alpha, norm_g, ls_log["ls_conv"], ls_log["ls_it"],
                        ls_log["ls_time"], ls_log["zoom_used"],
                        ls_log["zoom_conv"], ls_log["zoom_it"]))
        self.__append_history(alpha, norm_g, ls_log)
        return norm_g
Example #5
0
    def step(self, model, X, Y, verbose):
        """Implements the LBFGS step update method.

        Parameters
        ----------
        model : isanet.model.MLP
            Specify the Multilayer Perceptron object to optimize

         X : array-like of shape (n_samples, n_features)
            The input data.

        Y : array-like of shape (n_samples, n_output)
            The target values.

        verbose : integer, default=0
            Controls the verbosity: the higher, the more messages.

        Returns
        ------
            float
                The gradient norm.

        """

        w = make_vector(model.weights)

        if ~model.is_fitted and self.epoch == 0:
            self.__g = make_vector(
                self.backpropagation(model, model.weights, X, Y))
            d = -self.__g
            phi0 = metrics.mse_reg(Y, model.predict(X), model, model.weights)
        else:
            gamma = np.dot(self.__s[-1].T, self.__y[-1]) / np.dot(
                self.__y[-1].T, self.__y[-1])
            H0 = gamma
            d = -self.__compute_search_dir(self.__g, H0, self.__s, self.__y)
            phi0 = model.history["loss_mse_reg"][-1]
            curvature_condition = np.dot(self.__s[-1].T, self.__y[-1])
            if curvature_condition <= 1e-8:
                print("curvature condition: {}".format(curvature_condition))
                raise Exception("Curvature condition is negative")

        norm_g = np.linalg.norm(self.__g)

        phi = phi_function(model, self, w, X, Y, d)
        ls_verbose = False
        if verbose >= 3:
            ls_verbose = True
        alpha, ls_log = line_search_wolfe(phi=phi.phi,
                                          derphi=phi.derphi,
                                          phi0=phi0,
                                          old_phi0=self.__old_phi0,
                                          c1=self.c1,
                                          c2=self.c2,
                                          verbose=ls_verbose)

        self.__old_phi0 = phi0
        new_g = phi.get_last_g()

        delta = alpha * d
        w += delta
        model.weights = restore_w_to_model(model, w)

        if (len(self.__s) == self.m and len(self.__y) == self.m):
            self.__s.pop(0)
            self.__y.pop(0)
        # w_new - w_old = w_old + alpha*d - w_old = alpha*d = delta
        self.__s.append(delta)  # delta = w_new - w_old
        self.__y.append(new_g - self.__g)
        self.__g = new_g

        if verbose >= 2:
            print(
                "| alpha: {} | ng: {} | ls conv: {}, it: {}, time: {:4.4f} | zoom used: {}, conv: {}, it: {}|"
                .format(alpha, norm_g, ls_log["ls_conv"], ls_log["ls_it"],
                        ls_log["ls_time"], ls_log["zoom_used"],
                        ls_log["zoom_conv"], ls_log["zoom_it"]))
        self.__append_history(alpha, norm_g, ls_log)
        return norm_g