Ejemplo n.º 1
0
    def run(self, XY, wc=None):
        """A NIPALS implementation for sparse PLS regresison.

        Parameters
        ----------
        XY : List of two numpy arrays. XY[0] is n-by-p and XY[1] is n-by-q. The
                independent and dependent variables.

        wc : List of numpy array. The start vectors.

        Returns
        -------
        w : Numpy array, p-by-1. The weight vector of X.

        c : Numpy array, q-by-1. The weight vector of Y.
        """
        X = XY[0]
        Y = XY[1]

        n, p = X.shape

        l1_1 = penalties.L1(l=self.l[0])
        l1_2 = penalties.L1(l=self.l[1])

        if wc is not None:
            w_new = wc[0]
        else:
            maxi = np.argmax(np.sum(Y ** 2, axis=0))
            u = Y[:, [maxi]]
            w_new = np.dot(X.T, u)
            w_new *= 1.0 / maths.norm(w_new)

        for i in range(self.max_iter):
            w = w_new

            c = np.dot(Y.T, np.dot(X, w))
            if self.penalise_y:
                c = l1_2.prox(c)
                normc = maths.norm(c)
                if normc > consts.TOLERANCE:
                    c *= 1.0 / normc

            w_new = np.dot(X.T, np.dot(Y, c))
            w_new = l1_1.prox(w_new)
            normw = maths.norm(w_new)
            if normw > consts.TOLERANCE:
                w_new *= 1.0 / normw

            if maths.norm(w_new - w) / maths.norm(w) < self.eps:
                break

        self.num_iter = i

#        t = np.dot(X, w)
#        tt = np.dot(t.T, t)[0, 0]
#        c = np.dot(Y.T, t)
#        if tt > consts.TOLERANCE:
#            c /= tt

        return w_new, c
Ejemplo n.º 2
0
    def run(self, X, y, beta=None):
        """Find the minimiser of the associated function, starting at beta.

        Parameters
        ----------
        X : Numpy array, shape n-by-p. The matrix X with independent
                variables.

        y : Numpy array, shape n-by-1. The response variable y.

        beta : Numpy array. Optional starting point.
        """
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, False)
        if self.info_requested(Info.time):
            t = []
        if self.info_requested(Info.fvalue):
            f = []
        if self.info_requested(Info.converged):
            self.info_set(Info.converged, False)

        n, p = X.shape

        if beta is None:
            beta = self.start_vector.get_weights(p)
        else:
            beta = beta.copy()

        function = functions.CombinedFunction()
        function.add_loss(functions.losses.LinearRegression(X, y, mean=False))
        function.add_prox(penalties.L1(l=self.l))

        xTx = np.sum(X**2.0, axis=0)
        if self.mean:
            xTx *= 1.0 / float(n)

        for i in range(1, self.max_iter + 1):

            if self.info_requested(Info.time):
                tm = utils.time_cpu()

            # The update has an error that propagates. This resets the
            # approximation. We may not need to do this at every iteration.
            y_Xbeta = y - np.dot(X, beta)

            betaold = beta.copy()
            for j in range(p):

                xj = X[:, [j]]
                betaj = beta[j, 0]

                if xTx[j] < consts.TOLERANCE:  # Avoid division-by-zero.
                    bj = 0.0
                else:
                    bj = np.dot(xj.T, y_Xbeta + xj * betaj)[0, 0]
                    if self.mean:
                        bj /= float(n)

                    if j < self.penalty_start:
                        bj = bj / xTx[j]
                    else:
                        # Soft thresholding.
                        bj = np.sign(bj) \
                                * max(0.0, (abs(bj) - self.l) / xTx[j])

                y_Xbeta -= xj * (bj - betaj)  # Update X.beta.
                beta[j] = bj  # Save result.

            if self.info_requested(Info.time):
                t.append(utils.time_cpu() - tm)
            if self.info_requested(Info.fvalue):
                f_ = self._f(y_Xbeta, y, beta)
                f.append(f_)

#            print "err:", maths.norm(beta - betaold)
            if maths.norm(beta - betaold) < self.eps \
                    and i >= self.min_iter:

                if self.info_requested(Info.converged):
                    self.info_set(Info.converged, True)


#                print "iterations: ", i
                break

        self.num_iter = i
        if self.info_requested(Info.num_iter):
            self.info_set(Info.num_iter, i)
        if self.info_requested(Info.time):
            self.info_set(Info.time, t)
        if self.info_requested(Info.fvalue):
            self.info_set(Info.fvalue, f)
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, True)

        return beta