예제 #1
0
    def update_Z(self, X, y, verbose=False, sample_weight=None):
        """Greedy CD solver for the quadratic term of a factorization machine.

        Solves 0.5 ||y - <Z, XX'>||^2_2 + ||Z||_*

        Z implicitly stored as P'ΛP
        """
        n_samples, n_features = X.shape
        rng = check_random_state(self.random_state)
        P = self.P_
        lams = self.lams_
        old_loss = np.inf
        max_rank = self.max_rank
        if max_rank is None:
            max_rank = n_features

        ##
        #residual = self.predict_quadratic(X) - y  # could optimize
        #loss = self._loss(residual, sample_weight=sample_weight)
        #rms = np.sqrt(np.mean((residual) ** 2))
        #print("rank={} loss={}, RMSE={}".format(0, loss, rms))
        ##

        for _ in range(self.max_iter_inner):
            if self.rank_ >= max_rank:
                break
            residual = self.predict_quadratic(X) - y  # could optimize
            if sample_weight is not None:
                residual *= sample_weight
            p = _find_basis(X, residual, **self.eigsh_kwargs)
            P.append(p)
            lams.append(0.)

            # refit
            refit_target = y.copy()
            K = polynomial_kernel(X, np.array(P), degree=2, gamma=1, coef0=0)
            if sample_weight is not None:
                refit_target *= np.sqrt(sample_weight)
                K *= np.sqrt(sample_weight)[:, np.newaxis]
            K = np.asfortranarray(K)
            lams_init = np.array(lams, dtype=np.double)

            # minimizes 0.5 * ||y - K * lams||_2^2 + beta * ||w||_1
            lams, _, _, _ = enet_coordinate_descent(lams_init,
                                                    self.beta,
                                                    0,
                                                    K,
                                                    refit_target,
                                                    max_iter=self.refit_iter,
                                                    tol=self.tol,
                                                    rng=rng,
                                                    random=0,
                                                    positive=0)
            P = [p for p, lam in zip(P, lams) if np.abs(lam) > 0]
            lams = [lam for lam in lams if np.abs(lam) > 0]
            self.rank_ = len(lams)
            self.quadratic_trace_ = np.sum(np.abs(lams))

            predict_quadratic = self.predict_quadratic(X, P, lams)
            residual = y - predict_quadratic  # y is already shifted
            loss = self._loss(residual, sample_weight=sample_weight)

            if verbose > 0:
                rms = np.sqrt(np.mean((residual)**2))
                print("rank={} loss={}, RMSE={}".format(self.rank_, loss, rms))

            if np.abs(old_loss - loss) < self.tol:
                break

            old_loss = loss
        self.P_ = P
        self.lams_ = lams
    def update_Z(self, X, y, verbose=False, sample_weight=None):
        """Greedy CD solver for the quadratic term of a factorization machine.

        Solves 0.5 ||y - <Z, XX'>||^2_2 + ||Z||_*

        Z implicitly stored as P'ΛP
        """
        n_samples, n_features = X.shape
        rng = check_random_state(self.random_state)
        P = self.P_
        lams = self.lams_
        old_loss = np.inf
        max_rank = self.max_rank
        if max_rank is None:
            max_rank = n_features

        ##
        #residual = self.predict_quadratic(X) - y  # could optimize
        #loss = self._loss(residual, sample_weight=sample_weight)
        #rms = np.sqrt(np.mean((residual) ** 2))
        #print("rank={} loss={}, RMSE={}".format(0, loss, rms))
        ##

        for _ in range(self.max_iter_inner):
            if self.rank_ >= max_rank:
                break
            residual = self.predict_quadratic(X) - y  # could optimize
            if sample_weight is not None:
                residual *= sample_weight
            p = _find_basis(X, residual, **self.eigsh_kwargs)
            P.append(p)
            lams.append(0.)

            # refit
            refit_target = y.copy()
            K = polynomial_kernel(X, np.array(P), degree=2, gamma=1, coef0=0)
            if sample_weight is not None:
                refit_target *= np.sqrt(sample_weight)
                K *= np.sqrt(sample_weight)[:, np.newaxis]
            K = np.asfortranarray(K)
            lams_init = np.array(lams, dtype=np.double)

            # minimizes 0.5 * ||y - K * lams||_2^2 + beta * ||w||_1
            lams, _, _, _ = enet_coordinate_descent(
                lams_init, self.beta, 0, K, refit_target,
                max_iter=self.refit_iter, tol=self.tol, rng=rng, random=0,
                positive=0)
            P = [p for p, lam in zip(P, lams) if np.abs(lam) > 0]
            lams = [lam for lam in lams if np.abs(lam) > 0]
            self.rank_ = len(lams)
            self.quadratic_trace_ = np.sum(np.abs(lams))

            predict_quadratic = self.predict_quadratic(X, P, lams)
            residual = y - predict_quadratic  # y is already shifted
            loss = self._loss(residual, sample_weight=sample_weight)

            if verbose > 0:
                rms = np.sqrt(np.mean((residual) ** 2))
                print("rank={} loss={}, RMSE={}".format(self.rank_, loss, rms))

            if np.abs(old_loss - loss) < self.tol:
                break

            old_loss = loss
        self.P_ = P
        self.lams_ = lams
예제 #3
0
    def _dense_fit(self, X, y, Xy=None, coef_init=None):

        # copy was done in fit if necessary
        X, y, X_mean, y_mean, X_std = center_data(
            X, y, self.fit_intercept, self.normalize, copy=False)

        if y.ndim == 1:
            y = y[:, np.newaxis]
        if Xy is not None and Xy.ndim == 1:
            Xy = Xy[:, np.newaxis]

        n_samples, n_features = X.shape
        n_targets = y.shape[1]
        
        precompute = self.precompute
        if hasattr(precompute, '__array__') \
                and not np.allclose(X_mean, np.zeros(n_features)) \
                and not np.allclose(X_std, np.ones(n_features)):
            # recompute Gram
            precompute = 'auto'
            Xy = None

        coef_ = self._init_coef(coef_init, n_features, n_targets)
        dual_gap_ = np.empty(n_targets)
        eps_ = np.empty(n_targets)

        l1_reg = self.alpha*self.l1_ratio * n_samples
        l2_reg = 0.0#self.alpha * (1.0 - self.l1_ratio) * n_samples

        # precompute if n_samples > n_features
        if hasattr(precompute, '__array__'):
            Gram = precompute
        elif precompute or (precompute == 'auto' and n_samples > n_features):
            Gram = np.dot(X.T, X)
        else:
            Gram = None
        
        for k in xrange(n_targets):
            if Gram is None:
                coef_[k, :], dual_gap_[k], eps_[k] = \
                    cd_fast.enet_coordinate_descent(
                        coef_[k, :], l1_reg, l2_reg, X, y[:, k], self.max_iter,
                        self.tol, True)
            else:
                Gram = Gram.copy()
                if Xy is None:
                    this_Xy = np.dot(X.T, y[:, k])
                else:
                    this_Xy = Xy[:, k]
                coef_[k, :], dual_gap_[k], eps_[k] = \
                    cd_fast.enet_coordinate_descent_gram(
                        coef_[k, :], l1_reg, l2_reg, Gram, this_Xy, y[:, k],
                        self.max_iter, self.tol, True)

            if dual_gap_[k] > eps_[k]:
                warnings.warn('Objective did not converge for ' +
                              'target %d, you might want' % k +
                              ' to increase the number of iterations')

        self.coef_, self.dual_gap_, self.eps_ = (np.squeeze(a) for a in
                                                 (coef_, dual_gap_, eps_))
        self._set_intercept(X_mean, y_mean, X_std)

        # return self for chaining fit and predict calls
        return self