예제 #1
0
def test_primal_dual_relationship():
    y = y_diabetes.reshape(-1, 1)
    coef = _solve_cholesky(X_diabetes, y, alpha=[1e-2])
    K = np.dot(X_diabetes, X_diabetes.T)
    dual_coef = _solve_cholesky_kernel(K, y, alpha=[1e-2])
    coef2 = np.dot(X_diabetes.T, dual_coef).T
    assert_array_almost_equal(coef, coef2)
예제 #2
0
def test_primal_dual_relationship():
    y = y_diabetes.reshape(-1, 1)
    coef = _solve_cholesky(X_diabetes, y, alpha=[1e-2])
    K = np.dot(X_diabetes, X_diabetes.T)
    dual_coef = _solve_cholesky_kernel(K, y, alpha=[1e-2])
    coef2 = np.dot(X_diabetes.T, dual_coef).T
    assert_array_almost_equal(coef, coef2)
예제 #3
0
def test_ridge_sample_weights_in_feature_space():
    """Check that Cholesky solver in feature space applies sample_weights
    correctly.
    """

    rng = np.random.RandomState(42)

    n_samples_list = [5, 6, 7] * 2
    n_features_list = [7, 6, 5] * 2
    n_targets_list = [1, 1, 1, 2, 2, 2]
    noise = 1.
    alpha = 2.
    alpha = np.atleast_1d(alpha)

    for n_samples, n_features, n_targets in zip(n_samples_list,
                                                n_features_list,
                                                n_targets_list):
        X = rng.randn(n_samples, n_features)
        beta = rng.randn(n_features, n_targets)
        Y = X.dot(beta)
        Y_noisy = Y + rng.randn(*Y.shape) * np.sqrt((Y ** 2).sum(0)) * noise

        K = X.dot(X.T)
        sample_weights = 1. + (rng.randn(n_samples) ** 2) * 10

        coef_sample_space = _solve_cholesky_kernel(K, Y_noisy, alpha,
                                         sample_weight=sample_weights)

        coef_feature_space = _solve_cholesky(X, Y_noisy, alpha,
                                         sample_weight=sample_weights)

        assert_array_almost_equal(X.T.dot(coef_sample_space),
                                  coef_feature_space.T)
예제 #4
0
def test_ridge_sample_weights_in_feature_space():
    """Check that Cholesky solver in feature space applies sample_weights
    correctly.
    """

    rng = np.random.RandomState(42)

    n_samples_list = [5, 6, 7] * 2
    n_features_list = [7, 6, 5] * 2
    n_targets_list = [1, 1, 1, 2, 2, 2]
    noise = 1.
    alpha = 2.
    alpha = np.atleast_1d(alpha)

    for n_samples, n_features, n_targets in zip(n_samples_list,
                                                n_features_list,
                                                n_targets_list):
        X = rng.randn(n_samples, n_features)
        beta = rng.randn(n_features, n_targets)
        Y = X.dot(beta)
        Y_noisy = Y + rng.randn(*Y.shape) * np.sqrt((Y**2).sum(0)) * noise

        K = X.dot(X.T)
        sample_weights = 1. + (rng.randn(n_samples)**2) * 10

        coef_sample_space = _solve_cholesky_kernel(
            K, Y_noisy, alpha, sample_weight=sample_weights)

        coef_feature_space = _solve_cholesky(X,
                                             Y_noisy,
                                             alpha,
                                             sample_weight=sample_weights)

        assert_array_almost_equal(X.T.dot(coef_sample_space),
                                  coef_feature_space.T)
    def predict(self, X):
        Weights_S = np.multiply(self.alphas.ravel(), np.sum(self.E_mat,
                                                            axis=1))
        Weights_all = np.hstack(
            (np.ones(self.n_aux_samples), (Weights_S / np.max(Weights_S))))
        Ysp = np.vstack((self.Y_aux_, self.y_fit_))
        Xsp = np.vstack((self.X_aux_, self.X_fit_))
        KK = self._get_kernel(Xsp)
        Kx = self._get_kernel(Xsp, X)

        self.a_prediction = _solve_cholesky_kernel(KK,
                                                   Ysp,
                                                   self.lmbd,
                                                   sample_weight=Weights_all)

        y_prediction = np.dot(Kx.T, self.a_prediction)

        return y_prediction
예제 #6
0
    def fit(self, X, y=None, sample_weight=None):
        """Fit Kernel Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values

        sample_weight : float or numpy array of shape [n_samples]
            Individual weights for each sample, ignored if None is passed.

        Returns
        -------
        self : returns an instance of self.
        """
        # Convert data
        X, y = check_X_y(X, y, multi_output=True)

        n_samples = X.shape[0]
        K = self._get_kernel(X)
        alpha = np.atleast_1d(self.alpha)

        ravel = False
        if len(y.shape) == 1:
            y = y.reshape(-1, 1)
            ravel = True

        copy = self.kernel == "precomputed"
        self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha,
                                                 sample_weight,
                                                 copy)
        if ravel:
            self.dual_coef_ = self.dual_coef_.ravel()

        self.X_fit_ = X

        return self
예제 #7
0
    def fit(self, X, y=None, sample_weight=None):
        """Fit Kernel Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values

        sample_weight : float or numpy array of shape [n_samples]
            Individual weights for each sample, ignored if None is passed.

        Returns
        -------
        self : returns an instance of self.
        """
        # Convert data
        X, y = check_X_y(X, y, multi_output=True)

        n_samples = X.shape[0]
        K = self._get_kernel(X)
        alpha = np.atleast_1d(self.alpha)

        ravel = False
        if len(y.shape) == 1:
            y = y.reshape(-1, 1)
            ravel = True

        copy = self.kernel == "precomputed"
        self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight,
                                                 copy)
        if ravel:
            self.dual_coef_ = self.dual_coef_.ravel()

        self.X_fit_ = X

        return self
예제 #8
0
def enet_kernel_learning_admm2(
        K, y, lamda=0.01, beta=0.01, rho=1., max_iter=100, verbose=0, rtol=1e-4,
        tol=1e-4, return_n_iter=True, update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||y_i - alpha_i * sum_{k=1}^{n_k} w_k * K_{ik}||^2
        + lamda ||w||_1 + beta sum_{j=1}^{c_i}||alpha_j||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]

    u = [np.zeros(K[j].shape[1]) for j in range(n_patients)]
    u_1 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)

    x_old = [np.zeros(K[0].shape[1]) for j in range(n_patients)]
    w_1_old = w_1.copy()
    # w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update x
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        x = [prox_laplacian(y[j] + rho * (A[j].T.dot(alpha[j]) - u[j]), rho / 2.)
             for j in range(n_patients)]

        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        KK = [rho * A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [rho * A[j].dot(x[j] + u[j]) for j in range(n_patients)]
        alpha = [_solve_cholesky_kernel(
            KK[j], yy[j][..., None], 2 * beta).ravel() for j in range(n_patients)]
        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(A[j].dot(x[j] + u[j]) for j in range(n_patients))
        yy += w_1 - u_1
        coef = _solve_cholesky_kernel(KK, yy[..., None], 1).ravel()

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        # w_2 = prox_laplacian(coef + u_2, beta / rho)

        # update residuals
        alpha_coef_K = [
            alpha[j].dot(K[j].T.dot(coef)) for j in range(n_patients)]
        residuals = [x[j] - alpha_coef_K[j] for j in range(n_patients)]
        u = [u[j] + residuals[j] for j in range(n_patients)]
        u_1 += coef - w_1

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(coef - w_1) +
            sum(squared_norm(residuals[j]) for j in range(n_patients)))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) +
            sum(squared_norm(x[j] - x_old[j]) for j in range(n_patients)))

        obj = objective_admm2(x, y, alpha, lamda, beta, w_1)
        check = convergence(
            obj=obj, rnorm=rnorm, snorm=snorm,
            e_pri=np.sqrt(coef.size + sum(
                x[j].size for j in range(n_patients))) * tol + rtol * max(
                    np.sqrt(squared_norm(coef) + sum(squared_norm(
                        alpha_coef_K[j]) for j in range(n_patients))),
                    np.sqrt(squared_norm(w_1) + sum(squared_norm(
                        x[j]) for j in range(n_patients)))),
            e_dual=np.sqrt(coef.size + sum(
                x[j].size for j in range(n_patients))) * tol + rtol * rho * (
                    np.sqrt(squared_norm(u_1) + sum(squared_norm(
                        u[j]) for j in range(n_patients)))))

        w_1_old = w_1.copy()
        x_old = [x[j].copy() for j in range(n_patients)]

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u = [u[j] * (rho / rho_new) for j in range(n_patients)]
        u_1 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
예제 #9
0
def enet_kernel_learning_admm(
        K, y, lamda=0.01, beta=0.01, rho=1., max_iter=100, verbose=0, rtol=1e-4,
        tol=1e-4, return_n_iter=True, update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    u_1 = np.zeros(n_kernels)
    u_2 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)
    w_2 = np.zeros(n_kernels)

    w_1_old = w_1.copy()
    w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        KK = [A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [y[j].dot(A[j]) for j in range(n_patients)]

        alpha = [_solve_cholesky_kernel(
            KK[j], yy[j][..., None], 2).ravel() for j in range(n_patients)]
        # alpha = [_solve_cholesky_kernel(
        #     K_dot_coef[j], y[j][..., None], 0).ravel() for j in range(n_patients)]

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        w_2 = prox_laplacian(coef + u_2, beta / rho)

        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(y[j].dot(A[j].T) for j in range(n_patients))
        yy += rho * (w_1 + w_2 - u_1 - u_2)

        coef = _solve_cholesky_kernel(KK, yy[..., None], 2 * rho).ravel()

        # update residuals
        u_1 += coef - w_1
        u_2 += coef - w_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(coef - w_1) + squared_norm(coef - w_2))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) + squared_norm(w_2 - w_2_old))

        obj = objective_admm(K, y, alpha, lamda, beta, coef, w_1, w_2)

        check = convergence(
            obj=obj, rnorm=rnorm, snorm=snorm,
            e_pri=np.sqrt(2 * coef.size) * tol + rtol * max(
                np.sqrt(squared_norm(coef) + squared_norm(coef)),
                np.sqrt(squared_norm(w_1) + squared_norm(w_2))),
            e_dual=np.sqrt(2 * coef.size) * tol + rtol * rho * (
                np.sqrt(squared_norm(u_1) + squared_norm(u_2))))

        w_1_old = w_1.copy()
        w_2_old = w_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u_1 *= rho / rho_new
        u_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
예제 #10
0
파일: lasso.py 프로젝트: slipguru/mt-mkl
def lasso_kernel_admm(K,
                      y,
                      lamda=0.01,
                      rho=1.,
                      max_iter=100,
                      verbose=0,
                      rtol=1e-4,
                      tol=1e-4,
                      return_n_iter=True,
                      update_rho_options=None,
                      sample_weight=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||y_i - alpha_i * sum_{k=1}^{n_k} w_k * K_{ik}||^2
        + lamda ||w||_1 + beta sum_{j=1}^{c_i}||alpha_j||_2^2
    """
    n_kernels, n_samples, n_features = K.shape
    coef = np.ones(n_kernels)

    # alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]
    # u = [np.zeros(K[j].shape[1]) for j in range(n_patients)]
    w_1 = coef.copy()
    u_1 = np.zeros(n_kernels)

    # x_old = [np.zeros(K[0].shape[1]) for j in range(n_patients)]
    w_1_old = w_1.copy()
    Y = y[:, None].dot(y[:, None].T)

    checks = []
    for iteration_ in range(max_iter):
        # update w
        KK = 2 * np.tensordot(K, K.T, axes=([1, 2], [0, 1]))
        yy = 2 * np.tensordot(Y, K, axes=([0, 1], [1, 2]))
        yy += rho * (w_1 - u_1)
        coef = _solve_cholesky_kernel(KK, yy[..., None], rho).ravel()

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        # w_2 = prox_laplacian(coef + u_2, beta / rho)

        u_1 += coef - w_1

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(coef - w_1))
        snorm = rho * np.sqrt(squared_norm(w_1 - w_1_old))

        obj = lasso_objective(Y, coef, K, w_1, lamda)
        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(coef.size) * tol + rtol *
            max(np.sqrt(squared_norm(coef)), np.sqrt(squared_norm(w_1))),
            e_dual=np.sqrt(coef.size) * tol + rtol * rho *
            (np.sqrt(squared_norm(u_1))))

        w_1_old = w_1.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u_1 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
예제 #11
0
def enet_kernel_learning_admm2(K,
                               y,
                               lamda=0.01,
                               beta=0.01,
                               rho=1.,
                               max_iter=100,
                               verbose=0,
                               rtol=1e-4,
                               tol=1e-4,
                               return_n_iter=True,
                               update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||y_i - alpha_i * sum_{k=1}^{n_k} w_k * K_{ik}||^2
        + lamda ||w||_1 + beta sum_{j=1}^{c_i}||alpha_j||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]

    u = [np.zeros(K[j].shape[1]) for j in range(n_patients)]
    u_1 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)

    x_old = [np.zeros(K[0].shape[1]) for j in range(n_patients)]
    w_1_old = w_1.copy()
    # w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update x
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        x = [
            prox_laplacian(y[j] + rho * (A[j].T.dot(alpha[j]) - u[j]),
                           rho / 2.) for j in range(n_patients)
        ]

        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        KK = [rho * A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [rho * A[j].dot(x[j] + u[j]) for j in range(n_patients)]
        alpha = [
            _solve_cholesky_kernel(KK[j], yy[j][..., None], 2 * beta).ravel()
            for j in range(n_patients)
        ]
        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(A[j].dot(x[j] + u[j]) for j in range(n_patients))
        yy += w_1 - u_1
        coef = _solve_cholesky_kernel(KK, yy[..., None], 1).ravel()

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        # w_2 = prox_laplacian(coef + u_2, beta / rho)

        # update residuals
        alpha_coef_K = [
            alpha[j].dot(K[j].T.dot(coef)) for j in range(n_patients)
        ]
        residuals = [x[j] - alpha_coef_K[j] for j in range(n_patients)]
        u = [u[j] + residuals[j] for j in range(n_patients)]
        u_1 += coef - w_1

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(coef - w_1) +
            sum(squared_norm(residuals[j]) for j in range(n_patients)))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) +
            sum(squared_norm(x[j] - x_old[j]) for j in range(n_patients)))

        obj = objective_admm2(x, y, alpha, lamda, beta, w_1)
        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(coef.size + sum(x[j].size
                                          for j in range(n_patients))) * tol +
            rtol * max(
                np.sqrt(
                    squared_norm(coef) + sum(
                        squared_norm(alpha_coef_K[j])
                        for j in range(n_patients))),
                np.sqrt(
                    squared_norm(w_1) +
                    sum(squared_norm(x[j]) for j in range(n_patients)))),
            e_dual=np.sqrt(coef.size + sum(x[j].size
                                           for j in range(n_patients))) * tol +
            rtol * rho * (np.sqrt(
                squared_norm(u_1) +
                sum(squared_norm(u[j]) for j in range(n_patients)))))

        w_1_old = w_1.copy()
        x_old = [x[j].copy() for j in range(n_patients)]

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u = [u[j] * (rho / rho_new) for j in range(n_patients)]
        u_1 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
예제 #12
0
def enet_kernel_learning_admm(K,
                              y,
                              lamda=0.01,
                              beta=0.01,
                              rho=1.,
                              max_iter=100,
                              verbose=0,
                              rtol=1e-4,
                              tol=1e-4,
                              return_n_iter=True,
                              update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    u_1 = np.zeros(n_kernels)
    u_2 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)
    w_2 = np.zeros(n_kernels)

    w_1_old = w_1.copy()
    w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        KK = [A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [y[j].dot(A[j]) for j in range(n_patients)]

        alpha = [
            _solve_cholesky_kernel(KK[j], yy[j][..., None], 2).ravel()
            for j in range(n_patients)
        ]
        # alpha = [_solve_cholesky_kernel(
        #     K_dot_coef[j], y[j][..., None], 0).ravel() for j in range(n_patients)]

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        w_2 = prox_laplacian(coef + u_2, beta / rho)

        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(y[j].dot(A[j].T) for j in range(n_patients))
        yy += rho * (w_1 + w_2 - u_1 - u_2)

        coef = _solve_cholesky_kernel(KK, yy[..., None], 2 * rho).ravel()

        # update residuals
        u_1 += coef - w_1
        u_2 += coef - w_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(coef - w_1) + squared_norm(coef - w_2))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) + squared_norm(w_2 - w_2_old))

        obj = objective_admm(K, y, alpha, lamda, beta, coef, w_1, w_2)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(2 * coef.size) * tol +
            rtol * max(np.sqrt(squared_norm(coef) + squared_norm(coef)),
                       np.sqrt(squared_norm(w_1) + squared_norm(w_2))),
            e_dual=np.sqrt(2 * coef.size) * tol + rtol * rho *
            (np.sqrt(squared_norm(u_1) + squared_norm(u_2))))

        w_1_old = w_1.copy()
        w_2_old = w_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u_1 *= rho / rho_new
        u_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
예제 #13
0
    def fit(self, X, y=None, sample_weight=None):
        """Fit Kernel Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values

        sample_weight : float or array-like of shape [n_samples]
            Individual weights for each sample, ignored if None is passed.

        Returns
        -------
        self : returns an instance of self.
        """
        # Convert data
        X, y = check_X_y(X,
                         y,
                         accept_sparse=("csr", "csc"),
                         multi_output=True,
                         y_numeric=True)
        if sample_weight is not None and not isinstance(sample_weight, float):
            sample_weight = check_array(sample_weight, ensure_2d=False)
        if self.kernel_mat is None:
            self.kernel_mat = self._get_kernel(X,
                                               nystroem_kernel=self.nystroem)
        alpha = np.atleast_1d(
            self.alpha)  # XXX not needed anymore for KTBoost?

        ravel = False
        if len(y.shape) == 1:
            y = y.reshape(-1, 1)
            ravel = True

        if self.nystroem:  # XXX maybe this can be done better (without the need for copying)
            y = y[self.component_indices].copy()
            if not sample_weight is None:
                sample_weight = sample_weight[self.component_indices].copy()
            X = X[self.component_indices].copy()

        if self.sparse:
            if self.solve_kernel is None:
                K = self.kernel_mat.copy(
                )  ##Need to copy since for the weighted case, the matrix gets modified
                self.dual_coef_ = _solve_cholesky_kernel_sparse(
                    K, y, alpha, sample_weight)
            else:
                self.dual_coef_ = self.solve_kernel(y)
        else:
            if self.solve_kernel is None:
                self.dual_coef_ = _solve_cholesky_kernel(
                    self.kernel_mat, y, alpha, sample_weight, copy=True
                )  ##Need to copy since for the weighted case, the matrix gets modified
            else:
                self.dual_coef_ = self.solve_kernel.dot(y)

        if ravel:
            self.dual_coef_ = self.dual_coef_.ravel()

        self.X_fit_ = X

        return self