Ejemplo n.º 1
0
def test_csr_sparse_center_data():
    # Test output format of sparse_center_data, when input is csr
    X, y = make_regression()
    X[X < 2.5] = 0.0
    csr = sparse.csr_matrix(X)
    csr_, y, _, _, _ = sparse_center_data(csr, y, True)
    assert_equal(csr_.getformat(), 'csr')
Ejemplo n.º 2
0
def test_csr_sparse_center_data():
    # Test output format of sparse_center_data, when input is csr
    X, y = make_regression()
    X[X < 2.5] = 0.0
    csr = sparse.csr_matrix(X)
    csr_, y, _, _, _ = sparse_center_data(csr, y, True)
    assert_equal(csr_.getformat(), 'csr')
Ejemplo n.º 3
0
 def _preprocess_data(X,
                      y,
                      fit_intercept,
                      normalize,
                      copy=True,
                      return_mean=False):
     if not return_mean:
         return center_data(X, y, fit_intercept, normalize, copy=False)
     else:
         return sparse_center_data(X, y, fit_intercept, normalize)
Ejemplo n.º 4
0
def test_sparse_center_data():
    n_samples = 200
    n_features = 2
    rng = check_random_state(0)
    # random_state not supported yet in sparse.rand
    X = sparse.rand(n_samples, n_features, density=.5)  # , random_state=rng
    X = X.tolil()
    y = rng.rand(n_samples)
    XA = X.toarray()
    # XXX: currently scaled to variance=n_samples
    expected_X_std = np.std(XA, axis=0) * np.sqrt(X.shape[0])

    Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X,
                                                       y,
                                                       fit_intercept=False,
                                                       normalize=False)
    assert_array_almost_equal(X_mean, np.zeros(n_features))
    assert_array_almost_equal(y_mean, 0)
    assert_array_almost_equal(X_std, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y)

    Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X,
                                                       y,
                                                       fit_intercept=True,
                                                       normalize=False)
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_std, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))

    Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X,
                                                       y,
                                                       fit_intercept=True,
                                                       normalize=True)
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_std, expected_X_std)
    assert_array_almost_equal(Xt.A, XA / expected_X_std)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
Ejemplo n.º 5
0
def test_sparse_center_data():
    n_samples = 200
    n_features = 2
    rng = check_random_state(0)
    # random_state not supported yet in sparse.rand
    X = sparse.rand(n_samples, n_features, density=.5)  # , random_state=rng
    X = X.tolil()
    y = rng.rand(n_samples)
    XA = X.toarray()
    # XXX: currently scaled to variance=n_samples
    expected_X_std = np.std(XA, axis=0) * np.sqrt(X.shape[0])

    Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X, y,
                                                       fit_intercept=False,
                                                       normalize=False)
    assert_array_almost_equal(X_mean, np.zeros(n_features))
    assert_array_almost_equal(y_mean, 0)
    assert_array_almost_equal(X_std, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y)

    Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X, y,
                                                       fit_intercept=True,
                                                       normalize=False)
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_std, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))

    Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X, y,
                                                       fit_intercept=True,
                                                       normalize=True)
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_std, expected_X_std)
    assert_array_almost_equal(Xt.A, XA / expected_X_std)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
Ejemplo n.º 6
0
    def _sparse_fit(self, X, y, Xy=None, coef_init=None):

        if X.shape[0] != y.shape[0]:
            raise ValueError("X and y have incompatible shapes.\n" +
                             "Note: Sparse matrices cannot be indexed w/" +
                             "boolean masks (use `indices=True` in CV).")

        # NOTE: we are explicitly not centering the data the naive way to
        # avoid breaking the sparsity of X
        X_data, y, X_mean, y_mean, X_std = sparse_center_data(
            X, y, self.fit_intercept, self.normalize)

        if y.ndim == 1:
            y = y[:, np.newaxis]

        n_samples, n_features = X.shape[0], X.shape[1]
        n_targets = y.shape[1]

        coef_ = self._init_coef(coef_init, n_features, n_targets)
        dual_gap_ = np.empty(n_targets)
        eps_ = np.empty(n_targets)

        l1_reg = self.alpha * self.l1_ratio* n_samples
     
        l2_reg =0.0 #self.alpha * (1.0 - self.l1_ratio) * n_samples

        for k in xrange(n_targets):
            coef_[k, :], dual_gap_[k], eps_[k] = \
                cd_fast.sparse_enet_coordinate_descent(
                    coef_[k, :], l1_reg, l2_reg, X_data, X.indices,
                    X.indptr, y[:, k], X_mean / X_std,
                    self.max_iter, self.tol, True)

            if dual_gap_[k] > eps_[k]:
                warnings.warn('Objective did not converge for ' +
                              'target %d, you might want' % k +
                              ' to increase the number of iterations')

        self.coef_, self.dual_gap_, self.eps_ = (np.squeeze(a) for a in
                                                 (coef_, dual_gap_, eps_))
        self._set_intercept(X_mean, y_mean, X_std)

        # return self for chaining fit and predict calls
        return self
Ejemplo n.º 7
0
def test_deprecation_center_data():
    n_samples = 200
    n_features = 2

    w = 1.0 + rng.rand(n_samples)
    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples)

    param_grid = product([True, False], [True, False], [True, False],
                         [None, w])

    for (fit_intercept, normalize, copy, sample_weight) in param_grid:

        XX = X.copy()  # such that we can try copy=False as well

        X1, y1, X1_mean, X1_var, y1_mean = \
            center_data(XX, y, fit_intercept=fit_intercept,
                        normalize=normalize, copy=copy,
                        sample_weight=sample_weight)

        XX = X.copy()

        X2, y2, X2_mean, X2_var, y2_mean = \
            _preprocess_data(XX, y, fit_intercept=fit_intercept,
                             normalize=normalize, copy=copy,
                             sample_weight=sample_weight)

        assert_array_almost_equal(X1, X2)
        assert_array_almost_equal(y1, y2)
        assert_array_almost_equal(X1_mean, X2_mean)
        assert_array_almost_equal(X1_var, X2_var)
        assert_array_almost_equal(y1_mean, y2_mean)

    # Sparse cases
    X = sparse.csr_matrix(X)

    for (fit_intercept, normalize, copy, sample_weight) in param_grid:

        X1, y1, X1_mean, X1_var, y1_mean = \
            center_data(X, y, fit_intercept=fit_intercept, normalize=normalize,
                        copy=copy, sample_weight=sample_weight)

        X2, y2, X2_mean, X2_var, y2_mean = \
            _preprocess_data(X, y, fit_intercept=fit_intercept,
                             normalize=normalize, copy=copy,
                             sample_weight=sample_weight, return_mean=False)

        assert_array_almost_equal(X1.toarray(), X2.toarray())
        assert_array_almost_equal(y1, y2)
        assert_array_almost_equal(X1_mean, X2_mean)
        assert_array_almost_equal(X1_var, X2_var)
        assert_array_almost_equal(y1_mean, y2_mean)

    for (fit_intercept, normalize) in product([True, False], [True, False]):

        X1, y1, X1_mean, X1_var, y1_mean = \
            sparse_center_data(X, y, fit_intercept=fit_intercept,
                               normalize=normalize)

        X2, y2, X2_mean, X2_var, y2_mean = \
            _preprocess_data(X, y, fit_intercept=fit_intercept,
                             normalize=normalize, return_mean=True)

        assert_array_almost_equal(X1.toarray(), X2.toarray())
        assert_array_almost_equal(y1, y2)
        assert_array_almost_equal(X1_mean, X2_mean)
        assert_array_almost_equal(X1_var, X2_var)
        assert_array_almost_equal(y1_mean, y2_mean)
Ejemplo n.º 8
0
def test_deprecation_center_data():
    n_samples = 200
    n_features = 2

    w = 1.0 + rng.rand(n_samples)
    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples)

    param_grid = product([True, False], [True, False], [True, False],
                         [None, w])

    for (fit_intercept, normalize, copy, sample_weight) in param_grid:

        XX = X.copy()  # such that we can try copy=False as well

        X1, y1, X1_mean, X1_var, y1_mean = \
            center_data(XX, y, fit_intercept=fit_intercept,
                        normalize=normalize, copy=copy,
                        sample_weight=sample_weight)

        XX = X.copy()

        X2, y2, X2_mean, X2_var, y2_mean = \
            _preprocess_data(XX, y, fit_intercept=fit_intercept,
                             normalize=normalize, copy=copy,
                             sample_weight=sample_weight)

        assert_array_almost_equal(X1, X2)
        assert_array_almost_equal(y1, y2)
        assert_array_almost_equal(X1_mean, X2_mean)
        assert_array_almost_equal(X1_var, X2_var)
        assert_array_almost_equal(y1_mean, y2_mean)

    # Sparse cases
    X = sparse.csr_matrix(X)

    for (fit_intercept, normalize, copy, sample_weight) in param_grid:

        X1, y1, X1_mean, X1_var, y1_mean = \
            center_data(X, y, fit_intercept=fit_intercept, normalize=normalize,
                        copy=copy, sample_weight=sample_weight)

        X2, y2, X2_mean, X2_var, y2_mean = \
            _preprocess_data(X, y, fit_intercept=fit_intercept,
                             normalize=normalize, copy=copy,
                             sample_weight=sample_weight, return_mean=False)

        assert_array_almost_equal(X1.toarray(), X2.toarray())
        assert_array_almost_equal(y1, y2)
        assert_array_almost_equal(X1_mean, X2_mean)
        assert_array_almost_equal(X1_var, X2_var)
        assert_array_almost_equal(y1_mean, y2_mean)

    for (fit_intercept, normalize) in product([True, False], [True, False]):

        X1, y1, X1_mean, X1_var, y1_mean = \
            sparse_center_data(X, y, fit_intercept=fit_intercept,
                               normalize=normalize)

        X2, y2, X2_mean, X2_var, y2_mean = \
            _preprocess_data(X, y, fit_intercept=fit_intercept,
                             normalize=normalize, return_mean=True)

        assert_array_almost_equal(X1.toarray(), X2.toarray())
        assert_array_almost_equal(y1, y2)
        assert_array_almost_equal(X1_mean, X2_mean)
        assert_array_almost_equal(X1_var, X2_var)
        assert_array_almost_equal(y1_mean, y2_mean)