def test_incr_mean_variance_axis_ignore_nan(axis, sparse_constructor):
    old_means = np.array([535., 535., 535., 535.])
    old_variances = np.array([4225., 4225., 4225., 4225.])
    old_sample_count = np.array([2, 2, 2, 2], dtype=np.int64)

    X = sparse_constructor(
        np.array([[170, 170, 170, 170],
                  [430, 430, 430, 430],
                  [300, 300, 300, 300]]))

    X_nan = sparse_constructor(
        np.array([[170, np.nan, 170, 170],
                  [np.nan, 170, 430, 430],
                  [430, 430, np.nan, 300],
                  [300, 300, 300, np.nan]]))

    # we avoid creating specific data for axis 0 and 1: translating the data is
    # enough.
    if axis:
        X = X.T
        X_nan = X_nan.T

    # take a copy of the old statistics since they are modified in place.
    X_means, X_vars, X_sample_count = incr_mean_variance_axis(
        X, axis, old_means.copy(), old_variances.copy(),
        old_sample_count.copy())
    X_nan_means, X_nan_vars, X_nan_sample_count = incr_mean_variance_axis(
        X_nan, axis, old_means.copy(), old_variances.copy(),
        old_sample_count.copy())

    assert_allclose(X_nan_means, X_means)
    assert_allclose(X_nan_vars, X_vars)
    assert_allclose(X_nan_sample_count, X_sample_count)
Exemple #2
0
def test_incr_mean_variance_axis_dim_mismatch(sparse_constructor):
    """Check that we raise proper error when axis=1 and the dimension mismatch.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/pull/18655
    """
    n_samples, n_features = 60, 4
    rng = np.random.RandomState(42)
    X = sparse_constructor(rng.rand(n_samples, n_features))

    last_mean = np.zeros(n_features)
    last_var = np.zeros_like(last_mean)
    last_n = np.zeros(last_mean.shape, dtype=np.int64)

    kwargs = dict(last_mean=last_mean, last_var=last_var, last_n=last_n)
    mean0, var0, _ = incr_mean_variance_axis(X, axis=0, **kwargs)
    assert_allclose(np.mean(X.toarray(), axis=0), mean0)
    assert_allclose(np.var(X.toarray(), axis=0), var0)

    # test ValueError if axis=1 and last_mean.size == n_features
    with pytest.raises(ValueError):
        incr_mean_variance_axis(X, axis=1, **kwargs)

    # test inconsistent shapes of last_mean, last_var, last_n
    kwargs = dict(last_mean=last_mean[:-1], last_var=last_var, last_n=last_n)
    with pytest.raises(ValueError):
        incr_mean_variance_axis(X, axis=0, **kwargs)
def test_incr_mean_variance_axis_ignore_nan(axis, sparse_constructor):
    old_means = np.array([535., 535., 535., 535.])
    old_variances = np.array([4225., 4225., 4225., 4225.])
    old_sample_count = np.array([2, 2, 2, 2], dtype=np.int64)

    X = sparse_constructor(
        np.array([[170, 170, 170, 170], [430, 430, 430, 430],
                  [300, 300, 300, 300]]))

    X_nan = sparse_constructor(
        np.array([[170, np.nan, 170, 170], [np.nan, 170, 430, 430],
                  [430, 430, np.nan, 300], [300, 300, 300, np.nan]]))

    # we avoid creating specific data for axis 0 and 1: translating the data is
    # enough.
    if axis:
        X = X.T
        X_nan = X_nan.T

    # take a copy of the old statistics since they are modified in place.
    X_means, X_vars, X_sample_count = incr_mean_variance_axis(
        X, axis, old_means.copy(), old_variances.copy(),
        old_sample_count.copy())
    X_nan_means, X_nan_vars, X_nan_sample_count = incr_mean_variance_axis(
        X_nan, axis, old_means.copy(), old_variances.copy(),
        old_sample_count.copy())

    assert_allclose(X_nan_means, X_means)
    assert_allclose(X_nan_vars, X_vars)
    assert_allclose(X_nan_sample_count, X_sample_count)
def test_incr_mean_variance_axis_weighted_axis1(Xw, X, weights,
                                                sparse_constructor, dtype):
    axis = 1
    Xw_sparse = sparse_constructor(Xw).astype(dtype)
    X_sparse = sparse_constructor(X).astype(dtype)

    last_mean = np.zeros(np.shape(Xw)[0], dtype=dtype)
    last_var = np.zeros_like(last_mean, dtype=dtype)
    last_n = np.zeros_like(last_mean, dtype=np.int64)
    means0, vars0, n_incr0 = incr_mean_variance_axis(X=X_sparse,
                                                     axis=axis,
                                                     last_mean=last_mean,
                                                     last_var=last_var,
                                                     last_n=last_n,
                                                     weights=None)

    means_w0, vars_w0, n_incr_w0 = incr_mean_variance_axis(X=Xw_sparse,
                                                           axis=axis,
                                                           last_mean=last_mean,
                                                           last_var=last_var,
                                                           last_n=last_n,
                                                           weights=weights)

    assert means_w0.dtype == dtype
    assert vars_w0.dtype == dtype
    assert n_incr_w0.dtype == dtype

    means_simple, vars_simple = mean_variance_axis(X=X_sparse, axis=axis)

    assert_array_almost_equal(means0, means_w0)
    assert_array_almost_equal(means0, means_simple)
    assert_array_almost_equal(vars0, vars_w0)
    assert_array_almost_equal(vars0, vars_simple)
    assert_array_almost_equal(n_incr0, n_incr_w0)

    # check second round for incremental
    means1, vars1, n_incr1 = incr_mean_variance_axis(X=X_sparse,
                                                     axis=axis,
                                                     last_mean=means0,
                                                     last_var=vars0,
                                                     last_n=n_incr0,
                                                     weights=None)

    means_w1, vars_w1, n_incr_w1 = incr_mean_variance_axis(X=Xw_sparse,
                                                           axis=axis,
                                                           last_mean=means_w0,
                                                           last_var=vars_w0,
                                                           last_n=n_incr_w0,
                                                           weights=weights)

    assert_array_almost_equal(means1, means_w1)
    assert_array_almost_equal(vars1, vars_w1)
    assert_array_almost_equal(n_incr1, n_incr_w1)

    assert means_w1.dtype == dtype
    assert vars_w1.dtype == dtype
    assert n_incr_w1.dtype == dtype
Exemple #5
0
def test_incr_mean_variance_no_new_n():
    # check the behaviour when we update the variance with an empty matrix
    axis = 0
    X1 = sp.random(5, 1, density=0.8, random_state=0).tocsr()
    X2 = sp.random(0, 1, density=0.8, random_state=0).tocsr()
    last_mean, last_var = np.zeros(X1.shape[1]), np.zeros(X1.shape[1])
    last_n = np.zeros(X1.shape[1], dtype=np.int64)
    last_mean, last_var, last_n = incr_mean_variance_axis(
        X1, axis, last_mean, last_var, last_n)
    # update statistic with a column which should ignored
    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
        X2, axis, last_mean, last_var, last_n)
    assert_allclose(updated_mean, last_mean)
    assert_allclose(updated_var, last_var)
    assert_allclose(updated_n, last_n)
Exemple #6
0
def test_incr_mean_variance_axis_equivalence_mean_variance(X1, X2):
    # non-regression test for:
    # https://github.com/scikit-learn/scikit-learn/issues/16448
    # check that computing the incremental mean and variance is equivalent to
    # computing the mean and variance on the stacked dataset.
    axis = 0
    last_mean, last_var = np.zeros(X1.shape[1]), np.zeros(X1.shape[1])
    last_n = np.zeros(X1.shape[1], dtype=np.int64)
    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
        X1, axis, last_mean, last_var, last_n)
    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
        X2, axis, updated_mean, updated_var, updated_n)
    X = sp.vstack([X1, X2])
    assert_allclose(updated_mean, np.nanmean(X.A, axis=axis))
    assert_allclose(updated_var, np.nanvar(X.A, axis=axis))
    assert_allclose(updated_n, np.count_nonzero(~np.isnan(X.A), axis=0))
Exemple #7
0
def test_incr_mean_variance_n_float():
    # check the behaviour when last_n is just a number
    axis = 0
    X = sp.random(5, 2, density=0.8, random_state=0).tocsr()
    last_mean, last_var = np.zeros(X.shape[1]), np.zeros(X.shape[1])
    last_n = 0
    _, _, new_n = incr_mean_variance_axis(
        X, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
    )
    assert_allclose(new_n, np.full(X.shape[1], X.shape[0]))
Exemple #8
0
    def partial_fit(self, X, y=None):
        """Online computation of mean and std on X for later scaling.
        All of X is processed as a single batch. This is intended for cases
        when `fit` is not feasible due to very large number of `n_samples`
        or because X is read from a continuous stream.

        The algorithm for incremental mean and std is given in Equation 1.5a,b
        in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. "Algorithms
        for computing the sample variance: Analysis and recommendations."
        The American Statistician 37.3 (1983): 242-247:

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to compute the mean and standard deviation
            used for later scaling along the features axis.

        y: Passthrough for ``Pipeline`` compatibility.
        """
        X = check_array(X,
                        accept_sparse=('csr', 'csc'),
                        copy=self.copy,
                        ensure_2d=False,
                        warn_on_dtype=True,
                        estimator=self,
                        dtype=FLOAT_DTYPES)

        # Even in the case of `with_mean=False`, we update the mean anyway
        # This is needed for the incremental computation of the var
        # See incr_mean_variance_axis and _incremental_mean_variance_axis

        if not sparse.issparse(X):
            return super(SparseScaler, self).partial_fit(X)

        if self.with_std:
            # First pass
            if not hasattr(self, 'n_samples_seen_'):
                self.mean_, self.var_ = mean_variance_axis(X, axis=0)
                n = X.shape[0]
                self.n_samples_seen_ = n
            # Next passes
            else:
                self.mean_, self.var_, self.n_samples_seen_ = \
                    incr_mean_variance_axis(X, axis=0,
                                            last_mean=self.mean_,
                                            last_var=self.var_,
                                            last_n=self.n_samples_seen_)

        if self.with_std:
            self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_))
        else:
            self.scale_ = None

        return self
def test_mean_variance_illegal_axis():
    X, _ = make_classification(5, 4, random_state=0)
    # Sparsify the array a little bit
    X[0, 0] = 0
    X[2, 1] = 0
    X[4, 3] = 0
    X_csr = sp.csr_matrix(X)
    with pytest.raises(ValueError):
        mean_variance_axis(X_csr, axis=-3)
    with pytest.raises(ValueError):
        mean_variance_axis(X_csr, axis=2)
    with pytest.raises(ValueError):
        mean_variance_axis(X_csr, axis=-1)

    with pytest.raises(ValueError):
        incr_mean_variance_axis(X_csr,
                                axis=-3,
                                last_mean=None,
                                last_var=None,
                                last_n=None)

    with pytest.raises(ValueError):
        incr_mean_variance_axis(X_csr,
                                axis=2,
                                last_mean=None,
                                last_var=None,
                                last_n=None)

    with pytest.raises(ValueError):
        incr_mean_variance_axis(X_csr,
                                axis=-1,
                                last_mean=None,
                                last_var=None,
                                last_n=None)
Exemple #10
0
def test_incr_mean_variance_axis():
    for axis in [0, 1]:
        rng = np.random.RandomState(0)
        n_features = 50
        n_samples = 10
        data_chunks = [
            rng.randint(0, 2, size=n_features) for i in range(n_samples)
        ]

        # default params for incr_mean_variance
        last_mean = np.zeros(n_features)
        last_var = np.zeros_like(last_mean)
        last_n = 0

        # Test errors
        X = np.array(data_chunks[0])
        X = np.atleast_2d(X)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        assert_raises(TypeError, incr_mean_variance_axis, axis, last_mean,
                      last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, axis, last_mean,
                      last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, X_lil, axis,
                      last_mean, last_var, last_n)

        # Test _incr_mean_and_var with a 1 row input
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)  # X.shape[axis] picks # samples

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # Test _incremental_mean_and_var with whole data
        X = np.vstack(data_chunks)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        X_csc = sp.csc_matrix(X_lil)

        expected_dtypes = [(np.float32, np.float32), (np.float64, np.float64),
                           (np.int32, np.float64), (np.int64, np.float64)]

        for input_dtype, output_dtype in expected_dtypes:
            for X_sparse in (X_csr, X_csc):
                X_sparse = X_sparse.astype(input_dtype)
                X_means, X_vars = mean_variance_axis(X_sparse, axis)
                X_means_incr, X_vars_incr, n_incr = \
                    incr_mean_variance_axis(X_sparse, axis, last_mean,
                                            last_var, last_n)
                assert_equal(X_means_incr.dtype, output_dtype)
                assert_equal(X_vars_incr.dtype, output_dtype)
                assert_array_almost_equal(X_means, X_means_incr)
                assert_array_almost_equal(X_vars, X_vars_incr)
                assert_equal(X.shape[axis], n_incr)
def test_incr_mean_variance_axis():
    for axis in [0, 1]:
        rng = np.random.RandomState(0)
        n_features = 50
        n_samples = 10
        data_chunks = [
            rng.random_integers(0, 1, size=n_features)
            for i in range(n_samples)
        ]

        # default params for incr_mean_variance
        last_mean = np.zeros(n_features)
        last_var = np.zeros_like(last_mean)
        last_n = 0

        # Test errors
        X = np.array(data_chunks[0])
        X = np.atleast_2d(X)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        assert_raises(TypeError, incr_mean_variance_axis, axis, last_mean,
                      last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, axis, last_mean,
                      last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, X_lil, axis,
                      last_mean, last_var, last_n)

        # Test _incr_mean_and_var with a 1 row input
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)  # X.shape[axis] picks # samples

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # Test _incremental_mean_and_var with whole data
        X = np.vstack(data_chunks)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # All data but as float
        X = X.astype(np.float32)
        X_csr = X_csr.astype(np.float32)
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        X_csc = X_csr.astype(np.float32)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)
def test_incr_mean_variance_axis():
    for axis in [0, 1]:
        rng = np.random.RandomState(0)
        n_features = 50
        n_samples = 10
        data_chunks = [rng.randint(0, 2, size=n_features)
                       for i in range(n_samples)]

        # default params for incr_mean_variance
        last_mean = np.zeros(n_features)
        last_var = np.zeros_like(last_mean)
        last_n = np.zeros_like(last_mean, dtype=np.int64)

        # Test errors
        X = np.array(data_chunks[0])
        X = np.atleast_2d(X)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        assert_raises(TypeError, incr_mean_variance_axis, axis,
                      last_mean, last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, axis,
                      last_mean, last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, X_lil, axis,
                      last_mean, last_var, last_n)

        # Test _incr_mean_and_var with a 1 row input
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)  # X.shape[axis] picks # samples

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # Test _incremental_mean_and_var with whole data
        X = np.vstack(data_chunks)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        X_csc = sp.csc_matrix(X_lil)

        expected_dtypes = [(np.float32, np.float32),
                           (np.float64, np.float64),
                           (np.int32, np.float64),
                           (np.int64, np.float64)]

        for input_dtype, output_dtype in expected_dtypes:
            for X_sparse in (X_csr, X_csc):
                X_sparse = X_sparse.astype(input_dtype)
                last_mean = last_mean.astype(output_dtype)
                last_var = last_var.astype(output_dtype)
                X_means, X_vars = mean_variance_axis(X_sparse, axis)
                X_means_incr, X_vars_incr, n_incr = \
                    incr_mean_variance_axis(X_sparse, axis, last_mean,
                                            last_var, last_n)
                assert_equal(X_means_incr.dtype, output_dtype)
                assert_equal(X_vars_incr.dtype, output_dtype)
                assert_array_almost_equal(X_means, X_means_incr)
                assert_array_almost_equal(X_vars, X_vars_incr)
                assert_equal(X.shape[axis], n_incr)
def test_incr_mean_variance_axis():
    for axis in [0, 1]:
        rng = np.random.RandomState(0)
        n_features = 50
        n_samples = 10
        data_chunks = [rng.random_integers(0, 1, size=n_features)
                       for i in range(n_samples)]

        # default params for incr_mean_variance
        last_mean = np.zeros(n_features)
        last_var = np.zeros_like(last_mean)
        last_n = 0

        # Test errors
        X = np.array(data_chunks[0])
        X = np.atleast_2d(X)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        assert_raises(TypeError, incr_mean_variance_axis, axis,
                      last_mean, last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, axis,
                      last_mean, last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, X_lil, axis,
                      last_mean, last_var, last_n)

        # Test _incr_mean_and_var with a 1 row input
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)  # X.shape[axis] picks # samples

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # Test _incremental_mean_and_var with whole data
        X = np.vstack(data_chunks)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # All data but as float
        X = X.astype(np.float32)
        X_csr = X_csr.astype(np.float32)
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        X_csc = X_csr.astype(np.float32)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)
    def partial_fit(self, X, y=None):
        """
        Performs online computation of mean and standard deviation on X for later scaling.
        All of X is processed as a single batch.
        This is intended for cases when `fit` is
        not feasible due to very large number of `n_samples`
        or because X is read from a continuous stream.

        The algorithm for incremental mean
        and std is given in Equation 1.5a,b in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. "Algorithms
        for computing the sample variance: Analysis and recommendations."
        The American Statistician 37.3 (1983): 242-247

        :param X: Data matrix to scale.
        :type X: numpy.ndarray, shape [n_samples, n_features]
        :param y: Passthrough for Scikit-learn ``Pipeline`` compatibility.
        :type y: None
        :return: Fitted object.
        :rtype: pyChemometrics.ChemometricsScaler

        """

        X = check_array(X,
                        accept_sparse=('csr', 'csc'),
                        copy=self.copy,
                        estimator=self,
                        dtype=FLOAT_DTYPES)

        # Even in the case of `with_mean=False`, we update the mean anyway
        # This is needed for the incremental computation of the var
        # See incr_mean_variance_axis and _incremental_mean_variance_axis

        if sparse.issparse(X):
            if self.with_mean:
                raise ValueError(
                    "Cannot center sparse matrices: pass `with_mean=False` "
                    "instead. See docstring for motivation and alternatives.")
            if self.with_std:
                # First pass
                if not hasattr(self, 'n_samples_seen_'):
                    self.mean_, self.var_ = mean_variance_axis(X, axis=0)
                    self.n_samples_seen_ = X.shape[0]
                # Next passes
                else:
                    self.mean_, self.var_, self.n_samples_seen_ = \
                        incr_mean_variance_axis(X, axis=0,
                                                last_mean=self.mean_,
                                                last_var=self.var_,
                                                last_n=self.n_samples_seen_)
            else:
                self.mean_ = None
                self.var_ = None
        else:
            # First pass
            if not hasattr(self, 'n_samples_seen_'):
                self.mean_ = .0
                self.n_samples_seen_ = 0
                if self.with_std:
                    self.var_ = .0
                else:
                    self.var_ = None

            self.mean_, self.var_, self.n_samples_seen_ = \
                _incremental_mean_and_var(X, self.mean_, self.var_,
                                          self.n_samples_seen_)

        if self.with_std:
            self.scale_ = _handle_zeros_in_scale(numpy.sqrt(
                self.var_))**self.scale_power
        else:
            self.scale_ = None

        return self