Beispiel #1
0
def test_iterative_imputer_early_stopping():
    rng = np.random.RandomState(0)
    n = 50
    d = 5
    A = rng.rand(n, 1)
    B = rng.rand(1, d)
    X = np.dot(A, B)
    nan_mask = rng.rand(n, d) < 0.5
    X_missing = X.copy()
    X_missing[nan_mask] = np.nan

    imputer = IterativeImputer(max_iter=100,
                               tol=1e-2,
                               sample_posterior=False,
                               verbose=1,
                               random_state=rng)
    X_filled_100 = imputer.fit_transform(X_missing)
    assert len(imputer.imputation_sequence_) == d * imputer.n_iter_

    imputer = IterativeImputer(max_iter=imputer.n_iter_,
                               sample_posterior=False,
                               verbose=1,
                               random_state=rng)
    X_filled_early = imputer.fit_transform(X_missing)
    assert_allclose(X_filled_100, X_filled_early, atol=1e-7)

    imputer = IterativeImputer(max_iter=100,
                               tol=0,
                               sample_posterior=False,
                               verbose=1,
                               random_state=rng)
    imputer.fit(X_missing)
    assert imputer.n_iter_ == imputer.max_iter
Beispiel #2
0
def test_iterative_imputer_one_feature(X):
    # check we exit early when there is a single feature
    imputer = IterativeImputer().fit(X)
    assert imputer.n_iter_ == 0
    imputer = IterativeImputer()
    imputer.fit([[1], [2]])
    assert imputer.n_iter_ == 0
    imputer.fit([[1], [np.nan]])
    assert imputer.n_iter_ == 0
Beispiel #3
0
def test_iterative_imputer_verbose():
    rng = np.random.RandomState(0)

    n = 100
    d = 3
    X = _sparse_random_matrix(n, d, density=0.10, random_state=rng).toarray()
    imputer = IterativeImputer(missing_values=0, max_iter=1, verbose=1)
    imputer.fit(X)
    imputer.transform(X)
    imputer = IterativeImputer(missing_values=0, max_iter=1, verbose=2)
    imputer.fit(X)
    imputer.transform(X)
Beispiel #4
0
def test_iterative_imputer_skip_non_missing(skip_complete):
    # check the imputing strategy when missing data are present in the
    # testing set only.
    # taken from: https://github.com/scikit-learn/scikit-learn/issues/14383
    rng = np.random.RandomState(0)
    X_train = np.array([
        [5, 2, 2, 1],
        [10, 1, 2, 7],
        [3, 1, 1, 1],
        [8, 4, 2, 2]
    ])
    X_test = np.array([
        [np.nan, 2, 4, 5],
        [np.nan, 4, 1, 2],
        [np.nan, 1, 10, 1]
    ])
    imputer = IterativeImputer(
        initial_strategy='mean', skip_complete=skip_complete, random_state=rng
    )
    X_test_est = imputer.fit(X_train).transform(X_test)
    if skip_complete:
        # impute with the initial strategy: 'mean'
        assert_allclose(X_test_est[:, 0], np.mean(X_train[:, 0]))
    else:
        assert_allclose(X_test_est[:, 0], [11, 7, 12], rtol=1e-4)
Beispiel #5
0
def test_iterative_imputer_no_missing():
    rng = np.random.RandomState(0)
    X = rng.rand(100, 100)
    X[:, 0] = np.nan
    m1 = IterativeImputer(max_iter=10, random_state=rng)
    m2 = IterativeImputer(max_iter=10, random_state=rng)
    pred1 = m1.fit(X).transform(X)
    pred2 = m2.fit_transform(X)
    # should exclude the first column entirely
    assert_allclose(X[:, 1:], pred1)
    # fit and fit_transform should both be identical
    assert_allclose(pred1, pred2)
Beispiel #6
0
def test_iterative_imputer_transform_stochasticity():
    pytest.importorskip("scipy", minversion="0.17.0")
    rng1 = np.random.RandomState(0)
    rng2 = np.random.RandomState(1)
    n = 100
    d = 10
    X = _sparse_random_matrix(n, d, density=0.10,
                             random_state=rng1).toarray()

    # when sample_posterior=True, two transforms shouldn't be equal
    imputer = IterativeImputer(missing_values=0,
                               max_iter=1,
                               sample_posterior=True,
                               random_state=rng1)
    imputer.fit(X)

    X_fitted_1 = imputer.transform(X)
    X_fitted_2 = imputer.transform(X)

    # sufficient to assert that the means are not the same
    assert np.mean(X_fitted_1) != pytest.approx(np.mean(X_fitted_2))

    # when sample_posterior=False, and n_nearest_features=None
    # and imputation_order is not random
    # the two transforms should be identical even if rng are different
    imputer1 = IterativeImputer(missing_values=0,
                                max_iter=1,
                                sample_posterior=False,
                                n_nearest_features=None,
                                imputation_order='ascending',
                                random_state=rng1)

    imputer2 = IterativeImputer(missing_values=0,
                                max_iter=1,
                                sample_posterior=False,
                                n_nearest_features=None,
                                imputation_order='ascending',
                                random_state=rng2)
    imputer1.fit(X)
    imputer2.fit(X)

    X_fitted_1a = imputer1.transform(X)
    X_fitted_1b = imputer1.transform(X)
    X_fitted_2 = imputer2.transform(X)

    assert_allclose(X_fitted_1a, X_fitted_1b)
    assert_allclose(X_fitted_1a, X_fitted_2)