Beispiel #1
0
def test_imputation_deletion_warning(strategy):
    X = np.ones((3, 5))
    X[:, 0] = np.nan

    with pytest.warns(UserWarning, match="Deleting"):
        imputer = SimpleImputer(strategy=strategy, verbose=True)
        imputer.fit_transform(X)
Beispiel #2
0
def test_imputation_error_invalid_strategy(strategy):
    X = np.ones((3, 5))
    X[0, 0] = np.nan

    with pytest.raises(ValueError, match=str(strategy)):
        imputer = SimpleImputer(strategy=strategy)
        imputer.fit_transform(X)
Beispiel #3
0
def test_imputation_constant_error_invalid_type(X_data, missing_value):
    # Verify that exceptions are raised on invalid fill_value type
    X = np.full((3, 5), X_data, dtype=float)
    X[0, 0] = missing_value

    with pytest.raises(ValueError, match="imputing numerical"):
        imputer = SimpleImputer(missing_values=missing_value,
                                strategy="constant",
                                fill_value="x")
        imputer.fit_transform(X)
Beispiel #4
0
def test_imputation_shape(strategy):
    # Verify the shapes of the imputed matrix for different strategies.
    X = np.random.randn(10, 2)
    X[::2] = np.nan

    imputer = SimpleImputer(strategy=strategy)
    X_imputed = imputer.fit_transform(sparse.csr_matrix(X))
    assert X_imputed.shape == (10, 2)
    X_imputed = imputer.fit_transform(X)
    assert X_imputed.shape == (10, 2)

    iterative_imputer = IterativeImputer(initial_strategy=strategy)
    X_imputed = iterative_imputer.fit_transform(X)
    assert X_imputed.shape == (10, 2)
Beispiel #5
0
def test_imputation_constant_integer():
    # Test imputation using the constant strategy on integers
    X = np.array([[-1, 2, 3, -1], [4, -1, 5, -1], [6, 7, -1, -1],
                  [8, 9, 0, -1]])

    X_true = np.array([[0, 2, 3, 0], [4, 0, 5, 0], [6, 7, 0, 0], [8, 9, 0, 0]])

    imputer = SimpleImputer(missing_values=-1,
                            strategy="constant",
                            fill_value=0)
    X_trans = imputer.fit_transform(X)

    assert_array_equal(X_trans, X_true)
Beispiel #6
0
def test_imputation_constant_float(array_constructor):
    # Test imputation using the constant strategy on floats
    X = np.array([[np.nan, 1.1, 0, np.nan], [1.2, np.nan, 1.3, np.nan],
                  [0, 0, np.nan, np.nan], [1.4, 1.5, 0, np.nan]])

    X_true = np.array([[-1, 1.1, 0, -1], [1.2, -1, 1.3, -1], [0, 0, -1, -1],
                       [1.4, 1.5, 0, -1]])

    X = array_constructor(X)

    X_true = array_constructor(X_true)

    imputer = SimpleImputer(strategy="constant", fill_value=-1)
    X_trans = imputer.fit_transform(X)

    assert_allclose_dense_sparse(X_trans, X_true)
Beispiel #7
0
def test_simple_imputation_add_indicator_sparse_matrix(arr_type):
    X_sparse = arr_type([[np.nan, 1, 5], [2, np.nan, 1], [6, 3, np.nan],
                         [1, 2, 9]])
    X_true = np.array([
        [3., 1., 5., 1., 0., 0.],
        [2., 2., 1., 0., 1., 0.],
        [6., 3., 5., 0., 0., 1.],
        [1., 2., 9., 0., 0., 0.],
    ])

    imputer = SimpleImputer(missing_values=np.nan, add_indicator=True)
    X_trans = imputer.fit_transform(X_sparse)

    assert sparse.issparse(X_trans)
    assert X_trans.shape == X_true.shape
    assert_allclose(X_trans.toarray(), X_true)
Beispiel #8
0
def test_imputation_constant_object(marker):
    # Test imputation using the constant strategy on objects
    X = np.array([[marker, "a", "b", marker], ["c", marker, "d", marker],
                  ["e", "f", marker, marker], ["g", "h", "i", marker]],
                 dtype=object)

    X_true = np.array(
        [["missing", "a", "b", "missing"], ["c", "missing", "d", "missing"],
         ["e", "f", "missing", "missing"], ["g", "h", "i", "missing"]],
        dtype=object)

    imputer = SimpleImputer(missing_values=marker,
                            strategy="constant",
                            fill_value="missing")
    X_trans = imputer.fit_transform(X)

    assert_array_equal(X_trans, X_true)
Beispiel #9
0
def test_imputation_most_frequent_pandas(dtype):
    # Test imputation using the most frequent strategy on pandas df
    pd = pytest.importorskip("pandas")

    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                    ",i,x,\n"
                    "a,,y,\n"
                    "a,j,,\n"
                    "b,j,x,")

    df = pd.read_csv(f, dtype=dtype)

    X_true = np.array(
        [["a", "i", "x"], ["a", "j", "y"], ["a", "j", "x"], ["b", "j", "x"]],
        dtype=object)

    imputer = SimpleImputer(strategy="most_frequent")
    X_trans = imputer.fit_transform(df)

    assert_array_equal(X_trans, X_true)
Beispiel #10
0
def test_imputation_mean_median_error_invalid_type(strategy, dtype):
    X = np.array([["a", "b", 3], [4, "e", 6], ["g", "h", 9]], dtype=dtype)

    with pytest.raises(ValueError, match="non-numeric data"):
        imputer = SimpleImputer(strategy=strategy)
        imputer.fit_transform(X)