Example #1
0
def test_imputing_all_missing_values_categorical(missing):
    """Assert that all missing values are imputed in categorical columns."""
    X = [[missing, "a", "a"], ["b", "c", missing], ["b", "a", "c"],
         ["c", "a", "a"]]
    y = [1, 1, 0, 0]
    imputer = Imputer(strat_cat="most_frequent")
    X, y = imputer.fit_transform(X, y)
    assert X.isna().sum().sum() == 0
Example #2
0
def test_imputing_all_missing_values_numeric(missing):
    """Assert that all missing values are imputed in numeric columns."""
    X = [[missing, 1, 1], [2, 5, 2], [4, missing, 1], [2, 1, 1]]
    y = [1, 1, 0, 0]
    imputer = Imputer(strat_num="mean")
    imputer.missing.append(99)
    X, y = imputer.fit_transform(X, y)
    assert X.isna().sum().sum() == 0
Example #3
0
def test_cols_too_many_nans():
    """Assert that columns with too many missing values are dropped."""
    X = X_bin.copy()
    for i in range(5):  # Add 5 cols with all NaN values
        X["col " + str(i)] = [np.nan for _ in range(X.shape[0])]
    impute = Imputer(strat_num="mean",
                     strat_cat="most_frequent",
                     min_frac_cols=0.5)
    X, y = impute.fit_transform(X, y_bin)
    assert len(X.columns) == 30  # Original number of columns
    assert X.isna().sum().sum() == 0
Example #4
0
def test_rows_too_many_nans():
    """Assert that rows with too many missing values are dropped."""
    X = X_bin.copy()
    for i in range(5):  # Add 5 rows with all NaN values
        X.loc[len(X)] = [np.nan for _ in range(X.shape[1])]
    y = [np.random.randint(2) for _ in range(len(X))]
    impute = Imputer(strat_num="mean",
                     strat_cat="most_frequent",
                     min_frac_rows=0.5)
    X, y = impute.fit_transform(X, y)
    assert len(X) == 569  # Original size
    assert X.isna().sum().sum() == 0
Example #5
0
def test_imputing_non_numeric_most_frequent():
    """Assert that the most_frequent strategy for non-numerical works."""
    imputer = Imputer(strat_cat="most_frequent")
    X, y = imputer.fit_transform(X10_sn, y10)
    assert X.iloc[0, 2] == "d"
    assert X.isna().sum().sum() == 0
Example #6
0
def test_imputing_non_numeric_drop():
    """Assert that the drop strategy for non-numerical works."""
    imputer = Imputer(strat_cat="drop")
    X, y = imputer.fit_transform(X10_sn, y10)
    assert len(X) == 9
    assert X.isna().sum().sum() == 0
Example #7
0
def test_imputing_non_numeric_string():
    """Assert that imputing a string for non-numerical values works."""
    imputer = Imputer(strat_cat="missing")
    X, y = imputer.fit_transform(X10_sn, y10)
    assert X.iloc[0, 2] == "missing"
    assert X.isna().sum().sum() == 0
Example #8
0
def test_imputing_numeric_most_frequent():
    """Assert that imputing the most_frequent for numerical values works."""
    imputer = Imputer(strat_num="most_frequent")
    X, y = imputer.fit_transform(X10_nan, y10)
    assert X.iloc[0, 0] == 3
    assert X.isna().sum().sum() == 0
Example #9
0
def test_imputing_numeric_mean():
    """Assert that imputing the mean for numerical values works."""
    imputer = Imputer(strat_num="mean")
    X, y = imputer.fit_transform(X10_nan, y10)
    assert X.iloc[0, 0] == pytest.approx(2.577778, rel=1e-6, abs=1e-12)
    assert X.isna().sum().sum() == 0
Example #10
0
def test_imputing_numeric_number():
    """Assert that imputing a number for numerical values works."""
    imputer = Imputer(strat_num=3.2)
    X, y = imputer.fit_transform(X10_nan, y10)
    assert X.iloc[0, 0] == 3.2
    assert X.isna().sum().sum() == 0
Example #11
0
def test_imputing_numeric_drop():
    """Assert that imputing drop for numerical values works."""
    imputer = Imputer(strat_num="drop")
    X, y = imputer.fit_transform(X10_nan, y10)
    assert len(X) == 8
    assert X.isna().sum().sum() == 0
Example #12
0
def test_imputer_is_fitted():
    """Assert that an error is raised if the instance is not fitted."""
    pytest.raises(NotFittedError, Imputer().transform, X_bin, y_bin)
Example #13
0
def test_invalid_min_frac_cols():
    """Assert that an error is raised for invalid min_frac_cols."""
    imputer = Imputer(min_frac_cols=5.2)
    pytest.raises(ValueError, imputer.fit, X_bin, y_bin)
Example #14
0
def test_strat_num_parameter():
    """Assert that the strat_num parameter is set correctly."""
    imputer = Imputer(strat_num="invalid")
    pytest.raises(ValueError, imputer.fit, X_bin, y_bin)
Example #15
0
def test_load_data_with_no_trainer():
    """Assert that an error is raised when data is provided without a trainer."""
    Imputer().save(FILE_DIR + "imputer")
    pytest.raises(TypeError, ATOMLoader, FILE_DIR + "imputer", data=(X_bin, ))