def test_non_numeric_errors(non_numeric_df): # test col with all strings X = non_numeric_df # mean with all strings strategies = {'A': {"impute_strategy": "mean"}} with pytest.raises(ValueError, match="Cannot use mean strategy with non-numeric data"): transformer = PerColumnImputer(impute_strategies=strategies) transformer.fit_transform(X) with pytest.raises(ValueError, match="Cannot use mean strategy with non-numeric data"): transformer = PerColumnImputer(impute_strategies=strategies) transformer.fit(X) # median with all strings strategies = {'B': {"impute_strategy": "median"}} with pytest.raises( ValueError, match="Cannot use median strategy with non-numeric data"): transformer = PerColumnImputer(impute_strategies=strategies) transformer.fit_transform(X) with pytest.raises( ValueError, match="Cannot use median strategy with non-numeric data"): transformer = PerColumnImputer(impute_strategies=strategies) transformer.fit(X)
def test_transform_drop_all_nan_columns(): X = pd.DataFrame({ "all_nan": [np.nan, np.nan, np.nan], "some_nan": [np.nan, 1, 0], "another_col": [0, 1, 2] }) strategies = { 'all_nan': { "impute_strategy": "most_frequent" }, 'some_nan': { "impute_strategy": "most_frequent" }, 'another_col': { "impute_strategy": "most_frequent" } } transformer = PerColumnImputer(impute_strategies=strategies) transformer.fit(X) X_expected_arr = pd.DataFrame({ "some_nan": [0, 1, 0], "another_col": [0, 1, 2] }) assert_frame_equal(X_expected_arr, transformer.transform(X), check_dtype=False) assert_frame_equal( X, pd.DataFrame({ "all_nan": [np.nan, np.nan, np.nan], "some_nan": [np.nan, 1, 0], "another_col": [0, 1, 2] }))
def test_transform_drop_all_nan_columns_empty(): X = pd.DataFrame([[np.nan, np.nan, np.nan]]) strategies = {'0': {"impute_strategy": "most_frequent"}, } transformer = PerColumnImputer(impute_strategies=strategies) assert transformer.fit_transform(X).to_dataframe().empty assert_frame_equal(X, pd.DataFrame([[np.nan, np.nan, np.nan]])) strategies = {'0': {"impute_strategy": "most_frequent"}} transformer = PerColumnImputer(impute_strategies=strategies) transformer.fit(X) assert transformer.transform(X).to_dataframe().empty assert_frame_equal(X, pd.DataFrame([[np.nan, np.nan, np.nan]]))
def test_per_column_imputer_woodwork_custom_overrides_returned_by_components(X_df, has_nan): y = pd.Series([1, 2, 1]) if has_nan: X_df.iloc[len(X_df) - 1, 0] = np.nan override_types = [Integer, Double, Categorical, NaturalLanguage, Boolean] for logical_type in override_types: try: X = ww.DataTable(X_df, logical_types={0: logical_type}) except TypeError: continue imputer = PerColumnImputer() imputer.fit(X, y) transformed = imputer.transform(X, y) assert isinstance(transformed, ww.DataTable) assert transformed.logical_types == {0: logical_type}
def test_fit_transform(): X = pd.DataFrame([[2], [4], [6], [np.nan]]) X_expected = pd.DataFrame([[2], [4], [6], [4]]) X.columns = ['A'] X_expected.columns = ['A'] strategies = {'A': {"impute_strategy": "median"}} transformer = PerColumnImputer(impute_strategies=strategies) transformer.fit(X) X_t = transformer.transform(X) transformer = PerColumnImputer(impute_strategies=strategies) X_fit_transform = transformer.fit_transform(X) assert_frame_equal(X_t, X_fit_transform, check_dtype=False)