def test_cols_set(): """ Tests if the function correctly throws an error if cols is a set """ with pytest.raises(AssertionError, match="Cols must be a list!"): pyedahelper.fast_missing_impute(df=test_data, method="mean", cols={'R', 'Python'})
def test_mode_fail(): """ Tests if the function correctly throws an error if method is mode and one or more of the columns don't have a mode """ with pytest.raises(Exception): pyedahelper.fast_missing_impute(df=test_data, method="mode", cols=["c"])
def test_method_format(): """ Tests if the function correctly throws an error if method is not a valid method """ with pytest.raises(AssertionError, match="Not a valid method!"): pyedahelper.fast_missing_impute(df=test_data, method="avg", cols=["a", "b"])
def test_one_method(): """ Tests if the function correctly throws an error if the method is not a string when more than one method is inputted """ with pytest.raises(AssertionError, match="Method must be a string!"): pyedahelper.fast_missing_impute(df=test_data, method=["mean", "median"], cols="a")
def test_cols_allstr(): """ Tests if the function correctly throws an error if cols is a list that contains a non-string element """ with pytest.raises(AssertionError, match="Columns must be a list of strings"): pyedahelper.fast_missing_impute(df=test_data, method="median", cols=["a", 3, "c"])
def test_non_numeric(): """ Tests if the function correctly throws an error if an column in cols is non-numeric and the method selected is not 'remove' or 'mode' """ with pytest.raises(AssertionError, match="With non-numeric columns, can only use " "method = 'remove' or 'mode'"): pyedahelper.fast_missing_impute(df=test_data, method="mean", cols=["e"])
def test_cols_in_df(): """ Tests if the function correctly throws an error if an element in cols is not part of the data frame """ with pytest.raises(AssertionError, match="One or more of the column names" " are not in the data frame!"): pyedahelper.fast_missing_impute(df=test_data, method="mean", cols=["a", "c", "fake_col"])
def test_remove_method(): """ Tests if the function, when method = 'remove', leaves the selected column(s) with no NAs and leaves the other columns the same """ sample_drop = pyedahelper.fast_missing_impute(df=test_data, method="remove", cols=["a", "e"]) assert sample_drop["a"].isna().sum() == 0 assert sample_drop["e"].isna().sum() == 0 assert sample_drop["f"].isna().sum() == 4 assert sample_drop.shape[0] < test_data.shape[ 0] # should have fewer rows because NAs are removed assert sample_drop.shape[1] == test_data.shape[ 1] # number of columns should not change
def test_mode(): """ Tests if the function, when method = 'mode', leaves the selected columns with NAs imputed with the mode of the column, with no NAs left in the column, and leaves the other columns the same """ sample_mode = pyedahelper.fast_missing_impute(df=test_data, method="mode", cols=["a", "e"]) assert sample_mode["a"].isna().sum() == 0 assert sample_mode["e"].isna().sum() == 0 assert sample_mode["d"].isna().sum() == test_data["d"].isna().sum( ) # should remain unchanged assert sample_mode["a"][5] == mode(test_data["a"]) assert sample_mode["e"][1] == mode(test_data["e"]) assert sample_mode.shape == test_data.shape # dimensions shouldn't change
def test_mean(): """ Tests if the function, when method = 'mean', leaves the selected columns with NAs imputed with the mean of the column, with no NAs left in the column, and leaves the other columns the same """ sample_mean = pyedahelper.fast_missing_impute(df=test_data, method="mean", cols=["a", "c"]) assert sample_mean["a"].isna().sum() == 0 assert sample_mean["c"].isna().sum() == 0 assert sample_mean["b"].isna().sum() == test_data["b"].isna().sum( ) # should remain unchanged assert sample_mean["a"][5] == test_data["a"].mean() assert sample_mean["c"][2] == test_data["c"].mean() assert sample_mean.shape == test_data.shape # dimensions should not change
def test_df_assert(): """ Tests if the function correctly throws an error if df is not a dataframe """ with pytest.raises(AssertionError, match="Data must be a data frame!"): pyedahelper.fast_missing_impute(df=test_list, method="mean", cols="a")