def test_find_low_var_nan(): # dataset containing NaN values x = [np.nan] * 10 y = list(range(10)) z = [1] * 10 df_nan = pd.DataFrame(list(zip(x, y, z))) df_nan.columns = ["x", "y", "z"] out = feature_selection.find_low_var(df_nan) assert (out == ["x", "z"]) or (out == ["z", "x"])
def test_find_low_var_threshold(): sigma = 0.5 var = sigma**2 x = np.random.normal(loc=1, scale=sigma, size=1000) y = np.random.normal(loc=1, scale=1.0, size=1000) df2 = pd.DataFrame(list(zip(x, y))) df2.columns = ["x", "y"] out = feature_selection.find_low_var(df2, threshold=var * 2) assert out == ["x"]
def test_find_low_var_threshold(): sigma = 0.5 var = sigma ** 2 x = np.random.normal(loc=1, scale=sigma, size=1000) y = np.random.normal(loc=1, scale=1.0, size=1000) df2 = pd.DataFrame(list(zip(x, y))) df2.columns = ["x", "y"] out = feature_selection.find_low_var(df2, threshold=var * 2) assert out == ["x"]
def test_find_low_var_errors(): with pytest.raises(ValueError): feature_selection.find_low_var(df["a"].tolist())
def test_find_low_var(): out = feature_selection.find_low_var(df) assert len(out) == 1 assert out == ["b"]