def test_na_vals_reg_plot_raise_warning(): X, y = load_diabetes(return_X_y=True) X = pd.DataFrame(X) y[::50] = np.NaN X['target_col'] = y scatter_alpha = _get_scatter_alpha('auto', X['target_col']) scatter_size = _get_scatter_size('auto', X['target_col']) with pytest.warns(UserWarning, match="Missing values in target_col have " "been removed for regression"): plot(X, 'target_col') with pytest.warns(UserWarning, match="Missing values in target_col have " "been removed for regression"): plot_regression_continuous(X, 'target_col', scatter_alpha=scatter_alpha, scatter_size=scatter_size) with pytest.warns(UserWarning, match="Missing values in target_col have " "been removed for regression"): plot_regression_categorical(X, 'target_col', scatter_alpha=scatter_alpha, scatter_size=scatter_size)
def test_plot_regression_continuous_with_target_outliers(): df = pd.DataFrame( data={ "feature": np.random.randint(low=1, high=100, size=200), # target values are bound between 50 and 100 "target": np.random.randint(low=50, high=100, size=200) }) # append single outlier record with target value 0 df = df.append({"feature": 50, "target": 0}, ignore_index=True) with pytest.warns(UserWarning, match="Dropped 1 outliers in column target."): plot_regression_continuous(df, 'target')
def test_plot_wrong_target_type(): X, y = make_blobs() X = pd.DataFrame(X) X['target'] = y with pytest.raises(ValueError, match="need continuous"): plot_regression_categorical(X, 'target') with pytest.raises(ValueError, match="need continuous"): plot_regression_continuous(X, 'target') X['target'] = X[0] with pytest.raises(ValueError, match="need categorical"): plot_classification_categorical(X, 'target') with pytest.raises(ValueError, match="need categorical"): plot_classification_continuous(X, 'target')
def test_label_truncation(): a = ('a_really_long_name_that_would_mess_up_the_layout_a_lot' '_by_just_being_very_long') b = ('the_target_that_has_an_equally_long_name_which_would_' 'mess_up_everything_as_well_but_in_different_places') df = pd.DataFrame({a: np.random.uniform(0, 1, 1000)}) df[b] = df[a] + np.random.uniform(0, 0.1, 1000) res = plot_regression_continuous(df, target_col=b) assert res[0, 0].get_ylabel() == 'the_target_that_h...' assert res[0, 0].get_xlabel() == 'a_really_long_nam...' set_config(truncate_labels=False) res = plot_regression_continuous(df, target_col=b) assert res[0, 0].get_ylabel() == b assert res[0, 0].get_xlabel() == a set_config(truncate_labels=True)
def test_plot_regression_with_target_outliers(): df = pd.DataFrame( data={ "feature": np.random.randint(low=1, high=100, size=200), # target values are bound between 50 and 100 "target": np.random.randint(low=50, high=100, size=200) }) # append single outlier record with target value 0 df = df.append({"feature": 50, "target": 0}, ignore_index=True) with pytest.warns(UserWarning, match="Dropped 1 outliers in column target."): plot_regression_continuous(df, target_col='target') with pytest.warns(UserWarning, match="Dropped 1 outliers in column target."): plot_regression_categorical(df, target_col='target') res = plot(df, target_col='target') assert len(res) == 3 ax = res[0] # ensure outlier at 0 was removed assert ax.get_xticks()[0] == 40