def test_float_classification_target():
    # check we can plot even if we do classification with a float target
    X, y = make_blobs()
    data = pd.DataFrame(X)
    data['target'] = y.astype(np.float)
    types = detect_types(data)
    assert types.categorical['target']
    plot_supervised(data, 'target')
    # same with "actual float" - we need to specify classification for that :-/
    data['target'] = y.astype(np.float) + .2
    plot_supervised(data, 'target', type_hints={'target': 'categorical'})
    plt.close("all")
def test_type_hints(add, feature_type, target_type):
    X = pd.DataFrame(np.random.randint(4, size=100)) + add
    X['target'] = np.random.uniform(size=100)
    plot_supervised(X,
                    type_hints={
                        0: feature_type,
                        'target': target_type
                    },
                    target_col='target')
    # get title of figure
    text = plt.gcf()._suptitle.get_text()
    assert feature_type.capitalize() in text
    ax = plt.gca()
    # one of the labels is 'target' iif regression
    labels = ax.get_ylabel() + ax.get_xlabel()
    assert ('target' in labels) == (target_type == 'continuous')
    plt.close("all")
def test_plots_smoke(continuous_features, categorical_features, task):
    # simple smoke test
    # should be parametrized
    n_samples = 100
    X_cont, y_cont = make_regression(n_samples=n_samples,
                                     n_features=continuous_features,
                                     n_informative=min(continuous_features, 2))
    X_cat, y_cat = make_regression(n_samples=n_samples,
                                   n_features=categorical_features,
                                   n_informative=min(categorical_features, 2))
    if X_cat.shape[1] > 0:
        X_cat = KBinsDiscretizer(encode='ordinal').fit_transform(X_cat)
    cont_columns = ["asdf_%d_cont" % i for i in range(continuous_features)]
    df_cont = pd.DataFrame(X_cont, columns=cont_columns)
    if categorical_features > 0:
        cat_columns = ["asdf_%d_cat" % i for i in range(categorical_features)]
        df_cat = pd.DataFrame(X_cat, columns=cat_columns).astype('int')
        df_cat = df_cat.astype("category")
        X_df = pd.concat([df_cont, df_cat], axis=1)
    else:
        X_df = df_cont
    assert (X_df.shape[1] == continuous_features + categorical_features)
    X_clean = clean(X_df.copy())
    y = y_cont + y_cat
    if X_df.shape[1] == 0:
        y = np.random.uniform(size=n_samples)
    if task == "classification":
        y = np.digitize(y, np.percentile(y, [5, 10, 60, 85]))
    X_clean['target'] = y
    if task == "classification":
        X_clean['target'] = X_clean['target'].astype('category')
    types = detect_types(X_clean)
    column_types = types.T.idxmax()
    assert np.all(column_types[:continuous_features] == 'continuous')
    assert np.all(column_types[continuous_features:-1] == 'categorical')
    if task == "classification":
        assert column_types[-1] == 'categorical'
    else:
        assert column_types[-1] == 'continuous'

    plot_supervised(X_clean, 'target')
    plt.close("all")
def test_plot_target_low_card_int():
    data = load_digits()
    df = data_df_from_bunch(data)
    plot_supervised(df[::10], target_col='target')