Ejemplo n.º 1
0
def test_pairplot_iris():
    data = data_df_from_bunch(load_iris())
    axes = pairplot(data, target_col='target')
    # the diagonal axes are duplicated
    # so we can have sharex / sharey
    assert axes.shape == (4, 4)
    assert len(plt.gcf().get_axes()) == 4 * 4 + 4
Ejemplo n.º 2
0
def test_plot_classification_continuous():
    data = fetch_openml('MiceProtein')
    df = data_df_from_bunch(data)
    # only univariate plots
    figures = plot_classification_continuous(df,
                                             target_col='target',
                                             plot_pairwise=False)
    assert len(figures) == 1
    # top 10 axes
    assert len(figures[0].get_axes()) == 10
    # six is the minimum number of features for histograms
    # (last column is target)
    figures = plot_classification_continuous(df.iloc[:, -7:],
                                             target_col='target',
                                             plot_pairwise=False)
    assert len(figures) == 1
    assert len(figures[0].get_axes()) == 6

    # for 5 features, do full pairplot
    figures = plot_classification_continuous(df.iloc[:, -6:],
                                             target_col='target',
                                             plot_pairwise=False)
    assert len(figures) == 1
    # diagonal has twin axes
    assert len(figures[0].get_axes()) == 5 * 5 + 5

    # also do pairwise plots
    figures = plot_classification_continuous(df,
                                             target_col='target',
                                             random_state=42)
    # univariate, pairwise, pca, lda
    assert len(figures) == 4
    # univariate
    axes = figures[0].get_axes()
    assert len(axes) == 10
    # known result
    assert axes[0].get_xlabel() == "SOD1_N"
    # bar plot never has ylabel
    assert axes[0].get_ylabel() == ""
    # pairwise
    axes = figures[1].get_axes()
    assert len(axes) == 4
    # known result
    assert axes[0].get_xlabel() == "SOD1_N"
    assert axes[0].get_ylabel() == 'S6_N'

    # PCA
    axes = figures[2].get_axes()
    assert len(axes) == 4
    # known result
    assert axes[0].get_xlabel() == "PCA 1"
    assert axes[0].get_ylabel() == 'PCA 5'

    # LDA
    axes = figures[3].get_axes()
    assert len(axes) == 4
    # known result
    assert axes[0].get_xlabel() == "LDA 0"
    assert axes[0].get_ylabel() == 'LDA 1'
Ejemplo n.º 3
0
def test_data_df_from_bunch():
    data_bunch = fetch_openml('MiceProtein')
    data = data_df_from_bunch(data_bunch)
    assert len(data) == len(data_bunch.data)
    assert all(data.target.unique() == [
        'c-CS-m', 'c-SC-m', 'c-CS-s', 'c-SC-s', 't-CS-m', 't-SC-m', 't-CS-s',
        't-SC-s'
    ])
Ejemplo n.º 4
0
def test_regression_boston():
    boston = load_boston()
    data = data_df_from_bunch(boston)
    er = SimpleRegressor()
    er.fit(data, target_col='target')

    # test nupmy array
    er = SimpleRegressor()
    er.fit(boston.data, boston.target)
Ejemplo n.º 5
0
def test_digits_type_hints():
    data_bunch = load_digits()

    try:
        feature_names = data_bunch.feature_names
    except AttributeError:
        feature_names = ['x%d' % i for i in range(data_bunch.data.shape[1])]

    data = data_df_from_bunch(data_bunch)
    data_clean = clean(data, type_hints={
                       feature: 'continuous' for feature in feature_names})
    assert data_clean.shape[1] == 65
Ejemplo n.º 6
0
def test_plot_target_low_card_int():
    data = load_digits()
    df = data_df_from_bunch(data)
    plot(df[::10], target_col='target')
Ejemplo n.º 7
0
def test_digits_type_hints():
    data = data_df_from_bunch(load_digits())
    data_clean = clean(
        data, type_hints={"x{}".format(i): 'continuous'
                          for i in range(64)})
    assert data_clean.shape[1] == 65
Ejemplo n.º 8
0
def test_pairplot_iris():
    data = data_df_from_bunch(load_iris())
    pairplot(data, target_col='target')
Ejemplo n.º 9
0
"""
Wine Classification Dataset Visualization
==========================================
"""
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from dabl import plot_supervised
from dabl.utils import data_df_from_bunch

wine_bunch = load_wine()
wine_df = data_df_from_bunch(wine_bunch)

plot_supervised(wine_df, 'target')
plt.show()
Ejemplo n.º 10
0
    
    # print data(feature)shape
    wine.data.shape
    # print the wine data features (top 5 records)
    print (wine.data[0:5])
    print (wine.target)
    #Count number of observation in each class
    for i in set(wine.target):
        print('Class', i, ' -> ', list(wine.target).count(i))
        
explore()

# Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB as GNB

# Train the model using the training sets
# and Predict the response for test dataset
y_pred = GNB().fit(X_train, y_train).predict(X_test)

# Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy, how often is the classifier correct?
print(f"Accuracy: {100*metrics.accuracy_score(y_test, y_pred):.3f}%")

import matplotlib.pyplot as plt
from dabl import plot
from dabl.utils import data_df_from_bunch

plot(data_df_from_bunch(wine), 'target')
plt.show()