Esempio n. 1
0
def test_categorical_data_digits():
    digits = load_digits()
    X = digits['data']
    y = digits['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb.score(X, y)

    mixed_nb = MixedNB(categorical_features='all',
                       max_categories=np.repeat(17, 64))
    mixed_nb.fit(X[:1440], y[:1440])
    mixed_nb.score(X[:1440], y[:1440])
Esempio n. 2
0
def test_continuous_data_digits():
    digits = load_digits()
    X = digits['data']
    y = digits['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb_score = gaussian_nb.score(X, y)

    mixed_nb = MixedNB()
    mixed_nb.fit(X, y)
    mixed_nb_score = mixed_nb.score(X, y)

    assert np.isclose(gaussian_nb_score, mixed_nb_score)
Esempio n. 3
0
def test_continuous_data_wine():
    wine = load_wine()
    X = wine['data']
    y = wine['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb_score = gaussian_nb.score(X, y)

    mixed_nb = MixedNB()
    mixed_nb.fit(X, y)
    mixed_nb_score = mixed_nb.score(X, y)

    assert np.isclose(gaussian_nb_score, mixed_nb_score)
Esempio n. 4
0
def test_continuous_data_breast_cancer():
    breast_cancer = load_breast_cancer()
    X = breast_cancer['data']
    y = breast_cancer['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb_score = gaussian_nb.score(X, y)

    mixed_nb = MixedNB()
    mixed_nb.fit(X, y)
    mixed_nb_score = mixed_nb.score(X, y)

    assert np.isclose(gaussian_nb_score, mixed_nb_score)
Esempio n. 5
0
def test_categorical_data_simple():
    X, y = load_example()

    mixed_nb = MixedNB([0, 1])
    mixed_nb.fit(X, y)
    mixed_nb.score(X, y)
# Change categorical variables into numerical variables
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]] = X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]].apply(LabelEncoder().fit_transform)

# Split the dataset into training dataset and test dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size =.2,random_state=1234, stratify=y)

# Build a Bayesian Classification Model and predict the type using the test data.
gnb = MixedNB(categorical_features=[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35])
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

# Calculate the accuracy
accuracy = gnb.score(X_test, y_test)
print('Accuracy: {0:.2f}'.format(accuracy))

# Build a confusion matrix
cm = metrics.confusion_matrix(y_test,y_pred)
print(metrics.classification_report(y_test,y_pred))


#Bayesian for PCA dataset
# Load dataset
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection  import train_test_split
from sklearn import metrics
import statsmodels.api as sm
Esempio n. 7
0
# ## MixedNB with digits dataset
# ### using categorical naive bayes

# Load the required modules
import numpy as np
from sklearn.datasets import load_digits
from sklearn.naive_bayes import GaussianNB
from mixed_naive_bayes import MixedNB

# Load the digits dataset
digits = load_digits()
X = digits['data']
y = digits['target']

# Fit to `sklearn`'s GaussianNB
gaussian_nb = GaussianNB()
gaussian_nb.fit(X, y)
gaussian_nb_score = gaussian_nb.score(X, y)

# Fit to our classifier
mixed_nb = MixedNB(categorical_features='all',
                   max_categories=np.repeat(17, 64))
mixed_nb.fit(X, y)
mixed_nb_score = mixed_nb.score(X, y)

print(gaussian_nb_score)
print(mixed_nb_score)