def test_categorical_data_digits(): digits = load_digits() X = digits['data'] y = digits['target'] gaussian_nb = GaussianNB() gaussian_nb.fit(X, y) gaussian_nb.score(X, y) mixed_nb = MixedNB(categorical_features='all', max_categories=np.repeat(17, 64)) mixed_nb.fit(X[:1440], y[:1440]) mixed_nb.score(X[:1440], y[:1440])
def test_continuous_data_digits(): digits = load_digits() X = digits['data'] y = digits['target'] gaussian_nb = GaussianNB() gaussian_nb.fit(X, y) gaussian_nb_score = gaussian_nb.score(X, y) mixed_nb = MixedNB() mixed_nb.fit(X, y) mixed_nb_score = mixed_nb.score(X, y) assert np.isclose(gaussian_nb_score, mixed_nb_score)
def test_continuous_data_wine(): wine = load_wine() X = wine['data'] y = wine['target'] gaussian_nb = GaussianNB() gaussian_nb.fit(X, y) gaussian_nb_score = gaussian_nb.score(X, y) mixed_nb = MixedNB() mixed_nb.fit(X, y) mixed_nb_score = mixed_nb.score(X, y) assert np.isclose(gaussian_nb_score, mixed_nb_score)
def test_continuous_data_breast_cancer(): breast_cancer = load_breast_cancer() X = breast_cancer['data'] y = breast_cancer['target'] gaussian_nb = GaussianNB() gaussian_nb.fit(X, y) gaussian_nb_score = gaussian_nb.score(X, y) mixed_nb = MixedNB() mixed_nb.fit(X, y) mixed_nb_score = mixed_nb.score(X, y) assert np.isclose(gaussian_nb_score, mixed_nb_score)
def test_categorical_data_simple(): X, y = load_example() mixed_nb = MixedNB([0, 1]) mixed_nb.fit(X, y) mixed_nb.score(X, y)
# Change categorical variables into numerical variables label_encoder = LabelEncoder() y = label_encoder.fit_transform(y) X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]] = X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]].apply(LabelEncoder().fit_transform) # Split the dataset into training dataset and test dataset X_train, X_test, y_train, y_test = train_test_split(X,y,test_size =.2,random_state=1234, stratify=y) # Build a Bayesian Classification Model and predict the type using the test data. gnb = MixedNB(categorical_features=[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]) gnb.fit(X_train, y_train) y_pred = gnb.predict(X_test) # Calculate the accuracy accuracy = gnb.score(X_test, y_test) print('Accuracy: {0:.2f}'.format(accuracy)) # Build a confusion matrix cm = metrics.confusion_matrix(y_test,y_pred) print(metrics.classification_report(y_test,y_pred)) #Bayesian for PCA dataset # Load dataset import numpy as np import pandas as pd from sklearn.naive_bayes import GaussianNB from sklearn.model_selection import train_test_split from sklearn import metrics import statsmodels.api as sm
# ## MixedNB with digits dataset # ### using categorical naive bayes # Load the required modules import numpy as np from sklearn.datasets import load_digits from sklearn.naive_bayes import GaussianNB from mixed_naive_bayes import MixedNB # Load the digits dataset digits = load_digits() X = digits['data'] y = digits['target'] # Fit to `sklearn`'s GaussianNB gaussian_nb = GaussianNB() gaussian_nb.fit(X, y) gaussian_nb_score = gaussian_nb.score(X, y) # Fit to our classifier mixed_nb = MixedNB(categorical_features='all', max_categories=np.repeat(17, 64)) mixed_nb.fit(X, y) mixed_nb_score = mixed_nb.score(X, y) print(gaussian_nb_score) print(mixed_nb_score)