def test_continuous_data_iris(): iris = load_iris() X = iris['data'] y = iris['target'] gaussian_nb = GaussianNB() gaussian_nb.fit(X, y) gaussian_nb_pred = gaussian_nb.predict(X) mixed_nb = MixedNB() mixed_nb.fit(X, y) mixed_nb_pred = mixed_nb.predict(X) assert (mixed_nb_pred == gaussian_nb_pred).all()
#scores = cross_val_score(best_clf, SX_train.astype(float), y_train.astype(int), n_jobs=16, cv=StratifiedKFold(5)) #accuracy = scores.mean() #sd= (scores.std()) # 0.8330165782220578 +/- 0.029993214054194695 # Fit optimised model best_clf.fit(SX_train.astype(float),y_train.astype(int)) SX_test.to_csv(Output_name+'SX_test.csv') filename = Output_name+'Naive_Bayes_finalized_model.mod' with open( filename, "wb") as file: pickle.dump(best_clf, file) ### Evalaute performance on the training set ### y_train_pred = best_clf.predict(SX_train) # Print confusion matrix cm_train = confusion_matrix(y_train, y_train_pred) print(cm_train) train_report = classification_report(y_train, y_train_pred) print (train_report) balanced_accuracy = balanced_accuracy_score(y_train, y_train_pred) print(balanced_accuracy) sensitivity = cm_train[1,1]/(cm_train[1,0]+cm_train[1,1]) print(sensitivity)
#Divide the dataset into y and X y = pro2['Severity'] X = pro2.iloc[:,1:] # Change categorical variables into numerical variables label_encoder = LabelEncoder() y = label_encoder.fit_transform(y) X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]] = X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]].apply(LabelEncoder().fit_transform) # Split the dataset into training dataset and test dataset X_train, X_test, y_train, y_test = train_test_split(X,y,test_size =.2,random_state=1234, stratify=y) # Build a Bayesian Classification Model and predict the type using the test data. gnb = MixedNB(categorical_features=[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]) gnb.fit(X_train, y_train) y_pred = gnb.predict(X_test) # Calculate the accuracy accuracy = gnb.score(X_test, y_test) print('Accuracy: {0:.2f}'.format(accuracy)) # Build a confusion matrix cm = metrics.confusion_matrix(y_test,y_pred) print(metrics.classification_report(y_test,y_pred)) #Bayesian for PCA dataset # Load dataset import numpy as np import pandas as pd from sklearn.naive_bayes import GaussianNB
Run benchmarks on toy datasets provided by sklearn. This is to ensure our implementation of Gaussian Naive Bayes is the same as sklearn's. """ from sklearn.datasets import load_iris, load_digits, \ load_wine, load_breast_cancer from sklearn.naive_bayes import GaussianNB from mixed_naive_bayes import MixedNB for load_data in [load_iris, load_digits, load_wine, load_breast_cancer]: print(f"--- {''.join(load_data.__name__.split('_')[1:])} ---") dataset = load_data() X = dataset['data'] y = dataset['target'] gaussian_nb = GaussianNB() gaussian_nb.fit(X, y) gaussian_nb_pred = gaussian_nb.predict(X) mixed_nb = MixedNB() mixed_nb.fit(X, y) mixed_nb_pred = mixed_nb.predict(X) print(f"GaussianNB: {gaussian_nb.score(X,y)}") print(f"MixedNB : {mixed_nb.score(X,y)}")