예제 #1
0
def test_continuous_data_iris():
    iris = load_iris()
    X = iris['data']
    y = iris['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb_pred = gaussian_nb.predict(X)

    mixed_nb = MixedNB()
    mixed_nb.fit(X, y)
    mixed_nb_pred = mixed_nb.predict(X)

    assert (mixed_nb_pred == gaussian_nb_pred).all()
    #scores = cross_val_score(best_clf, SX_train.astype(float), y_train.astype(int), n_jobs=16, cv=StratifiedKFold(5))
    #accuracy = scores.mean()
    #sd= (scores.std())     
    # 0.8330165782220578 +/- 0.029993214054194695

    # Fit optimised model
    best_clf.fit(SX_train.astype(float),y_train.astype(int))
    SX_test.to_csv(Output_name+'SX_test.csv')


    filename = Output_name+'Naive_Bayes_finalized_model.mod'
    with open( filename, "wb") as file:
        pickle.dump(best_clf, file)

    ### Evalaute performance on the training set ###
    y_train_pred = best_clf.predict(SX_train)

    # Print confusion matrix 
    cm_train = confusion_matrix(y_train, y_train_pred)
    print(cm_train)


    train_report = classification_report(y_train, y_train_pred)
    print (train_report)

    balanced_accuracy = balanced_accuracy_score(y_train, y_train_pred)
    print(balanced_accuracy)

    sensitivity =  cm_train[1,1]/(cm_train[1,0]+cm_train[1,1])								
    print(sensitivity)
#Divide the dataset into y and X
y = pro2['Severity']
X = pro2.iloc[:,1:]

# Change categorical variables into numerical variables
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]] = X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]].apply(LabelEncoder().fit_transform)

# Split the dataset into training dataset and test dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size =.2,random_state=1234, stratify=y)

# Build a Bayesian Classification Model and predict the type using the test data.
gnb = MixedNB(categorical_features=[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35])
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

# Calculate the accuracy
accuracy = gnb.score(X_test, y_test)
print('Accuracy: {0:.2f}'.format(accuracy))

# Build a confusion matrix
cm = metrics.confusion_matrix(y_test,y_pred)
print(metrics.classification_report(y_test,y_pred))


#Bayesian for PCA dataset
# Load dataset
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
예제 #4
0
Run benchmarks on toy datasets provided by sklearn. 
This is to ensure our implementation of Gaussian Naive 
Bayes is the same as sklearn's.
"""

from sklearn.datasets import load_iris, load_digits, \
    load_wine, load_breast_cancer
from sklearn.naive_bayes import GaussianNB
from mixed_naive_bayes import MixedNB

for load_data in [load_iris, load_digits, load_wine,
                  load_breast_cancer]:

    print(f"--- {''.join(load_data.__name__.split('_')[1:])} ---")

    dataset = load_data()

    X = dataset['data']
    y = dataset['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb_pred = gaussian_nb.predict(X)

    mixed_nb = MixedNB()
    mixed_nb.fit(X, y)
    mixed_nb_pred = mixed_nb.predict(X)

    print(f"GaussianNB: {gaussian_nb.score(X,y)}")
    print(f"MixedNB   : {mixed_nb.score(X,y)}")