Esempio n. 1
0
def test_continuous_data_iris():
    iris = load_iris()
    X = iris['data']
    y = iris['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb_pred = gaussian_nb.predict(X)

    mixed_nb = MixedNB()
    mixed_nb.fit(X, y)
    mixed_nb_pred = mixed_nb.predict(X)

    assert (mixed_nb_pred == gaussian_nb_pred).all()
    #scores = cross_val_score(best_clf, SX_train.astype(float), y_train.astype(int), n_jobs=16, cv=StratifiedKFold(5))
    #accuracy = scores.mean()
    #sd= (scores.std())     
    # 0.8330165782220578 +/- 0.029993214054194695

    # Fit optimised model
    best_clf.fit(SX_train.astype(float),y_train.astype(int))
    SX_test.to_csv(Output_name+'SX_test.csv')


    filename = Output_name+'Naive_Bayes_finalized_model.mod'
    with open( filename, "wb") as file:
        pickle.dump(best_clf, file)

    ### Evalaute performance on the training set ###
    y_train_pred = best_clf.predict(SX_train)

    # Print confusion matrix 
    cm_train = confusion_matrix(y_train, y_train_pred)
    print(cm_train)


    train_report = classification_report(y_train, y_train_pred)
    print (train_report)

    balanced_accuracy = balanced_accuracy_score(y_train, y_train_pred)
    print(balanced_accuracy)

    sensitivity =  cm_train[1,1]/(cm_train[1,0]+cm_train[1,1])								
    print(sensitivity)
#Divide the dataset into y and X
y = pro2['Severity']
X = pro2.iloc[:,1:]

# Change categorical variables into numerical variables
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]] = X.iloc[:,[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35]].apply(LabelEncoder().fit_transform)

# Split the dataset into training dataset and test dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size =.2,random_state=1234, stratify=y)

# Build a Bayesian Classification Model and predict the type using the test data.
gnb = MixedNB(categorical_features=[0,1,2,3,4,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,28,30,31,32,33,34,35])
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

# Calculate the accuracy
accuracy = gnb.score(X_test, y_test)
print('Accuracy: {0:.2f}'.format(accuracy))

# Build a confusion matrix
cm = metrics.confusion_matrix(y_test,y_pred)
print(metrics.classification_report(y_test,y_pred))


#Bayesian for PCA dataset
# Load dataset
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
Run benchmarks on toy datasets provided by sklearn. 
This is to ensure our implementation of Gaussian Naive 
Bayes is the same as sklearn's.
"""

from sklearn.datasets import load_iris, load_digits, \
    load_wine, load_breast_cancer
from sklearn.naive_bayes import GaussianNB
from mixed_naive_bayes import MixedNB

for load_data in [load_iris, load_digits, load_wine,
                  load_breast_cancer]:

    print(f"--- {''.join(load_data.__name__.split('_')[1:])} ---")

    dataset = load_data()

    X = dataset['data']
    y = dataset['target']

    gaussian_nb = GaussianNB()
    gaussian_nb.fit(X, y)
    gaussian_nb_pred = gaussian_nb.predict(X)

    mixed_nb = MixedNB()
    mixed_nb.fit(X, y)
    mixed_nb_pred = mixed_nb.predict(X)

    print(f"GaussianNB: {gaussian_nb.score(X,y)}")
    print(f"MixedNB   : {mixed_nb.score(X,y)}")