Exemple #1
0
 def setUp(self):
     self.mnb = NaiveBayes(multinomial=True)
     self.skmnb = MultinomialNB()
     self.bnb = NaiveBayes(bernoulli=True)
     self.skbnb = BernoulliNB()
     self.cnb = NaiveBayes(multinomial=True, cnb=True)
     self.wcnb = NaiveBayes(multinomial=True, wcnb=True)
def naive_bayes_metrics(x, y, x_labels, y_labels):
    nb = NaiveBayes(x, y, x_labels, y_labels)

    label = 'Gaussian Naive Bayes'
    y_pred = nb.crossval_gaussian_navie_bayes()
    nb.show_basic_metrics(y_pred, label=label)
    cm = nb.plot_confusion_matrix(y_pred, label=label)
    acc, f1 = nb.count_basic_metrics(y_pred)
    return [[label, acc, f1, y_pred, cm]]
Exemple #3
0
class NBTest(unittest.TestCase):

    def setUp(self):
        self.mnb = NaiveBayes(multinomial=True)
        self.skmnb = MultinomialNB()
        self.bnb = NaiveBayes(bernoulli=True)
        self.skbnb = BernoulliNB()
        self.cnb = NaiveBayes(multinomial=True, cnb=True)
        self.wcnb = NaiveBayes(multinomial=True, wcnb=True)

    def test_count_vectorized(self):
        self.mnb.fit(X_count, train_targets)
        self.skmnb.fit(X_count, train_targets)
        self.assertEqual(self.mnb.score(X_count_test,test_targets),self.skmnb.score(X_count_test,test_targets))

    def test_tfidf_vectorized(self):
        self.mnb.fit(X_tfidf, train_targets)
        self.skmnb.fit(X_tfidf, train_targets)
        self.assertEqual(self.mnb.score(X_tfidf_test, test_targets), self.skmnb.score(X_tfidf_test, test_targets))

    def test_cnb(self):
        self.cnb.fit(X_count, train_targets)
        self.mnb.fit(X_count, train_targets)
        cnb_score = self.cnb.score(X_count_test, test_targets)
        mnb_score = self.mnb.score(X_count_test, test_targets)
        print "CNB: {},   MNB: {}".format(cnb_score, mnb_score)
        assert (cnb_score - mnb_score) > -0.1  

    def test_wcnb(self):
        self.wcnb.fit(X_count, train_targets)
        self.mnb.fit(X_count, train_targets)
        wcnb_score = self.wcnb.score(X_count_test, test_targets)
        mnb_score = self.mnb.score(X_count_test, test_targets)
        print "WCNB: {},   MNB: {}".format(wcnb_score, mnb_score)
        assert (wcnb_score - mnb_score) > -0.5  
from misc.constants import *


def model_output(classification_model: ClassificationModel, tune_start, tune_end):
    classification_model.tune(tune_start, tune_end)
    classification_model.fit()
    classification_model.evaluate_model()
    excel_writer = ExcelWriter(classification_model.name)
    excel_writer.edit_sheet(classification_model.report_dict)


if __name__ == '__main__':
    if os.path.exists(WORK_BOOK_PATH):
        os.remove(WORK_BOOK_PATH)

    input_parser = InputParser()
    data_tuple = input_parser.get_samples_and_labels()

    samples, labels = np.array(data_tuple[0]), np.array(data_tuple[1])
    training_samples, test_samples, training_labels, test_labels = train_test_split(samples, labels,
                                                                                    test_size=TEST_SIZE,
                                                                                    random_state=0)

    model_output(NaiveBayes(training_samples, test_samples, training_labels, test_labels), None, None)
    model_output(KNearest(training_samples, test_samples, training_labels, test_labels), MIN_N_NEIGHBOUR,
                 MAX_N_NEIGHBOUR)
    model_output(RandomForest(training_samples, test_samples, training_labels, test_labels), MIN_N_ESTIMATE,
                 MAX_N_ESTIMATE)
    model_output(DecisionTree(training_samples, test_samples, training_labels, test_labels), None, None)
    model_output(AdaBoost(training_samples, test_samples, training_labels, test_labels), MIN_N_ESTIMATE, MAX_N_ESTIMATE)
Exemple #5
0
"""Prediction on the data set"""
import pandas as pd

from classifiers.naive_bayes import NaiveBayes
from classifiers.util import train_test_split, confusion_matrix, accuracy_score

# reading the data set
df = pd.read_csv("./dataset/processed.csv")
X = df.drop("Outcome", axis=1).values
y = df["Outcome"].values

# making sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=20)

# training the model
nb = NaiveBayes(X_train, y_train)
nb.fit(X_train, y_train)

# getting the predictions
y_predictions = nb.predict(X_test)
print(f"The accuracy score :: {accuracy_score(y_predictions, y_test) * 100} %")

# confusion matrix
print("Confusion Matrix ::")
print(f"{confusion_matrix(y_test, y_predictions)}")