def Naive_random_accuracy(X_train,X_test,y_train,y_test): """ Gets accuracies for Naive Bayes CLassifier for passed in values using train/test/split Args: X_train: (list of list) X_train for Naive Bayes classifier X_test: (list of list) X_tests for Naive Bayes classifier y_train: (list) y_train for Naive Bayes classifier y_test: (list) y_test to compare to for Naive Bayes classifier """ #creates new linear regressor and KNN classifiers Naive = MyNaiveBayesClassifier() #fits linear regressor and KNN classifier Naive.fit(X_train = X_train, y_train = y_train) #gets predictions for Linear Regressor and Knn classifier y_predicted = Naive.predict(X_test) myutils.convert_mpg_to_categorical(y_predicted) myutils.convert_mpg_to_categorical(y_test) #gets accuracys for Linear Regressor and Knn classifier acc = get_accuracy(y_predicted, y_test) print("===========================================") print("STEP #2: Predictive Accuracy") print("===========================================") print("Random Subsample (k=10, 2:1 Train/Test)") print("Naive Bayes: accuracy = ", acc, " error rate = ", 1 - acc)
def perform_Naive_cross_validation(X_train2, X_train_folds, X_test_folds, y_train2): """ performs cross validation on the passed in folds for Naive Bayes Args: X_train2: (list of list) initial X_train values X_train_folds (list of lists) folds of X_train indices y_train2: (list) initial y_train for Knn classifier X_test_folds (list of lists) folds of X_test indices returns: y_predict: (list) y_predicted values y_test: (list) paralel list of y_actual values """ X_train = [] X_test = [] y_predicted = [] y_train = [] y_test = [] y_test = [] y_predict = [] curr_index = 0 #loops though each X_test fold for i in range(len(X_test_folds)): X_test = [] X_train = [] y_train = [] X_test = X_test_folds[i] #creates X test and y test from fold indices for j in range(len(X_test)): curr_index = X_test[j] X_test[j] = X_train2[curr_index] y_test.append(y_train2[curr_index]) #creates X_train and inputs X_train items for each index X_train = X_train_folds[i] for j in range(len(X_train)): curr_index = X_train[j] X_train[j] = X_train2[curr_index] y_train.append(y_train2[curr_index]) #tests KNN algorithm on each fold and appends values to y_predicted Naive = MyNaiveBayesClassifier() Naive.fit(X_train = X_train, y_train = y_train) y_predicted.append(Naive.predict(X_test)) #converts y_predicted to 1d list for i in range(len(y_predicted)): for j in range(len(y_predicted[i])): y_predict.append(y_predicted[i][j]) return y_predict, y_test
def test_naive_bayes_classifier_fit(): # Test with 8 instance dataset from class test_fit = MyNaiveBayesClassifier() test_fit.fit(class_x_train, class_y_train) assert class_priors == test_fit.priors assert class_posteriors == test_fit.posteriors # Test Iphone dataset from Reading Quiz test_fit.fit(iphone_x_train, iphone_y_train) assert iphone_priors == test_fit.priors assert iphone_posteriors == test_fit.posteriors # Test train dataset from textbook test_fit.fit(train_x_train, train_y_train) assert train_priors == test_fit.priors assert train_posteriors == test_fit.posteriors
def test_naive_bayes_classifier_predict(): myNaiveBayes = MyNaiveBayesClassifier() # Test Case 1: Example traced out in class y_train, X_train = [], [] for inst in attr_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) myNaiveBayes.fit(X_train, y_train) # Get the prediction for the given test value(s) X_test = [[1, 5]] y_pred = myNaiveBayes.predict(X_test) assert y_pred == ["yes"] # Test Case 2: RQ5 Example y_train, X_train = [], [] for inst in iphone_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) myNaiveBayes.fit(X_train, y_train) # Get the prediction for the given test value(s) X_test = [[2, 2, "fair"], [1, 1, "excellent"]] y_pred = myNaiveBayes.predict(X_test) assert y_pred == ["yes", "no"] # Test Case 3: Bramer 3.2 Train Dataset y_train, X_train = [], [] for inst in train_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) myNaiveBayes.fit(X_train, y_train) # Get the prediction for the given test value(s) X_test = [["weekday", "winter", "high", "heavy"], ["weekday", "summer", "high", "heavy"], ["sunday", "summer", "normal", "slight"]] y_pred = myNaiveBayes.predict(X_test) assert y_pred == ["very late", "on time", "on time"]
def test_naive_bayes_classifier_fit(): myNaiveBayes = MyNaiveBayesClassifier() # Test Case 1: Example traced out in class y_train, X_train = [], [] for inst in attr_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) myNaiveBayes.fit(X_train, y_train) # Assert against the priors and posteriors true_priors = [[(5 / 8)], [(3 / 8)]] # buys_iphone=yes, buys_iphone=no true_posteriors = [[(4 / 5), (2 / 3)], [(1 / 5), (1 / 3)], [(2 / 5), (2 / 3)], [(3 / 5), (1 / 3)]] _, pred_priors = myNaiveBayes.priors.get_subtable(0, 'end', 1, 'end') _, pred_posteriors = myNaiveBayes.posteriors.get_subtable( 0, 'end', 2, 'end') assert np.allclose(true_priors, pred_priors) assert np.allclose(true_posteriors, pred_posteriors) # Test Case 2: RQ5 Example y_train, X_train = [], [] for inst in iphone_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) myNaiveBayes.fit(X_train, y_train) # Assert against the priors and posteriors true_priors = [[(5 / 15)], [(10 / 15)]] # buys_iphone=yes, buys_iphone=no true_posteriors = [[0.6, 0.2], [0.4, 0.8], [0.4, 0.3], [0.4, 0.4], [0.2, 0.3], [0.4, 0.7], [0.6, 0.3]] _, pred_priors = myNaiveBayes.priors.get_subtable(0, 'end', 1, 'end') _, pred_posteriors = myNaiveBayes.posteriors.get_subtable( 0, 'end', 2, 'end') assert np.allclose(true_priors, pred_priors) assert np.allclose(true_posteriors, pred_posteriors) # Test Case 3: Bramer 3.2 Train Dataset y_train, X_train = [], [] for inst in train_table: y_train.append(inst[-1]) X_train.append(inst[:-1]) myNaiveBayes.fit(X_train, y_train) # Assert against the priors and posteriors true_priors = [[(14 / 20)], [(2 / 20)], [(3 / 20)], [(1 / 20)]] # buys_iphone=yes, buys_iphone=no true_posteriors = [[0.6428571428571429, 0.5, 1.0, 0.0], [0.14285714285714285, 0.5, 0.0, 1.0], [0.14285714285714285, 0.0, 0.0, 0.0], [0.07142857142857142, 0.0, 0.0, 0.0], [0.2857142857142857, 0.0, 0.0, 1.0], [0.14285714285714285, 1.0, 0.6666666666666666, 0.0], [0.42857142857142855, 0.0, 0.0, 0.0], [0.14285714285714285, 0.0, 0.3333333333333333, 0.0], [0.35714285714285715, 0.0, 0.0, 0.0], [0.2857142857142857, 0.5, 0.3333333333333333, 1.0], [0.35714285714285715, 0.5, 0.6666666666666666, 0.0], [0.35714285714285715, 0.5, 0.3333333333333333, 0.0], [0.5714285714285714, 0.0, 0.0, 0.0], [0.07142857142857142, 0.5, 0.6666666666666666, 1.0]] _, pred_priors = myNaiveBayes.priors.get_subtable(0, 'end', 1, 'end') _, pred_posteriors = myNaiveBayes.posteriors.get_subtable( 0, 'end', 2, 'end') assert np.allclose(true_priors, pred_priors) assert np.allclose(true_posteriors, pred_posteriors)
weather = collisions_data.get_column('WEATHER') road_condition = collisions_data.get_column('ROADCOND') light_condition = collisions_data.get_column('LIGHTCOND') junction_type = collisions_data.get_column('JUNCTIONTYPE') severity = collisions_data.get_column('SEVERITYDESC') X_train = [[ weather[i], road_condition[i], light_condition[i], junction_type[i], severity[i] ] for i in range(len(weather))] y_train = collisions_data.get_column('COLLISIONTYPE') for i, val in enumerate(y_train): if val == 'Unknown': del y_train[i] del X_train[i] strattrain_folds, strattest_folds = myevaluation.stratified_kfold_cross_validation( X_train, y_train, 10) strat_xtrain, strat_ytrain, strat_xtest, strat_ytest = myutils.get_from_folds( X_train, y_train, strattrain_folds, strattest_folds) myb = MyNaiveBayesClassifier() myb.fit(strat_xtrain, strat_ytrain) packaged_object = myb # pickle packaged object outfile = open('driving_bayes.p', 'wb') pickle.dump(packaged_object, outfile) outfile.close()
import os # "pickle" an object (AKA object serialization) # save a Python object to a binary file # "unpickle" an object (AKA object de-serialization) # load a Python object from a binary file (back into memory) # Get data from csv file table = MyPyTable().load_from_file( os.path.join("input_files", "winequality-red.csv")) y_col = table.get_column("quality", False) x_cols = table.drop_col("quality") # Use Naive Bayes to classify testcase = MyNaiveBayesClassifier() #Returns x INDEXES X_train, X_test = myevaluation.stratified_kfold_cross_validation(x_cols, y_col, n_splits=10) X_train, X_test, y_train, y_test = myutils.getInstances( X_train, X_test, x_cols, y_col) for i, fold in enumerate(X_train): train, test = myutils.normalize_values(X_train[i], X_test[i]) testcase.fit(train, y_train[i]) break packaged_object = testcase # pickle packaged_object
def test_naive_bayes_classifier_predict(): train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]] y = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"] nb = MyNaiveBayesClassifier() nb.fit(train, y) pred = nb.predict([[1, 5]]) assert pred == ["yes"] # TODO: fix this # RQ5 (fake) iPhone purchases dataset iphone_col_names = [ "standing", "job_status", "credit_rating", "buys_iphone" ] iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"], [2, 3, "fair", "yes"], [2, 2, "fair", "yes"], [2, 1, "fair", "yes"], [2, 1, "excellent", "no"], [2, 1, "excellent", "yes"], [1, 2, "fair", "no"], [1, 1, "fair", "yes"], [2, 2, "fair", "yes"], [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"], [2, 3, "fair", "yes"], [2, 2, "excellent", "no"], [2, 3, "fair", "yes"]] mypy = MyPyTable(iphone_col_names, iphone_table) y2 = myutils.get_mypycol(mypy, "buys_iphone") nb2 = MyNaiveBayesClassifier() nb2.fit(iphone_table, y2) pred2 = nb2.predict([[1, 2, "fair"]]) assert pred2 == ["yes"] # Bramer 3.2 train dataset train_col_names = ["day", "season", "wind", "rain", "class"] train_table = [["weekday", "spring", "none", "none", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "high", "heavy", "late"], ["saturday", "summer", "normal", "none", "on time"], ["weekday", "autumn", "normal", "none", "very late"], ["holiday", "summer", "high", "slight", "on time"], ["sunday", "summer", "normal", "none", "on time"], ["weekday", "winter", "high", "heavy", "very late"], ["weekday", "summer", "none", "slight", "on time"], ["saturday", "spring", "high", "heavy", "cancelled"], ["weekday", "summer", "high", "slight", "on time"], ["saturday", "winter", "normal", "none", "late"], ["weekday", "summer", "high", "none", "on time"], ["weekday", "winter", "normal", "heavy", "very late"], ["saturday", "autumn", "high", "slight", "on time"], ["weekday", "autumn", "none", "heavy", "on time"], ["holiday", "spring", "normal", "slight", "on time"], ["weekday", "spring", "normal", "none", "on time"], ["weekday", "spring", "normal", "slight", "on time"]] mypy2 = MyPyTable(train_col_names, train_table) y3 = myutils.get_mypycol(mypy2, "class") nb3 = MyNaiveBayesClassifier() nb3.fit(train_table, y3) nb3.fit(train_table, y3) pred3 = nb3.predict([["weekday", "winter", "high", "heavy"]]) assert pred3 == ["cancelled"]
def test_naive_bayes_classifier_fit(): train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]] y = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"] nb = MyNaiveBayesClassifier() nb.fit(train, y) assert nb.priors == [["yes", 5 / 8], ["no", 3 / 8]] assert nb.posteriors == [[0, ['yes', ['1', 0.8], ['2', 0.2]], ['no', ['1', 2/3], ['2', 1/3]]], \ [1, ['yes', ['5', 0.4], ['6', 0.6]], ['no', ['5', 2/3], ['6', 1/3]]]] # RQ5 (fake) iPhone purchases dataset iphone_col_names = [ "standing", "job_status", "credit_rating", "buys_iphone" ] iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"], [2, 3, "fair", "yes"], [2, 2, "fair", "yes"], [2, 1, "fair", "yes"], [2, 1, "excellent", "no"], [2, 1, "excellent", "yes"], [1, 2, "fair", "no"], [1, 1, "fair", "yes"], [2, 2, "fair", "yes"], [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"], [2, 3, "fair", "yes"], [2, 2, "excellent", "no"], [2, 3, "fair", "yes"]] mypy = MyPyTable(iphone_col_names, iphone_table) y2 = myutils.get_mypycol(mypy, "buys_iphone") nb2 = MyNaiveBayesClassifier() nb2.fit(iphone_table, y2) assert nb2.priors == [["no", 1 / 3], ["yes", 2 / 3]] nb2_posts = [[ 0, ['no', ['1', 3 / 15], ['2', 2 / 15]], ['yes', ['1', 2 / 15], ['2', 8 / 15]] ], [ 1, ['no', ['3', 2 / 15], ['2', 2 / 15], ['1', 2 / 3]], ['yes', ['3', 3 / 15], ['2', 4 / 15], ['1', 3 / 15]] ], [ 2, ['no', ['fair', 2 / 15], ['excellent', 3 / 15]], ['yes', ['fair', 7 / 15], ['excellent', 3 / 15]] ], [ 3, ['no', ['no', 1 / 3], ['yes', 0.0]], ['yes', ['no', 0.0], ['yes', 2 / 3]] ]] # assert nb2.posteriors == nb2_posts # Bramer 3.2 train dataset train_col_names = ["day", "season", "wind", "rain", "class"] train_table = [["weekday", "spring", "none", "none", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "high", "heavy", "late"], ["saturday", "summer", "normal", "none", "on time"], ["weekday", "autumn", "normal", "none", "very late"], ["holiday", "summer", "high", "slight", "on time"], ["sunday", "summer", "normal", "none", "on time"], ["weekday", "winter", "high", "heavy", "very late"], ["weekday", "summer", "none", "slight", "on time"], ["saturday", "spring", "high", "heavy", "cancelled"], ["weekday", "summer", "high", "slight", "on time"], ["saturday", "winter", "normal", "none", "late"], ["weekday", "summer", "high", "none", "on time"], ["weekday", "winter", "normal", "heavy", "very late"], ["saturday", "autumn", "high", "slight", "on time"], ["weekday", "autumn", "none", "heavy", "on time"], ["holiday", "spring", "normal", "slight", "on time"], ["weekday", "spring", "normal", "none", "on time"], ["weekday", "spring", "normal", "slight", "on time"]] mypy2 = MyPyTable(train_col_names, train_table) y3 = myutils.get_mypycol(mypy2, "class") nb3 = MyNaiveBayesClassifier() nb3.fit(iphone_table, y3)
def test_naive_bayes_classifier_predict(): # Setting up object to fit and predict class dataset test_predict = MyNaiveBayesClassifier() test_predict.fit(class_x_train, class_y_train) class_predicted = test_predict.predict(class_test) assert class_actuals == class_predicted # Setting up object to fit and predict iphone dataset test_predict.fit(iphone_x_train, iphone_y_train) iphone_predicted = test_predict.predict(iphone_test) assert iphone_actuals == iphone_predicted # Setting up object to fit and predict iphone dataset test_predict.fit(train_x_train, train_y_train) train_predicted = test_predict.predict(train_test) assert train_actuals == train_predicted # MyZeroRClassifier another = MyZeroRClassifier() another.fit(iphone_x_train, iphone_y_train) another_class = another.predict(iphone_test) #print(another_class) # MyRandomClassifier another2 = MyRandomClassifier() another2.fit(iphone_x_train, iphone_y_train) another_class2 = another2.predict(iphone_test)
def test_naive_bayes_classifier_predict(): ###Test #1### col_names = ["att1", "att2"] X_train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]] y_train = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"] Naive = MyNaiveBayesClassifier() Naive.fit(X_train, y_train) y_predict = Naive.predict([[1, 5]]) y_actual = ["yes"] assert y_predict[0] == y_actual[0] ###Test #2### iphone_col_names = [ "standing", "job_status", "credit_rating", "buys_iphone" ] X_train = [[2, 3, "fair"], [2, 2, "fair"], [2, 1, "fair"], [2, 1, "excellent"], [2, 1, "excellent"], [1, 2, "fair"], [1, 1, "fair"], [2, 2, "fair"], [1, 2, "excellent"], [2, 2, "excellent"], [2, 3, "fair"], [2, 2, "excellent"], [2, 3, "fair"], [1, 3, "fair"], [1, 3, "excellent"]] y_train = [ "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "yes", "yes", "yes", "no", "yes", "no", "no" ] Naive = MyNaiveBayesClassifier() Naive.fit(X_train, y_train) y_predict = Naive.predict([[2, 2, "fair"]]) y_actual = ["yes"] assert y_predict[0] == y_actual[0] y_predict = Naive.predict([[1, 1, "excellent"]]) y_actual = ["no"] assert y_predict[0] == y_actual[0] ###Test #3### train_col_names = ["day", "season", "wind", "rain", "class"] X_train = [["weekday", "spring", "none", "none"], ["weekday", "winter", "none", "slight"], ["weekday", "winter", "none", "slight"], ["weekday", "winter", "high", "heavy"], ["saturday", "summer", "normal", "none"], ["weekday", "autumn", "normal", "none"], ["holiday", "summer", "high", "slight"], ["sunday", "summer", "normal", "none"], ["weekday", "winter", "high", "heavy"], ["weekday", "summer", "none", "slight"], ["saturday", "spring", "high", "heavy"], ["weekday", "summer", "high", "slight"], ["saturday", "winter", "normal", "none"], ["weekday", "summer", "high", "none"], ["weekday", "winter", "normal", "heavy"], ["saturday", "autumn", "high", "slight"], ["weekday", "autumn", "none", "heavy"], ["holiday", "spring", "normal", "slight"], ["weekday", "spring", "normal", "none"], ["weekday", "spring", "normal", "slight"]] y_train = [ "on time", "on time", "on time", "late", "on time", "very late", "on time", "on time", "very late", "on time", "cancelled", "on time", "late", "on time", "very late", "on time", "on time", "on time", "on time", "on time" ] Naive = MyNaiveBayesClassifier() Naive.fit(X_train, y_train) y_predict = Naive.predict([["weekday", "winter", "high", "heavy"], ["saturday", "spring", "normal", "slight"]]) y_actual = ["very late", "on time"] for i in range(len(y_actual)): assert y_predict[i] == y_actual[i]
def test_naive_bayes_classifier_fit(): ###Test #1### col_names = ["att1", "att2"] X_train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]] y_train = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"] Naive = MyNaiveBayesClassifier() Naive.fit(X_train, y_train) prior_ans = [5 / 8, 3 / 8] post_ans = [[["Attributes", "yes", "no"], [1, 4 / 5, 2 / 3], [2, 1 / 5, 1 / 3]], [["Attributes", "yes", "no"], [5, 2 / 5, 2 / 3], [6, 3 / 5, 1 / 3]]] for i in range(len(post_ans)): for j in range(len(post_ans[i])): for k in range(len(post_ans[i][j])): if (isinstance(post_ans[i][j][k], float)): post_ans[i][j][k] = round(post_ans[i][j][k], 3) for i in range(len(prior_ans)): assert np.isclose(Naive.priors[i], prior_ans[i]) for i in range(len(post_ans)): for j in range(len(post_ans[i])): for k in range(len(post_ans[i][j])): n = Naive.posteriors[i][j][k] ans = post_ans[i][j][k] assert n == ans ###Test #2### iphone_col_names = [ "standing", "job_status", "credit_rating", "buys_iphone" ] X_train = [[2, 3, "fair"], [2, 2, "fair"], [2, 1, "fair"], [2, 1, "excellent"], [2, 1, "excellent"], [1, 2, "fair"], [1, 1, "fair"], [2, 2, "fair"], [1, 2, "excellent"], [2, 2, "excellent"], [2, 3, "fair"], [2, 2, "excellent"], [2, 3, "fair"], [1, 3, "fair"], [1, 3, "excellent"]] y_train = [ "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "yes", "yes", "yes", "no", "yes", "no", "no" ] Naive = MyNaiveBayesClassifier() Naive.fit(X_train, y_train) prior_ans = [10 / 15, 5 / 15] post_ans = [[["Attributes", "yes", "no"], [1, 2 / 10, 3 / 5], [2, 8 / 10, 2 / 5]], [["Attributes", "yes", "no"], [1, 3 / 10, 1 / 5], [2, 4 / 10, 2 / 5], [3, 3 / 10, 2 / 5]], [["Attributes", "yes", "no"], ["fair", 7 / 10, 2 / 5], ["excellent", 3 / 10, 3 / 5]]] for i in range(len(post_ans)): for j in range(len(post_ans[i])): for k in range(len(post_ans[i][j])): if (isinstance(post_ans[i][j][k], float)): round(post_ans[i][j][k], 3) for i in range(len(prior_ans)): assert prior_ans[i] in Naive.priors for i in range(len(post_ans)): for j in range(len(post_ans[i])): assert post_ans[i][j] in Naive.posteriors[i] ###Test #3### train_col_names = ["day", "season", "wind", "rain", "class"] X_train = [["weekday", "spring", "none", "none"], ["weekday", "winter", "none", "slight"], ["weekday", "winter", "none", "slight"], ["weekday", "winter", "high", "heavy"], ["saturday", "summer", "normal", "none"], ["weekday", "autumn", "normal", "none"], ["holiday", "summer", "high", "slight"], ["sunday", "summer", "normal", "none"], ["weekday", "winter", "high", "heavy"], ["weekday", "summer", "none", "slight"], ["saturday", "spring", "high", "heavy"], ["weekday", "summer", "high", "slight"], ["saturday", "winter", "normal", "none"], ["weekday", "summer", "high", "none"], ["weekday", "winter", "normal", "heavy"], ["saturday", "autumn", "high", "slight"], ["weekday", "autumn", "none", "heavy"], ["holiday", "spring", "normal", "slight"], ["weekday", "spring", "normal", "none"], ["weekday", "spring", "normal", "slight"]] y_train = [ "on time", "on time", "on time", "late", "on time", "very late", "on time", "on time", "very late", "on time", "cancelled", "on time", "late", "on time", "very late", "on time", "on time", "on time", "on time", "on time" ] Naive = MyNaiveBayesClassifier() Naive.fit(X_train, y_train) prior_ans = [0.7, 0.1, 0.15, 0.05] post_ans = [[['Attributes', 'on time', 'late', 'very late', 'cancelled'], ['weekday', 0.643, 0.5, 1.0, 0.0], ['saturday', 0.143, 0.5, 0.0, 1.0], ['holiday', 0.143, 0.0, 0.0, 0.0], ['sunday', 0.071, 0.0, 0.0, 0.0]], [['Attributes', 'on time', 'late', 'very late', 'cancelled'], ['spring', 0.286, 0.0, 0.0, 1.0], ['winter', 0.143, 1.0, 0.667, 0.0], ['summer', 0.429, 0.0, 0.0, 0.0], ['autumn', 0.143, 0.0, 0.333, 0.0]], [['Attributes', 'on time', 'late', 'very late', 'cancelled'], ['none', 0.357, 0.0, 0.0, 0.0], ['high', 0.286, 0.5, 0.333, 1.0], ['normal', 0.357, 0.5, 0.667, 0.0]], [['Attributes', 'on time', 'late', 'very late', 'cancelled'], ['none', 0.357, 0.5, 0.333, 0.0], ['slight', 0.571, 0.0, 0.0, 0.0], ['heavy', 0.071, 0.5, 0.667, 1.0]]] prior_ans = [0.7, 0.1, 0.15, 0.05] for i in range(len(post_ans)): for j in range(len(post_ans[i])): for k in range(len(post_ans[i][j])): if (isinstance(post_ans[i][j][k], float)): round(post_ans[i][j][k], 3) for i in range(len(prior_ans)): assert prior_ans[i] in Naive.priors for i in range(len(post_ans)): for j in range(len(post_ans[i])): assert post_ans[i][j] in Naive.posteriors[i]
def test_naive_bayes_classifier_predict(): testNB = MyNaiveBayesClassifier() truePriors = [3 / 8, 5 / 8] X_test = [[1, 5]] expected = ['yes'] testData = [[1, 5, 'yes'], [2, 6, 'yes'], [1, 5, 'no'], [1, 5, 'no'], [1, 6, 'yes'], [2, 6, 'no'], [1, 5, 'yes'], [1, 6, 'yes']] allClasses = [] for row in testData: allClasses.append(row.pop()) testNB.fit(testData, allClasses) predictions = testNB.predict(X_test) assert predictions[0] == 'yes' testNB = MyNaiveBayesClassifier() # RQ5 (fake) iPhone purchases dataset truePriors = [0.333, 0.667] X_test = [[2, 2, 'fair'], [1, 1, 'excellent']] expected = ['yes', 'no'] iphone_col_names = [ "standing", "job_status", "credit_rating", "buys_iphone" ] iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"], [2, 3, "fair", "yes"], [2, 2, "fair", "yes"], [2, 1, "fair", "yes"], [2, 1, "excellent", "no"], [2, 1, "excellent", "yes"], [1, 2, "fair", "no"], [1, 1, "fair", "yes"], [2, 2, "fair", "yes"], [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"], [2, 3, "fair", "yes"], [2, 2, "excellent", "no"], [2, 3, "fair", "yes"]] allClasses = [] for row in iphone_table: allClasses.append(row.pop()) testNB.fit(iphone_table, allClasses) predictions = testNB.predict(X_test) for i in range(len(predictions)): assert predictions[i] == expected[i] testNB = MyNaiveBayesClassifier() truePriors = [0.05, 0.1, 0.7, 0.15] # Bramer 3.2 train dataset train_col_names = ["day", "season", "wind", "rain", "class"] train_table = [["weekday", "spring", "none", "none", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "high", "heavy", "late"], ["saturday", "summer", "normal", "none", "on time"], ["weekday", "autumn", "normal", "none", "very late"], ["holiday", "summer", "high", "slight", "on time"], ["sunday", "summer", "normal", "none", "on time"], ["weekday", "winter", "high", "heavy", "very late"], ["weekday", "summer", "none", "slight", "on time"], ["saturday", "spring", "high", "heavy", "cancelled"], ["weekday", "summer", "high", "slight", "on time"], ["saturday", "winter", "normal", "none", "late"], ["weekday", "summer", "high", "none", "on time"], ["weekday", "winter", "normal", "heavy", "very late"], ["saturday", "autumn", "high", "slight", "on time"], ["weekday", "autumn", "none", "heavy", "on time"], ["holiday", "spring", "normal", "slight", "on time"], ["weekday", "spring", "normal", "none", "on time"], ["weekday", "spring", "normal", "slight", "on time"]] allClasses = [] for row in train_table: allClasses.append(row.pop()) testNB.fit(train_table, allClasses)
def test_naive_bayes_classifier_fit(): testNB = MyNaiveBayesClassifier() truePriors = [3 / 8, 5 / 8] truePost = { 0: { 1: { 'no': 2 / 3, 'yes': 0.8 }, 2: { 'no': 1 / 3, 'yes': 0.2 } }, 1: { 5: { 'no': 0.6666666666666666, 'yes': 0.4 }, 6: { 'no': 0.3333333333333333, 'yes': 0.6 } } } testData = [[1, 5, 'yes'], [2, 6, 'yes'], [1, 5, 'no'], [1, 5, 'no'], [1, 6, 'yes'], [2, 6, 'no'], [1, 5, 'yes'], [1, 6, 'yes']] allClasses = [] for row in testData: allClasses.append(row.pop()) testNB.fit(testData, allClasses) assert np.allclose(testNB.priors, truePriors) assert testNB.posteriors == truePost testNB = MyNaiveBayesClassifier() # RQ5 (fake) iPhone purchases dataset truePriors = [0.333, 0.667] truePost = { 0: { 1: { 'no': 0.6, 'yes': 0.2 }, 2: { 'no': 0.4, 'yes': 0.8 } }, 1: { 1: { 'no': 0.2, 'yes': 0.3 }, 2: { 'no': 0.4, 'yes': 0.4 }, 3: { 'no': 0.4, 'yes': 0.3 } }, 2: { 'excellent': { 'no': 0.6, 'yes': 0.3 }, 'fair': { 'no': 0.4, 'yes': 0.7 } } } iphone_col_names = [ "standing", "job_status", "credit_rating", "buys_iphone" ] iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"], [2, 3, "fair", "yes"], [2, 2, "fair", "yes"], [2, 1, "fair", "yes"], [2, 1, "excellent", "no"], [2, 1, "excellent", "yes"], [1, 2, "fair", "no"], [1, 1, "fair", "yes"], [2, 2, "fair", "yes"], [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"], [2, 3, "fair", "yes"], [2, 2, "excellent", "no"], [2, 3, "fair", "yes"]] allClasses = [] for row in iphone_table: allClasses.append(row.pop()) testNB.fit(iphone_table, allClasses) assert np.allclose(testNB.priors, truePriors) assert testNB.posteriors == truePost testNB = MyNaiveBayesClassifier() truePriors = [0.05, 0.1, 0.7, 0.15] truePost = { 0: { 'holiday': { 'cancelled': 0.0, 'late': 0.0, 'on time': 0.14285714285714285, 'very late': 0.0 }, 'saturday': { 'cancelled': 1.0, 'late': 0.5, 'on time': 0.14285714285714285, 'very late': 0.0 }, 'sunday': { 'cancelled': 0.0, 'late': 0.0, 'on time': 0.07142857142857142, 'very late': 0.0 }, 'weekday': { 'cancelled': 0.0, 'late': 0.5, 'on time': 0.6428571428571429, 'very late': 1.0 } }, 1: { 'autumn': { 'cancelled': 0.0, 'late': 0.0, 'on time': 0.14285714285714285, 'very late': 0.3333333333333333 }, 'spring': { 'cancelled': 1.0, 'late': 0.0, 'on time': 0.2857142857142857, 'very late': 0.0 }, 'summer': { 'cancelled': 0.0, 'late': 0.0, 'on time': 0.42857142857142855, 'very late': 0.0 }, 'winter': { 'cancelled': 0.0, 'late': 1.0, 'on time': 0.14285714285714285, 'very late': 0.6666666666666666 } }, 2: { 'high': { 'cancelled': 1.0, 'late': 0.5, 'on time': 0.2857142857142857, 'very late': 0.3333333333333333 }, 'none': { 'cancelled': 0.0, 'late': 0.0, 'on time': 0.35714285714285715, 'very late': 0.0 }, 'normal': { 'cancelled': 0.0, 'late': 0.5, 'on time': 0.35714285714285715, 'very late': 0.6666666666666666 } }, 3: { 'heavy': { 'cancelled': 1.0, 'late': 0.5, 'on time': 0.07142857142857142, 'very late': 0.6666666666666666 }, 'none': { 'cancelled': 0.0, 'late': 0.5, 'on time': 0.35714285714285715, 'very late': 0.3333333333333333 }, 'slight': { 'cancelled': 0.0, 'late': 0.0, 'on time': 0.5714285714285714, 'very late': 0.0 } } } # Bramer 3.2 train dataset train_col_names = ["day", "season", "wind", "rain", "class"] train_table = [["weekday", "spring", "none", "none", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "none", "slight", "on time"], ["weekday", "winter", "high", "heavy", "late"], ["saturday", "summer", "normal", "none", "on time"], ["weekday", "autumn", "normal", "none", "very late"], ["holiday", "summer", "high", "slight", "on time"], ["sunday", "summer", "normal", "none", "on time"], ["weekday", "winter", "high", "heavy", "very late"], ["weekday", "summer", "none", "slight", "on time"], ["saturday", "spring", "high", "heavy", "cancelled"], ["weekday", "summer", "high", "slight", "on time"], ["saturday", "winter", "normal", "none", "late"], ["weekday", "summer", "high", "none", "on time"], ["weekday", "winter", "normal", "heavy", "very late"], ["saturday", "autumn", "high", "slight", "on time"], ["weekday", "autumn", "none", "heavy", "on time"], ["holiday", "spring", "normal", "slight", "on time"], ["weekday", "spring", "normal", "none", "on time"], ["weekday", "spring", "normal", "slight", "on time"]] allClasses = [] for row in train_table: allClasses.append(row.pop()) testNB.fit(train_table, allClasses) assert np.allclose(testNB.priors, truePriors) assert testNB.posteriors == truePost