def Naive_random_accuracy(X_train,X_test,y_train,y_test):
    """ Gets accuracies for Naive Bayes CLassifier for passed in values using train/test/split
    Args:
        X_train: (list of list) X_train for Naive Bayes classifier
        X_test: (list of list) X_tests for Naive Bayes classifier
        y_train: (list) y_train for Naive Bayes classifier
        y_test: (list) y_test to compare to for Naive Bayes classifier
      
    """

    #creates new linear regressor and KNN classifiers
    Naive = MyNaiveBayesClassifier()

    #fits linear regressor and KNN classifier
    Naive.fit(X_train = X_train, y_train = y_train)

    #gets predictions for Linear Regressor and Knn classifier
    y_predicted = Naive.predict(X_test)
    
    myutils.convert_mpg_to_categorical(y_predicted)
    myutils.convert_mpg_to_categorical(y_test)

    #gets accuracys for Linear Regressor and Knn classifier
    acc = get_accuracy(y_predicted, y_test)
    print("===========================================")
    print("STEP #2: Predictive Accuracy")
    print("===========================================")
    print("Random Subsample (k=10, 2:1 Train/Test)")
    print("Naive Bayes: accuracy = ", acc, " error rate = ", 1 - acc)
def perform_Naive_cross_validation(X_train2, X_train_folds, X_test_folds, y_train2):
    """ performs cross validation on the passed in folds for Naive Bayes
    Args:
        X_train2: (list of list) initial X_train values
        X_train_folds (list of lists) folds of X_train indices
        y_train2: (list) initial y_train for Knn classifier
        X_test_folds (list of lists) folds of X_test indices
    
    returns: 
        y_predict: (list) y_predicted values
        y_test: (list) paralel list of y_actual values
    """
    X_train = []
    X_test = []
    y_predicted = []
    y_train = []
    y_test = []
    y_test = []
    y_predict = []
    curr_index = 0

    #loops though each X_test fold
    for i in range(len(X_test_folds)):
        X_test = []
        X_train = []
        y_train = []
        X_test = X_test_folds[i]

        #creates X test and y test from fold indices
        for j in range(len(X_test)):
            curr_index = X_test[j]
            X_test[j] = X_train2[curr_index]
            y_test.append(y_train2[curr_index])

        #creates X_train and inputs X_train items for each index
        X_train = X_train_folds[i]
        for j in range(len(X_train)):
            curr_index = X_train[j]
            X_train[j] = X_train2[curr_index]
            y_train.append(y_train2[curr_index])
        
        #tests KNN algorithm on each fold and appends values to y_predicted
        Naive = MyNaiveBayesClassifier()
        Naive.fit(X_train = X_train, y_train = y_train)
        y_predicted.append(Naive.predict(X_test))

    #converts y_predicted to 1d list
    for i in range(len(y_predicted)):
        for j in range(len(y_predicted[i])):
            y_predict.append(y_predicted[i][j])

    return y_predict, y_test
Esempio n. 3
0
def test_naive_bayes_classifier_fit():
    # Test with 8 instance dataset from class
    test_fit = MyNaiveBayesClassifier()
    test_fit.fit(class_x_train, class_y_train)

    assert class_priors == test_fit.priors
    assert class_posteriors == test_fit.posteriors

    # Test Iphone dataset from Reading Quiz
    test_fit.fit(iphone_x_train, iphone_y_train)

    assert iphone_priors == test_fit.priors
    assert iphone_posteriors == test_fit.posteriors

    # Test train dataset from textbook
    test_fit.fit(train_x_train, train_y_train)

    assert train_priors == test_fit.priors
    assert train_posteriors == test_fit.posteriors
Esempio n. 4
0
def test_naive_bayes_classifier_predict():
    myNaiveBayes = MyNaiveBayesClassifier()

    # Test Case 1: Example traced out in class
    y_train, X_train = [], []
    for inst in attr_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Get the prediction for the given test value(s)
    X_test = [[1, 5]]
    y_pred = myNaiveBayes.predict(X_test)
    assert y_pred == ["yes"]

    # Test Case 2: RQ5 Example
    y_train, X_train = [], []
    for inst in iphone_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Get the prediction for the given test value(s)
    X_test = [[2, 2, "fair"], [1, 1, "excellent"]]
    y_pred = myNaiveBayes.predict(X_test)
    assert y_pred == ["yes", "no"]

    # Test Case 3: Bramer 3.2 Train Dataset
    y_train, X_train = [], []
    for inst in train_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Get the prediction for the given test value(s)
    X_test = [["weekday", "winter", "high", "heavy"],
              ["weekday", "summer", "high", "heavy"],
              ["sunday", "summer", "normal", "slight"]]
    y_pred = myNaiveBayes.predict(X_test)
    assert y_pred == ["very late", "on time", "on time"]
Esempio n. 5
0
def test_naive_bayes_classifier_fit():
    myNaiveBayes = MyNaiveBayesClassifier()

    # Test Case 1: Example traced out in class
    y_train, X_train = [], []
    for inst in attr_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Assert against the priors and posteriors
    true_priors = [[(5 / 8)], [(3 / 8)]]  # buys_iphone=yes, buys_iphone=no
    true_posteriors = [[(4 / 5), (2 / 3)], [(1 / 5), (1 / 3)],
                       [(2 / 5), (2 / 3)], [(3 / 5), (1 / 3)]]
    _, pred_priors = myNaiveBayes.priors.get_subtable(0, 'end', 1, 'end')
    _, pred_posteriors = myNaiveBayes.posteriors.get_subtable(
        0, 'end', 2, 'end')

    assert np.allclose(true_priors, pred_priors)
    assert np.allclose(true_posteriors, pred_posteriors)

    # Test Case 2: RQ5 Example
    y_train, X_train = [], []
    for inst in iphone_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Assert against the priors and posteriors
    true_priors = [[(5 / 15)], [(10 / 15)]]  # buys_iphone=yes, buys_iphone=no
    true_posteriors = [[0.6, 0.2], [0.4, 0.8], [0.4, 0.3], [0.4, 0.4],
                       [0.2, 0.3], [0.4, 0.7], [0.6, 0.3]]
    _, pred_priors = myNaiveBayes.priors.get_subtable(0, 'end', 1, 'end')
    _, pred_posteriors = myNaiveBayes.posteriors.get_subtable(
        0, 'end', 2, 'end')

    assert np.allclose(true_priors, pred_priors)
    assert np.allclose(true_posteriors, pred_posteriors)

    # Test Case 3: Bramer 3.2 Train Dataset
    y_train, X_train = [], []
    for inst in train_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Assert against the priors and posteriors
    true_priors = [[(14 / 20)], [(2 / 20)], [(3 / 20)],
                   [(1 / 20)]]  # buys_iphone=yes, buys_iphone=no
    true_posteriors = [[0.6428571428571429, 0.5, 1.0, 0.0],
                       [0.14285714285714285, 0.5, 0.0, 1.0],
                       [0.14285714285714285, 0.0, 0.0, 0.0],
                       [0.07142857142857142, 0.0, 0.0, 0.0],
                       [0.2857142857142857, 0.0, 0.0, 1.0],
                       [0.14285714285714285, 1.0, 0.6666666666666666, 0.0],
                       [0.42857142857142855, 0.0, 0.0, 0.0],
                       [0.14285714285714285, 0.0, 0.3333333333333333, 0.0],
                       [0.35714285714285715, 0.0, 0.0, 0.0],
                       [0.2857142857142857, 0.5, 0.3333333333333333, 1.0],
                       [0.35714285714285715, 0.5, 0.6666666666666666, 0.0],
                       [0.35714285714285715, 0.5, 0.3333333333333333, 0.0],
                       [0.5714285714285714, 0.0, 0.0, 0.0],
                       [0.07142857142857142, 0.5, 0.6666666666666666, 1.0]]
    _, pred_priors = myNaiveBayes.priors.get_subtable(0, 'end', 1, 'end')
    _, pred_posteriors = myNaiveBayes.posteriors.get_subtable(
        0, 'end', 2, 'end')

    assert np.allclose(true_priors, pred_priors)
    assert np.allclose(true_posteriors, pred_posteriors)
Esempio n. 6
0
weather = collisions_data.get_column('WEATHER')
road_condition = collisions_data.get_column('ROADCOND')
light_condition = collisions_data.get_column('LIGHTCOND')
junction_type = collisions_data.get_column('JUNCTIONTYPE')
severity = collisions_data.get_column('SEVERITYDESC')

X_train = [[
    weather[i], road_condition[i], light_condition[i], junction_type[i],
    severity[i]
] for i in range(len(weather))]
y_train = collisions_data.get_column('COLLISIONTYPE')

for i, val in enumerate(y_train):
    if val == 'Unknown':
        del y_train[i]
        del X_train[i]

strattrain_folds, strattest_folds = myevaluation.stratified_kfold_cross_validation(
    X_train, y_train, 10)
strat_xtrain, strat_ytrain, strat_xtest, strat_ytest = myutils.get_from_folds(
    X_train, y_train, strattrain_folds, strattest_folds)

myb = MyNaiveBayesClassifier()
myb.fit(strat_xtrain, strat_ytrain)

packaged_object = myb
# pickle packaged object
outfile = open('driving_bayes.p', 'wb')
pickle.dump(packaged_object, outfile)
outfile.close()
import os

# "pickle" an object (AKA object serialization)
# save a Python object to a binary file

# "unpickle" an object (AKA object de-serialization)
# load a Python object from a binary file (back into memory)

# Get data from csv file
table = MyPyTable().load_from_file(
    os.path.join("input_files", "winequality-red.csv"))
y_col = table.get_column("quality", False)
x_cols = table.drop_col("quality")

# Use Naive Bayes to classify
testcase = MyNaiveBayesClassifier()

#Returns x INDEXES
X_train, X_test = myevaluation.stratified_kfold_cross_validation(x_cols,
                                                                 y_col,
                                                                 n_splits=10)
X_train, X_test, y_train, y_test = myutils.getInstances(
    X_train, X_test, x_cols, y_col)

for i, fold in enumerate(X_train):
    train, test = myutils.normalize_values(X_train[i], X_test[i])
    testcase.fit(train, y_train[i])
    break

packaged_object = testcase
# pickle packaged_object
Esempio n. 8
0
def test_naive_bayes_classifier_predict():
    train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    nb = MyNaiveBayesClassifier()
    nb.fit(train, y)

    pred = nb.predict([[1, 5]])

    assert pred == ["yes"]  # TODO: fix this
    # RQ5 (fake) iPhone purchases dataset
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    mypy = MyPyTable(iphone_col_names, iphone_table)
    y2 = myutils.get_mypycol(mypy, "buys_iphone")
    nb2 = MyNaiveBayesClassifier()
    nb2.fit(iphone_table, y2)
    pred2 = nb2.predict([[1, 2, "fair"]])

    assert pred2 == ["yes"]

    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]
    mypy2 = MyPyTable(train_col_names, train_table)
    y3 = myutils.get_mypycol(mypy2, "class")
    nb3 = MyNaiveBayesClassifier()
    nb3.fit(train_table, y3)
    nb3.fit(train_table, y3)
    pred3 = nb3.predict([["weekday", "winter", "high", "heavy"]])

    assert pred3 == ["cancelled"]
Esempio n. 9
0
def test_naive_bayes_classifier_fit():
    train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    nb = MyNaiveBayesClassifier()
    nb.fit(train, y)
    assert nb.priors == [["yes", 5 / 8], ["no", 3 / 8]]
    assert nb.posteriors == [[0, ['yes', ['1', 0.8], ['2', 0.2]], ['no', ['1', 2/3], ['2', 1/3]]], \
                            [1, ['yes', ['5', 0.4], ['6', 0.6]], ['no', ['5', 2/3], ['6', 1/3]]]]

    # RQ5 (fake) iPhone purchases dataset
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    mypy = MyPyTable(iphone_col_names, iphone_table)
    y2 = myutils.get_mypycol(mypy, "buys_iphone")
    nb2 = MyNaiveBayesClassifier()
    nb2.fit(iphone_table, y2)
    assert nb2.priors == [["no", 1 / 3], ["yes", 2 / 3]]
    nb2_posts = [[
        0, ['no', ['1', 3 / 15], ['2', 2 / 15]],
        ['yes', ['1', 2 / 15], ['2', 8 / 15]]
    ],
                 [
                     1, ['no', ['3', 2 / 15], ['2', 2 / 15], ['1', 2 / 3]],
                     ['yes', ['3', 3 / 15], ['2', 4 / 15], ['1', 3 / 15]]
                 ],
                 [
                     2, ['no', ['fair', 2 / 15], ['excellent', 3 / 15]],
                     ['yes', ['fair', 7 / 15], ['excellent', 3 / 15]]
                 ],
                 [
                     3, ['no', ['no', 1 / 3], ['yes', 0.0]],
                     ['yes', ['no', 0.0], ['yes', 2 / 3]]
                 ]]
    # assert nb2.posteriors == nb2_posts
    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]
    mypy2 = MyPyTable(train_col_names, train_table)
    y3 = myutils.get_mypycol(mypy2, "class")
    nb3 = MyNaiveBayesClassifier()
    nb3.fit(iphone_table, y3)
Esempio n. 10
0
def test_naive_bayes_classifier_predict():
    # Setting up object to fit and predict class dataset
    test_predict = MyNaiveBayesClassifier()
    test_predict.fit(class_x_train, class_y_train)
    class_predicted = test_predict.predict(class_test)

    assert class_actuals == class_predicted

    # Setting up object to fit and predict iphone dataset
    test_predict.fit(iphone_x_train, iphone_y_train)
    iphone_predicted = test_predict.predict(iphone_test)

    assert iphone_actuals == iphone_predicted

    # Setting up object to fit and predict iphone dataset
    test_predict.fit(train_x_train, train_y_train)
    train_predicted = test_predict.predict(train_test)

    assert train_actuals == train_predicted

    # MyZeroRClassifier
    another = MyZeroRClassifier()
    another.fit(iphone_x_train, iphone_y_train)
    another_class = another.predict(iphone_test)
    #print(another_class)

    # MyRandomClassifier
    another2 = MyRandomClassifier()
    another2.fit(iphone_x_train, iphone_y_train)
    another_class2 = another2.predict(iphone_test)
Esempio n. 11
0
def test_naive_bayes_classifier_predict():

    ###Test #1###
    col_names = ["att1", "att2"]
    X_train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y_train = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)
    y_predict = Naive.predict([[1, 5]])
    y_actual = ["yes"]

    assert y_predict[0] == y_actual[0]

    ###Test #2###

    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    X_train = [[2, 3, "fair"], [2, 2, "fair"], [2, 1, "fair"],
               [2, 1, "excellent"], [2, 1, "excellent"], [1, 2, "fair"],
               [1, 1, "fair"], [2, 2, "fair"], [1, 2, "excellent"],
               [2, 2, "excellent"], [2, 3, "fair"], [2, 2, "excellent"],
               [2, 3, "fair"], [1, 3, "fair"], [1, 3, "excellent"]]
    y_train = [
        "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "yes", "yes",
        "yes", "no", "yes", "no", "no"
    ]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)

    y_predict = Naive.predict([[2, 2, "fair"]])
    y_actual = ["yes"]
    assert y_predict[0] == y_actual[0]

    y_predict = Naive.predict([[1, 1, "excellent"]])
    y_actual = ["no"]
    assert y_predict[0] == y_actual[0]

    ###Test #3###

    train_col_names = ["day", "season", "wind", "rain", "class"]
    X_train = [["weekday", "spring", "none", "none"],
               ["weekday", "winter", "none", "slight"],
               ["weekday", "winter", "none", "slight"],
               ["weekday", "winter", "high", "heavy"],
               ["saturday", "summer", "normal", "none"],
               ["weekday", "autumn", "normal", "none"],
               ["holiday", "summer", "high", "slight"],
               ["sunday", "summer", "normal", "none"],
               ["weekday", "winter", "high", "heavy"],
               ["weekday", "summer", "none", "slight"],
               ["saturday", "spring", "high", "heavy"],
               ["weekday", "summer", "high", "slight"],
               ["saturday", "winter", "normal", "none"],
               ["weekday", "summer", "high", "none"],
               ["weekday", "winter", "normal", "heavy"],
               ["saturday", "autumn", "high", "slight"],
               ["weekday", "autumn", "none", "heavy"],
               ["holiday", "spring", "normal", "slight"],
               ["weekday", "spring", "normal", "none"],
               ["weekday", "spring", "normal", "slight"]]

    y_train = [
        "on time", "on time", "on time", "late", "on time", "very late",
        "on time", "on time", "very late", "on time", "cancelled", "on time",
        "late", "on time", "very late", "on time", "on time", "on time",
        "on time", "on time"
    ]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)

    y_predict = Naive.predict([["weekday", "winter", "high", "heavy"],
                               ["saturday", "spring", "normal", "slight"]])
    y_actual = ["very late", "on time"]
    for i in range(len(y_actual)):
        assert y_predict[i] == y_actual[i]
Esempio n. 12
0
def test_naive_bayes_classifier_fit():

    ###Test #1###
    col_names = ["att1", "att2"]
    X_train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y_train = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)

    prior_ans = [5 / 8, 3 / 8]
    post_ans = [[["Attributes", "yes", "no"], [1, 4 / 5, 2 / 3],
                 [2, 1 / 5, 1 / 3]],
                [["Attributes", "yes", "no"], [5, 2 / 5, 2 / 3],
                 [6, 3 / 5, 1 / 3]]]

    for i in range(len(post_ans)):
        for j in range(len(post_ans[i])):
            for k in range(len(post_ans[i][j])):
                if (isinstance(post_ans[i][j][k], float)):
                    post_ans[i][j][k] = round(post_ans[i][j][k], 3)

    for i in range(len(prior_ans)):
        assert np.isclose(Naive.priors[i], prior_ans[i])

    for i in range(len(post_ans)):
        for j in range(len(post_ans[i])):
            for k in range(len(post_ans[i][j])):
                n = Naive.posteriors[i][j][k]
                ans = post_ans[i][j][k]
                assert n == ans

    ###Test #2###

    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    X_train = [[2, 3, "fair"], [2, 2, "fair"], [2, 1, "fair"],
               [2, 1, "excellent"], [2, 1, "excellent"], [1, 2, "fair"],
               [1, 1, "fair"], [2, 2, "fair"], [1, 2, "excellent"],
               [2, 2, "excellent"], [2, 3, "fair"], [2, 2, "excellent"],
               [2, 3, "fair"], [1, 3, "fair"], [1, 3, "excellent"]]
    y_train = [
        "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "yes", "yes",
        "yes", "no", "yes", "no", "no"
    ]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)

    prior_ans = [10 / 15, 5 / 15]
    post_ans = [[["Attributes", "yes", "no"], [1, 2 / 10, 3 / 5],
                 [2, 8 / 10, 2 / 5]],
                [["Attributes", "yes", "no"], [1, 3 / 10, 1 / 5],
                 [2, 4 / 10, 2 / 5], [3, 3 / 10, 2 / 5]],
                [["Attributes", "yes", "no"], ["fair", 7 / 10, 2 / 5],
                 ["excellent", 3 / 10, 3 / 5]]]
    for i in range(len(post_ans)):
        for j in range(len(post_ans[i])):
            for k in range(len(post_ans[i][j])):
                if (isinstance(post_ans[i][j][k], float)):
                    round(post_ans[i][j][k], 3)

    for i in range(len(prior_ans)):
        assert prior_ans[i] in Naive.priors

    for i in range(len(post_ans)):
        for j in range(len(post_ans[i])):
            assert post_ans[i][j] in Naive.posteriors[i]

    ###Test #3###

    train_col_names = ["day", "season", "wind", "rain", "class"]
    X_train = [["weekday", "spring", "none", "none"],
               ["weekday", "winter", "none", "slight"],
               ["weekday", "winter", "none", "slight"],
               ["weekday", "winter", "high", "heavy"],
               ["saturday", "summer", "normal", "none"],
               ["weekday", "autumn", "normal", "none"],
               ["holiday", "summer", "high", "slight"],
               ["sunday", "summer", "normal", "none"],
               ["weekday", "winter", "high", "heavy"],
               ["weekday", "summer", "none", "slight"],
               ["saturday", "spring", "high", "heavy"],
               ["weekday", "summer", "high", "slight"],
               ["saturday", "winter", "normal", "none"],
               ["weekday", "summer", "high", "none"],
               ["weekday", "winter", "normal", "heavy"],
               ["saturday", "autumn", "high", "slight"],
               ["weekday", "autumn", "none", "heavy"],
               ["holiday", "spring", "normal", "slight"],
               ["weekday", "spring", "normal", "none"],
               ["weekday", "spring", "normal", "slight"]]

    y_train = [
        "on time", "on time", "on time", "late", "on time", "very late",
        "on time", "on time", "very late", "on time", "cancelled", "on time",
        "late", "on time", "very late", "on time", "on time", "on time",
        "on time", "on time"
    ]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)

    prior_ans = [0.7, 0.1, 0.15, 0.05]
    post_ans = [[['Attributes', 'on time', 'late', 'very late', 'cancelled'],
                 ['weekday', 0.643, 0.5, 1.0, 0.0],
                 ['saturday', 0.143, 0.5, 0.0, 1.0],
                 ['holiday', 0.143, 0.0, 0.0, 0.0],
                 ['sunday', 0.071, 0.0, 0.0, 0.0]],
                [['Attributes', 'on time', 'late', 'very late', 'cancelled'],
                 ['spring', 0.286, 0.0, 0.0, 1.0],
                 ['winter', 0.143, 1.0, 0.667, 0.0],
                 ['summer', 0.429, 0.0, 0.0, 0.0],
                 ['autumn', 0.143, 0.0, 0.333, 0.0]],
                [['Attributes', 'on time', 'late', 'very late', 'cancelled'],
                 ['none', 0.357, 0.0, 0.0, 0.0],
                 ['high', 0.286, 0.5, 0.333, 1.0],
                 ['normal', 0.357, 0.5, 0.667, 0.0]],
                [['Attributes', 'on time', 'late', 'very late', 'cancelled'],
                 ['none', 0.357, 0.5, 0.333, 0.0],
                 ['slight', 0.571, 0.0, 0.0, 0.0],
                 ['heavy', 0.071, 0.5, 0.667, 1.0]]]
    prior_ans = [0.7, 0.1, 0.15, 0.05]

    for i in range(len(post_ans)):
        for j in range(len(post_ans[i])):
            for k in range(len(post_ans[i][j])):
                if (isinstance(post_ans[i][j][k], float)):
                    round(post_ans[i][j][k], 3)

    for i in range(len(prior_ans)):
        assert prior_ans[i] in Naive.priors

    for i in range(len(post_ans)):
        for j in range(len(post_ans[i])):
            assert post_ans[i][j] in Naive.posteriors[i]
Esempio n. 13
0
def test_naive_bayes_classifier_predict():

    testNB = MyNaiveBayesClassifier()
    truePriors = [3 / 8, 5 / 8]
    X_test = [[1, 5]]
    expected = ['yes']
    testData = [[1, 5, 'yes'], [2, 6, 'yes'], [1, 5, 'no'], [1, 5, 'no'],
                [1, 6, 'yes'], [2, 6, 'no'], [1, 5, 'yes'], [1, 6, 'yes']]

    allClasses = []
    for row in testData:
        allClasses.append(row.pop())

    testNB.fit(testData, allClasses)
    predictions = testNB.predict(X_test)

    assert predictions[0] == 'yes'

    testNB = MyNaiveBayesClassifier()

    # RQ5 (fake) iPhone purchases dataset
    truePriors = [0.333, 0.667]
    X_test = [[2, 2, 'fair'], [1, 1, 'excellent']]
    expected = ['yes', 'no']
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    allClasses = []
    for row in iphone_table:
        allClasses.append(row.pop())

    testNB.fit(iphone_table, allClasses)
    predictions = testNB.predict(X_test)

    for i in range(len(predictions)):
        assert predictions[i] == expected[i]

    testNB = MyNaiveBayesClassifier()
    truePriors = [0.05, 0.1, 0.7, 0.15]
    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]

    allClasses = []
    for row in train_table:
        allClasses.append(row.pop())

    testNB.fit(train_table, allClasses)
Esempio n. 14
0
def test_naive_bayes_classifier_fit():

    testNB = MyNaiveBayesClassifier()
    truePriors = [3 / 8, 5 / 8]
    truePost = {
        0: {
            1: {
                'no': 2 / 3,
                'yes': 0.8
            },
            2: {
                'no': 1 / 3,
                'yes': 0.2
            }
        },
        1: {
            5: {
                'no': 0.6666666666666666,
                'yes': 0.4
            },
            6: {
                'no': 0.3333333333333333,
                'yes': 0.6
            }
        }
    }

    testData = [[1, 5, 'yes'], [2, 6, 'yes'], [1, 5, 'no'], [1, 5, 'no'],
                [1, 6, 'yes'], [2, 6, 'no'], [1, 5, 'yes'], [1, 6, 'yes']]

    allClasses = []
    for row in testData:
        allClasses.append(row.pop())

    testNB.fit(testData, allClasses)

    assert np.allclose(testNB.priors, truePriors)
    assert testNB.posteriors == truePost

    testNB = MyNaiveBayesClassifier()

    # RQ5 (fake) iPhone purchases dataset
    truePriors = [0.333, 0.667]
    truePost = {
        0: {
            1: {
                'no': 0.6,
                'yes': 0.2
            },
            2: {
                'no': 0.4,
                'yes': 0.8
            }
        },
        1: {
            1: {
                'no': 0.2,
                'yes': 0.3
            },
            2: {
                'no': 0.4,
                'yes': 0.4
            },
            3: {
                'no': 0.4,
                'yes': 0.3
            }
        },
        2: {
            'excellent': {
                'no': 0.6,
                'yes': 0.3
            },
            'fair': {
                'no': 0.4,
                'yes': 0.7
            }
        }
    }
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    allClasses = []
    for row in iphone_table:
        allClasses.append(row.pop())

    testNB.fit(iphone_table, allClasses)

    assert np.allclose(testNB.priors, truePriors)
    assert testNB.posteriors == truePost

    testNB = MyNaiveBayesClassifier()
    truePriors = [0.05, 0.1, 0.7, 0.15]
    truePost = {
        0: {
            'holiday': {
                'cancelled': 0.0,
                'late': 0.0,
                'on time': 0.14285714285714285,
                'very late': 0.0
            },
            'saturday': {
                'cancelled': 1.0,
                'late': 0.5,
                'on time': 0.14285714285714285,
                'very late': 0.0
            },
            'sunday': {
                'cancelled': 0.0,
                'late': 0.0,
                'on time': 0.07142857142857142,
                'very late': 0.0
            },
            'weekday': {
                'cancelled': 0.0,
                'late': 0.5,
                'on time': 0.6428571428571429,
                'very late': 1.0
            }
        },
        1: {
            'autumn': {
                'cancelled': 0.0,
                'late': 0.0,
                'on time': 0.14285714285714285,
                'very late': 0.3333333333333333
            },
            'spring': {
                'cancelled': 1.0,
                'late': 0.0,
                'on time': 0.2857142857142857,
                'very late': 0.0
            },
            'summer': {
                'cancelled': 0.0,
                'late': 0.0,
                'on time': 0.42857142857142855,
                'very late': 0.0
            },
            'winter': {
                'cancelled': 0.0,
                'late': 1.0,
                'on time': 0.14285714285714285,
                'very late': 0.6666666666666666
            }
        },
        2: {
            'high': {
                'cancelled': 1.0,
                'late': 0.5,
                'on time': 0.2857142857142857,
                'very late': 0.3333333333333333
            },
            'none': {
                'cancelled': 0.0,
                'late': 0.0,
                'on time': 0.35714285714285715,
                'very late': 0.0
            },
            'normal': {
                'cancelled': 0.0,
                'late': 0.5,
                'on time': 0.35714285714285715,
                'very late': 0.6666666666666666
            }
        },
        3: {
            'heavy': {
                'cancelled': 1.0,
                'late': 0.5,
                'on time': 0.07142857142857142,
                'very late': 0.6666666666666666
            },
            'none': {
                'cancelled': 0.0,
                'late': 0.5,
                'on time': 0.35714285714285715,
                'very late': 0.3333333333333333
            },
            'slight': {
                'cancelled': 0.0,
                'late': 0.0,
                'on time': 0.5714285714285714,
                'very late': 0.0
            }
        }
    }
    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]

    allClasses = []
    for row in train_table:
        allClasses.append(row.pop())

    testNB.fit(train_table, allClasses)

    assert np.allclose(testNB.priors, truePriors)
    assert testNB.posteriors == truePost