예제 #1
0
def test_naive_bayes_classifier_predict():
    train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    nb = MyNaiveBayesClassifier()
    nb.fit(train, y)

    pred = nb.predict([[1, 5]])

    assert pred == ["yes"]  # TODO: fix this
    # RQ5 (fake) iPhone purchases dataset
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    mypy = MyPyTable(iphone_col_names, iphone_table)
    y2 = myutils.get_mypycol(mypy, "buys_iphone")
    nb2 = MyNaiveBayesClassifier()
    nb2.fit(iphone_table, y2)
    pred2 = nb2.predict([[1, 2, "fair"]])

    assert pred2 == ["yes"]

    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]
    mypy2 = MyPyTable(train_col_names, train_table)
    y3 = myutils.get_mypycol(mypy2, "class")
    nb3 = MyNaiveBayesClassifier()
    nb3.fit(train_table, y3)
    nb3.fit(train_table, y3)
    pred3 = nb3.predict([["weekday", "winter", "high", "heavy"]])

    assert pred3 == ["cancelled"]
예제 #2
0
def test_naive_bayes_classifier_predict():
    # Setting up object to fit and predict class dataset
    test_predict = MyNaiveBayesClassifier()
    test_predict.fit(class_x_train, class_y_train)
    class_predicted = test_predict.predict(class_test)

    assert class_actuals == class_predicted

    # Setting up object to fit and predict iphone dataset
    test_predict.fit(iphone_x_train, iphone_y_train)
    iphone_predicted = test_predict.predict(iphone_test)

    assert iphone_actuals == iphone_predicted

    # Setting up object to fit and predict iphone dataset
    test_predict.fit(train_x_train, train_y_train)
    train_predicted = test_predict.predict(train_test)

    assert train_actuals == train_predicted

    # MyZeroRClassifier
    another = MyZeroRClassifier()
    another.fit(iphone_x_train, iphone_y_train)
    another_class = another.predict(iphone_test)
    #print(another_class)

    # MyRandomClassifier
    another2 = MyRandomClassifier()
    another2.fit(iphone_x_train, iphone_y_train)
    another_class2 = another2.predict(iphone_test)
def Naive_random_accuracy(X_train,X_test,y_train,y_test):
    """ Gets accuracies for Naive Bayes CLassifier for passed in values using train/test/split
    Args:
        X_train: (list of list) X_train for Naive Bayes classifier
        X_test: (list of list) X_tests for Naive Bayes classifier
        y_train: (list) y_train for Naive Bayes classifier
        y_test: (list) y_test to compare to for Naive Bayes classifier
      
    """

    #creates new linear regressor and KNN classifiers
    Naive = MyNaiveBayesClassifier()

    #fits linear regressor and KNN classifier
    Naive.fit(X_train = X_train, y_train = y_train)

    #gets predictions for Linear Regressor and Knn classifier
    y_predicted = Naive.predict(X_test)
    
    myutils.convert_mpg_to_categorical(y_predicted)
    myutils.convert_mpg_to_categorical(y_test)

    #gets accuracys for Linear Regressor and Knn classifier
    acc = get_accuracy(y_predicted, y_test)
    print("===========================================")
    print("STEP #2: Predictive Accuracy")
    print("===========================================")
    print("Random Subsample (k=10, 2:1 Train/Test)")
    print("Naive Bayes: accuracy = ", acc, " error rate = ", 1 - acc)
def perform_Naive_cross_validation(X_train2, X_train_folds, X_test_folds, y_train2):
    """ performs cross validation on the passed in folds for Naive Bayes
    Args:
        X_train2: (list of list) initial X_train values
        X_train_folds (list of lists) folds of X_train indices
        y_train2: (list) initial y_train for Knn classifier
        X_test_folds (list of lists) folds of X_test indices
    
    returns: 
        y_predict: (list) y_predicted values
        y_test: (list) paralel list of y_actual values
    """
    X_train = []
    X_test = []
    y_predicted = []
    y_train = []
    y_test = []
    y_test = []
    y_predict = []
    curr_index = 0

    #loops though each X_test fold
    for i in range(len(X_test_folds)):
        X_test = []
        X_train = []
        y_train = []
        X_test = X_test_folds[i]

        #creates X test and y test from fold indices
        for j in range(len(X_test)):
            curr_index = X_test[j]
            X_test[j] = X_train2[curr_index]
            y_test.append(y_train2[curr_index])

        #creates X_train and inputs X_train items for each index
        X_train = X_train_folds[i]
        for j in range(len(X_train)):
            curr_index = X_train[j]
            X_train[j] = X_train2[curr_index]
            y_train.append(y_train2[curr_index])
        
        #tests KNN algorithm on each fold and appends values to y_predicted
        Naive = MyNaiveBayesClassifier()
        Naive.fit(X_train = X_train, y_train = y_train)
        y_predicted.append(Naive.predict(X_test))

    #converts y_predicted to 1d list
    for i in range(len(y_predicted)):
        for j in range(len(y_predicted[i])):
            y_predict.append(y_predicted[i][j])

    return y_predict, y_test
예제 #5
0
def test_naive_bayes_classifier_predict():
    myNaiveBayes = MyNaiveBayesClassifier()

    # Test Case 1: Example traced out in class
    y_train, X_train = [], []
    for inst in attr_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Get the prediction for the given test value(s)
    X_test = [[1, 5]]
    y_pred = myNaiveBayes.predict(X_test)
    assert y_pred == ["yes"]

    # Test Case 2: RQ5 Example
    y_train, X_train = [], []
    for inst in iphone_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Get the prediction for the given test value(s)
    X_test = [[2, 2, "fair"], [1, 1, "excellent"]]
    y_pred = myNaiveBayes.predict(X_test)
    assert y_pred == ["yes", "no"]

    # Test Case 3: Bramer 3.2 Train Dataset
    y_train, X_train = [], []
    for inst in train_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])
    myNaiveBayes.fit(X_train, y_train)
    # Get the prediction for the given test value(s)
    X_test = [["weekday", "winter", "high", "heavy"],
              ["weekday", "summer", "high", "heavy"],
              ["sunday", "summer", "normal", "slight"]]
    y_pred = myNaiveBayes.predict(X_test)
    assert y_pred == ["very late", "on time", "on time"]
예제 #6
0
def test_naive_bayes_classifier_predict():

    ###Test #1###
    col_names = ["att1", "att2"]
    X_train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y_train = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)
    y_predict = Naive.predict([[1, 5]])
    y_actual = ["yes"]

    assert y_predict[0] == y_actual[0]

    ###Test #2###

    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    X_train = [[2, 3, "fair"], [2, 2, "fair"], [2, 1, "fair"],
               [2, 1, "excellent"], [2, 1, "excellent"], [1, 2, "fair"],
               [1, 1, "fair"], [2, 2, "fair"], [1, 2, "excellent"],
               [2, 2, "excellent"], [2, 3, "fair"], [2, 2, "excellent"],
               [2, 3, "fair"], [1, 3, "fair"], [1, 3, "excellent"]]
    y_train = [
        "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "yes", "yes",
        "yes", "no", "yes", "no", "no"
    ]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)

    y_predict = Naive.predict([[2, 2, "fair"]])
    y_actual = ["yes"]
    assert y_predict[0] == y_actual[0]

    y_predict = Naive.predict([[1, 1, "excellent"]])
    y_actual = ["no"]
    assert y_predict[0] == y_actual[0]

    ###Test #3###

    train_col_names = ["day", "season", "wind", "rain", "class"]
    X_train = [["weekday", "spring", "none", "none"],
               ["weekday", "winter", "none", "slight"],
               ["weekday", "winter", "none", "slight"],
               ["weekday", "winter", "high", "heavy"],
               ["saturday", "summer", "normal", "none"],
               ["weekday", "autumn", "normal", "none"],
               ["holiday", "summer", "high", "slight"],
               ["sunday", "summer", "normal", "none"],
               ["weekday", "winter", "high", "heavy"],
               ["weekday", "summer", "none", "slight"],
               ["saturday", "spring", "high", "heavy"],
               ["weekday", "summer", "high", "slight"],
               ["saturday", "winter", "normal", "none"],
               ["weekday", "summer", "high", "none"],
               ["weekday", "winter", "normal", "heavy"],
               ["saturday", "autumn", "high", "slight"],
               ["weekday", "autumn", "none", "heavy"],
               ["holiday", "spring", "normal", "slight"],
               ["weekday", "spring", "normal", "none"],
               ["weekday", "spring", "normal", "slight"]]

    y_train = [
        "on time", "on time", "on time", "late", "on time", "very late",
        "on time", "on time", "very late", "on time", "cancelled", "on time",
        "late", "on time", "very late", "on time", "on time", "on time",
        "on time", "on time"
    ]

    Naive = MyNaiveBayesClassifier()
    Naive.fit(X_train, y_train)

    y_predict = Naive.predict([["weekday", "winter", "high", "heavy"],
                               ["saturday", "spring", "normal", "slight"]])
    y_actual = ["very late", "on time"]
    for i in range(len(y_actual)):
        assert y_predict[i] == y_actual[i]
예제 #7
0
def test_naive_bayes_classifier_predict():

    testNB = MyNaiveBayesClassifier()
    truePriors = [3 / 8, 5 / 8]
    X_test = [[1, 5]]
    expected = ['yes']
    testData = [[1, 5, 'yes'], [2, 6, 'yes'], [1, 5, 'no'], [1, 5, 'no'],
                [1, 6, 'yes'], [2, 6, 'no'], [1, 5, 'yes'], [1, 6, 'yes']]

    allClasses = []
    for row in testData:
        allClasses.append(row.pop())

    testNB.fit(testData, allClasses)
    predictions = testNB.predict(X_test)

    assert predictions[0] == 'yes'

    testNB = MyNaiveBayesClassifier()

    # RQ5 (fake) iPhone purchases dataset
    truePriors = [0.333, 0.667]
    X_test = [[2, 2, 'fair'], [1, 1, 'excellent']]
    expected = ['yes', 'no']
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    allClasses = []
    for row in iphone_table:
        allClasses.append(row.pop())

    testNB.fit(iphone_table, allClasses)
    predictions = testNB.predict(X_test)

    for i in range(len(predictions)):
        assert predictions[i] == expected[i]

    testNB = MyNaiveBayesClassifier()
    truePriors = [0.05, 0.1, 0.7, 0.15]
    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]

    allClasses = []
    for row in train_table:
        allClasses.append(row.pop())

    testNB.fit(train_table, allClasses)