def random_accuracy(X_train, X_test, y_train, y_test, lin_X_train, lin_X_test, lin_y_train, lin_y_test):
    """ Gets accuracies for Linear Regression and KNN classifiers for passed in values using train/test/split
    Args:
        X_train: (list of list) X_train for Knn classifier
        X_test: (list of list) X_tests for Knn classifier
        y_train: (list) y_train for Knn classifier
        y_test: (list) y_test to compare to for Linear Regression  classifier
        lin_X_train: (list of list) X_train for Linear Regression  classifier
        lin_X_test: (list of list) X_tests for Linear Regression  classifier
        lin_y_train: (list) y_train for Linear Regression classifier
        lin_y_test: (list) y_test to compare to for Linear Regression  classifier
    """

    #creates new linear regressor and KNN classifiers
    Knn = MyKNeighborsClassifier(n_neighbors=10)
    LinRegress = MySimpleLinearRegressor()

    #fits linear regressor and KNN classifier
    Knn.fit(X_train = X_train, y_train = y_train)
    LinRegress.fit(X_train = lin_X_train, y_train = lin_y_train)

    #gets predictions for Linear Regressor and Knn classifier
    y_predicted = Knn.predict(X_test)
    lin_y_predicted = LinRegress.predict(lin_X_test)
    
    #gets accuracys for Linear Regressor and Knn classifier
    k_acc = get_accuracy(y_predicted, y_test)
    lin_acc = get_accuracy(lin_y_predicted, lin_y_test)
    print("===========================================")
    print("STEP 3: Predictive Accuracy")
    print("===========================================")
    print("Random Subsample (k=10, 2:1 Train/Test)")
    print("Linear Regression: accuracy = ", lin_acc, " error rate = ", 1 - lin_acc)
    print("k Nearest Neighbors: accuracy = ", k_acc," error rate = ", 1 - k_acc)
Ejemplo n.º 2
0
def test_kneighbors_classifier_predict():
    train0 = [
        [7, 7],
        [7, 4],
        [3, 4],
        [1, 4]
    ]
    train0_labels = ["bad", "bad", "good", "good"]
    test0 = [[3, 7]]
    knn0 = MyKNeighborsClassifier()
    knn0.fit(train0, train0_labels)
    predicted0 = knn0.predict(test0)
    actual = ["good"] 
    assert predicted0 == actual
   
    train = [
        [3, 2],
        [6, 6],
        [4, 1],
        [4, 4],
        [1, 2],
        [2, 0],
        [0, 3],
        [1, 6]
    ]
    train_labels = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    test = [[2, 3]]
    knn1 = MyKNeighborsClassifier()
    knn1.fit(train, train_labels)
    predicted = knn1.predict(test)
    actual = ["yes"] 
    assert predicted == actual # TODO: fix this
Ejemplo n.º 3
0
def test_kneighbors_classifier_kneighbors():
    train0 = [
        [7, 7],
        [7, 4],
        [3, 4],
        [1, 4]
    ]
    train0_labels = ["bad", "bad", "good", "good"]
    test0 = [[3, 7]]
    knn0 = MyKNeighborsClassifier()
    knn0.fit(train0, train0_labels)
    dists0, indices0 = knn0.kneighbors(test0)
    real_indices = [[2, 3, 0]]
    assert indices0 == real_indices
    train1 = [
        [3, 2],
        [6, 6],
        [4, 1],
        [4, 4],
        [1, 2],
        [2, 0],
        [0, 3],
        [1, 6]
    ]
    train1_labels = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    test1 = [[2, 3]]
    knn1 = MyKNeighborsClassifier()
    knn1.fit(train1, train1_labels)
    dists1, indices1 = knn1.kneighbors(test1)
    real_indices = [[0, 4, 6]]

    assert indices1 == real_indices # TODO: fix this
Ejemplo n.º 4
0
def test_kneighbors_classifier_predict():
    """
    1. Use the 4 instance training set example traced in class on the iPad, asserting against our desk check
    2. Use the 8 instance training set example from ClassificationFun/main.py, asserting against our in-class result
    3. Use Bramer 3.6 Self-assessment exercise 2, asserting against exercise solution in Bramer Appendix E
    """
    myKNeigh = MyKNeighborsClassifier()

    # Test Case 1:
    x = [[7, 7], [7, 4], [3, 4], [1, 4]]
    y = ["Bad", "Bad", "Good", "Good"]
    myKNeigh.fit(x, y)
    x_test = [[3, 7]]
    pred_y = myKNeigh.predict(x_test)
    assert pred_y == ["Good"]

    # Test Case 2:
    x = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    y = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    myKNeigh.fit(x, y)
    x_test = [[2, 3]]
    pred_y = myKNeigh.predict(x_test)
    assert pred_y == ["yes"]

    # Test Case 3
    myKNeigh = MyKNeighborsClassifier(n_neighbors=5)
    x = [[0.8, 6.3], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
         [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
         [12.8, 1.1], [14, 19.9], [14.2, 18.5], [15.6, 17.4], [15.8, 12.2],
         [16.6, 6.7], [17.4, 4.5], [18.2, 6.9], [19, 3.4], [19.6, 11.1]]
    y = [
        "-", "-", "-", "+", "-", "+", "-", "+", "+", "+", "-", "-", "-", "-",
        "-", "+", "+", "+", "-", "+"
    ]
    myKNeigh.fit(x, y)
    x_test = [[9.1, 11]]
    pred_y = myKNeigh.predict(x_test)
    assert pred_y == ["+"]
Ejemplo n.º 5
0
def test_kneighbors_classifier_kneighbors():
    """
    1. Use the 4 instance training set example traced in class on the iPad, asserting against our desk check
    2. Use the 8 instance training set example from ClassificationFun/main.py, asserting against our in-class result
    3. Use Bramer 3.6 Self-assessment exercise 2, asserting against exercise solution in Bramer Appendix E
    """
    myKNeigh = MyKNeighborsClassifier()

    # Test Case 1:
    x = [[7, 7], [7, 4], [3, 4], [1, 4]]
    y = ["Bad", "Bad", "Good", "Good"]
    x_test = [[3, 7]]
    # For this dataset, we normalized the columns
    norm_x_train, norm_x_test = myutils.normalize_train_and_test_sets(
        x, x_test)
    myKNeigh.fit(norm_x_train, y)

    pred_closest_dist, pred_closest_indices = myKNeigh.kneighbors(norm_x_test)
    assert np.allclose(pred_closest_indices, [[0, 2, 3]])
    assert np.allclose(pred_closest_dist,
                       [[(2 / 3), 1.00, np.sqrt(1 + (1 / 3)**2)]])

    # Test Case 2:
    x = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    y = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    myKNeigh.fit(x, y)
    x_test = [[2, 3]]
    pred_closest_dist, pred_closest_indices = myKNeigh.kneighbors(x_test)
    # Actual: [[3, 2, 'no', 0, 1.4142135623730951], [1, 2, 'yes', 4, 1.4142135623730951], [0, 3, 'yes', 6, 2.0]]
    assert np.allclose(pred_closest_indices, [[0, 4, 6]])
    assert np.allclose(pred_closest_dist,
                       [[1.4142135623730951, 1.4142135623730951, 2.0]])

    # Test Case 3:
    myKNeigh = MyKNeighborsClassifier(n_neighbors=5)
    x = [[0.8, 6.3], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
         [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
         [12.8, 1.1], [14, 19.9], [14.2, 18.5], [15.6, 17.4], [15.8, 12.2],
         [16.6, 6.7], [17.4, 4.5], [18.2, 6.9], [19, 3.4], [19.6, 11.1]]
    y = [
        "-", "-", "-", "+", "-", "+", "-", "+", "+", "+", "-", "-", "-", "-",
        "-", "+", "+", "+", "-", "+"
    ]
    myKNeigh.fit(x, y)
    x_test = [[9.1, 11]]
    pred_closest_dist, pred_closest_indices = myKNeigh.kneighbors(x_test)
    assert np.allclose(pred_closest_indices, [[6, 5, 7, 4, 8]])
    assert np.allclose(pred_closest_dist,
                       [[0.608, 1.237, 2.202, 2.802, 2.915]],
                       atol=0.001)  # Use the tolerance of the known vals
Ejemplo n.º 6
0
def predict():
    # dating = request.args.get("dating", "")
    # violence = request.args.get("violence", "")
    # world_life = request.args.get("world_life", "")
    # night_time = request.args.get("night_time", "")
    # shake_the_audience = request.args.get("shake_the_audience", "")
    # family_gospel = request.args.get("family_gospel", "")
    # romantic = request.args.get("romantic", "")
    # communication = request.args.get("communication", "")
    # obscene = request.args.get("obscene", "")
    # music = request.args.get("music", "")
    # movement_places = request.args.get("movement_places", "")
    # light_visual_perceptions = request.args.get("light_visual_perceptions", "")
    # family_spiritual = request.args.get("family_spiritual", "")
    # like_girls = request.args.get("like_girls", "")
    sadness = request.args.get("sadness", 5)
    feelings = request.args.get("feelings", 5)
    danceability = request.args.get("danceability", 5)
    loudness = request.args.get("loudness", 5)
    accousticness = request.args.get("accousticness", 5)
    instumentalness = request.args.get("instrumentalness", 5)
    valence = request.args.get("valence", 5)
    energy = request.args.get("energy", 5)
    # age = request.args.get("age", "")

    # get data to fit
    table = mpt.MyPyTable().load_from_file("tcc_ceds_music.csv")

    new_table = myutils.get_even_classifier_instances(table)
    genre_col = myutils.get_column(new_table.data, new_table.column_names,
                                   "genre")
    new_table = myutils.categorize_values(new_table)

    X = []
    X.append(new_table.get_column("sadness"))
    X.append(new_table.get_column("feelings"))
    X.append(new_table.get_column("danceability"))
    X.append(new_table.get_column("loudness"))
    X.append(new_table.get_column("acousticness"))
    X.append(new_table.get_column("instrumentalness"))
    X.append(new_table.get_column("valence"))
    X.append(new_table.get_column("energy"))
    # X.append(genre_col)
    X = myutils.transpose(X)

    # create knn classifier
    knn_classifier = MyKNeighborsClassifier()
    knn_classifier.fit(X, genre_col)
    try:
        print("sadness:", sadness)
        prediction = knn_classifier.predict([[
            sadness, feelings, danceability, loudness, acousticness,
            instrumentalness, valence, energy
        ]])
        print(prediction)
    except:
        print("feelings:", feelings)
        prediction = None
        print("in except block")

    if prediction is not None:
        result = {"prediction": prediction}
        return jsonify(result), 200
    else:
        results_array = [
            "pop", "hip hop", "rock", "blues", "country", "jazz", "raggae"
        ]
        rand_int = random.randint(0, len(results_array))
        result = {"prediction": results_array[rand_int]}
        return jsonify(result), 200
Ejemplo n.º 7
0
def test_kneighbors_classifier_kneighbors():
    # DataSet 1:
    x_train1 = [[7, 7], [7, 4], [3, 4], [1, 4]]
    y_train1 = ["bad", "bad", "good", "good"]
    x_test1 = [[3, 7]]
    x_train1_final, x_test1_final = myutils.normalize(x_train1, x_test1)
    test_kneighbors1 = MyKNeighborsClassifier(4)
    test_kneighbors1.fit(x_train1_final, y_train1)
    distances1, indices1 = test_kneighbors1.kneighbors(x_test1_final)
    for i in range(len(distances1)):
        for j in range(len(distances1[0])):
            distances1[i][j] = round(distances1[i][j], 3)
    check_distance1 = [[.667, 1.00, 1.054, 1.202]]
    check_indices1 = [[0, 2, 3, 1]]
    assert np.allclose(distances1, check_distance1)
    assert np.allclose(indices1, check_indices1)

    # DataSet2:
    x_train2 = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    y_train2 = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    x_test2 = [[2, 3]]
    test_kneighbors2 = MyKNeighborsClassifier()
    test_kneighbors2.fit(x_train2, y_train2)
    distances2, indices2 = test_kneighbors2.kneighbors(x_test2)
    for i in range(len(distances2)):
        for j in range(len(distances2[0])):
            distances2[i][j] = round(distances2[i][j], 3)
    check_distance2 = [[1.414, 1.414, 2.00]]
    check_indices2 = [[0, 4, 6]]
    assert np.allclose(distances2, check_distance2)
    assert np.allclose(indices2, check_indices2)

    # DataSet3:
    x_train3 = [[0.8, 6.4], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
                [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
                [12.8, 1.1], [14.0, 19.9], [14.2, 18.5], [15.6, 17.4],
                [15.8, 12.2], [16.6, 6.7], [17.4, 4.5], [18.2, 6.9],
                [19.0, 3.4], [19.6, 11.1]]
    y_train3 = [
        "-", "-", "-", "+", "-", "+", "-", "+", "+", "+", "-", "-", "-", "-",
        "-", "+", "+", "+", "-", "+"
    ]
    x_test3 = [[9.1, 11.0]]
    test_kneighbors3 = MyKNeighborsClassifier(20)
    test_kneighbors3.fit(x_train3, y_train3)
    distances3, indices3 = test_kneighbors3.kneighbors(x_test3)
    for i in range(len(distances3)):
        for j in range(len(distances3[0])):
            distances3[i][j] = round(distances3[i][j], 3)

    check_distance3 = [[
        0.608, 1.237, 2.202, 2.802, 2.915, 5.580, 6.807, 7.290, 7.871, 8.228,
        8.645, 9.070, 9.122, 9.489, 9.981, 10.160, 10.500, 10.542, 10.569,
        12.481
    ]]
    check_indices3 = [[
        6, 5, 7, 4, 8, 9, 14, 3, 2, 1, 15, 12, 13, 0, 17, 11, 19, 16, 10, 18
    ]]
    check_predicted2 = ["yes"]
    assert np.allclose(distances3, check_distance3)
    assert np.allclose(indices3, check_indices3)
Ejemplo n.º 8
0
def test_kneighbors_classifier_predict():

    # DataSet1:
    x_train1 = [[7, 7], [7, 4], [3, 4], [1, 4]]
    y_train1 = ["bad", "bad", "good", "good"]
    x_test1 = [[3, 7]]
    test_kneighbors1 = MyKNeighborsClassifier()
    test_kneighbors1.fit(x_train1, y_train1)
    y_predict1 = test_kneighbors1.predict(x_test1)
    check_predict1 = ["good"]
    #assert y_predict1 == check_predict1

    # DataSet2:
    x_train2 = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    y_train2 = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    x_test2 = [[2, 3]]
    test_kneighbors2 = MyKNeighborsClassifier()
    test_kneighbors2.fit(x_train2, y_train2)
    y_predict2 = test_kneighbors2.predict(x_test2)
    check_predict2 = ["yes"]
    #assert y_predict2 == check_predict2

    # DataSet3:
    x_train3 = [[0.8, 6.4], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
                [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
                [12.8, 1.1], [14.0, 19.9], [14.2, 18.5], [15.6, 17.4],
                [15.8, 12.2], [16.6, 6.7], [17.4, 4.5], [18.2, 6.9],
                [19.0, 3.4], [19.6, 11.1]]
    y_train3 = [
        "-", "-", "-", "+", "-", "+", "-", "+", "+", "+", "-", "-", "-", "-",
        "-", "+", "+", "+", "-", "+"
    ]
    x_test3 = [[9.1, 11.0]]
    test_kneighbors3 = MyKNeighborsClassifier()
    test_kneighbors3.fit(x_train3, y_train3)
    y_predict3 = test_kneighbors3.predict(x_test3)
    check_predict3 = ["+"]
Ejemplo n.º 9
0
def test_kneighbors_classifier_kneighbors():
    KNC = MyKNeighborsClassifier()

    X_train = [
        [1, 1],
        [1, 0],
        [.33, 0],
        [0, 0],
    ]
    y_train = ["bad", "bad", "good", "good"]
    KNC.fit(X_train, y_train)
    distances, indices = KNC.kneighbors([[.33, 1]])
    test_distance = [[0.67, 1, 1.05304]]
    test_indices = [[0, 2, 3]]

    for i in range(len(distances)):
        for j in range(len(distances[i])):
            print(distances[i][j])
            assert np.isclose(distances[i][j], test_distance[i][j])

    for i in range(len(indices)):
        for j in range(len(indices[i])):
            assert np.isclose(indices[i][j], test_indices[i][j])

    ### Test #2 ###
    X_train = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    y_train = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    KNC.fit(X_train, y_train)
    distances, indices = KNC.kneighbors([[2, 3]])
    test_distance = [[1.4142135623730951, 1.4142135623730951, 2.0]]
    test_indices = [[0, 4, 6]]

    for i in range(len(distances)):
        for j in range(len(distances[i])):
            assert np.isclose(distances[i][j], test_distance[i][j])

    for i in range(len(indices)):
        for j in range(len(indices[i])):
            assert np.isclose(indices[i][j], test_indices[i][j])

    ### Test #3 ###
    KNC = MyKNeighborsClassifier(n_neighbors=5)

    X_train = [[.8, 6.3], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
               [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
               [12.8, 1.1], [14.0, 19.9], [14.2, 18.5], [15.6, 17.4],
               [15.8, 12.2], [16.6, 6.7], [17.4, 4.5], [17.2, 6.9],
               [19.0, 3.4], [19.6, 11.1]]
    y_train = [
        "-", "-", "-", "+", "-", "+", "-", "+", "+", "+", "-", "-", "-", "-",
        "-", "+", "+", "+", "-", "+"
    ]
    KNC.fit(X_train, y_train)
    distances, indices = KNC.kneighbors([[9.1, 11]])
    print(distances)
    print(indices)

    test_distance = [[
        0.6082762530298216, 1.2369316876852974, 2.202271554554525,
        2.8017851452243794, 2.9154759474226513
    ]]
    test_indices = [[6, 5, 7, 4, 8]]

    for i in range(len(distances)):
        for j in range(len(distances[i])):
            assert np.isclose(distances[i][j], test_distance[i][j])

    for i in range(len(indices)):
        for j in range(len(indices[i])):
            assert np.isclose(indices[i][j], test_indices[i][j])
Ejemplo n.º 10
0
def test_kneighbors_classifier_predict():

    ### Test #1 ###
    KNC = MyKNeighborsClassifier()

    X_train = [
        [1, 1],
        [1, 0],
        [.33, 0],
        [0, 0],
    ]
    y_train = ["bad", "bad", "good", "good"]
    KNC.fit(X_train, y_train)
    y_predicted = KNC.predict([[0.33, 1]])
    test_predicted = ["good"]

    for i in range(len(y_predicted)):
        assert y_predicted[i] == test_predicted[i]

    ### Test #2 ###
    X_train = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    y_train = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    KNC.fit(X_train, y_train)
    y_predicted = KNC.predict([[2, 3]])
    test_predicted = ["yes"]

    for i in range(len(y_predicted)):
        assert y_predicted[i] == test_predicted[i]

    ### Test #3 ###
    KNC = MyKNeighborsClassifier(n_neighbors=5)

    X_train = [[.8, 6.3], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
               [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
               [12.8, 1.1], [14.0, 19.9], [14.2, 18.5], [15.6, 17.4],
               [15.8, 12.2], [16.6, 6.7], [17.4, 4.5], [17.2, 6.9],
               [19.0, 3.4], [19.6, 11.1]]
    y_train = [
        "-", "-", "-", "+", "-", "+", "-", "+", "+", "+", "-", "-", "-", "-",
        "-", "+", "+", "+", "-", "+"
    ]
    KNC.fit(X_train, y_train)
    y_predicted = KNC.predict([[9.1, 11]])
    test_predicted = ["+"]

    for i in range(len(y_predicted)):
        assert y_predicted[i] == test_predicted[i]
Ejemplo n.º 11
0
def test_kneighbors_classifier_predict():

    testKNN = MyKNeighborsClassifier()

    train = [[1, 1], [1, 0], [0.33, 0], [0, 0]]
    train_labels = ["bad", "bad", "good", "good"]
    test = [[0.33, 1]]

    testKNN.n_neighbors = 3
    testKNN.X_train = train
    testKNN.y_train = train_labels

    predictions = testKNN.predict(test)
    expected = ['good']

    for i in range(len(predictions)):
        assert predictions[i] == expected[i]

    # case 2
    testKNN = MyKNeighborsClassifier()
    train = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    train_labels = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    test = [[2, 3]]

    testKNN.n_neighbors = 3
    testKNN.X_train = train
    testKNN.y_train = train_labels

    predictions = testKNN.predict(test)
    expected = ['yes']

    for i in range(len(predictions)):
        assert predictions[i] == expected[i]

    # case 3
    testKNN = MyKNeighborsClassifier()
    train = [[0.8, 6.3], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
             [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
             [12.8, 1.1], [14.0, 19.9], [14.2, 18.5], [15.6, 17.4],
             [15.8, 12.2], [16.6, 6.7], [17.4, 4.5], [18.2, 6.9], [19.0, 3.4],
             [19.6, 11.1]]
    train_labels = [
        -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, -1, -1, -1, -1, -1, 1, 1, 1, -1, 1
    ]

    testKNN.n_neighbors = 5
    testKNN.X_train = train
    testKNN.y_train = train_labels
    test = [[9.1, 11.0]]

    predictions = testKNN.predict(test)
    expected = [1]

    for i in range(len(predictions)):
        assert predictions[i] == expected[i]
Ejemplo n.º 12
0
def test_kneighbors_classifier_kneighbors():

    testKNN = MyKNeighborsClassifier()

    train = [[1, 1], [1, 0], [0.33, 0], [0, 0]]
    train_labels = ["bad", "bad", "good", "good"]
    test = [[0.33, 1]]

    testKNN.n_neighbors = 3
    testKNN.X_train = train
    testKNN.y_train = train_labels

    threeDist, threeIndices = testKNN.kneighbors(test)
    expectedDist = [0.670, 1.000, 1.053]
    expectedInd = [0, 2, 3]

    assert np.allclose(threeDist, expectedDist)
    assert np.allclose(threeIndices, expectedInd)

    # case 2
    testKNN = MyKNeighborsClassifier()
    train = [[3, 2], [6, 6], [4, 1], [4, 4], [1, 2], [2, 0], [0, 3], [1, 6]]
    train_labels = ["no", "yes", "no", "no", "yes", "no", "yes", "yes"]
    test = [[2, 3]]

    testKNN.n_neighbors = 3
    testKNN.X_train = train
    testKNN.y_train = train_labels

    threeDist, threeIndices = testKNN.kneighbors(test)
    expectedDist = [1.414, 1.414, 2.000]
    expectedInd = [0, 4, 6]

    assert np.allclose(threeDist, expectedDist)
    assert np.allclose(threeIndices, expectedInd)

    # case 3
    testKNN = MyKNeighborsClassifier()
    train = [[0.8, 6.3], [1.4, 8.1], [2.1, 7.4], [2.6, 14.3], [6.8, 12.6],
             [8.8, 9.8], [9.2, 11.6], [10.8, 9.6], [11.8, 9.9], [12.4, 6.5],
             [12.8, 1.1], [14.0, 19.9], [14.2, 18.5], [15.6, 17.4],
             [15.8, 12.2], [16.6, 6.7], [17.4, 4.5], [18.2, 6.9], [19.0, 3.4],
             [19.6, 11.1]]
    train_labels = [
        -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, -1, -1, -1, -1, -1, 1, 1, 1, -1, 1
    ]

    testKNN.n_neighbors = 5
    testKNN.X_train = train
    testKNN.y_train = train_labels
    test = [[9.1, 11.0]]

    fiveDist, fiveIndices = testKNN.kneighbors(test)
    expectedDist = [0.608, 1.237, 2.202, 2.802, 2.915]
    expectedInd = [6, 5, 7, 4, 8]

    assert np.allclose(fiveDist, expectedDist)
    assert np.allclose(fiveIndices, expectedInd)
Ejemplo n.º 13
0
import pickle

# Importing the data and table and cols
movies_fname = os.path.join("input_data", "movies.csv")
# movie_data = MyPyTable().load_from_file_no_encode(movies_fname)
movies_table = MyPyTable().load_from_file(movies_fname, encode='cp1252')

# Getting profit
gross_profit = [
    movies_table.get_column('gross')[i] - movies_table.get_column('budget')[i]
    for i in range(len(movies_table.data))
]
profitted = [0 if gross < 0 else 1 for gross in gross_profit]
movies_table.add_column(profitted, 'profitted')

# fit the KNN algorithm to the movies data
kn_class = MyKNeighborsClassifier()
feature_cols = [
    'budget', 'votes', 'genre', 'rating', 'score', 'star', 'director', 'writer'
]
features = movies_table.get_key_columns(feature_cols)
outcomes = profitted
kn_class.fit(features, outcomes)

packaged_object = kn_class

# pickle packaged object
outfile = open('movies_tree.p', 'wb')
pickle.dump(packaged_object, outfile)
outfile.close()