Ejemplo n.º 1
0
# (e) 
lda      = LinearDiscriminantAnalysis() 
lda.pred = lda.fit(x_train, y_train).predict(x_test)
print(pd.DataFrame(confusion_matrix(y_test, lda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1']))
print('error rate: ', accuracy_score(y_test, lda.pred)) # 62.5%

# (f) 
qda      = QuadraticDiscriminantAnalysis()
qda.pred = qda.fit(x_train, y_train).predict(x_test)
print(pd.DataFrame(confusion_matrix(y_test, qda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1']))
print('error rate: ', accuracy_score(y_test, qda.pred)) # 58.7%

# (g)
knn        = KNeighborsClassifier(n_neighbors=1)
knn.pred   = knn.fit(x_train, y_train).predict(x_test) 
print('error rate: ', accuracy_score(y_test, knn.pred)) # 49%

# (h): Logistic and LDA models are the best.

# (i)
# KNN
error_rate = np.array([]) 
k_value    = np.array([]) 
for i in (5, 10, 20):  
    knn        = KNeighborsClassifier(n_neighbors=i)
    knn.pred   = knn.fit(x_train, y_train).predict(x_test) 
    k_value    = np.append(k_value, i)
    error_rate = np.append(error_rate, 1-accuracy_score(y_test, knn.pred))

best_k = k_value[error_rate.argmin()]
Ejemplo n.º 2
0
        )
        print("calculating DECISION TREE please waite...")
        dt = DecisionTreeClassifier(random_state=0)
        dt.fit(fea_train, np.array(labels_train))

        dt.pred = dt.predict(fea_test)
        calculate_result(np.array(labels_test), dt.pred)

        print(
            "------------------------------------------------------------------------"
        )
        print("calculating KNN please waite...")
        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(fea_train, np.array(labels_train))

        knn.pred = knn.predict(fea_test)
        calculate_result(np.array(labels_test), knn.pred)

        print(
            "------------------------------------------------------------------------"
        )
        print("calculating Nueral networks please waite...")
        paramters = L_layer_model(fea_train.T,
                                  np.array(labels_train).T, [50, 100, 50, 10])
        nnpred = NN_predict(fea_test.T, np.array(labels_test).T, paramters)
        calculate_result(np.array(labels_test), np.array(nnpred))

    print(
        "========================================================================="
    )
    print("Test optimal model on test set")
    plt.ylabel('Error Rate')

    print("the misclassification error for each k value is : ",
          np.round(MSE, 3))

    return nearest_k


query = list(range(0, 50))

optimal_k = find_optimal_k(X_train, Y_train, query)

# K-NN with optimal K
knn = KNeighborsClassifier(n_neighbors=optimal_k)
knn.fit(X_train, Y_train)
pred = knn.pred(X_test)
"""
Model Evaluation
"""

# Confusion Matrix
plt.figure()
cm = confusion_matrix(Y_test, pred)
class_label = ["negative", "positive"]
df_cm_test = pd.DataFrame(cm, index=class_label, columns=class_label)
sns.heatmap(df_cm_test, annot=True, fmt="d")
plt.title("Confusion Matrix for Test datas")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")

# classification report