def generate_svm_analysis(): X_p, Y_p = get_phishing_data() X_v, Y_v = get_vocal_data() X_train, X_test, y_train, y_test = train_test_split(np.array(X_p), np.array(Y_p), test_size=0.20) hyperSVM( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for SVM (Phishing Data)\nHyperparameter : Kernel Function" ) hyperSVMPenalty( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for SVM (Phishing Data)\nHyperparameter : Penalty (C)" ) C_val = SVMGridSearchCV(X_train, y_train) estimator_phish = SVC(C=C_val, kernel='rbf', random_state=100) train_samp_phish, SVM_train_score_phish, SVM_fit_time_phish, SVM_pred_time_phish = plot_learning_curve( estimator_phish, X_train, y_train, title="SVM Phishing Data") final_classifier_evaluation(estimator_phish, X_train, X_test, y_train, y_test) X_train, X_test, y_train, y_test = train_test_split(np.array(X_v), np.array(Y_v), test_size=0.20) hyperSVM( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for SVM (Vocal Data)\nHyperparameter : Kernel Function" ) hyperSVMPenalty( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for SVM (Vocal Data)\nHyperparameter : Penalty(C)" ) C_val = SVMGridSearchCV(X_train, y_train) estimator_vocal = SVC(C=C_val, kernel='rbf', random_state=100) train_samp_vocal, SVM_train_score_vocal, SVM_fit_time_vocal, SVM_pred_time_vocal = plot_learning_curve( estimator_vocal, X_train, y_train, title="SVM Vocal Data") final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train, y_test)
def generate_knn_analysis(): X_p, Y_p = get_phishing_data() X_v, Y_v = get_vocal_data() X_train, X_test, y_train, y_test = train_test_split(np.array(X_p), np.array(Y_p), test_size=0.20) hyperKNN( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for kNN (Phishing Data)\nHyperparameter : No. Neighbors" ) estimator_phish = KNeighborsClassifier(n_neighbors=3, n_jobs=-1) train_samp_phish, kNN_train_score_phish, kNN_fit_time_phish, kNN_pred_time_phish = plot_learning_curve( estimator_phish, X_train, y_train, title="kNN Phishing Data") final_classifier_evaluation(estimator_phish, X_train, X_test, y_train, y_test) X_train, X_test, y_train, y_test = train_test_split(np.array(X_v), np.array(Y_v), test_size=0.20) hyperKNN( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for kNN (Vocal Data)\nHyperparameter : No. Neighbors" ) estimator_vocal = KNeighborsClassifier(n_neighbors=3, n_jobs=-1) train_samp_vocal, kNN_train_score_vocal, kNN_fit_time_vocal, kNN_pred_time_vocal = plot_learning_curve( estimator_vocal, X_train, y_train, title="kNN Vocal Data") final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train, y_test)
def generate_neural_network_analysis(): X_p, Y_p = get_phishing_data() X_v, Y_v = get_vocal_data() X_train, X_test, y_train, y_test = train_test_split(np.array(X_p), np.array(Y_p), test_size=0.20) hyperNN( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for NN (Phishing Data)\nHyperparameter : No. Hidden Units" ) hyperNNLR( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for NN (Phishing Data)\nHyperparameter : Learning Rate" ) h_units, learn_rate = NNGridSearchCV(X_train, y_train) estimator_phish = MLPClassifier(hidden_layer_sizes=(h_units, ), solver='adam', activation='logistic', learning_rate_init=learn_rate, random_state=100) train_samp_phish, NN_train_score_phish, NN_fit_time_phish, NN_pred_time_phish = plot_learning_curve( estimator_phish, X_train, y_train, title="Neural Net Phishing Data") final_classifier_evaluation(estimator_phish, X_train, X_test, y_train, y_test) X_train, X_test, y_train, y_test = train_test_split(np.array(X_v), np.array(Y_v), test_size=0.20) hyperNN( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for NN (Vocal Data)\nHyperparameter : No. Hidden Units" ) hyperNNLR( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for NN (Vocal Data)\nHyperparameter : Learning Rate" ) h_units, learn_rate = NNGridSearchCV(X_train, y_train) estimator_vocal = MLPClassifier(hidden_layer_sizes=(h_units, ), solver='adam', activation='logistic', learning_rate_init=learn_rate, random_state=100) train_samp_vocal, NN_train_score_vocal, NN_fit_time_vocal, NN_pred_time_vocal = plot_learning_curve( estimator_vocal, X_train, y_train, title="Neural Net Vocal Data") final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train, y_test)
def generate_boosting_analysis(): X_p, Y_p = get_phishing_data() X_v, Y_v = get_vocal_data() X_train, X_test, y_train, y_test = train_test_split(np.array(X_p), np.array(Y_p), test_size=0.20) hyperBoost( X_train, y_train, X_test, y_test, 3, 50, title= "Model Complexity Curve for Boosted Tree (Phishing Data)\nHyperparameter : No. Estimators" ) start_leaf_n = round(0.005 * len(X_train)) end_leaf_n = round( 0.05 * len(X_train)) # leaf nodes of size [0.5%, 5% will be tested] max_depth, min_samples_leaf, n_est, learn_rate = BoostedGridSearchCV( start_leaf_n, end_leaf_n, X_train, y_train) estimator_phish = GradientBoostingClassifier( max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_estimators=n_est, learning_rate=learn_rate, random_state=100) train_samp_phish, BT_train_score_phish, BT_fit_time_phish, BT_pred_time_phish = plot_learning_curve( estimator_phish, X_train, y_train, title="Boosted Tree Phishing Data") final_classifier_evaluation(estimator_phish, X_train, X_test, y_train, y_test) X_train, X_test, y_train, y_test = train_test_split(np.array(X_v), np.array(Y_v), test_size=0.20) hyperBoost( X_train, y_train, X_test, y_test, 3, 50, title= "Model Complexity Curve for Boosted Tree (Vocal Data)\nHyperparameter : No. Estimators" ) start_leaf_n = round(0.005 * len(X_train)) end_leaf_n = round( 0.05 * len(X_train)) # leaf nodes of size [0.5%, 5% will be tested] max_depth, min_samples_leaf, n_est, learn_rate = BoostedGridSearchCV( start_leaf_n, end_leaf_n, X_train, y_train) estimator_vocal = GradientBoostingClassifier( max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_estimators=n_est, learning_rate=learn_rate, random_state=100) train_samp_vocal, BT_train_score_vocal, BT_fit_time_vocal, BT_pred_time_vocal = plot_learning_curve( estimator_vocal, X_train, y_train, title="Boosted Tree Vocal Data") final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train, y_test)
def generate_decision_tree_analysis(): X_p, Y_p = get_phishing_data() X_v, Y_v = get_vocal_data() X_train, X_test, y_train, y_test = train_test_split(np.array(X_p), np.array(Y_p), test_size=0.20) hyperTree( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for Decision Tree (Phishing Data)\nHyperparameter : Tree Max Depth" ) hyperTreeSampleLeaf( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for Decision Tree (Phishing Data)\nHyperparameter : Min Sample Leaf" ) max_depth, min_samples_leaf = TreeGridSearchCV(round(0.005 * len(X_train)), round(0.05 * len(X_train)), X_train, y_train) estimator_phish = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf, random_state=100, criterion='entropy') train_samp_phish, DT_train_score_phish, DT_fit_time_phish, DT_pred_time_phish = plot_learning_curve( estimator_phish, X_train, y_train, title="Decision Tree Phishing Data") final_classifier_evaluation(estimator_phish, X_train, X_test, y_train, y_test) X_train, X_test, y_train, y_test = train_test_split(np.array(X_v), np.array(Y_v), test_size=0.20) hyperTree( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for Decision Tree (Vocal Data)\nHyperparameter : Tree Max Depth" ) hyperTreeSampleLeaf( X_train, y_train, X_test, y_test, title= "Model Complexity Curve for Decision Tree (Vocal Data)\nHyperparameter : Min Sample Leaf" ) max_depth, min_samples_leaf = TreeGridSearchCV(round(0.005 * len(X_train)), round(0.05 * len(X_train)), X_train, y_train) estimator_vocal = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf, random_state=100, criterion='entropy') train_samp_vocal, DT_train_score_vocal, DT_fit_time_vocal, DT_pred_time_vocal = plot_learning_curve( estimator_vocal, X_train, y_train, title="Decision Tree Vocal Data") final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train, y_test)