Ejemplo n.º 1
0
def generate_svm_analysis():
    X_p, Y_p = get_phishing_data()
    X_v, Y_v = get_vocal_data()

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_p),
                                                        np.array(Y_p),
                                                        test_size=0.20)
    hyperSVM(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for SVM (Phishing Data)\nHyperparameter : Kernel Function"
    )
    hyperSVMPenalty(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for SVM (Phishing Data)\nHyperparameter : Penalty (C)"
    )
    C_val = SVMGridSearchCV(X_train, y_train)
    estimator_phish = SVC(C=C_val, kernel='rbf', random_state=100)
    train_samp_phish, SVM_train_score_phish, SVM_fit_time_phish, SVM_pred_time_phish = plot_learning_curve(
        estimator_phish, X_train, y_train, title="SVM Phishing Data")
    final_classifier_evaluation(estimator_phish, X_train, X_test, y_train,
                                y_test)

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_v),
                                                        np.array(Y_v),
                                                        test_size=0.20)
    hyperSVM(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for SVM (Vocal Data)\nHyperparameter : Kernel Function"
    )
    hyperSVMPenalty(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for SVM (Vocal Data)\nHyperparameter : Penalty(C)"
    )
    C_val = SVMGridSearchCV(X_train, y_train)
    estimator_vocal = SVC(C=C_val, kernel='rbf', random_state=100)
    train_samp_vocal, SVM_train_score_vocal, SVM_fit_time_vocal, SVM_pred_time_vocal = plot_learning_curve(
        estimator_vocal, X_train, y_train, title="SVM Vocal Data")
    final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train,
                                y_test)
Ejemplo n.º 2
0
def generate_knn_analysis():
    X_p, Y_p = get_phishing_data()
    X_v, Y_v = get_vocal_data()

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_p),
                                                        np.array(Y_p),
                                                        test_size=0.20)
    hyperKNN(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for kNN (Phishing Data)\nHyperparameter : No. Neighbors"
    )
    estimator_phish = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    train_samp_phish, kNN_train_score_phish, kNN_fit_time_phish, kNN_pred_time_phish = plot_learning_curve(
        estimator_phish, X_train, y_train, title="kNN Phishing Data")
    final_classifier_evaluation(estimator_phish, X_train, X_test, y_train,
                                y_test)

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_v),
                                                        np.array(Y_v),
                                                        test_size=0.20)
    hyperKNN(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for kNN (Vocal Data)\nHyperparameter : No. Neighbors"
    )
    estimator_vocal = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    train_samp_vocal, kNN_train_score_vocal, kNN_fit_time_vocal, kNN_pred_time_vocal = plot_learning_curve(
        estimator_vocal, X_train, y_train, title="kNN Vocal Data")
    final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train,
                                y_test)
Ejemplo n.º 3
0
def generate_neural_network_analysis():
    X_p, Y_p = get_phishing_data()
    X_v, Y_v = get_vocal_data()

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_p),
                                                        np.array(Y_p),
                                                        test_size=0.20)
    hyperNN(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for NN (Phishing Data)\nHyperparameter : No. Hidden Units"
    )
    hyperNNLR(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for NN (Phishing Data)\nHyperparameter : Learning Rate"
    )
    h_units, learn_rate = NNGridSearchCV(X_train, y_train)
    estimator_phish = MLPClassifier(hidden_layer_sizes=(h_units, ),
                                    solver='adam',
                                    activation='logistic',
                                    learning_rate_init=learn_rate,
                                    random_state=100)
    train_samp_phish, NN_train_score_phish, NN_fit_time_phish, NN_pred_time_phish = plot_learning_curve(
        estimator_phish, X_train, y_train, title="Neural Net Phishing Data")
    final_classifier_evaluation(estimator_phish, X_train, X_test, y_train,
                                y_test)

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_v),
                                                        np.array(Y_v),
                                                        test_size=0.20)
    hyperNN(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for NN (Vocal Data)\nHyperparameter : No. Hidden Units"
    )
    hyperNNLR(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for NN (Vocal Data)\nHyperparameter : Learning Rate"
    )
    h_units, learn_rate = NNGridSearchCV(X_train, y_train)
    estimator_vocal = MLPClassifier(hidden_layer_sizes=(h_units, ),
                                    solver='adam',
                                    activation='logistic',
                                    learning_rate_init=learn_rate,
                                    random_state=100)
    train_samp_vocal, NN_train_score_vocal, NN_fit_time_vocal, NN_pred_time_vocal = plot_learning_curve(
        estimator_vocal, X_train, y_train, title="Neural Net Vocal Data")
    final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train,
                                y_test)
Ejemplo n.º 4
0
def generate_boosting_analysis():
    X_p, Y_p = get_phishing_data()
    X_v, Y_v = get_vocal_data()

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_p),
                                                        np.array(Y_p),
                                                        test_size=0.20)
    hyperBoost(
        X_train,
        y_train,
        X_test,
        y_test,
        3,
        50,
        title=
        "Model Complexity Curve for Boosted Tree (Phishing Data)\nHyperparameter : No. Estimators"
    )
    start_leaf_n = round(0.005 * len(X_train))
    end_leaf_n = round(
        0.05 * len(X_train))  # leaf nodes of size [0.5%, 5% will be tested]
    max_depth, min_samples_leaf, n_est, learn_rate = BoostedGridSearchCV(
        start_leaf_n, end_leaf_n, X_train, y_train)
    estimator_phish = GradientBoostingClassifier(
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf,
        n_estimators=n_est,
        learning_rate=learn_rate,
        random_state=100)
    train_samp_phish, BT_train_score_phish, BT_fit_time_phish, BT_pred_time_phish = plot_learning_curve(
        estimator_phish, X_train, y_train, title="Boosted Tree Phishing Data")
    final_classifier_evaluation(estimator_phish, X_train, X_test, y_train,
                                y_test)

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_v),
                                                        np.array(Y_v),
                                                        test_size=0.20)
    hyperBoost(
        X_train,
        y_train,
        X_test,
        y_test,
        3,
        50,
        title=
        "Model Complexity Curve for Boosted Tree (Vocal Data)\nHyperparameter : No. Estimators"
    )
    start_leaf_n = round(0.005 * len(X_train))
    end_leaf_n = round(
        0.05 * len(X_train))  # leaf nodes of size [0.5%, 5% will be tested]
    max_depth, min_samples_leaf, n_est, learn_rate = BoostedGridSearchCV(
        start_leaf_n, end_leaf_n, X_train, y_train)
    estimator_vocal = GradientBoostingClassifier(
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf,
        n_estimators=n_est,
        learning_rate=learn_rate,
        random_state=100)
    train_samp_vocal, BT_train_score_vocal, BT_fit_time_vocal, BT_pred_time_vocal = plot_learning_curve(
        estimator_vocal, X_train, y_train, title="Boosted Tree Vocal Data")
    final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train,
                                y_test)
Ejemplo n.º 5
0
def generate_decision_tree_analysis():
    X_p, Y_p = get_phishing_data()
    X_v, Y_v = get_vocal_data()
    X_train, X_test, y_train, y_test = train_test_split(np.array(X_p),
                                                        np.array(Y_p),
                                                        test_size=0.20)
    hyperTree(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for Decision Tree (Phishing Data)\nHyperparameter : Tree Max Depth"
    )
    hyperTreeSampleLeaf(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for Decision Tree (Phishing Data)\nHyperparameter : Min Sample Leaf"
    )
    max_depth, min_samples_leaf = TreeGridSearchCV(round(0.005 * len(X_train)),
                                                   round(0.05 * len(X_train)),
                                                   X_train, y_train)
    estimator_phish = DecisionTreeClassifier(max_depth=max_depth,
                                             min_samples_leaf=min_samples_leaf,
                                             random_state=100,
                                             criterion='entropy')
    train_samp_phish, DT_train_score_phish, DT_fit_time_phish, DT_pred_time_phish = plot_learning_curve(
        estimator_phish, X_train, y_train, title="Decision Tree Phishing Data")
    final_classifier_evaluation(estimator_phish, X_train, X_test, y_train,
                                y_test)

    X_train, X_test, y_train, y_test = train_test_split(np.array(X_v),
                                                        np.array(Y_v),
                                                        test_size=0.20)
    hyperTree(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for Decision Tree (Vocal Data)\nHyperparameter : Tree Max Depth"
    )
    hyperTreeSampleLeaf(
        X_train,
        y_train,
        X_test,
        y_test,
        title=
        "Model Complexity Curve for Decision Tree (Vocal Data)\nHyperparameter : Min Sample Leaf"
    )
    max_depth, min_samples_leaf = TreeGridSearchCV(round(0.005 * len(X_train)),
                                                   round(0.05 * len(X_train)),
                                                   X_train, y_train)
    estimator_vocal = DecisionTreeClassifier(max_depth=max_depth,
                                             min_samples_leaf=min_samples_leaf,
                                             random_state=100,
                                             criterion='entropy')
    train_samp_vocal, DT_train_score_vocal, DT_fit_time_vocal, DT_pred_time_vocal = plot_learning_curve(
        estimator_vocal, X_train, y_train, title="Decision Tree Vocal Data")
    final_classifier_evaluation(estimator_vocal, X_train, X_test, y_train,
                                y_test)