def NeuralNetworkModel(splitData, X_train, X_test, y_train, y_test):
    clf = MLPClassifier(alpha=1e-4, max_iter=1000)
    layers = [(4, 6), (5, 7), (8, 10)]
    grid_values = {
        'hidden_layer_sizes': layers,
        'activation': ['tanh', 'relu'],
        'learning_rate': ['constant', 'invscaling']
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_

    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)

    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="NeuralNetworkGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test):
    clf = LogisticRegression(solver='liblinear',
                             multi_class='ovr',
                             class_weight={
                                 0: 0.7,
                                 1: 1.5
                             })
    grid_values = {
        'penalty': ['l1', 'l2'],
        'C': [0.01, .09, 1, 5, 25, 50, 100]
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_
    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="LogisticRegressionGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
예제 #3
0
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test):
    svc = SVC()
    clf = AdaBoostClassifier(base_estimator=svc, algorithm='SAMME')
    grid_values = {
        'base_estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'base_estimator__C': [x / 10 for x in range(1, 11)],
        'base_estimator__degree': list(range(3, 5))
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_

    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0

    y_preds = clf.predict(X_train).reshape(-1, 1)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="AdaBoostGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
예제 #4
0
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test):
    clf = LogisticRegression(penalty='l1',
                             solver='liblinear',
                             multi_class='ovr',
                             class_weight={
                                 0: 0.7,
                                 1: 1.5
                             })
    clf.fit(X_train, y_train.ravel())

    if splitData:
        y_preds = clf.predict(X_test)
        printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="LogisticRegression",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
def XGBClassifierModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = xgb.XGBClassifier(objective="multi:softmax", eval_metric="mlogloss")
    grid_values = {
        'learning_rate': [x / 10 for x in range(1, 5)],
        'max_depth': list(range(10, 21, 1))
    }
    grid_clf_acc = GridSearchCV(
        clf,
        param_grid=grid_values,
        scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'],
        refit='f1_weighted',
        n_jobs=2,
        verbose=0)
    grid_clf_acc.fit(X_train, y_train)
    clf = grid_clf_acc.best_estimator_
    # print(clf)
    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    logAndSave(name_of_model="XGBClassifierModelV2GS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
def XGBClassifierModel(splitData, X_train, X_test, y_train, y_test):
    clf = xgb.XGBClassifier(objective="binary:logistic", eval_metric="auc")
    grid_values = {
        'learning_rate': [x / 10 for x in range(1, 11)],
        'max_depth': list(range(10, 21, 1)),
        'gamma ': [x / 10 for x in range(1, 11)]
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_
    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="XGBClassifierGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
예제 #7
0
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test):
	svc = SVC()
	clf = AdaBoostClassifier(base_estimator=svc, n_estimators=100, algorithm='SAMME')
	clf.fit(X_train, y_train.ravel())

	if splitData:
		y_preds = clf.predict(X_test)
		printMetrics(y_test, y_preds)
		val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds)
	else:
		val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0

	y_preds = clf.predict(X_train).reshape(-1, 1)
	acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
	val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
	metrics = (acc, pre, recall, auc, f1)
	# print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

	logAndSave(name_of_model="AdaBoost", clf=clf, metrics=metrics, val_metrics=val_metrics)
예제 #8
0
def RandomForestModel(splitData, X_train, X_test, y_train, y_test):
	clf = RandomForestClassifier(n_estimators=100, max_depth=11)
	grid_values = {'n_estimators': list(range(100, 501, 50)), 'criterion': ['gini', 'entropy'], 'max_depth': list(range(10, 21, 1))}
	grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc')
	grid_clf_acc.fit(X_train, y_train.ravel())
	clf = grid_clf_acc.best_estimator_
	if splitData:
		y_preds = clf.predict(X_test)
		# printMetrics(y_test, y_preds)
		val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds)
	else:
		val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
	y_preds = clf.predict(X_train)
	acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
	val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
	metrics = (acc, pre, recall, auc, f1)
	# print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

	logAndSave(name_of_model="RandomForestClassifierGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
예제 #9
0
def XGBClassifierModel(splitData, X_train, X_test, y_train, y_test):
    clf = xgb.XGBClassifier(objective="binary:logistic", eval_metric="auc")
    clf.fit(X_train, y_train.ravel())
    if splitData:
        y_preds = clf.predict(X_test)
        printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

    logAndSave(name_of_model="XGBClassifier",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
예제 #10
0
def RandomForestModel(splitData, X_train, X_test, y_train, y_test):
    clf = RandomForestClassifier(max_depth=14)
    clf.fit(X_train, y_train.ravel())

    if splitData:
        y_preds = clf.predict(X_test)
        printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="RandomForestClassifier",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
예제 #11
0
                                       min_samples_leaf=9,
                                       min_samples_split=18,
                                       n_estimators=100)),
    StackingEstimator(
        estimator=MLPClassifier(alpha=0.001, learning_rate_init=1.0)),
    StackingEstimator(
        estimator=GradientBoostingClassifier(learning_rate=0.5,
                                             max_depth=4,
                                             max_features=0.6500000000000001,
                                             min_samples_leaf=9,
                                             min_samples_split=19,
                                             n_estimators=100,
                                             subsample=0.9500000000000001)),
    BernoulliNB(alpha=10.0, fit_prior=False))
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 101)

exported_pipeline.fit(X_train, y_train)
y_preds = exported_pipeline.predict(X_train)
acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)

y_preds = exported_pipeline.predict(X_test)
val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds)
val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)

metrics = (acc, pre, recall, auc, f1)
logAndSave(name_of_model="TPOT_Classifier",
           clf=exported_pipeline,
           metrics=metrics,
           val_metrics=val_metrics)