plt.yticks(fontsize=14)
ax2.set_xlim(0,1)
ax2.set_ylim(0,1)
plt.legend(['RF', 'GLM', 'No-Skill'], loc='lower left', fontsize=16)
plt.tight_layout()
plt.show()
#fig.savefig('S:/Dehydration_stroke/Team Emerald/scripts/Michael Working Scripts/Michael Figures/EPI1Full.png')
'''

################################
######## RF ROC Curves #########
rf_total = Classifiers.RandomForestModel(
    params={
        'n_estimators': 100,
        'criterion': 'gini',
        'max_depth': None,
        'min_samples_split': 3,
        'min_samples_leaf': 1,
        'max_features': 2,
        'verbose': 0
    })
rf_total.fit(X_train_p, y_train_p)

rf_total_raw_preds, rf_total_preds, rf_total_score = rf_total.predict(
    X_test_p, y_test_p)

rf_total_fig = Classifiers.metrics(y_test_p, rf_total_raw_preds[:, 1],
                                   rf_total_preds)

rf_vt = Classifiers.RandomForestModel(
    params={
        'n_estimators': 100,
Example #2
0
xg = XGBClassifier(n_estimators=100, criterion="gini", max_depth=None)
randomizedsearch = RandomizedSearchCV(xg, hyperparameter_xg, cv=5, n_jobs=-1)
best_model_xg = randomizedsearch.fit(X_train, y_train)
print("\n\n\nXGB BEST PARAMS:\n\n\n")
best_params_xg = best_model_xg.best_params_
print(best_params_xg)


########################################
######## Print Original Results ########
rf_anova = Classifiers.RandomForestModel(
    params={
        **best_params_rf,
        "n_estimators": 100,
        "criterion": "gini",
        "max_depth": None,
        "verbose": 0,
    }
)
rf_anova.fit(X_train_p, y_train_p)
rf_anova_raw_preds, rf_anova_preds, rf_anova_score = rf_anova.predict(
    X_test_p, y_test_p
)


auc_rf, pr_auc, fpr, tpr, roc_thresholds, recalls, precisions = get_auc_pr(
    y_test_p, rf_anova_raw_preds
)

glm_anova = Classifiers.LogisticRegressionModel(
X_test_new = sfm.transform(X_test)

print("Shape of X data RF fr: ", X_train_new.shape)

# Scale and normalize raw data
X_train_p = preprocessing.scale(X_train_new)
X_test_p = preprocessing.scale(X_test_new)
y_train_p = np.squeeze(y_train.to_numpy())
y_test_p = np.squeeze(y_test.to_numpy())

# Use new features to train RF and XGB models
rf = Classifiers.RandomForestModel(
    params={
        "n_estimators": 100,
        "criterion": "gini",
        "max_depth": None,
        "min_samples_split": 2,
        "min_samples_leaf": 1,
        "max_features": "sqrt",
        "verbose": 0,
    })
rf.fit(X_train_p, y_train_p)

# Save model to disk
filename = 'S:/Dehydration_stroke/Team Emerald/Working GitHub Directories/'\
           'Michael/stroke-hemodynamics/Aim 2/Models/FullModelResults/UpdatedResults/TEST24hr_model_rf.sav'
pickle.dump(rf, open(filename, 'wb'))

rf_raw_preds, rf_preds, rf_score = rf.predict(X_test_p, y_test_p)

auc_rf, pr_auc_rf, fpr, tpr, roc_thresholds, recalls, precisions = get_auc_pr(
    y_test_p, rf_raw_preds)