def run_random_forest(rf_clf, training, testing, feature_cols, outcome_col): """ Returns fitted_rf_model, diagnostics, predicted_rf_probs """ X_train, X_test = training[feature_cols].values, testing[feature_cols].values Y_train, Y_test = training[outcome_col].values, testing[outcome_col].values fitted_rf_model = rf_clf.fit(X_train, Y_train) rf_diagnostics = get_diagnostics(testing[outcome_col], testing[feature_cols], fitted_rf_model, 'rf') predicted_rf_probs = [p[1] for p in fitted_rf_model.predict_proba(X_test)] return fitted_rf_model, rf_diagnostics, predicted_rf_probs
def run_logistic_regression(training, testing, feature_cols, outcome_col): """ Returns fitted_logit_model, logit_diagnostics, predicted_logit_probs """ if 'intercept' not in training.columns: training['intercept'] = 1 if 'intercept' not in testing.columns: testing['intercept'] = 1 intercept_feature_cols = feature_cols + ['intercept'] logit = sm.Logit(training[outcome_col], training[intercept_feature_cols]) fitted_logit_model = logit.fit() logit_diagnostics = get_diagnostics(testing[outcome_col], testing[intercept_feature_cols], fitted_logit_model, model_type = 'logit') predicted_logit_probs = fitted_logit_model.predict(testing[intercept_feature_cols]) return fitted_logit_model, logit_diagnostics, predicted_logit_probs