コード例 #1
0
def run_random_forest(rf_clf, training, testing, feature_cols, outcome_col):
    """
    Returns fitted_rf_model, diagnostics, predicted_rf_probs
    """
    X_train, X_test = training[feature_cols].values, testing[feature_cols].values
    Y_train, Y_test = training[outcome_col].values, testing[outcome_col].values
    fitted_rf_model = rf_clf.fit(X_train, Y_train)
    rf_diagnostics = get_diagnostics(testing[outcome_col], testing[feature_cols], fitted_rf_model, 'rf')
    predicted_rf_probs = [p[1] for p in fitted_rf_model.predict_proba(X_test)]

    return fitted_rf_model, rf_diagnostics, predicted_rf_probs
コード例 #2
0
def run_logistic_regression(training, testing, feature_cols, outcome_col):
    """
    Returns fitted_logit_model, logit_diagnostics, predicted_logit_probs
    """
    if 'intercept' not in training.columns:
        training['intercept'] = 1
    if 'intercept' not in testing.columns:
        testing['intercept'] = 1
    intercept_feature_cols = feature_cols + ['intercept']
    logit = sm.Logit(training[outcome_col], training[intercept_feature_cols])
    fitted_logit_model = logit.fit()
    logit_diagnostics = get_diagnostics(testing[outcome_col], testing[intercept_feature_cols], fitted_logit_model, model_type = 'logit')
    predicted_logit_probs = fitted_logit_model.predict(testing[intercept_feature_cols])

    return fitted_logit_model, logit_diagnostics, predicted_logit_probs