# n_jobs = 3) # cv_rf.fit(training_set, class_set) # print('Best Parameters using grid search: \n', # cv_rf.best_params_) # end = time.time() # print('Time taken in grid search: {0: .2f}'\ #.format(end - start)) # Test Set Calculations ------------------------------------- # Test error rate test_error_rate_rf = 1 - accuracy_rf # Confusion Matrix test_crosstb = hf.create_conf_mat(test_class_set, predictions_rf) # Print Variable Importance hf.variable_importance(importances_rf, indices_rf) # Cross validation print('Cross Validation:') hf.cross_val_metrics(fit_rf, training_set, class_set, print_results=True) print('Confusion Matrix:') print(test_crosstb, '\n') print("Here is our mean accuracy on the test set:\n {0: 0.3f}"\ .format(accuracy_rf)) print("The test error rate for our model is:\n {0: .3f}"\ .format(test_error_rate_rf))
test_error_rate_RF = 1 - accuracy_RF # ROC Curve stuff fpr2, tpr2, _ = roc_curve(predictions_RF, test_class_set) auc_rf = auc(fpr2, tpr2) # Uncomment to save your model as a pickle object! # joblib.dump(fit_RF, 'pickle_models/model_rf.pkl') if __name__=='__main__': # Print model parameters print(fit_RF) hf.variable_importance(import_rf, ind_rf) hf.variable_importance_plot(import_rf_desc, ind_rf) print(''' ############################################ ## HYPERPARAMETER OPTIMIZATION ## ############################################ ''' ) print("Note: Remove commented code to see this section") print("chosen parameters: {'bootstrap': True, 'criterion': 'entropy', \ 'max_depth': 4}\ \nElapsed time of optimization: 189.949 seconds")
# Set the random state for reproducibility fit_rf = RandomForestClassifier(random_state=42) ## Set best parameters given by grid search fit_rf.set_params(criterion='gini', max_features='log2', max_depth=3, n_estimators=400) # Fit model on training data fit_rf.fit(training_set, class_set) # Tree Specific ------------------------------------------------- # Extracting feature importance var_imp_rf = hf.variable_importance(fit_rf) importances_rf = var_imp_rf['importance'] indices_rf = var_imp_rf['index'] if __name__ == '__main__': # Print model parameters ------------------------------------ print(fit_rf, '\n') # Initialize function for metrics --------------------------- fit_dict_rf = produce_model_metrics(fit_rf, test_set, test_class_set, 'rf') # Extract each piece from dictionary predictions_rf = fit_dict_rf['predictions'] accuracy_rf = fit_dict_rf['accuracy']