def rf_grid(df_train): # Run random forest, find best parameters RF = nfl.rf(df_train, 'Pos') params = { "n_estimators": range(1, 420, 20), "criterion": ["gini", "entropy"] } df_cv = nfl.grid_search(df_train, RF, 'Pos', params, save=True) nfl.display_parameter_curve(df_cv, 'param_n_estimators', 'mean_test_score', '# Estimators', 'Mean Test Score', 'param_criterion')
def rf_grid_maxfeature(df_train): # Run grid search multiple times for max features RF = nfl.rf(df_train, 'Pos') params = { "n_estimators": [220], "criterion": ["gini"], "max_features": range(1, df_train.shape[1] - 1, 1) } for i in range(0, 10): print(i) try: df_cv = df_cv.append( nfl.grid_search(df_train, RF, 'Pos', params, save=False)) except: df_cv = nfl.grid_search(df_train, RF, 'Pos', params, save=False) df_cv = df_cv.groupby(['param_max_features']).mean().reset_index() nfl.display_parameter_curve(df_cv, 'param_max_features', 'mean_test_score', 'n_estimators', 'Mean Test Score') nfl.display_parameter_curve(df_cv, 'param_max_features', 'mean_train_score', 'n_estimators', 'Mean Train Score')
def svm_grid(df_train): # Run support vector machine, find best parameters SVM = nfl.svm(df_train, 'Pos') params = { "C": np.power(10.0, np.arange(-6, 5, 1)), "kernel": ["linear", "rbf"] } df_cv = nfl.grid_search(df_train, SVM, 'Pos', params, save=True) nfl.display_parameter_curve(df_cv, 'param_C', 'mean_test_score', 'Penalty Param', 'Mean Test Score', 'param_kernel', True) nfl.display_parameter_curve(df_cv, 'param_C', 'mean_train_score', 'Penalty Param', 'Mean Train Score', 'param_kernel', True)
def dt_grid(df_train): # Run decision tree, find best parameters DT = nfl.decision_tree(df_train, 'Pos') params = { "min_samples_leaf": range(1, 40, 1), "criterion": ["gini", "entropy"] } df_cv = nfl.grid_search(df_train, DT, 'Pos', params, save=True) nfl.display_parameter_curve(df_cv, 'param_min_samples_leaf', 'mean_test_score', 'Min Samples Leaf', 'Mean Test Score', 'param_criterion') nfl.display_parameter_curve(df_cv, 'param_min_samples_leaf', 'mean_train_score', 'Min Sample Leaf', 'Mean Train Score', 'param_criterion')
def rf_grid_est(df_train): # Run grid search multiple times for RT n_estimators RF = nfl.rf(df_train, 'Pos') params = { "n_estimators": range(1, 420, 20), "criterion": ["gini", "entropy"] } for i in range(0, 1): print(i) try: df_cv = df_cv.append( nfl.grid_search(df_train, RF, 'Pos', params, save=False)) except: df_cv = nfl.grid_search(df_train, RF, 'Pos', params, save=False) df_cv = df_cv.groupby(['param_n_estimators']).mean().reset_index() nfl.display_parameter_curve(df_cv, 'param_n_estimators', 'mean_test_score', '# Estimators', 'Mean Test Score', 'param_criterion') nfl.display_parameter_curve(df_cv, 'param_n_estimators', 'mean_train_score', '# Estimators', 'Mean Train Score', 'param_criterion')