def rf_grid(df_train):
    # Run random forest, find best parameters
    RF = nfl.rf(df_train, 'Pos')
    params = {
        "n_estimators": range(1, 420, 20),
        "criterion": ["gini", "entropy"]
    }
    df_cv = nfl.grid_search(df_train, RF, 'Pos', params, save=True)
    nfl.display_parameter_curve(df_cv, 'param_n_estimators', 'mean_test_score',
                                '# Estimators', 'Mean Test Score',
                                'param_criterion')
def rf_grid_maxfeature(df_train):
    # Run grid search multiple times for max features
    RF = nfl.rf(df_train, 'Pos')
    params = {
        "n_estimators": [220],
        "criterion": ["gini"],
        "max_features": range(1, df_train.shape[1] - 1, 1)
    }

    for i in range(0, 10):
        print(i)
        try:
            df_cv = df_cv.append(
                nfl.grid_search(df_train, RF, 'Pos', params, save=False))
        except:
            df_cv = nfl.grid_search(df_train, RF, 'Pos', params, save=False)
    df_cv = df_cv.groupby(['param_max_features']).mean().reset_index()

    nfl.display_parameter_curve(df_cv, 'param_max_features', 'mean_test_score',
                                'n_estimators', 'Mean Test Score')
    nfl.display_parameter_curve(df_cv, 'param_max_features',
                                'mean_train_score', 'n_estimators',
                                'Mean Train Score')
def svm_grid(df_train):
    # Run support vector machine, find best parameters
    SVM = nfl.svm(df_train, 'Pos')
    params = {
        "C": np.power(10.0, np.arange(-6, 5, 1)),
        "kernel": ["linear", "rbf"]
    }
    df_cv = nfl.grid_search(df_train, SVM, 'Pos', params, save=True)
    nfl.display_parameter_curve(df_cv, 'param_C', 'mean_test_score',
                                'Penalty Param', 'Mean Test Score',
                                'param_kernel', True)
    nfl.display_parameter_curve(df_cv, 'param_C', 'mean_train_score',
                                'Penalty Param', 'Mean Train Score',
                                'param_kernel', True)
def dt_grid(df_train):
    # Run decision tree, find best parameters
    DT = nfl.decision_tree(df_train, 'Pos')
    params = {
        "min_samples_leaf": range(1, 40, 1),
        "criterion": ["gini", "entropy"]
    }
    df_cv = nfl.grid_search(df_train, DT, 'Pos', params, save=True)
    nfl.display_parameter_curve(df_cv, 'param_min_samples_leaf',
                                'mean_test_score', 'Min Samples Leaf',
                                'Mean Test Score', 'param_criterion')
    nfl.display_parameter_curve(df_cv, 'param_min_samples_leaf',
                                'mean_train_score', 'Min Sample Leaf',
                                'Mean Train Score', 'param_criterion')
def rf_grid_est(df_train):
    # Run grid search multiple times for RT n_estimators
    RF = nfl.rf(df_train, 'Pos')
    params = {
        "n_estimators": range(1, 420, 20),
        "criterion": ["gini", "entropy"]
    }

    for i in range(0, 1):
        print(i)
        try:
            df_cv = df_cv.append(
                nfl.grid_search(df_train, RF, 'Pos', params, save=False))
        except:
            df_cv = nfl.grid_search(df_train, RF, 'Pos', params, save=False)

    df_cv = df_cv.groupby(['param_n_estimators']).mean().reset_index()

    nfl.display_parameter_curve(df_cv, 'param_n_estimators', 'mean_test_score',
                                '# Estimators', 'Mean Test Score',
                                'param_criterion')
    nfl.display_parameter_curve(df_cv, 'param_n_estimators',
                                'mean_train_score', '# Estimators',
                                'Mean Train Score', 'param_criterion')