Beispiel #1
0
def stacking_gaussian(var_smoothing=1e-9):
    estimadores = [('svm', svm()), ('xgboost', xgboost()),
                   ('random_forest', random_forest())]
    cv = utils.kfold_for_cross_validation()
    stacking = StackingClassifier(
        estimators=estimadores,
        final_estimator=GaussianNB(var_smoothing=var_smoothing),
        stack_method="predict_proba",
        cv=cv)
    return stacking
# - Se buscan mejores hiperparametros que los default con un GridSearchCV para ambos NB

# #### Hiperparámetros

pipeline_gaussian = Pipeline([("preprocessor", pp.PreprocessingGaussianNB1()), 
                              ("model", GaussianNB())
                     ])
pipeline_categorical = Pipeline([("preprocessor", pp.PreprocessingCategoricalNB1()), 
                              ("model", CategoricalNB())
                     ])

# +
from sklearn.model_selection import GridSearchCV
params = {'model__alpha': np.arange(1, 10, 1)}

cv = utils.kfold_for_cross_validation()
#Descomentar para ejecutar GridSearchCV
gscv_categorical = GridSearchCV(pipeline_categorical, params, scoring='roc_auc', n_jobs=-1, cv=cv, return_train_score=True).fit(X, y)
print(gscv_categorical.best_score_)
print(gscv_categorical.best_params_)

# +
params = {'model__var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-3, 5e-3, 1e-2, 3e-2, 5e-2, 0.1, 0.3]}

cv = utils.kfold_for_cross_validation()
#Descomentar para ejecutar GridSearchCV
gscv_gaussian = GridSearchCV(pipeline_gaussian, params, scoring='roc_auc', n_jobs=-1, cv=cv, return_train_score=True).fit(X, y)
print(gscv_gaussian.best_score_)
print(gscv_gaussian.best_params_)
# -