def execute(): env = Environment( train_dataset=get_imbalanced_dataset(), results_path="HyperparameterHunterAssets", target_column="target", metrics=["roc_auc_score", "accuracy_score"], cv_type="KFold", cv_params=dict(n_splits=5, random_state=7), ) # Since this is HyperparameterHunter, after all, we'll throw in some classic hyperparameter # optimization just for fun. If you're like most people and you think it's absurd to test # 18 different `imblearn` techniques, feel free to comment out some `EngineerStep`s below opt_0 = ET(iterations=20, random_state=32) opt_0.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( max_depth=Integer(2, 20), n_estimators=Integer(50, 900), learning_rate=Real(0.0001, 0.9), subsample=0.5, booster=Categorical(["gbtree", "gblinear"]), ), feature_engineer=FeatureEngineer([ Categorical( [ EngineerStep(resample_smote_tomek, stage="intra_cv"), EngineerStep(over_sample_random, stage="intra_cv"), EngineerStep(over_sample_smote, stage="intra_cv"), EngineerStep(under_sample_random, stage="intra_cv"), EngineerStep(under_sample_cluster_centroids, stage="intra_cv"), EngineerStep(under_sample_tomek_links, stage="intra_cv"), #################### GROUP 2 (EXTENDED) #################### EngineerStep(resample_smote_enn, stage="intra_cv"), EngineerStep(over_sample_ADASYN, stage="intra_cv"), EngineerStep(over_sample_BorderlineSMOTE, stage="intra_cv"), EngineerStep(over_sample_SVMSMOTE, stage="intra_cv"), EngineerStep(under_sample_NearMiss, stage="intra_cv"), EngineerStep(under_sample_CondensedNearestNeighbour, stage="intra_cv"), EngineerStep(under_sample_OneSidedSelection, stage="intra_cv"), EngineerStep(under_sample_NeighbourhoodCleaningRule, stage="intra_cv"), EngineerStep(under_sample_EditedNearestNeighbours, stage="intra_cv"), EngineerStep(under_sample_RepeatedEditedNearestNeighbour, stage="intra_cv"), EngineerStep(under_sample_AllKNN, stage="intra_cv"), EngineerStep(under_sample_InstanceHardnessThreshold, stage="intra_cv"), ], optional=True, ) ]), ) opt_0.go()
def hyperparameter_space(self, param_space=None): space = dict(is_unbalance=True, learning_rate=Real(0.01, 0.3), num_boost_round=Integer(50, 500), num_leaves=Integer(31, 255), min_child_weight=Real(0.1, 10), colsample_bytree=Real(0.5, 1.), subsample=Real(0.5, 1.), reg_alpha=Real(0.01, 0.1), reg_lambda=Real(0.01, 0.1)) if param_space: return param_space else: return space
def _build_penta_cat_int(input_shape): model = Sequential([ Dense( Integer(50, 100), kernel_initializer=Categorical( ["lecun_uniform", "lecun_normal", "glorot_normal"]), input_shape=input_shape, activation=Categorical( ["elu", "selu", "softsign", "relu", "tanh", "sigmoid"]), ), Dropout(0.5), Dense( 1, kernel_initializer=Categorical( ["lecun_uniform", "lecun_normal", "glorot_normal"]), activation=Categorical( ["elu", "selu", "softsign", "relu", "tanh", "sigmoid"]), ), ]) model.compile( optimizer=Categorical([ "sgd", "rmsprop", "adagrad", "adadelta", "adam", "adamax", "nadam" ]), loss="binary_crossentropy", metrics=["accuracy"], ) return model
def _execute(): env = Environment( train_dataset=get_breast_cancer_data(), results_path="HyperparameterHunterAssets", target_column="diagnosis", metrics=["roc_auc_score"], cv_type=StratifiedKFold, cv_params=dict(n_splits=10, shuffle=True, random_state=32), runs=2, ) optimizer = BayesianOptPro(iterations=10, read_experiments=True, random_state=None) optimizer.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), n_estimators=200, subsample=0.5, booster=Categorical(["gbtree", "gblinear", "dart"]), ), model_extra_params=dict(fit=dict(eval_metric=Categorical(["auc", "rmse", "mae"]))), ) optimizer.go()
def _execute(): env = Environment( train_dataset=get_breast_cancer_data(), root_results_path='HyperparameterHunterAssets', target_column='diagnosis', metrics_map=['roc_auc_score'], cross_validation_type=StratifiedKFold, cross_validation_params=dict(n_splits=10, shuffle=True, random_state=32), runs=1, ) optimizer = RandomForestOptimization( iterations=100, read_experiments=True, ) optimizer.set_experiment_guidelines( model_initializer=LGBMClassifier, model_init_params=dict(boosting_type=Categorical(['gbdt', 'dart']), num_leaves=Integer(5, 20), max_depth=-1, min_child_samples=5, subsample=0.5), ) optimizer.go()
def _execute(): env = Environment( train_dataset=get_toy_classification_data(), root_results_path='HyperparameterHunterAssets', metrics_map=['roc_auc_score'], cross_validation_type='StratifiedKFold', cross_validation_params=dict(n_splits=5, shuffle=True, random_state=32), runs=1, ) optimizer = GradientBoostedRegressionTreeOptimization( iterations=10, read_experiments=True, random_state=None, ) optimizer.set_experiment_guidelines( model_initializer=CatBoostClassifier, model_init_params=dict(iterations=100, eval_metric=Categorical( ['Logloss', 'Accuracy', 'AUC'], transform='onehot'), learning_rate=Real(low=0.0001, high=0.5), depth=Integer(4, 7), save_snapshot=False), ) optimizer.go() print('')
def _execute(): #################### Environment #################### env = Environment( train_dataset=get_breast_cancer_data(target="target"), root_results_path="HyperparameterHunterAssets", metrics_map=["roc_auc_score"], cross_validation_type="StratifiedKFold", cross_validation_params=dict(n_splits=5, shuffle=True, random_state=32), ) #################### Experimentation #################### experiment = CVExperiment( model_initializer=KerasClassifier, model_init_params=dict(build_fn=_build_fn_experiment), model_extra_params=dict(callbacks=[ReduceLROnPlateau(patience=5)], batch_size=32, epochs=10, verbose=0), ) #################### Optimization #################### optimizer = BayesianOptimization(iterations=10) optimizer.set_experiment_guidelines( model_initializer=KerasClassifier, model_init_params=dict(build_fn=_build_fn_optimization), model_extra_params=dict( callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))], batch_size=Categorical([32, 64], transform="onehot"), epochs=10, verbose=0, ), ) optimizer.go()
def _execute(): env = Environment( train_dataset=get_breast_cancer_data(), root_results_path='HyperparameterHunterAssets', target_column='diagnosis', metrics_map=['roc_auc_score'], cross_validation_type=StratifiedKFold, cross_validation_params=dict(n_splits=10, shuffle=True, random_state=32), runs=2, ) optimizer = BayesianOptimization(iterations=100, read_experiments=True, random_state=None) optimizer.set_experiment_guidelines( model_initializer=XGBClassifier, model_init_params=dict( max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), n_estimators=200, subsample=0.5, booster=Categorical(['gbtree', 'gblinear', 'dart']), ), model_extra_params=dict( fit=dict( eval_metric=Categorical(['auc', 'rmse', 'mae']) ) ), ) optimizer.go()
def execute(): """This is going to be a very simple example to illustrate what exactly HyperparameterHunter does, and how it revolutionizes hyperparameter optimization.""" # Start by creating an `Environment` - This is where you define how Experiments (and optimization) will be conducted env = Environment( train_dataset=get_breast_cancer_data(target="target"), root_results_path="HyperparameterHunterAssets", metrics_map=["roc_auc_score"], cross_validation_type="StratifiedKFold", cross_validation_params=dict(n_splits=10, shuffle=True, random_state=32), ) # Now, conduct an `Experiment` # This tells HyperparameterHunter to use the settings in the active `Environment` to train a model with these hyperparameters experiment = CVExperiment(model_initializer=XGBClassifier, model_init_params=dict(objective="reg:linear", max_depth=3)) # That's it. No annoying boilerplate code to fit models and record results # Now, the `Environment`'s `root_results_path` directory will contain new files describing the Experiment just conducted # Time for the fun part. We'll set up some hyperparameter optimization by first defining the `OptimizationProtocol` we want optimizer = BayesianOptimization(verbose=1) # Now we're going to say which hyperparameters we want to optimize. # Notice how this looks just like our `experiment` above optimizer.set_experiment_guidelines( model_initializer=XGBClassifier, model_init_params=dict( objective= "reg:linear", # We're setting this as a constant guideline - Not one to optimize max_depth=Integer( 2, 10 ), # Instead of using an int like the `experiment` above, we provide a space to search ), ) # Notice that our range for `max_depth` includes the `max_depth=3` value we used in our `experiment` earlier optimizer.go() # Now, we go assert experiment.experiment_id in [ _[2] for _ in optimizer.similar_experiments ] # Here we're verifying that the `experiment` we conducted first was found by `optimizer` and used as learning material # You can also see via the console that we found `experiment`'s saved files, and used it to start optimization last_experiment_id = optimizer.current_experiment.experiment_id # Let's save the id of the experiment that was just conducted by `optimizer` optimizer.go() # Now, we'll start up `optimizer` again... # And we can see that this second optimization round learned from both our first `experiment` and our first optimization round assert experiment.experiment_id in [ _[2] for _ in optimizer.similar_experiments ] assert last_experiment_id in [_[2] for _ in optimizer.similar_experiments]
def _build_fn_glorot_normal_1(input_shape): # `"glorot_normal"` model = Sequential( [ Dense(Integer(50, 100), input_shape=input_shape), Dense(1, kernel_initializer="glorot_normal"), ] ) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model
def _build_fn_orthogonal_i_6(input_shape): # `Orthogonal(gain=Real(0.6, 1.6))` model = Sequential( [ Dense(Integer(50, 100), input_shape=input_shape), Dense(1, kernel_initializer=Orthogonal(gain=Real(0.6, 1.6))), ] ) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model
def _build_fn_categorical_4(input_shape): # `Categorical(["glorot_normal", Orthogonal(gain=1)])` model = Sequential( [ Dense(Integer(50, 100), input_shape=input_shape), Dense(1, kernel_initializer=Categorical(["glorot_normal", Orthogonal(gain=1)])), ] ) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model
def test_reg_categorical_integer_ok(env_boston_regression, hh_assets, opt_pro): """Identical to `test_reg_categorical_ok`, except `Integer` added to show cooperation""" opt = opt_pro(iterations=3, random_state=32, n_initial_points=1) opt.forge_experiment( model_initializer=AdaBoostRegressor, model_init_params=dict(loss=Categorical( ["linear", "square", "exponential"]), n_estimators=Integer(10, 40)), ) opt.go()
def opt_pro(optimization_protocol): opt = optimization_protocol(iterations=3, random_state=32, n_initial_points=1) opt.forge_experiment( model_initializer=XGBRegressor, model_init_params=dict( max_depth=Integer(2, 10), n_estimators=Integer(50, 300), learning_rate=Real(0.1, 0.9), subsample=0.5, booster=Categorical(["gbtree", "gblinear"]), ), model_extra_params=dict(fit=dict( eval_metric=Categorical(["rmse", "mae"]))), feature_engineer=FeatureEngineer( [Categorical([nothing_transform], optional=True)]), ) opt.go() return opt
def test_reg_engineer_integer_ok(env_boston_regression, hh_assets, opt_pro): """Identical to `test_reg_engineer`, except `Integer` dimension added to show that everything is fine now. Problem limited to not only `BayesianOptPro`, but also exclusively `Categorical` search spaces""" opt = opt_pro(iterations=3, random_state=32, n_initial_points=1) opt.forge_experiment( model_initializer=AdaBoostRegressor, model_init_params=dict(n_estimators=Integer(10, 40)), feature_engineer=FeatureEngineer( [Categorical([standard_scale, min_max_scale, normalize])]), ) opt.go()
def opt_keras_0(): optimizer = DummySearch(iterations=2) optimizer.set_experiment_guidelines( model_initializer=KerasClassifier, model_init_params=dict(build_fn=_build_fn_optimization), model_extra_params=dict( callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))], batch_size=Categorical([32, 64], transform="onehot"), epochs=10, verbose=0, ), ) optimizer.go()
def opt_dtc_0(): optimizer = ExtraTreesOptimization(iterations=2, random_state=1337) optimizer.set_experiment_guidelines( model_initializer=DecisionTreeClassifier, model_init_params=dict( criterion="gini", min_samples_split=Integer(2, 5), splitter=Categorical(["best", "random"]), min_weight_fraction_leaf=Real(0.0, 0.1), ), ) optimizer.go() yield optimizer
def opt_regressor(): optimizer = DummyOptPro(iterations=1) optimizer.forge_experiment( model_initializer=KerasRegressor, model_init_params=_build_fn_regressor, model_extra_params=dict( callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))], batch_size=Categorical([32, 64], transform="onehot"), epochs=10, verbose=0, ), ) optimizer.go()
def do_optimization(): optimizer = BayesianOptPro(iterations=5, random_state=1337) optimizer.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( objective="reg:linear", max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), subsample=0.5, booster=Categorical(["gbtree", "dart"]), ), ) optimizer.go()
def opt_xgb_0(): optimizer = RandomForestOptimization(iterations=2, random_state=1337) optimizer.set_experiment_guidelines( model_initializer=XGBClassifier, model_init_params=dict( objective="reg:linear", max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), subsample=0.5, booster=Categorical(["gbtree", "dart"]), ), ) optimizer.go() yield optimizer
def test_sentinels_optimization(env_0): optimizer = GBRT(iterations=2) optimizer.set_experiment_guidelines( model_initializer=XGBClassifier, model_init_params=dict(objective="reg:linear", max_depth=Integer(2, 20), subsample=0.5), model_extra_params=dict(fit=dict( eval_set=get_all_sentinels(env_0), early_stopping_rounds=5, eval_metric=Categorical(["auc", "mae"]), )), ) optimizer.go()
def test_reg_engineer_categorical_integer_ok(env_boston_regression, hh_assets, opt_pro): """Identical to `test_reg_engineer_categorical`, except `Integer` added to demonstrate that all `OptPro`s can optimize with `FeatureEngineer` if space is not exclusively `Categorical`""" opt = opt_pro(iterations=3, random_state=32, n_initial_points=1) opt.forge_experiment( model_initializer=AdaBoostRegressor, model_init_params=dict(loss=Categorical( ["linear", "square", "exponential"]), n_estimators=Integer(10, 40)), feature_engineer=FeatureEngineer( [Categorical([standard_scale, min_max_scale, normalize])]), ) opt.go()
def execute(): #################### Environment #################### env = Environment( train_dataset=get_iris_data(), results_path="HyperparameterHunterAssets", target_column="species", metrics=["hamming_loss"], cv_params=dict(n_splits=5, random_state=32), ) #################### Experiment #################### # Just a reference for normal `class_weight` usage outside of optimization CVExperiment(RandomForestClassifier, dict(n_estimators=10, class_weight={ 0: 1, 1: 1, 2: 1 })) #################### Optimization #################### opt = BayesianOptPro(iterations=10, random_state=32) opt.forge_experiment( model_initializer=RandomForestClassifier, model_init_params=dict( # Weight values for each class can be optimized with `Categorical`/`Integer` class_weight={ 0: Categorical([1, 3]), 1: Categorical([1, 4]), 2: Integer(1, 9), # You can also use `Integer` for low/high ranges }, criterion=Categorical(["gini", "entropy"]), n_estimators=Integer(5, 100), ), ) opt.go()
def test_space_rvs(): """Test that calling `Space.rvs` returns expected values. This is specifically aimed at ensuring `Categorical` instances containing strings produce the entire string, rather than the first character, for example""" space = Space([Integer(50, 100), Categorical(["glorot_normal", "orthogonal"])]) sample_0 = space.rvs(random_state=32) sample_1 = space.rvs(n_samples=1, random_state=32) sample_2 = space.rvs(n_samples=2, random_state=32) sample_3 = space.rvs(n_samples=3, random_state=32) assert sample_0 == [[73, "glorot_normal"]] assert sample_1 == [[73, "glorot_normal"]] assert sample_2 == [[73, "glorot_normal"], [93, "orthogonal"]] assert sample_3 == [[73, "glorot_normal"], [93, "glorot_normal"], [55, "orthogonal"]]
def _build_fn_regressor(input_shape): model = Sequential( [ Dense(100, activation="relu", input_shape=input_shape), Dense(Integer(40, 60), activation="relu", kernel_initializer="glorot_normal"), Dropout(Real(0.2, 0.7)), Dense(1, activation=Categorical(["relu", "sigmoid"]), kernel_initializer="orthogonal"), ] ) model.compile( optimizer=Categorical(["adam", "rmsprop"]), loss="mean_absolute_error", metrics=["mean_absolute_error"], ) return model
def opt_svc_0(request): optimizer = BayesianOptimization(target_metric=request.param, iterations=2, random_state=32) optimizer.set_experiment_guidelines( model_initializer=SVC, model_init_params=dict( C=Real(0.9, 1.1), kernel=Categorical(["linear", "poly", "rbf"]), max_iter=Integer(50, 125), tol=1e-3, ), ) optimizer.go() yield optimizer assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))
def space_fixture(): dimensions = [ Real(0.1, 0.9), Categorical(["foo", "bar", "baz"]), Integer(12, 18) ] locations = [ ("model_init_params", "a"), ("model_init_params", "b", "c"), ("model_extra_params", "e"), ] for i in range(len(dimensions)): setattr(dimensions[i], "location", locations[i]) return Space(dimensions)
def _build_fn_optimization(input_shape): model = Sequential([ Dense(Integer(50, 150), kernel_initializer='uniform', input_shape=input_shape, activation='relu'), Dropout(Real(0.2, 0.7)), Dense(1, kernel_initializer='uniform', activation=Categorical(['sigmoid', 'relu'])), ]) model.compile( optimizer=Categorical(['adam', 'rmsprop']), loss='binary_crossentropy', metrics=['accuracy'], ) return model
def _build_fn_optimization(input_shape): model = Sequential([ Dense( Integer(50, 150), kernel_initializer="uniform", input_shape=input_shape, activation="relu", ), Dropout(Real(0.2, 0.7)), Dense(1, kernel_initializer="uniform", activation=Categorical(["sigmoid", "relu"])), ]) model.compile(optimizer=Categorical(["adam", "rmsprop"]), loss="binary_crossentropy", metrics=["accuracy"]) return model
def opt_lgb_0(request): optimizer = BayesianOptimization(target_metric=request.param, iterations=2, random_state=32) optimizer.set_experiment_guidelines( model_initializer=LGBMClassifier, model_init_params=dict( boosting_type=Categorical(["gbdt", "dart"]), num_leaves=Integer(2, 8), max_depth=5, min_child_samples=1, subsample=Real(0.4, 0.7), verbose=-1, ), ) optimizer.go() yield optimizer assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))