Exemple #1
0
def execute():
    """This is going to be a very simple example to illustrate what exactly HyperparameterHunter does, and how it revolutionizes
    hyperparameter optimization."""

    # Start by creating an `Environment` - This is where you define how Experiments (and optimization) will be conducted
    env = Environment(
        train_dataset=get_breast_cancer_data(target="target"),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=10, shuffle=True, random_state=32),
    )

    # Now, conduct an `Experiment`
    # This tells HyperparameterHunter to use the settings in the active `Environment` to train a model with these hyperparameters
    experiment = CVExperiment(model_initializer=XGBClassifier,
                              model_init_params=dict(objective="reg:linear",
                                                     max_depth=3))

    # That's it. No annoying boilerplate code to fit models and record results
    # Now, the `Environment`'s `results_path` directory will contain new files describing the Experiment just conducted

    # Time for the fun part. We'll set up some hyperparameter optimization by first defining the `OptimizationProtocol` we want
    optimizer = BayesianOptPro(verbose=1)

    # Now we're going to say which hyperparameters we want to optimize.
    # Notice how this looks just like our `experiment` above
    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective=
            "reg:linear",  # We're setting this as a constant guideline - Not one to optimize
            max_depth=Integer(
                2, 10
            ),  # Instead of using an int like the `experiment` above, we provide a space to search
        ),
    )
    # Notice that our range for `max_depth` includes the `max_depth=3` value we used in our `experiment` earlier

    optimizer.go()  # Now, we go

    assert experiment.experiment_id in [
        _[2] for _ in optimizer.similar_experiments
    ]
    # Here we're verifying that the `experiment` we conducted first was found by `optimizer` and used as learning material
    # You can also see via the console that we found `experiment`'s saved files, and used it to start optimization

    last_experiment_id = optimizer.current_experiment.experiment_id
    # Let's save the id of the experiment that was just conducted by `optimizer`

    optimizer.go()  # Now, we'll start up `optimizer` again...

    # And we can see that this second optimization round learned from both our first `experiment` and our first optimization round
    assert experiment.experiment_id in [
        _[2] for _ in optimizer.similar_experiments
    ]
    assert last_experiment_id in [_[2] for _ in optimizer.similar_experiments]
def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path="HyperparameterHunterAssets",
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=10, shuffle=True, random_state=32),
        runs=2,
    )

    optimizer = BayesianOptPro(iterations=10, read_experiments=True, random_state=None)

    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            n_estimators=200,
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear", "dart"]),
        ),
        model_extra_params=dict(fit=dict(eval_metric=Categorical(["auc", "rmse", "mae"]))),
    )

    optimizer.go()
def _execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_breast_cancer_data(target="target"),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    #################### Experimentation ####################
    experiment = CVExperiment(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_experiment),
        model_extra_params=dict(callbacks=[ReduceLROnPlateau(patience=5)],
                                batch_size=32,
                                epochs=10,
                                verbose=0),
    )

    #################### Optimization ####################
    optimizer = BayesianOptPro(iterations=10)
    optimizer.forge_experiment(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_optimization),
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()
def fe_optimizer(request):
    if request.param is not None:
        request.param = FeatureEngineer(request.param)
    opt = BayesianOptPro()
    opt.forge_experiment(model_initializer=Ridge,
                         model_init_params={},
                         feature_engineer=request.param)
    opt.go()
    return opt
def do_optimization():
    optimizer = BayesianOptPro(iterations=5, random_state=1337)
    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            subsample=0.5,
            booster=Categorical(["gbtree", "dart"]),
        ),
    )
    optimizer.go()
Exemple #6
0
def opt_svc_0(request):
    optimizer = BayesianOptPro(target_metric=request.param,
                               iterations=2,
                               random_state=32)
    optimizer.forge_experiment(
        model_initializer=SVC,
        model_init_params=dict(
            C=Real(0.9, 1.1),
            kernel=Categorical(["linear", "poly", "rbf"]),
            max_iter=Integer(50, 125),
            tol=1e-3,
        ),
    )
    optimizer.go()
    yield optimizer

    assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))
Exemple #7
0
def opt_lgb_0(request):
    optimizer = BayesianOptPro(target_metric=request.param,
                               iterations=2,
                               random_state=32)
    optimizer.forge_experiment(
        model_initializer=LGBMClassifier,
        model_init_params=dict(
            boosting_type=Categorical(["gbdt", "dart"]),
            num_leaves=Integer(2, 8),
            n_estimators=10,
            max_depth=5,
            min_child_samples=1,
            subsample=Real(0.4, 0.7),
            verbose=-1,
        ),
    )
    optimizer.go()
    yield optimizer

    assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))
Exemple #8
0
def execute():
    train_df, holdout_df = prep_data()

    env = Environment(
        train_dataset=train_df,
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        target_column=[f"target_{_}" for _ in range(10)
                       ],  # 10 classes (one-hot-encoded output)
        holdout_dataset=holdout_df,
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=3, shuffle=True, random_state=True),
    )

    exp = CVExperiment(KerasClassifier, build_fn_exp,
                       dict(batch_size=64, epochs=10, verbose=1))

    opt = BayesianOptPro(iterations=10, random_state=32)
    opt.forge_experiment(KerasClassifier, build_fn_opt,
                         dict(batch_size=64, epochs=10, verbose=0))
    opt.go()
def test_similar_experiments_unordered():
    """Check that an experiment with a single `EngineerStep` is considered "similar" by an
    Optimization Protocol, with two `optional` `EngineerStep`s, where the second step is identical
    to the single step used by the standalone experiment. As of v3.0.0alpha2, this is expected to
    fail because the otherwise identical engineer steps occur at different indexes in
    `FeatureEngineer.steps` for the experiment and the OptPro. The experiment has `sqr_sum_feature`
    at index=0, while the same step in the OptPro is at index=1. Note that the step index in OptPro
    is still 1 despite the fact that the other step immediately preceding it is `optional`"""
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path=assets_dir,
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    exp = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([EngineerStep(sqr_sum_feature)]),
    )

    opt = BayesianOptPro(iterations=1)
    opt.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([
            Categorical([standard_scale, normalize, min_max_scale],
                        optional=True),
            Categorical([sqr_sum_feature], optional=True),
        ]),
    )
    opt.go()

    assert exp.experiment_id in [_[2] for _ in opt.similar_experiments]
def execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_iris_data(),
        results_path="HyperparameterHunterAssets",
        target_column="species",
        metrics=["hamming_loss"],
        cv_params=dict(n_splits=5, random_state=32),
    )

    #################### Experiment ####################
    # Just a reference for normal `class_weight` usage outside of optimization
    CVExperiment(RandomForestClassifier,
                 dict(n_estimators=10, class_weight={
                     0: 1,
                     1: 1,
                     2: 1
                 }))

    #################### Optimization ####################
    opt = BayesianOptPro(iterations=10, random_state=32)
    opt.forge_experiment(
        model_initializer=RandomForestClassifier,
        model_init_params=dict(
            # Weight values for each class can be optimized with `Categorical`/`Integer`
            class_weight={
                0: Categorical([1, 3]),
                1: Categorical([1, 4]),
                2:
                Integer(1,
                        9),  # You can also use `Integer` for low/high ranges
            },
            criterion=Categorical(["gini", "entropy"]),
            n_estimators=Integer(5, 100),
        ),
    )
    opt.go()
Exemple #11
0
def test_categorical_tuple_match(env_digits):
    """Test that optimization of a `Categorical` space, whose values are tuples can be performed
    and that saved results from such a space are correctly identified as similar Experiments"""
    model_extra_params = dict(batch_size=32, epochs=3, verbose=0, shuffle=True)
    exp_0 = CVExperiment(KerasClassifier, build_fn_digits_exp, model_extra_params)

    #################### First OptPro ####################
    opt_0 = BayesianOptPro(iterations=1, random_state=32, n_initial_points=1)
    opt_0.forge_experiment(KerasClassifier, build_fn_digits_opt, model_extra_params)
    opt_0.go()
    assert len(opt_0.similar_experiments) == 1  # Should match `exp_0`

    #################### Second OptPro ####################
    opt_1 = BayesianOptPro(iterations=1, random_state=32, n_initial_points=1)
    opt_1.forge_experiment(KerasClassifier, build_fn_digits_opt, model_extra_params)
    opt_1.go()
    assert len(opt_1.similar_experiments) == 2  # Should match `exp_0` and `opt_0`
Exemple #12
0
    metrics=['roc_auc_score'],
    cv_type='StratifiedKFold',
    cv_params=dict(n_splits=10, shuffle=True, random_state=32),
)

# Now, conduct an `Experiment`
# This tells HyperparameterHunter to use the settings in the active `Environment` to train a model with these hyperparameters
experiment = CVExperiment(model_initializer=XGBClassifier,
                          model_init_params=dict(objective='reg:linear',
                                                 max_depth=3))

# That's it. No annoying boilerplate code to fit models and record results
# Now, the `Environment`'s `results_path` directory will contain new files describing the Experiment just conducted

# Time for the fun part. We'll set up some hyperparameter optimization by first defining the `OptPro` (Optimization Protocol) we want
optimizer = BayesianOptPro(verbose=1)

# Now we're going to say which hyperparameters we want to optimize.
# Notice how this looks just like our `experiment` above
optimizer.forge_experiment(
    model_initializer=XGBClassifier,
    model_init_params=dict(
        objective=
        'reg:linear',  # We're setting this as a constant guideline - Not one to optimize
        max_depth=Integer(
            2, 10
        )  # Instead of using an int like the `experiment` above, we provide a space to search
    ))
# Notice that our range for `max_depth` includes the `max_depth=3` value we used in our `experiment` earlier

optimizer.go()  # Now, we go
    model_init_params=dict(objective="reg:linear", max_depth=3, n_estimators=100, subsample=0.5),
    model_extra_params=dict(
        fit=dict(
            eval_set=[
                (env.train_input, env.train_target),
                (env.validation_input, env.validation_target),
            ],
            early_stopping_rounds=5,
            eval_metric="mae",
        )
    ),
)

# And/or...
#################### 2. Hyperparameter Optimization ####################
optimizer = BayesianOptPro(iterations=30, random_state=1337)
optimizer.forge_experiment(
    model_initializer=XGBClassifier,
    model_init_params=dict(
        objective="reg:linear",
        max_depth=Integer(2, 20),
        learning_rate=Real(0.0001, 0.5),
        subsample=0.5,
        booster=Categorical(["gbtree", "dart"]),
    ),
    model_extra_params=dict(
        fit=dict(
            eval_set=[
                (env.train_input, env.train_target),
                (env.validation_input, env.validation_target),
            ],