コード例 #1
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        cross_validation_type=RepeatedStratifiedKFold,
        cross_validation_params=dict(n_splits=3, n_repeats=2, random_state=32),
        do_full_save=do_full_save,
    )

    experiment_0 = CVExperiment(model_initializer=XGBClassifier,
                                model_init_params=dict(subsample=0.01))
    # Pro Tip: By setting XGBoost's subsample ridiculously low, we can get bad scores on purpose

    # Upon completion of this Experiment, we see a warning that not all result files will be saved
    # This is because the final score of the Experiment was below our threshold of 0.75
    # Specifically, we skipped saving prediction files (OOF, holdout, test, or in-fold), and the heartbeat file

    # What still got saved is the Experiment's: key information, leaderboard position, and description file
    # These are saved to allow us to use the information for future hyperparameter optimization, and detect repeated Experiments
    # Additionally, the Experiment's script backup is saved, but that's because its one of the first things that happens
    # For even finer control over what gets saved, use `do_full_save` together with `file_blacklist`

    # Now, lets perform another Experiment that does a bit better than our intentionally miserable one
    experiment_1 = CVExperiment(model_initializer=XGBClassifier,
                                model_init_params=dict(subsample=0.5))
コード例 #2
0
def test_feature_engineer_list_experiment_equality(env_boston, steps_0,
                                                   steps_1):
    """Test that the `feature_engineer` attribute constructed by
    :class:`~hyperparameter_hunter.experiments.CVExperiment` is the same whether it was given a
    list as input, or a :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer`"""
    exp_0 = CVExperiment(Ridge, feature_engineer=steps_0)
    exp_1 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_1))
    assert exp_0.feature_engineer == exp_1.feature_engineer

    # Repeat above, but switch which steps are wrapped in `FeatureEngineer`
    exp_2 = CVExperiment(Ridge, feature_engineer=steps_1)
    exp_3 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_0))
    assert exp_2.feature_engineer == exp_3.feature_engineer
コード例 #3
0
def prepped_experiment(request):
    """Build a partially prepared :class:`~hyperparameter_hunter.experiments.CVExperiment` instance

    Specifically, automatic execution is disabled via `auto_start=False`, then the following methods
    are called:

    1. :meth:`~hyperparameter_hunter.experiments.BaseExperiment.preparation_workflow`,
    2. :meth:`~hyperparameter_hunter.experiments.BaseExperiment._initialize_random_seeds`, and
    3. :meth:`~hyperparameter_hunter.experiments.BaseExperiment.on_exp_start`, which initializes the
       four :mod:`~hyperparameter_hunter.data.datasets` classes, then performs pre-CV feature
       engineering

    Notes
    -----
    Directly calling `on_exp_start` is ok in this test because after calling
    `_initialize_random_seeds`, `BaseExperiment` calls `execute`, which is implemented by
    `BaseCVExperiment`, and only calls `cross_validation_workflow`, whose first task is to call
    `on_exp_start`. So nothing gets skipped in between"""
    #################### Build `feature_engineer` ####################
    feature_engineer = FeatureEngineer(steps=request.param)

    #################### Partially Prepare `CVExperiment` ####################
    experiment = CVExperiment(
        model_initializer=AdaBoostClassifier,
        model_init_params=dict(),
        feature_engineer=feature_engineer,
        auto_start=False,
    )
    experiment.preparation_workflow()
    # noinspection PyProtectedMember
    experiment._initialize_random_seeds()
    experiment.on_exp_start()

    return experiment
コード例 #4
0
def test_feature_engineer_list_experiment_inequality(env_boston, steps_0,
                                                     steps_1):
    """Test that the `feature_engineer` attribute constructed by
    :class:`~hyperparameter_hunter.experiments.CVExperiment` is NOT the same when given a list as
    input vs. a :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer` when the two are
    actually different. This is an insanity test to make sure that the related test in this module,
    :func:`test_feature_engineer_list_experiment_equality`, is not simply equating everything"""
    exp_0 = CVExperiment(Ridge, feature_engineer=steps_0)
    exp_1 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_1))
    assert exp_0.feature_engineer != exp_1.feature_engineer

    # Repeat above, but switch which steps are wrapped in `FeatureEngineer`
    exp_2 = CVExperiment(Ridge, feature_engineer=steps_1)
    exp_3 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_0))
    assert exp_2.feature_engineer != exp_3.feature_engineer
def _execute():
    # To start, take a look at "examples/environment_params.json" - This is the file we're giving our Environment below
    # In this file, we can define a bunch of default Environment parameters that we don't want to always explicitly provide

    # It works really well for things that won't be changing often, like the following:
    # - `root_results_path`, which we probably never want to change, so all our results go to one place;
    # - `target_column`, which will probably be a constant for your data
    # - `metrics_map`, if you're not using any fancy metrics, and you already know what you want
    # - `file_blacklist`, if you're angry at me for adding that one result file that's always useless
    # Other parameters, whose default values you may want to change

    env = Environment(
        train_dataset=get_breast_cancer_data(),  # If your dataset is a str path, you can even add it to environment_params
        environment_params_path="./environment_params.json",  # Use this file for parameters not explicitly given
        cross_validation_params=dict(
            n_splits=5, shuffle=True, random_state=32
        ),  # Here we decide to override our default values
    )

    print(env.root_results_path)
    print(env.target_column)
    print(env.metrics_map)
    print(env.cross_validation_type)
    print(env.runs)
    print(env.file_blacklist)  # This includes some other values too, but you can ignore them
    # All of the above are from `environment_params_path`
    print(
        env.cross_validation_params
    )  # This is the value we provided above, rather than our `environment_params_path` default

    experiment = CVExperiment(model_initializer=KNeighborsClassifier, model_init_params={})
コード例 #6
0
def engineer_experiment(request):
    """`CVExperiment` fixture that supports provision of a `feature_engineer` through `request`"""
    feature_engineer = FeatureEngineer(steps=request.param)
    experiment = CVExperiment(
        model_initializer=Ridge, model_init_params=dict(), feature_engineer=feature_engineer
    )
    return experiment
コード例 #7
0
def _execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_breast_cancer_data(target="target"),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        cross_validation_type="StratifiedKFold",
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
    )

    #################### Experimentation ####################
    experiment = CVExperiment(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_experiment),
        model_extra_params=dict(callbacks=[ReduceLROnPlateau(patience=5)],
                                batch_size=32,
                                epochs=10,
                                verbose=0),
    )

    #################### Optimization ####################
    optimizer = BayesianOptimization(iterations=10)
    optimizer.set_experiment_guidelines(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_optimization),
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()
コード例 #8
0
def execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_boston_data(),
        results_path="HyperparameterHunterAssets",
        holdout_dataset=get_holdout_data,
        target_column="DIS",
        metrics=["r2_score", "median_absolute_error"],
        cv_type="KFold",
        cv_params=dict(n_splits=10, random_state=1),
    )

    #################### CVExperiment ####################
    exp_0 = CVExperiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([quantile_transform]),
    )

    #################### Optimization ####################
    # `opt_0` recognizes `exp_0`'s `feature_engineer` and its results as valid learning material
    # This is because `opt_0` marks the engineer step functions omitted by `exp_0` as `optional=True`
    opt_0 = DummyOptPro(iterations=10)
    opt_0.forge_experiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([
            Categorical([quantile_transform, log_transform], optional=True),
            Categorical([standard_scale, standard_scale_BAD], optional=True),
            Categorical([square_sum_feature], optional=True),
        ]),
    )
    opt_0.go()
コード例 #9
0
def execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path="HyperparameterHunterAssets",
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    experiment = CVExperiment(
        model_initializer=KerasClassifier,
        model_init_params=build_fn,
        model_extra_params=dict(
            callbacks=[
                ModelCheckpoint(
                    filepath=os.path.abspath("foo_checkpoint"), save_best_only=True, verbose=1
                ),
                ReduceLROnPlateau(patience=5),
            ],
            batch_size=32,
            epochs=10,
            verbose=0,
            shuffle=True,
        ),
    )
コード例 #10
0
def execute():
    """This is going to be a very simple example to illustrate what exactly HyperparameterHunter does, and how it revolutionizes
    hyperparameter optimization."""

    # Start by creating an `Environment` - This is where you define how Experiments (and optimization) will be conducted
    env = Environment(
        train_dataset=get_breast_cancer_data(target="target"),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        cross_validation_type="StratifiedKFold",
        cross_validation_params=dict(n_splits=10,
                                     shuffle=True,
                                     random_state=32),
    )

    # Now, conduct an `Experiment`
    # This tells HyperparameterHunter to use the settings in the active `Environment` to train a model with these hyperparameters
    experiment = CVExperiment(model_initializer=XGBClassifier,
                              model_init_params=dict(objective="reg:linear",
                                                     max_depth=3))

    # That's it. No annoying boilerplate code to fit models and record results
    # Now, the `Environment`'s `root_results_path` directory will contain new files describing the Experiment just conducted

    # Time for the fun part. We'll set up some hyperparameter optimization by first defining the `OptimizationProtocol` we want
    optimizer = BayesianOptimization(verbose=1)

    # Now we're going to say which hyperparameters we want to optimize.
    # Notice how this looks just like our `experiment` above
    optimizer.set_experiment_guidelines(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective=
            "reg:linear",  # We're setting this as a constant guideline - Not one to optimize
            max_depth=Integer(
                2, 10
            ),  # Instead of using an int like the `experiment` above, we provide a space to search
        ),
    )
    # Notice that our range for `max_depth` includes the `max_depth=3` value we used in our `experiment` earlier

    optimizer.go()  # Now, we go

    assert experiment.experiment_id in [
        _[2] for _ in optimizer.similar_experiments
    ]
    # Here we're verifying that the `experiment` we conducted first was found by `optimizer` and used as learning material
    # You can also see via the console that we found `experiment`'s saved files, and used it to start optimization

    last_experiment_id = optimizer.current_experiment.experiment_id
    # Let's save the id of the experiment that was just conducted by `optimizer`

    optimizer.go()  # Now, we'll start up `optimizer` again...

    # And we can see that this second optimization round learned from both our first `experiment` and our first optimization round
    assert experiment.experiment_id in [
        _[2] for _ in optimizer.similar_experiments
    ]
    assert last_experiment_id in [_[2] for _ in optimizer.similar_experiments]
コード例 #11
0
def exp_lambda_cb(lambda_cbs):
    """Return a `CVExperiment` with `lambda_cbs` as `callbacks`

    Parameters
    ----------
    lambda_cbs: `LambdaCallback`, list of `LambdaCallback`, or None
        LambdaCallback values passed to the `CVExperiment`'s `callbacks` kwarg"""
    return CVExperiment(AdaBoostRegressor, callbacks=lambda_cbs)
コード例 #12
0
def test_sentinels_experiment(env_0):
    # noinspection PyUnusedLocal
    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear", max_depth=3, subsample=0.5),
        model_extra_params=dict(
            fit=dict(eval_set=get_all_sentinels(env_0), early_stopping_rounds=5, eval_metric="mae")
        ),
    )
コード例 #13
0
def test_do_not_validate(env_boston):
    exp = CVExperiment(
        model_initializer=Ridge,
        model_init_params={},
        feature_engineer=FeatureEngineer([standard_scale], do_validate=False),
    )

    for step in exp.feature_engineer.steps:
        assert step.original_hashes == {}
        assert step.updated_hashes == {}
コード例 #14
0
def test_inverse_type_error(env_boston):
    """Test that an error is raised if an `EngineerStep` function returns an extra value that is
    not a function or class instance. Extra return values are used for inverse transformations"""
    with pytest.raises(TypeError,
                       match="`inversion` must be callable, or class with .*"):
        exp = CVExperiment(
            model_initializer=Ridge,
            model_init_params={},
            feature_engineer=FeatureEngineer([bad_quantile_transform]),
        )
コード例 #15
0
def experiment_fixture(request):
    #################### Build `feature_engineer` ####################
    feature_engineer = FeatureEngineer(steps=request.param)

    #################### Execute `CVExperiment` ####################
    experiment = CVExperiment(
        model_initializer=AdaBoostClassifier,
        model_init_params=dict(),
        feature_engineer=feature_engineer,
    )
    return experiment
コード例 #16
0
def exp_lgb_0():
    return CVExperiment(
        model_initializer=LGBMClassifier,
        model_init_params=dict(
            boosting_type="gbdt",
            num_leaves=5,
            max_depth=5,
            min_child_samples=1,
            subsample=0.5,
            verbose=-1,
        ),
    )
コード例 #17
0
def engineer_experiment(request):
    """`CVExperiment` fixture that supports provision of a `feature_engineer` through `request`

    Parameters
    ----------
    request: Object
        If `request` has a "param" attribute, it must be a list of feature engineering steps to
        provide to :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer`"""
    feature_engineer = FeatureEngineer(steps=getattr(request, "param", None))
    experiment = CVExperiment(model_initializer=SVC,
                              model_init_params=dict(),
                              feature_engineer=feature_engineer)
    return experiment
コード例 #18
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear", max_depth=3, n_estimators=100, subsample=0.5
        ),
    )
コード例 #19
0
def _execute():
    env = Environment(
        train_dataset=prep_data(),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        target_column=[f"target_{_}" for _ in range(10)],
        cross_validation_type="StratifiedKFold",
        cross_validation_params=dict(n_splits=10, shuffle=True, random_state=True),
    )

    experiment = CVExperiment(
        model_initializer=KerasClassifier,
        model_init_params=build_fn,
        model_extra_params=dict(batch_size=32, epochs=10, verbose=0, shuffle=True),
    )
コード例 #20
0
def experiment_prep_fixture(request):
    #################### Build `feature_engineer` ####################
    feature_engineer = FeatureEngineer(steps=request.param)

    #################### Partially Prepare `CVExperiment` ####################
    experiment = CVExperiment(
        model_initializer=AdaBoostClassifier,
        model_init_params=dict(),
        feature_engineer=feature_engineer,
        auto_start=False,
    )
    experiment.preparation_workflow()
    # noinspection PyProtectedMember
    experiment._initialize_random_seeds()
    # noinspection PyProtectedMember
    experiment._initial_preprocessing()

    return experiment
コード例 #21
0
def _execute():
    env = Environment(
        train_dataset=prep_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=3, shuffle=True, random_state=True),
    )

    experiment = CVExperiment(
        model_initializer=KerasClassifier,
        model_init_params=build_fn,
        model_extra_params=dict(batch_size=32,
                                epochs=3,
                                verbose=0,
                                shuffle=True),
    )
コード例 #22
0
def test_categorical_tuple_match(env_digits):
    """Test that optimization of a `Categorical` space, whose values are tuples can be performed
    and that saved results from such a space are correctly identified as similar Experiments"""
    model_extra_params = dict(batch_size=32, epochs=3, verbose=0, shuffle=True)
    exp_0 = CVExperiment(KerasClassifier, build_fn_digits_exp, model_extra_params)

    #################### First OptPro ####################
    opt_0 = BayesianOptPro(iterations=1, random_state=32, n_initial_points=1)
    opt_0.forge_experiment(KerasClassifier, build_fn_digits_opt, model_extra_params)
    opt_0.go()
    assert len(opt_0.similar_experiments) == 1  # Should match `exp_0`

    #################### Second OptPro ####################
    opt_1 = BayesianOptPro(iterations=1, random_state=32, n_initial_points=1)
    opt_1.forge_experiment(KerasClassifier, build_fn_digits_opt, model_extra_params)
    opt_1.go()
    assert len(opt_1.similar_experiments) == 2  # Should match `exp_0` and `opt_0`
コード例 #23
0
def test_optional_step_matching_by_exp(env_boston, es_0, es_1, es_2):
    """Test that the result of an Experiment is correctly matched by an OptPro with all-`optional`
    `EngineerStep` dimensions"""
    feature_engineer = [_ for _ in [es_0, es_1, es_2] if _ is not None]
    exp_0 = CVExperiment(XGBRegressor, feature_engineer=feature_engineer)

    opt_0 = ExtraTreesOptPro(iterations=1, random_state=32)
    opt_0.forge_experiment(
        XGBRegressor,
        feature_engineer=[
            Categorical([es_a], optional=True),
            Categorical([es_b, es_c], optional=True),
            Categorical([es_d, es_e], optional=True),
        ],
    )
    opt_0.get_ready()

    # Assert `opt_0` matched with `exp_0`
    assert len(opt_0.similar_experiments) == 1
コード例 #24
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        # Both `holdout_dataset`, and `train_dataset` can be any of the following: pandas.DataFrame, filepath, or None
        # If a filepath is provided, it will be passed to :meth:`pandas.read_csv`.
        # In addition to the above types, `holdout_dataset` can also be provided as a callable (see above :func:`get_holdout_set`)
        holdout_dataset=get_holdout_set,
        test_dataset=get_toy_classification_data(),
        # By default, `holdout_dataset` will be scored with the provided metrics, just like OOF predictions
        # However, you can provide the additional `metrics_params` kwarg to specify which metrics are calculated for each dataset
        # See the documentation in :class:`environment.Environment` and :class:`metrics.ScoringMixIn` for more information
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    experiment = CVExperiment(model_initializer=XGBClassifier,
                              model_init_params=dict(subsample=0.5))
コード例 #25
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type=RepeatedStratifiedKFold,
        cv_params=dict(n_splits=5, n_repeats=2, random_state=32),
        runs=2,
        # Just instantiate `Environment` with your list of callbacks, and go about business as usual
        experiment_callbacks=[printer_callback(),
                              confusion_matrix_oof()],
        # In addition to `printer_callback` made above, we're also adding the `confusion_matrix_oof` callback
        # This, and other callbacks, can be found in `hyperparameter_hunter.callbacks.recipes`
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params={},
        model_extra_params=dict(fit=dict(verbose=False)),
    )
コード例 #26
0
def execute():
    train_df, holdout_df = prep_data()

    env = Environment(
        train_dataset=train_df,
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        target_column=[f"target_{_}" for _ in range(10)
                       ],  # 10 classes (one-hot-encoded output)
        holdout_dataset=holdout_df,
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=3, shuffle=True, random_state=True),
    )

    exp = CVExperiment(KerasClassifier, build_fn_exp,
                       dict(batch_size=64, epochs=10, verbose=1))

    opt = BayesianOptPro(iterations=10, random_state=32)
    opt.forge_experiment(KerasClassifier, build_fn_opt,
                         dict(batch_size=64, epochs=10, verbose=0))
    opt.go()
コード例 #27
0
def test_similar_experiments_unordered():
    """Check that an experiment with a single `EngineerStep` is considered "similar" by an
    Optimization Protocol, with two `optional` `EngineerStep`s, where the second step is identical
    to the single step used by the standalone experiment. As of v3.0.0alpha2, this is expected to
    fail because the otherwise identical engineer steps occur at different indexes in
    `FeatureEngineer.steps` for the experiment and the OptPro. The experiment has `sqr_sum_feature`
    at index=0, while the same step in the OptPro is at index=1. Note that the step index in OptPro
    is still 1 despite the fact that the other step immediately preceding it is `optional`"""
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path=assets_dir,
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    exp = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([EngineerStep(sqr_sum_feature)]),
    )

    opt = BayesianOptPro(iterations=1)
    opt.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([
            Categorical([standard_scale, normalize, min_max_scale],
                        optional=True),
            Categorical([sqr_sum_feature], optional=True),
        ]),
    )
    opt.go()

    assert exp.experiment_id in [_[2] for _ in opt.similar_experiments]
コード例 #28
0
def test_predictor_holdout_iris():
    G.priority_callbacks = (DummyExperimentPredictorHoldout, )

    #################### Set Up Environment ####################
    env = Environment(
        train_dataset=get_iris_data(),
        results_path=assets_dir,
        holdout_dataset=get_holdout,
        target_column="species",
        metrics=dict(f1=lambda t, p: f1_score(t, p, average="micro"),
                     hamming_loss="hamming_loss"),
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    #################### Perform Experiment ####################
    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="multi:softprob",
            max_depth=1,
            n_estimators=300,
            learning_rate=0.02,
            min_child_weight=6,
            gamma=0.07,
            colsample_bytree=0.31,
        ),
        model_extra_params=dict(fit=dict(
            eval_set=[
                (env.train_input, env.train_target),
                (env.validation_input, env.validation_target),
            ],
            early_stopping_rounds=20,
            eval_metric="merror",
        )),
    )

    G.priority_callbacks = tuple()
コード例 #29
0
def test_predictor_holdout_breast_cancer():
    G.priority_callbacks = (DummyExperimentPredictorHoldout, )

    #################### Set Up Environment ####################
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path=assets_dir,
        holdout_dataset=get_holdout,
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    #################### Perform Experiment ####################
    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=3,
            n_estimators=100,
            learning_rate=0.02,
            min_child_weight=6,
            gamma=0.07,
            colsample_bytree=0.31,
        ),
        model_extra_params=dict(fit=dict(
            eval_set=[
                (env.train_input, env.train_target),
                (env.validation_input, env.validation_target),
            ],
            early_stopping_rounds=5,
            eval_metric="mae",
        )),
    )

    G.priority_callbacks = tuple()
コード例 #30
0
def execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_iris_data(),
        results_path="HyperparameterHunterAssets",
        target_column="species",
        metrics=["hamming_loss"],
        cv_params=dict(n_splits=5, random_state=32),
    )

    #################### Experiment ####################
    # Just a reference for normal `class_weight` usage outside of optimization
    CVExperiment(RandomForestClassifier,
                 dict(n_estimators=10, class_weight={
                     0: 1,
                     1: 1,
                     2: 1
                 }))

    #################### Optimization ####################
    opt = BayesianOptPro(iterations=10, random_state=32)
    opt.forge_experiment(
        model_initializer=RandomForestClassifier,
        model_init_params=dict(
            # Weight values for each class can be optimized with `Categorical`/`Integer`
            class_weight={
                0: Categorical([1, 3]),
                1: Categorical([1, 4]),
                2:
                Integer(1,
                        9),  # You can also use `Integer` for low/high ranges
            },
            criterion=Categorical(["gini", "entropy"]),
            n_estimators=Integer(5, 100),
        ),
    )
    opt.go()