def execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_boston_data(),
        results_path="HyperparameterHunterAssets",
        holdout_dataset=get_holdout_data,
        target_column="DIS",
        metrics=["r2_score", "median_absolute_error"],
        cv_type="KFold",
        cv_params=dict(n_splits=10, random_state=1),
    )

    #################### CVExperiment ####################
    exp_0 = CVExperiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([quantile_transform]),
    )

    #################### Optimization ####################
    # `opt_0` recognizes `exp_0`'s `feature_engineer` and its results as valid learning material
    # This is because `opt_0` marks the engineer step functions omitted by `exp_0` as `optional=True`
    opt_0 = DummyOptPro(iterations=10)
    opt_0.forge_experiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([
            Categorical([quantile_transform, log_transform], optional=True),
            Categorical([standard_scale, standard_scale_BAD], optional=True),
            Categorical([square_sum_feature], optional=True),
        ]),
    )
    opt_0.go()
Exemple #2
0
def env_boston():
    return Environment(
        train_dataset=get_boston_data(),
        results_path=assets_dir,
        target_column="DIS",
        metrics=["r2_score"],
        cv_type="KFold",
        cv_params=dict(n_splits=3, random_state=1),
    )
def env_boston_regression():
    env = Environment(
        train_dataset=get_boston_data(),
        results_path=assets_dir,
        target_column="DIS",
        metrics=["median_absolute_error"],
        cv_type="KFold",
        cv_params=dict(n_splits=3, random_state=1),
    )
    return env
def env_boston():
    return Environment(
        train_dataset=get_boston_data(),
        results_path=assets_dir,
        holdout_dataset=get_holdout_data,
        target_column="DIS",
        metrics=["r2_score", "median_absolute_error"],
        cv_type="KFold",
        cv_params=dict(n_splits=3, random_state=1),
        runs=1,
        verbose=1,
    )
def env_boston(request):
    """Environment fixture using the Boston regression dataset. Parametrizes `holdout_dataset`, so
    all tests using this fixture will be run twice: once with no `holdout_dataset`, and once with a
    `holdout_dataset` constructed using SKLearn's `train_test_split`"""
    return Environment(
        train_dataset=get_boston_data(),
        results_path=assets_dir,
        target_column="DIS",
        metrics=["r2_score"],
        holdout_dataset=request.param,
        cv_type="KFold",
        cv_params=dict(n_splits=3, random_state=1),
    )