def execute(): #################### Environment #################### env = Environment( train_dataset=get_boston_data(), results_path="HyperparameterHunterAssets", holdout_dataset=get_holdout_data, target_column="DIS", metrics=["r2_score", "median_absolute_error"], cv_type="KFold", cv_params=dict(n_splits=10, random_state=1), ) #################### CVExperiment #################### exp_0 = CVExperiment( model_initializer=Ridge, model_init_params=dict(), feature_engineer=FeatureEngineer([quantile_transform]), ) #################### Optimization #################### # `opt_0` recognizes `exp_0`'s `feature_engineer` and its results as valid learning material # This is because `opt_0` marks the engineer step functions omitted by `exp_0` as `optional=True` opt_0 = DummyOptPro(iterations=10) opt_0.forge_experiment( model_initializer=Ridge, model_init_params=dict(), feature_engineer=FeatureEngineer([ Categorical([quantile_transform, log_transform], optional=True), Categorical([standard_scale, standard_scale_BAD], optional=True), Categorical([square_sum_feature], optional=True), ]), ) opt_0.go()
def env_boston(): return Environment( train_dataset=get_boston_data(), results_path=assets_dir, target_column="DIS", metrics=["r2_score"], cv_type="KFold", cv_params=dict(n_splits=3, random_state=1), )
def env_boston_regression(): env = Environment( train_dataset=get_boston_data(), results_path=assets_dir, target_column="DIS", metrics=["median_absolute_error"], cv_type="KFold", cv_params=dict(n_splits=3, random_state=1), ) return env
def env_boston(): return Environment( train_dataset=get_boston_data(), results_path=assets_dir, holdout_dataset=get_holdout_data, target_column="DIS", metrics=["r2_score", "median_absolute_error"], cv_type="KFold", cv_params=dict(n_splits=3, random_state=1), runs=1, verbose=1, )
def env_boston(request): """Environment fixture using the Boston regression dataset. Parametrizes `holdout_dataset`, so all tests using this fixture will be run twice: once with no `holdout_dataset`, and once with a `holdout_dataset` constructed using SKLearn's `train_test_split`""" return Environment( train_dataset=get_boston_data(), results_path=assets_dir, target_column="DIS", metrics=["r2_score"], holdout_dataset=request.param, cv_type="KFold", cv_params=dict(n_splits=3, random_state=1), )