def execute():
    env = Environment(
        train_dataset=get_imbalanced_dataset(),
        results_path="HyperparameterHunterAssets",
        target_column="target",
        metrics=["roc_auc_score", "accuracy_score"],
        cv_type="KFold",
        cv_params=dict(n_splits=5, random_state=7),
    )

    # Since this is HyperparameterHunter, after all, we'll throw in some classic hyperparameter
    #   optimization just for fun. If you're like most people and you think it's absurd to test
    #   18 different `imblearn` techniques, feel free to comment out some `EngineerStep`s below

    opt_0 = ET(iterations=20, random_state=32)
    opt_0.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            n_estimators=Integer(50, 900),
            learning_rate=Real(0.0001, 0.9),
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear"]),
        ),
        feature_engineer=FeatureEngineer([
            Categorical(
                [
                    EngineerStep(resample_smote_tomek, stage="intra_cv"),
                    EngineerStep(over_sample_random, stage="intra_cv"),
                    EngineerStep(over_sample_smote, stage="intra_cv"),
                    EngineerStep(under_sample_random, stage="intra_cv"),
                    EngineerStep(under_sample_cluster_centroids,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_tomek_links, stage="intra_cv"),
                    #################### GROUP 2 (EXTENDED) ####################
                    EngineerStep(resample_smote_enn, stage="intra_cv"),
                    EngineerStep(over_sample_ADASYN, stage="intra_cv"),
                    EngineerStep(over_sample_BorderlineSMOTE,
                                 stage="intra_cv"),
                    EngineerStep(over_sample_SVMSMOTE, stage="intra_cv"),
                    EngineerStep(under_sample_NearMiss, stage="intra_cv"),
                    EngineerStep(under_sample_CondensedNearestNeighbour,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_OneSidedSelection,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_NeighbourhoodCleaningRule,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_EditedNearestNeighbours,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_RepeatedEditedNearestNeighbour,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_AllKNN, stage="intra_cv"),
                    EngineerStep(under_sample_InstanceHardnessThreshold,
                                 stage="intra_cv"),
                ],
                optional=True,
            )
        ]),
    )
    opt_0.go()
def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=StratifiedKFold,
        cross_validation_params=dict(n_splits=10, shuffle=True, random_state=32),
        runs=2,
    )

    optimizer = BayesianOptimization(iterations=100, read_experiments=True, random_state=None)

    optimizer.set_experiment_guidelines(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            n_estimators=200,
            subsample=0.5,
            booster=Categorical(['gbtree', 'gblinear', 'dart']),
        ),
        model_extra_params=dict(
            fit=dict(
                eval_metric=Categorical(['auc', 'rmse', 'mae'])
            )
        ),
    )

    optimizer.go()
def _execute():
    env = Environment(
        train_dataset=get_toy_classification_data(target='diagnosis'),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=RepeatedStratifiedKFold,
        cross_validation_params=dict(n_splits=5, n_repeats=2, random_state=32),
    )

    optimizer = ExtraTreesOptimization(
        iterations=10,
        read_experiments=True,
        random_state=None,
    )

    optimizer.set_experiment_guidelines(
        model_initializer=RGFClassifier,
        model_init_params=dict(max_leaf=1000,
                               algorithm=Categorical(
                                   ['RGF', 'RGF_Opt', 'RGF_Sib']),
                               l2=Real(0.01, 0.3),
                               normalize=Categorical([True, False]),
                               learning_rate=Real(0.3, 0.7),
                               loss=Categorical(['LS', 'Expo', 'Log', 'Abs'])),
    )

    optimizer.go()
def execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_boston_data(),
        results_path="HyperparameterHunterAssets",
        holdout_dataset=get_holdout_data,
        target_column="DIS",
        metrics=["r2_score", "median_absolute_error"],
        cv_type="KFold",
        cv_params=dict(n_splits=10, random_state=1),
    )

    #################### CVExperiment ####################
    exp_0 = CVExperiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([quantile_transform]),
    )

    #################### Optimization ####################
    # `opt_0` recognizes `exp_0`'s `feature_engineer` and its results as valid learning material
    # This is because `opt_0` marks the engineer step functions omitted by `exp_0` as `optional=True`
    opt_0 = DummyOptPro(iterations=10)
    opt_0.forge_experiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([
            Categorical([quantile_transform, log_transform], optional=True),
            Categorical([standard_scale, standard_scale_BAD], optional=True),
            Categorical([square_sum_feature], optional=True),
        ]),
    )
    opt_0.go()
def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path="HyperparameterHunterAssets",
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=10, shuffle=True, random_state=32),
        runs=2,
    )

    optimizer = BayesianOptPro(iterations=10, read_experiments=True, random_state=None)

    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            n_estimators=200,
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear", "dart"]),
        ),
        model_extra_params=dict(fit=dict(eval_metric=Categorical(["auc", "rmse", "mae"]))),
    )

    optimizer.go()
Example #6
0
def _build_penta_cat_int(input_shape):
    model = Sequential([
        Dense(
            Integer(50, 100),
            kernel_initializer=Categorical(
                ["lecun_uniform", "lecun_normal", "glorot_normal"]),
            input_shape=input_shape,
            activation=Categorical(
                ["elu", "selu", "softsign", "relu", "tanh", "sigmoid"]),
        ),
        Dropout(0.5),
        Dense(
            1,
            kernel_initializer=Categorical(
                ["lecun_uniform", "lecun_normal", "glorot_normal"]),
            activation=Categorical(
                ["elu", "selu", "softsign", "relu", "tanh", "sigmoid"]),
        ),
    ])
    model.compile(
        optimizer=Categorical([
            "sgd", "rmsprop", "adagrad", "adadelta", "adam", "adamax", "nadam"
        ]),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    return model
def test_reg_engineer_categorical(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate that `BayesianOptPro` breaks with multiple `Categorical`s when `FeatureEngineer`
    is included in the dimensions"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(
            loss=Categorical(["linear", "square", "exponential"])),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()
Example #8
0
def build_fn_digits_opt(input_shape=-1):
    model = Sequential(
        [
            Reshape((8, 8, -1), input_shape=(64,)),
            Conv2D(32, kernel_size=Categorical([(3, 3), (5, 5)]), activation="relu"),
            MaxPooling2D(pool_size=Categorical([(2, 2), (3, 3)])),
            Dropout(0.5),
            Flatten(),
            Dense(1, activation="sigmoid"),
        ]
    )
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model
def test_reg_engineer_categorical_integer_ok(env_boston_regression, hh_assets,
                                             opt_pro):
    """Identical to `test_reg_engineer_categorical`, except `Integer` added to demonstrate that all
    `OptPro`s can optimize with `FeatureEngineer` if space is not exclusively `Categorical`"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(loss=Categorical(
            ["linear", "square", "exponential"]),
                               n_estimators=Integer(10, 40)),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()
Example #10
0
def _build_tri_cat_real(input_shape):
    model = Sequential([
        Dense(90,
              input_shape=input_shape,
              activation=Categorical(["elu", "selu", "relu"])),
        Dropout(Real(0.2, 0.7)),
        Dense(1,
              activation=Categorical(
                  ["selu", "softsign", "relu", "tanh", "sigmoid"])),
    ])
    model.compile(optimizer=Categorical(["adam", "rmsprop"]),
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model
Example #11
0
def _build_fn_regressor(input_shape):
    model = Sequential(
        [
            Dense(100, activation="relu", input_shape=input_shape),
            Dense(Integer(40, 60), activation="relu", kernel_initializer="glorot_normal"),
            Dropout(Real(0.2, 0.7)),
            Dense(1, activation=Categorical(["relu", "sigmoid"]), kernel_initializer="orthogonal"),
        ]
    )
    model.compile(
        optimizer=Categorical(["adam", "rmsprop"]),
        loss="mean_absolute_error",
        metrics=["mean_absolute_error"],
    )
    return model
Example #12
0
def _execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        root_results_path='HyperparameterHunterAssets',
        metrics_map=['roc_auc_score'],
        cross_validation_type='StratifiedKFold',
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
        runs=1,
    )

    optimizer = GradientBoostedRegressionTreeOptimization(
        iterations=10,
        read_experiments=True,
        random_state=None,
    )

    optimizer.set_experiment_guidelines(
        model_initializer=CatBoostClassifier,
        model_init_params=dict(iterations=100,
                               eval_metric=Categorical(
                                   ['Logloss', 'Accuracy', 'AUC'],
                                   transform='onehot'),
                               learning_rate=Real(low=0.0001, high=0.5),
                               depth=Integer(4, 7),
                               save_snapshot=False),
    )

    optimizer.go()

    print('')
def _execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_breast_cancer_data(target="target"),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        cross_validation_type="StratifiedKFold",
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
    )

    #################### Experimentation ####################
    experiment = CVExperiment(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_experiment),
        model_extra_params=dict(callbacks=[ReduceLROnPlateau(patience=5)],
                                batch_size=32,
                                epochs=10,
                                verbose=0),
    )

    #################### Optimization ####################
    optimizer = BayesianOptimization(iterations=10)
    optimizer.set_experiment_guidelines(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_optimization),
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()
def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=StratifiedKFold,
        cross_validation_params=dict(n_splits=10,
                                     shuffle=True,
                                     random_state=32),
        runs=1,
    )

    optimizer = RandomForestOptimization(
        iterations=100,
        read_experiments=True,
    )
    optimizer.set_experiment_guidelines(
        model_initializer=LGBMClassifier,
        model_init_params=dict(boosting_type=Categorical(['gbdt', 'dart']),
                               num_leaves=Integer(5, 20),
                               max_depth=-1,
                               min_child_samples=5,
                               subsample=0.5),
    )
    optimizer.go()
Example #15
0
    def hyperparameter_space(self, param_space=None):

        space = dict(is_unbalance=True,
                     learning_rate=Real(0.01, 0.3),
                     num_boost_round=Categorical(np.arange(50, 500, 20)),
                     num_leaves=Categorical(np.arange(31, 256, 4)),
                     min_child_weight=Real(0.1, 10),
                     colsample_bytree=Real(0.5, 1.),
                     subsample=Real(0.5, 1.),
                     reg_alpha=Real(0.01, 0.1),
                     reg_lambda=Real(0.01, 0.1))

        if param_space:
            return param_space
        else:
            return space
class ChoiceMMNormalizeSS:
    functions = Categorical([min_max_scale, normalize, standard_scale])
    engineers = Categorical([
        EngineerStep(min_max_scale),
        EngineerStep(normalize),
        EngineerStep(standard_scale)
    ])
    o_functions = Categorical([min_max_scale, normalize, standard_scale],
                              optional=True)
    o_engineers = Categorical(
        [
            EngineerStep(min_max_scale),
            EngineerStep(normalize),
            EngineerStep(standard_scale)
        ],
        optional=True,
    )
Example #17
0
def _build_fn_optimization(input_shape):
    model = Sequential([
        Dense(Integer(50, 150),
              kernel_initializer='uniform',
              input_shape=input_shape,
              activation='relu'),
        Dropout(Real(0.2, 0.7)),
        Dense(1,
              kernel_initializer='uniform',
              activation=Categorical(['sigmoid', 'relu'])),
    ])
    model.compile(
        optimizer=Categorical(['adam', 'rmsprop']),
        loss='binary_crossentropy',
        metrics=['accuracy'],
    )
    return model
def _build_fn_optimization(input_shape):
    model = Sequential([
        Dense(
            Integer(50, 150),
            kernel_initializer="uniform",
            input_shape=input_shape,
            activation="relu",
        ),
        Dropout(Real(0.2, 0.7)),
        Dense(1,
              kernel_initializer="uniform",
              activation=Categorical(["sigmoid", "relu"])),
    ])
    model.compile(optimizer=Categorical(["adam", "rmsprop"]),
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model
Example #19
0
def _build_fn_categorical_4(input_shape):  # `Categorical(["glorot_normal", Orthogonal(gain=1)])`
    model = Sequential(
        [
            Dense(Integer(50, 100), input_shape=input_shape),
            Dense(1, kernel_initializer=Categorical(["glorot_normal", Orthogonal(gain=1)])),
        ]
    )
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model
Example #20
0
def test_optional_step_matching_by_exp(env_boston, es_0, es_1, es_2):
    """Test that the result of an Experiment is correctly matched by an OptPro with all-`optional`
    `EngineerStep` dimensions"""
    feature_engineer = [_ for _ in [es_0, es_1, es_2] if _ is not None]
    exp_0 = CVExperiment(XGBRegressor, feature_engineer=feature_engineer)

    opt_0 = ExtraTreesOptPro(iterations=1, random_state=32)
    opt_0.forge_experiment(
        XGBRegressor,
        feature_engineer=[
            Categorical([es_a], optional=True),
            Categorical([es_b, es_c], optional=True),
            Categorical([es_d, es_e], optional=True),
        ],
    )
    opt_0.get_ready()

    # Assert `opt_0` matched with `exp_0`
    assert len(opt_0.similar_experiments) == 1
def test_reg_categorical_ok(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate that all `OptPro`s are fine with exclusively-`Categorical` space that doesn't
    include `FeatureEngineer`"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(
            loss=Categorical(["linear", "square", "exponential"])),
    )
    opt.go()
def test_reg_categorical_integer_ok(env_boston_regression, hh_assets, opt_pro):
    """Identical to `test_reg_categorical_ok`, except `Integer` added to show cooperation"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(loss=Categorical(
            ["linear", "square", "exponential"]),
                               n_estimators=Integer(10, 40)),
    )
    opt.go()
Example #23
0
def opt_pro(optimization_protocol):
    opt = optimization_protocol(iterations=3,
                                random_state=32,
                                n_initial_points=1)
    opt.forge_experiment(
        model_initializer=XGBRegressor,
        model_init_params=dict(
            max_depth=Integer(2, 10),
            n_estimators=Integer(50, 300),
            learning_rate=Real(0.1, 0.9),
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear"]),
        ),
        model_extra_params=dict(fit=dict(
            eval_metric=Categorical(["rmse", "mae"]))),
        feature_engineer=FeatureEngineer(
            [Categorical([nothing_transform], optional=True)]),
    )
    opt.go()
    return opt
def test_reg_engineer(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate problem with `BayesianOptPro` specifically - same configuration is fine with all
    other `OptPro`s"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()
def test_reg_engineer_integer_ok(env_boston_regression, hh_assets, opt_pro):
    """Identical to `test_reg_engineer`, except `Integer` dimension added to show that everything is
    fine now. Problem limited to not only `BayesianOptPro`, but also exclusively `Categorical`
    search spaces"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(n_estimators=Integer(10, 40)),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()
def test_similar_experiments_unordered():
    """Check that an experiment with a single `EngineerStep` is considered "similar" by an
    Optimization Protocol, with two `optional` `EngineerStep`s, where the second step is identical
    to the single step used by the standalone experiment. As of v3.0.0alpha2, this is expected to
    fail because the otherwise identical engineer steps occur at different indexes in
    `FeatureEngineer.steps` for the experiment and the OptPro. The experiment has `sqr_sum_feature`
    at index=0, while the same step in the OptPro is at index=1. Note that the step index in OptPro
    is still 1 despite the fact that the other step immediately preceding it is `optional`"""
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path=assets_dir,
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    exp = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([EngineerStep(sqr_sum_feature)]),
    )

    opt = BayesianOptPro(iterations=1)
    opt.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([
            Categorical([standard_scale, normalize, min_max_scale],
                        optional=True),
            Categorical([sqr_sum_feature], optional=True),
        ]),
    )
    opt.go()

    assert exp.experiment_id in [_[2] for _ in opt.similar_experiments]
def opt_dtc_0():
    optimizer = ExtraTreesOptimization(iterations=2, random_state=1337)
    optimizer.set_experiment_guidelines(
        model_initializer=DecisionTreeClassifier,
        model_init_params=dict(
            criterion="gini",
            min_samples_split=Integer(2, 5),
            splitter=Categorical(["best", "random"]),
            min_weight_fraction_leaf=Real(0.0, 0.1),
        ),
    )
    optimizer.go()
    yield optimizer
Example #28
0
def opt_keras_0():
    optimizer = DummySearch(iterations=2)
    optimizer.set_experiment_guidelines(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_optimization),
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()
def do_optimization():
    optimizer = BayesianOptPro(iterations=5, random_state=1337)
    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            subsample=0.5,
            booster=Categorical(["gbtree", "dart"]),
        ),
    )
    optimizer.go()
Example #30
0
def opt_regressor():
    optimizer = DummyOptPro(iterations=1)
    optimizer.forge_experiment(
        model_initializer=KerasRegressor,
        model_init_params=_build_fn_regressor,
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()