コード例 #1
0
def execute():
    env = Environment(
        train_dataset=get_imbalanced_dataset(),
        results_path="HyperparameterHunterAssets",
        target_column="target",
        metrics=["roc_auc_score", "accuracy_score"],
        cv_type="KFold",
        cv_params=dict(n_splits=5, random_state=7),
    )

    # Since this is HyperparameterHunter, after all, we'll throw in some classic hyperparameter
    #   optimization just for fun. If you're like most people and you think it's absurd to test
    #   18 different `imblearn` techniques, feel free to comment out some `EngineerStep`s below

    opt_0 = ET(iterations=20, random_state=32)
    opt_0.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            n_estimators=Integer(50, 900),
            learning_rate=Real(0.0001, 0.9),
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear"]),
        ),
        feature_engineer=FeatureEngineer([
            Categorical(
                [
                    EngineerStep(resample_smote_tomek, stage="intra_cv"),
                    EngineerStep(over_sample_random, stage="intra_cv"),
                    EngineerStep(over_sample_smote, stage="intra_cv"),
                    EngineerStep(under_sample_random, stage="intra_cv"),
                    EngineerStep(under_sample_cluster_centroids,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_tomek_links, stage="intra_cv"),
                    #################### GROUP 2 (EXTENDED) ####################
                    EngineerStep(resample_smote_enn, stage="intra_cv"),
                    EngineerStep(over_sample_ADASYN, stage="intra_cv"),
                    EngineerStep(over_sample_BorderlineSMOTE,
                                 stage="intra_cv"),
                    EngineerStep(over_sample_SVMSMOTE, stage="intra_cv"),
                    EngineerStep(under_sample_NearMiss, stage="intra_cv"),
                    EngineerStep(under_sample_CondensedNearestNeighbour,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_OneSidedSelection,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_NeighbourhoodCleaningRule,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_EditedNearestNeighbours,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_RepeatedEditedNearestNeighbour,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_AllKNN, stage="intra_cv"),
                    EngineerStep(under_sample_InstanceHardnessThreshold,
                                 stage="intra_cv"),
                ],
                optional=True,
            )
        ]),
    )
    opt_0.go()
コード例 #2
0
    def hyperparameter_space(self, param_space=None):

        space = dict(is_unbalance=True,
                     learning_rate=Real(0.01, 0.3),
                     num_boost_round=Integer(50, 500),
                     num_leaves=Integer(31, 255),
                     min_child_weight=Real(0.1, 10),
                     colsample_bytree=Real(0.5, 1.),
                     subsample=Real(0.5, 1.),
                     reg_alpha=Real(0.01, 0.1),
                     reg_lambda=Real(0.01, 0.1))

        if param_space:
            return param_space
        else:
            return space
コード例 #3
0
def _build_penta_cat_int(input_shape):
    model = Sequential([
        Dense(
            Integer(50, 100),
            kernel_initializer=Categorical(
                ["lecun_uniform", "lecun_normal", "glorot_normal"]),
            input_shape=input_shape,
            activation=Categorical(
                ["elu", "selu", "softsign", "relu", "tanh", "sigmoid"]),
        ),
        Dropout(0.5),
        Dense(
            1,
            kernel_initializer=Categorical(
                ["lecun_uniform", "lecun_normal", "glorot_normal"]),
            activation=Categorical(
                ["elu", "selu", "softsign", "relu", "tanh", "sigmoid"]),
        ),
    ])
    model.compile(
        optimizer=Categorical([
            "sgd", "rmsprop", "adagrad", "adadelta", "adam", "adamax", "nadam"
        ]),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    return model
コード例 #4
0
def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path="HyperparameterHunterAssets",
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=10, shuffle=True, random_state=32),
        runs=2,
    )

    optimizer = BayesianOptPro(iterations=10, read_experiments=True, random_state=None)

    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            n_estimators=200,
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear", "dart"]),
        ),
        model_extra_params=dict(fit=dict(eval_metric=Categorical(["auc", "rmse", "mae"]))),
    )

    optimizer.go()
コード例 #5
0
def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=StratifiedKFold,
        cross_validation_params=dict(n_splits=10,
                                     shuffle=True,
                                     random_state=32),
        runs=1,
    )

    optimizer = RandomForestOptimization(
        iterations=100,
        read_experiments=True,
    )
    optimizer.set_experiment_guidelines(
        model_initializer=LGBMClassifier,
        model_init_params=dict(boosting_type=Categorical(['gbdt', 'dart']),
                               num_leaves=Integer(5, 20),
                               max_depth=-1,
                               min_child_samples=5,
                               subsample=0.5),
    )
    optimizer.go()
コード例 #6
0
def _execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        root_results_path='HyperparameterHunterAssets',
        metrics_map=['roc_auc_score'],
        cross_validation_type='StratifiedKFold',
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
        runs=1,
    )

    optimizer = GradientBoostedRegressionTreeOptimization(
        iterations=10,
        read_experiments=True,
        random_state=None,
    )

    optimizer.set_experiment_guidelines(
        model_initializer=CatBoostClassifier,
        model_init_params=dict(iterations=100,
                               eval_metric=Categorical(
                                   ['Logloss', 'Accuracy', 'AUC'],
                                   transform='onehot'),
                               learning_rate=Real(low=0.0001, high=0.5),
                               depth=Integer(4, 7),
                               save_snapshot=False),
    )

    optimizer.go()

    print('')
コード例 #7
0
def _execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_breast_cancer_data(target="target"),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        cross_validation_type="StratifiedKFold",
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
    )

    #################### Experimentation ####################
    experiment = CVExperiment(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_experiment),
        model_extra_params=dict(callbacks=[ReduceLROnPlateau(patience=5)],
                                batch_size=32,
                                epochs=10,
                                verbose=0),
    )

    #################### Optimization ####################
    optimizer = BayesianOptimization(iterations=10)
    optimizer.set_experiment_guidelines(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_optimization),
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()
コード例 #8
0
def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=StratifiedKFold,
        cross_validation_params=dict(n_splits=10, shuffle=True, random_state=32),
        runs=2,
    )

    optimizer = BayesianOptimization(iterations=100, read_experiments=True, random_state=None)

    optimizer.set_experiment_guidelines(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            n_estimators=200,
            subsample=0.5,
            booster=Categorical(['gbtree', 'gblinear', 'dart']),
        ),
        model_extra_params=dict(
            fit=dict(
                eval_metric=Categorical(['auc', 'rmse', 'mae'])
            )
        ),
    )

    optimizer.go()
コード例 #9
0
def execute():
    """This is going to be a very simple example to illustrate what exactly HyperparameterHunter does, and how it revolutionizes
    hyperparameter optimization."""

    # Start by creating an `Environment` - This is where you define how Experiments (and optimization) will be conducted
    env = Environment(
        train_dataset=get_breast_cancer_data(target="target"),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        cross_validation_type="StratifiedKFold",
        cross_validation_params=dict(n_splits=10,
                                     shuffle=True,
                                     random_state=32),
    )

    # Now, conduct an `Experiment`
    # This tells HyperparameterHunter to use the settings in the active `Environment` to train a model with these hyperparameters
    experiment = CVExperiment(model_initializer=XGBClassifier,
                              model_init_params=dict(objective="reg:linear",
                                                     max_depth=3))

    # That's it. No annoying boilerplate code to fit models and record results
    # Now, the `Environment`'s `root_results_path` directory will contain new files describing the Experiment just conducted

    # Time for the fun part. We'll set up some hyperparameter optimization by first defining the `OptimizationProtocol` we want
    optimizer = BayesianOptimization(verbose=1)

    # Now we're going to say which hyperparameters we want to optimize.
    # Notice how this looks just like our `experiment` above
    optimizer.set_experiment_guidelines(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective=
            "reg:linear",  # We're setting this as a constant guideline - Not one to optimize
            max_depth=Integer(
                2, 10
            ),  # Instead of using an int like the `experiment` above, we provide a space to search
        ),
    )
    # Notice that our range for `max_depth` includes the `max_depth=3` value we used in our `experiment` earlier

    optimizer.go()  # Now, we go

    assert experiment.experiment_id in [
        _[2] for _ in optimizer.similar_experiments
    ]
    # Here we're verifying that the `experiment` we conducted first was found by `optimizer` and used as learning material
    # You can also see via the console that we found `experiment`'s saved files, and used it to start optimization

    last_experiment_id = optimizer.current_experiment.experiment_id
    # Let's save the id of the experiment that was just conducted by `optimizer`

    optimizer.go()  # Now, we'll start up `optimizer` again...

    # And we can see that this second optimization round learned from both our first `experiment` and our first optimization round
    assert experiment.experiment_id in [
        _[2] for _ in optimizer.similar_experiments
    ]
    assert last_experiment_id in [_[2] for _ in optimizer.similar_experiments]
コード例 #10
0
def _build_fn_glorot_normal_1(input_shape):  # `"glorot_normal"`
    model = Sequential(
        [
            Dense(Integer(50, 100), input_shape=input_shape),
            Dense(1, kernel_initializer="glorot_normal"),
        ]
    )
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model
コード例 #11
0
def _build_fn_orthogonal_i_6(input_shape):  # `Orthogonal(gain=Real(0.6, 1.6))`
    model = Sequential(
        [
            Dense(Integer(50, 100), input_shape=input_shape),
            Dense(1, kernel_initializer=Orthogonal(gain=Real(0.6, 1.6))),
        ]
    )
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model
コード例 #12
0
def _build_fn_categorical_4(input_shape):  # `Categorical(["glorot_normal", Orthogonal(gain=1)])`
    model = Sequential(
        [
            Dense(Integer(50, 100), input_shape=input_shape),
            Dense(1, kernel_initializer=Categorical(["glorot_normal", Orthogonal(gain=1)])),
        ]
    )
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model
コード例 #13
0
def test_reg_categorical_integer_ok(env_boston_regression, hh_assets, opt_pro):
    """Identical to `test_reg_categorical_ok`, except `Integer` added to show cooperation"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(loss=Categorical(
            ["linear", "square", "exponential"]),
                               n_estimators=Integer(10, 40)),
    )
    opt.go()
コード例 #14
0
def opt_pro(optimization_protocol):
    opt = optimization_protocol(iterations=3,
                                random_state=32,
                                n_initial_points=1)
    opt.forge_experiment(
        model_initializer=XGBRegressor,
        model_init_params=dict(
            max_depth=Integer(2, 10),
            n_estimators=Integer(50, 300),
            learning_rate=Real(0.1, 0.9),
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear"]),
        ),
        model_extra_params=dict(fit=dict(
            eval_metric=Categorical(["rmse", "mae"]))),
        feature_engineer=FeatureEngineer(
            [Categorical([nothing_transform], optional=True)]),
    )
    opt.go()
    return opt
コード例 #15
0
def test_reg_engineer_integer_ok(env_boston_regression, hh_assets, opt_pro):
    """Identical to `test_reg_engineer`, except `Integer` dimension added to show that everything is
    fine now. Problem limited to not only `BayesianOptPro`, but also exclusively `Categorical`
    search spaces"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(n_estimators=Integer(10, 40)),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()
コード例 #16
0
def opt_keras_0():
    optimizer = DummySearch(iterations=2)
    optimizer.set_experiment_guidelines(
        model_initializer=KerasClassifier,
        model_init_params=dict(build_fn=_build_fn_optimization),
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()
コード例 #17
0
def opt_dtc_0():
    optimizer = ExtraTreesOptimization(iterations=2, random_state=1337)
    optimizer.set_experiment_guidelines(
        model_initializer=DecisionTreeClassifier,
        model_init_params=dict(
            criterion="gini",
            min_samples_split=Integer(2, 5),
            splitter=Categorical(["best", "random"]),
            min_weight_fraction_leaf=Real(0.0, 0.1),
        ),
    )
    optimizer.go()
    yield optimizer
コード例 #18
0
def opt_regressor():
    optimizer = DummyOptPro(iterations=1)
    optimizer.forge_experiment(
        model_initializer=KerasRegressor,
        model_init_params=_build_fn_regressor,
        model_extra_params=dict(
            callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))],
            batch_size=Categorical([32, 64], transform="onehot"),
            epochs=10,
            verbose=0,
        ),
    )
    optimizer.go()
コード例 #19
0
def do_optimization():
    optimizer = BayesianOptPro(iterations=5, random_state=1337)
    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            subsample=0.5,
            booster=Categorical(["gbtree", "dart"]),
        ),
    )
    optimizer.go()
コード例 #20
0
def opt_xgb_0():
    optimizer = RandomForestOptimization(iterations=2, random_state=1337)
    optimizer.set_experiment_guidelines(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            subsample=0.5,
            booster=Categorical(["gbtree", "dart"]),
        ),
    )
    optimizer.go()
    yield optimizer
コード例 #21
0
def test_sentinels_optimization(env_0):
    optimizer = GBRT(iterations=2)
    optimizer.set_experiment_guidelines(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               max_depth=Integer(2, 20),
                               subsample=0.5),
        model_extra_params=dict(fit=dict(
            eval_set=get_all_sentinels(env_0),
            early_stopping_rounds=5,
            eval_metric=Categorical(["auc", "mae"]),
        )),
    )
    optimizer.go()
コード例 #22
0
def test_reg_engineer_categorical_integer_ok(env_boston_regression, hh_assets,
                                             opt_pro):
    """Identical to `test_reg_engineer_categorical`, except `Integer` added to demonstrate that all
    `OptPro`s can optimize with `FeatureEngineer` if space is not exclusively `Categorical`"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(loss=Categorical(
            ["linear", "square", "exponential"]),
                               n_estimators=Integer(10, 40)),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()
コード例 #23
0
def execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_iris_data(),
        results_path="HyperparameterHunterAssets",
        target_column="species",
        metrics=["hamming_loss"],
        cv_params=dict(n_splits=5, random_state=32),
    )

    #################### Experiment ####################
    # Just a reference for normal `class_weight` usage outside of optimization
    CVExperiment(RandomForestClassifier,
                 dict(n_estimators=10, class_weight={
                     0: 1,
                     1: 1,
                     2: 1
                 }))

    #################### Optimization ####################
    opt = BayesianOptPro(iterations=10, random_state=32)
    opt.forge_experiment(
        model_initializer=RandomForestClassifier,
        model_init_params=dict(
            # Weight values for each class can be optimized with `Categorical`/`Integer`
            class_weight={
                0: Categorical([1, 3]),
                1: Categorical([1, 4]),
                2:
                Integer(1,
                        9),  # You can also use `Integer` for low/high ranges
            },
            criterion=Categorical(["gini", "entropy"]),
            n_estimators=Integer(5, 100),
        ),
    )
    opt.go()
コード例 #24
0
def test_space_rvs():
    """Test that calling `Space.rvs` returns expected values. This is specifically
    aimed at ensuring `Categorical` instances containing strings produce the entire
    string, rather than the first character, for example"""
    space = Space([Integer(50, 100), Categorical(["glorot_normal", "orthogonal"])])

    sample_0 = space.rvs(random_state=32)
    sample_1 = space.rvs(n_samples=1, random_state=32)
    sample_2 = space.rvs(n_samples=2, random_state=32)
    sample_3 = space.rvs(n_samples=3, random_state=32)

    assert sample_0 == [[73, "glorot_normal"]]
    assert sample_1 == [[73, "glorot_normal"]]
    assert sample_2 == [[73, "glorot_normal"], [93, "orthogonal"]]
    assert sample_3 == [[73, "glorot_normal"], [93, "glorot_normal"], [55, "orthogonal"]]
コード例 #25
0
def _build_fn_regressor(input_shape):
    model = Sequential(
        [
            Dense(100, activation="relu", input_shape=input_shape),
            Dense(Integer(40, 60), activation="relu", kernel_initializer="glorot_normal"),
            Dropout(Real(0.2, 0.7)),
            Dense(1, activation=Categorical(["relu", "sigmoid"]), kernel_initializer="orthogonal"),
        ]
    )
    model.compile(
        optimizer=Categorical(["adam", "rmsprop"]),
        loss="mean_absolute_error",
        metrics=["mean_absolute_error"],
    )
    return model
コード例 #26
0
def opt_svc_0(request):
    optimizer = BayesianOptimization(target_metric=request.param, iterations=2, random_state=32)
    optimizer.set_experiment_guidelines(
        model_initializer=SVC,
        model_init_params=dict(
            C=Real(0.9, 1.1),
            kernel=Categorical(["linear", "poly", "rbf"]),
            max_iter=Integer(50, 125),
            tol=1e-3,
        ),
    )
    optimizer.go()
    yield optimizer

    assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))
コード例 #27
0
def space_fixture():
    dimensions = [
        Real(0.1, 0.9),
        Categorical(["foo", "bar", "baz"]),
        Integer(12, 18)
    ]
    locations = [
        ("model_init_params", "a"),
        ("model_init_params", "b", "c"),
        ("model_extra_params", "e"),
    ]

    for i in range(len(dimensions)):
        setattr(dimensions[i], "location", locations[i])

    return Space(dimensions)
コード例 #28
0
def _build_fn_optimization(input_shape):
    model = Sequential([
        Dense(Integer(50, 150),
              kernel_initializer='uniform',
              input_shape=input_shape,
              activation='relu'),
        Dropout(Real(0.2, 0.7)),
        Dense(1,
              kernel_initializer='uniform',
              activation=Categorical(['sigmoid', 'relu'])),
    ])
    model.compile(
        optimizer=Categorical(['adam', 'rmsprop']),
        loss='binary_crossentropy',
        metrics=['accuracy'],
    )
    return model
コード例 #29
0
def _build_fn_optimization(input_shape):
    model = Sequential([
        Dense(
            Integer(50, 150),
            kernel_initializer="uniform",
            input_shape=input_shape,
            activation="relu",
        ),
        Dropout(Real(0.2, 0.7)),
        Dense(1,
              kernel_initializer="uniform",
              activation=Categorical(["sigmoid", "relu"])),
    ])
    model.compile(optimizer=Categorical(["adam", "rmsprop"]),
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model
コード例 #30
0
def opt_lgb_0(request):
    optimizer = BayesianOptimization(target_metric=request.param,
                                     iterations=2,
                                     random_state=32)
    optimizer.set_experiment_guidelines(
        model_initializer=LGBMClassifier,
        model_init_params=dict(
            boosting_type=Categorical(["gbdt", "dart"]),
            num_leaves=Integer(2, 8),
            max_depth=5,
            min_child_samples=1,
            subsample=Real(0.4, 0.7),
            verbose=-1,
        ),
    )
    optimizer.go()
    yield optimizer

    assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))