def _execute():
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=StratifiedKFold,
        cross_validation_params=dict(n_splits=10,
                                     shuffle=True,
                                     random_state=32),
        runs=1,
    )

    optimizer = RandomForestOptimization(
        iterations=100,
        read_experiments=True,
    )
    optimizer.set_experiment_guidelines(
        model_initializer=LGBMClassifier,
        model_init_params=dict(boosting_type=Categorical(['gbdt', 'dart']),
                               num_leaves=Integer(5, 20),
                               max_depth=-1,
                               min_child_samples=5,
                               subsample=0.5),
    )
    optimizer.go()
Esempio n. 2
0
def opt_xgb_0():
    optimizer = RandomForestOptimization(iterations=2, random_state=1337)
    optimizer.set_experiment_guidelines(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            subsample=0.5,
            booster=Categorical(["gbtree", "dart"]),
        ),
    )
    optimizer.go()
    yield optimizer
experiment_4 = CVExperiment(DecisionTreeClassifier, {})
experiment_5 = CVExperiment(RandomForestClassifier, {})
experiment_6 = CVExperiment(AdaBoostClassifier, {})
experiment_7 = CVExperiment(GradientBoostingClassifier, {})
experiment_8 = CVExperiment(GaussianNB, {})
experiment_9 = CVExperiment(LinearDiscriminantAnalysis, {})
experiment_10 = CVExperiment(QuadraticDiscriminantAnalysis, {})
experiment_11 = CVExperiment(MLPClassifier, {})
# Of course, SKLearn has many more algorithms than those shown here, but I think you get the idea

# Notice that in all the above experiments, we gave `CVExperiment` `model_init_params={}`.
# Passing an empty dict tells it to use the default hyperparameters for the `model_initializer`, which it'll figure out on its own.

#################### 2. Hyperparameter Optimization ####################
# We're just going to do optimization on one of the algorithms used above (`AdaBoostClassifier`);
# ... although, HyperparameterHunter can certainly do consecutive optimization rounds.

# Notice below that `optimizer` correctly identifies `experiment_6` as being the only saved
# ... experiment it can learn from because it's optimizing `AdaBoostClassifier`.

optimizer = RandomForestOptimization(iterations=12, random_state=42)
optimizer.set_experiment_guidelines(
    model_initializer=AdaBoostClassifier,
    model_init_params=dict(
        n_estimators=Integer(25, 100),
        learning_rate=Real(0.5, 1.0),
        algorithm=Categorical(["SAMME", "SAMME.R"]),
    ),
)
optimizer.go()
Esempio n. 4
0
    cross_validation_params=dict(n_splits=5, random_state=32),
)

# Now that HyperparameterHunter has an active `Environment`, we can do two things:
#################### 1. Perform Experiments ####################
experiment = CVExperiment(
    model_initializer=LGBMClassifier,
    model_init_params=dict(boosting_type="gbdt",
                           num_leaves=31,
                           max_depth=-1,
                           subsample=0.5),
)

# And/or...
#################### 2. Hyperparameter Optimization ####################
optimizer = RandomForestOptimization(iterations=10, random_state=32)
optimizer.set_experiment_guidelines(
    model_initializer=LGBMClassifier,
    model_init_params=dict(
        boosting_type=Categorical(["gbdt", "dart"]),
        num_leaves=Integer(10, 40),
        max_depth=-1,
        subsample=Real(0.3, 0.7),
    ),
)
optimizer.go()

# Notice, `optimizer` recognizes our earlier `experiment`'s hyperparameters fit inside the search
# space/guidelines set for `optimizer`.

# Then, when optimization is started, it automatically learns from `experiment`'s results
# Now that HyperparameterHunter has an active `Environment`, we can do two things:
#################### 1. Perform Experiments ####################
experiment = CVExperiment(
    model_initializer=LGBMClassifier,
    model_init_params=dict(boosting_type="gbdt", num_leaves=31, max_depth=-1, subsample=0.5),
    model_extra_params=dict(
        fit=dict(
            feature_name=train_df.columns.values[:-1].tolist(),
            categorical_feature=train_df.columns.values[11:-1].tolist(),
        )
    ),
)

# And/or...
#################### 2. Hyperparameter Optimization ####################
optimizer = RandomForestOptimization(iterations=10, random_state=32)
optimizer.set_experiment_guidelines(
    model_initializer=LGBMClassifier,
    model_init_params=dict(
        boosting_type=Categorical(["gbdt", "dart"]),
        num_leaves=Integer(10, 40),
        max_depth=-1,
        subsample=Real(0.3, 0.7),
    ),
    model_extra_params=dict(
        fit=dict(
            feature_name=train_df.columns.values[:-1].tolist(),
            categorical_feature=train_df.columns.values[11:-1].tolist(),
        )
    ),
)