def env_3():
    def printer_callback():
        def printer_helper(_rep, _fold, _run, last_evaluation_results):
            print(f"{_rep}.{_fold}.{_run}   {last_evaluation_results}")

        return lambda_callback(
            on_experiment_start=printer_helper,
            on_experiment_end=printer_helper,
            on_repetition_start=printer_helper,
            on_repetition_end=printer_helper,
            on_fold_start=printer_helper,
            on_fold_end=printer_helper,
            on_run_start=printer_helper,
            on_run_end=printer_helper,
        )

    return Environment(
        train_dataset=get_toy_classification_data(),
        results_path=assets_dir,
        metrics=["roc_auc_score"],
        holdout_dataset=get_toy_classification_data(),
        cv_type=RepeatedStratifiedKFold,
        cv_params=dict(n_splits=3, n_repeats=2, random_state=32),
        runs=2,
        experiment_callbacks=[
            printer_callback(),
            confusion_matrix_oof(),
            confusion_matrix_holdout(),
        ],
    )
def env_2():
    # noinspection PyUnusedLocal
    def get_holdout_set(train, target_column):
        return train, train.copy()

    return Environment(
        train_dataset=get_toy_classification_data(),
        results_path=assets_dir,
        holdout_dataset=get_holdout_set,
        test_dataset=get_toy_classification_data(),
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=3, shuffle=True, random_state=32),
    )
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        root_results_path="HyperparameterHunterAssets",
        metrics_map=["roc_auc_score"],
        cross_validation_type=RepeatedStratifiedKFold,
        cross_validation_params=dict(n_splits=3, n_repeats=2, random_state=32),
        do_full_save=do_full_save,
    )

    experiment_0 = CVExperiment(model_initializer=XGBClassifier,
                                model_init_params=dict(subsample=0.01))
    # Pro Tip: By setting XGBoost's subsample ridiculously low, we can get bad scores on purpose

    # Upon completion of this Experiment, we see a warning that not all result files will be saved
    # This is because the final score of the Experiment was below our threshold of 0.75
    # Specifically, we skipped saving prediction files (OOF, holdout, test, or in-fold), and the heartbeat file

    # What still got saved is the Experiment's: key information, leaderboard position, and description file
    # These are saved to allow us to use the information for future hyperparameter optimization, and detect repeated Experiments
    # Additionally, the Experiment's script backup is saved, but that's because its one of the first things that happens
    # For even finer control over what gets saved, use `do_full_save` together with `file_blacklist`

    # Now, lets perform another Experiment that does a bit better than our intentionally miserable one
    experiment_1 = CVExperiment(model_initializer=XGBClassifier,
                                model_init_params=dict(subsample=0.5))
def _execute():
    env = Environment(
        train_dataset=get_toy_classification_data(target='diagnosis'),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=RepeatedStratifiedKFold,
        cross_validation_params=dict(n_splits=5, n_repeats=2, random_state=32),
    )

    optimizer = ExtraTreesOptimization(
        iterations=10,
        read_experiments=True,
        random_state=None,
    )

    optimizer.set_experiment_guidelines(
        model_initializer=RGFClassifier,
        model_init_params=dict(max_leaf=1000,
                               algorithm=Categorical(
                                   ['RGF', 'RGF_Opt', 'RGF_Sib']),
                               l2=Real(0.01, 0.3),
                               normalize=Categorical([True, False]),
                               learning_rate=Real(0.3, 0.7),
                               loss=Categorical(['LS', 'Expo', 'Log', 'Abs'])),
    )

    optimizer.go()
예제 #5
0
def _execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        root_results_path='HyperparameterHunterAssets',
        metrics_map=['roc_auc_score'],
        cross_validation_type='StratifiedKFold',
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
        runs=1,
    )

    optimizer = GradientBoostedRegressionTreeOptimization(
        iterations=10,
        read_experiments=True,
        random_state=None,
    )

    optimizer.set_experiment_guidelines(
        model_initializer=CatBoostClassifier,
        model_init_params=dict(iterations=100,
                               eval_metric=Categorical(
                                   ['Logloss', 'Accuracy', 'AUC'],
                                   transform='onehot'),
                               learning_rate=Real(low=0.0001, high=0.5),
                               depth=Integer(4, 7),
                               save_snapshot=False),
    )

    optimizer.go()

    print('')
예제 #6
0
def env_fixture_1():
    return Environment(
        train_dataset=get_toy_classification_data(),
        results_path=None,
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )
예제 #7
0
def env_0():
    return Environment(
        train_dataset=get_toy_classification_data(),
        results_path=assets_dir,
        metrics=["roc_auc_score"],
        cv_type="RepeatedStratifiedKFold",
        cv_params=dict(n_splits=3, n_repeats=2, random_state=32),
    )
예제 #8
0
def env_fixture_0():
    return Environment(
        train_dataset=get_toy_classification_data(),
        results_path=
        "hyperparameter_hunter/__TEST__HyperparameterHunterAssets__",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )
예제 #9
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        # Both `holdout_dataset`, and `train_dataset` can be any of the following: pandas.DataFrame, filepath, or None
        # If a filepath is provided, it will be passed to :meth:`pandas.read_csv`.
        # In addition to the above types, `holdout_dataset` can also be provided as a callable (see above :func:`get_holdout_set`)
        holdout_dataset=get_holdout_set,
        test_dataset=get_toy_classification_data(),
        # By default, `holdout_dataset` will be scored with the provided metrics, just like OOF predictions
        # However, you can provide the additional `metrics_params` kwarg to specify which metrics are calculated for each dataset
        # See the documentation in :class:`environment.Environment` and :class:`metrics.ScoringMixIn` for more information
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    experiment = CVExperiment(model_initializer=XGBClassifier,
                              model_init_params=dict(subsample=0.5))
예제 #10
0
def env_0():
    def do_full_save(experiment_result):
        return experiment_result["final_evaluations"]["oof"]["roc_auc_score"] > 0.75

    return Environment(
        train_dataset=get_toy_classification_data(),
        results_path=assets_dir,
        metrics=["roc_auc_score"],
        cv_type=RepeatedStratifiedKFold,
        cv_params=dict(n_splits=3, n_repeats=2, random_state=32),
        do_full_save=do_full_save,
    )
예제 #11
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear", max_depth=3, n_estimators=100, subsample=0.5
        ),
    )
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        root_results_path='HyperparameterHunterAssets',
        metrics_map=['roc_auc_score'],
        cross_validation_type='StratifiedKFold',
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
    )

    experiment = CrossValidationExperiment(model_initializer=XGBClassifier,
                                           model_init_params=dict(
                                               objective='reg:linear',
                                               max_depth=3,
                                               n_estimators=100,
                                               subsample=0.5))
예제 #13
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(target='diagnosis'),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=RepeatedStratifiedKFold,
        cross_validation_params=dict(n_splits=5, n_repeats=2, random_state=32),
    )

    experiment = CrossValidationExperiment(
        model_initializer=RGFClassifier,
        model_init_params=dict(max_leaf=1000,
                               algorithm='RGF',
                               min_samples_leaf=10),
    )
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        root_results_path='HyperparameterHunterAssets',
        metrics_map=['roc_auc_score'],
        cross_validation_type=RepeatedStratifiedKFold,
        cross_validation_params=dict(n_splits=5, n_repeats=2, random_state=32),
        runs=2,
        # Just instantiate `Environment` with your list of callbacks, and go about business as usual
        experiment_callbacks=[printer_callback()],
    )

    experiment = CrossValidationExperiment(
        model_initializer=XGBClassifier,
        model_init_params={},
        model_extra_params=dict(fit=dict(verbose=False)),
    )
예제 #15
0
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type=RepeatedStratifiedKFold,
        cv_params=dict(n_splits=5, n_repeats=2, random_state=32),
        runs=2,
        # Just instantiate `Environment` with your list of callbacks, and go about business as usual
        experiment_callbacks=[printer_callback(),
                              confusion_matrix_oof()],
        # In addition to `printer_callback` made above, we're also adding the `confusion_matrix_oof` callback
        # This, and other callbacks, can be found in `hyperparameter_hunter.callbacks.recipes`
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params={},
        model_extra_params=dict(fit=dict(verbose=False)),
    )
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(target='diagnosis'),
        root_results_path='HyperparameterHunterAssets',
        target_column='diagnosis',
        metrics_map=['roc_auc_score'],
        cross_validation_type=StratifiedKFold,
        cross_validation_params=dict(n_splits=5, shuffle=True,
                                     random_state=32),
        runs=2,
        # file_blacklist='ALL',
    )

    experiment = CrossValidationExperiment(
        model_initializer=CatBoostClassifier,
        model_init_params=dict(iterations=500,
                               learning_rate=0.01,
                               depth=7,
                               allow_writing_files=False),
        # NOTE: Inside `model_init_params` can be any of the many kwargs accepted by :meth:`CatBoostClassifier.__init__`
        model_extra_params=dict(fit=dict(verbose=True)))