Python FeatureEngineer 예제들, hyperparameter_hunter.FeatureEngineer Python 예제들

예제 #1

0

파일 보기

파일: target_transform_inverse_example.py 프로젝트: zwcdp/hyperparameter_hunter

def execute():
    #################### Environment ####################
    env = Environment(
        train_dataset=get_boston_data(),
        results_path="HyperparameterHunterAssets",
        holdout_dataset=get_holdout_data,
        target_column="DIS",
        metrics=["r2_score", "median_absolute_error"],
        cv_type="KFold",
        cv_params=dict(n_splits=10, random_state=1),
    )

    #################### CVExperiment ####################
    exp_0 = CVExperiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([quantile_transform]),
    )

    #################### Optimization ####################
    # `opt_0` recognizes `exp_0`'s `feature_engineer` and its results as valid learning material
    # This is because `opt_0` marks the engineer step functions omitted by `exp_0` as `optional=True`
    opt_0 = DummyOptPro(iterations=10)
    opt_0.forge_experiment(
        model_initializer=Ridge,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([
            Categorical([quantile_transform, log_transform], optional=True),
            Categorical([standard_scale, standard_scale_BAD], optional=True),
            Categorical([square_sum_feature], optional=True),
        ]),
    )
    opt_0.go()

예제 #2

0

파일 보기

파일: test_support.py 프로젝트: zwcdp/hyperparameter_hunter

def test_feature_engineer_list_experiment_equality(env_boston, steps_0,
                                                   steps_1):
    """Test that the `feature_engineer` attribute constructed by
    :class:`~hyperparameter_hunter.experiments.CVExperiment` is the same whether it was given a
    list as input, or a :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer`"""
    exp_0 = CVExperiment(Ridge, feature_engineer=steps_0)
    exp_1 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_1))
    assert exp_0.feature_engineer == exp_1.feature_engineer

    # Repeat above, but switch which steps are wrapped in `FeatureEngineer`
    exp_2 = CVExperiment(Ridge, feature_engineer=steps_1)
    exp_3 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_0))
    assert exp_2.feature_engineer == exp_3.feature_engineer

예제 #3

0

파일 보기

파일: test_support.py 프로젝트: zwcdp/hyperparameter_hunter

def test_feature_engineer_list_optimization_equality(env_boston, steps_0,
                                                     steps_1):
    """Test that the `feature_engineer` attribute constructed by an OptPro is the same whether given
    a list as input, or a :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer`"""
    opt_0, opt_1, opt_2, opt_3 = GBRT(), GBRT(), GBRT(), GBRT()
    opt_0.forge_experiment(Ridge, feature_engineer=steps_0)
    opt_1.forge_experiment(Ridge, feature_engineer=FeatureEngineer(steps_1))
    assert opt_0.feature_engineer == opt_1.feature_engineer

    # Repeat above, but switch which steps are wrapped in `FeatureEngineer`
    opt_2.forge_experiment(Ridge, feature_engineer=steps_1)
    opt_3.forge_experiment(Ridge, feature_engineer=FeatureEngineer(steps_0))
    assert opt_2.feature_engineer == opt_3.feature_engineer

예제 #4

0

파일 보기

파일: test_support.py 프로젝트: zwcdp/hyperparameter_hunter

def test_feature_engineer_list_experiment_inequality(env_boston, steps_0,
                                                     steps_1):
    """Test that the `feature_engineer` attribute constructed by
    :class:`~hyperparameter_hunter.experiments.CVExperiment` is NOT the same when given a list as
    input vs. a :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer` when the two are
    actually different. This is an insanity test to make sure that the related test in this module,
    :func:`test_feature_engineer_list_experiment_equality`, is not simply equating everything"""
    exp_0 = CVExperiment(Ridge, feature_engineer=steps_0)
    exp_1 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_1))
    assert exp_0.feature_engineer != exp_1.feature_engineer

    # Repeat above, but switch which steps are wrapped in `FeatureEngineer`
    exp_2 = CVExperiment(Ridge, feature_engineer=steps_1)
    exp_3 = CVExperiment(Ridge, feature_engineer=FeatureEngineer(steps_0))
    assert exp_2.feature_engineer != exp_3.feature_engineer

예제 #5

0

파일 보기

파일: imblearn_resampling_example.py 프로젝트: zwcdp/hyperparameter_hunter

def execute():
    env = Environment(
        train_dataset=get_imbalanced_dataset(),
        results_path="HyperparameterHunterAssets",
        target_column="target",
        metrics=["roc_auc_score", "accuracy_score"],
        cv_type="KFold",
        cv_params=dict(n_splits=5, random_state=7),
    )

    # Since this is HyperparameterHunter, after all, we'll throw in some classic hyperparameter
    #   optimization just for fun. If you're like most people and you think it's absurd to test
    #   18 different `imblearn` techniques, feel free to comment out some `EngineerStep`s below

    opt_0 = ET(iterations=20, random_state=32)
    opt_0.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            max_depth=Integer(2, 20),
            n_estimators=Integer(50, 900),
            learning_rate=Real(0.0001, 0.9),
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear"]),
        ),
        feature_engineer=FeatureEngineer([
            Categorical(
                [
                    EngineerStep(resample_smote_tomek, stage="intra_cv"),
                    EngineerStep(over_sample_random, stage="intra_cv"),
                    EngineerStep(over_sample_smote, stage="intra_cv"),
                    EngineerStep(under_sample_random, stage="intra_cv"),
                    EngineerStep(under_sample_cluster_centroids,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_tomek_links, stage="intra_cv"),
                    #################### GROUP 2 (EXTENDED) ####################
                    EngineerStep(resample_smote_enn, stage="intra_cv"),
                    EngineerStep(over_sample_ADASYN, stage="intra_cv"),
                    EngineerStep(over_sample_BorderlineSMOTE,
                                 stage="intra_cv"),
                    EngineerStep(over_sample_SVMSMOTE, stage="intra_cv"),
                    EngineerStep(under_sample_NearMiss, stage="intra_cv"),
                    EngineerStep(under_sample_CondensedNearestNeighbour,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_OneSidedSelection,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_NeighbourhoodCleaningRule,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_EditedNearestNeighbours,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_RepeatedEditedNearestNeighbour,
                                 stage="intra_cv"),
                    EngineerStep(under_sample_AllKNN, stage="intra_cv"),
                    EngineerStep(under_sample_InstanceHardnessThreshold,
                                 stage="intra_cv"),
                ],
                optional=True,
            )
        ]),
    )
    opt_0.go()

예제 #6

0

파일 보기

def prepped_experiment(request):
    """Build a partially prepared :class:`~hyperparameter_hunter.experiments.CVExperiment` instance

    Specifically, automatic execution is disabled via `auto_start=False`, then the following methods
    are called:

    1. :meth:`~hyperparameter_hunter.experiments.BaseExperiment.preparation_workflow`,
    2. :meth:`~hyperparameter_hunter.experiments.BaseExperiment._initialize_random_seeds`, and
    3. :meth:`~hyperparameter_hunter.experiments.BaseExperiment.on_exp_start`, which initializes the
       four :mod:`~hyperparameter_hunter.data.datasets` classes, then performs pre-CV feature
       engineering

    Notes
    -----
    Directly calling `on_exp_start` is ok in this test because after calling
    `_initialize_random_seeds`, `BaseExperiment` calls `execute`, which is implemented by
    `BaseCVExperiment`, and only calls `cross_validation_workflow`, whose first task is to call
    `on_exp_start`. So nothing gets skipped in between"""
    #################### Build `feature_engineer` ####################
    feature_engineer = FeatureEngineer(steps=request.param)

    #################### Partially Prepare `CVExperiment` ####################
    experiment = CVExperiment(
        model_initializer=AdaBoostClassifier,
        model_init_params=dict(),
        feature_engineer=feature_engineer,
        auto_start=False,
    )
    experiment.preparation_workflow()
    # noinspection PyProtectedMember
    experiment._initialize_random_seeds()
    experiment.on_exp_start()

    return experiment

예제 #7

0

파일 보기

def test_validate_fe_steps_error_candidate_too_big(candidate, template,
                                                   candidate_step_cast):
    """Test that `IncompatibleCandidateError` is raised by `validate_fe_steps` when `candidate`
    has more steps than `template`. See `test_validate_fe_steps` for parameter descriptions"""
    with pytest.raises(IncompatibleCandidateError):
        validate_fe_steps(candidate_step_cast(candidate),
                          FeatureEngineer(template))

예제 #8

0

파일 보기

파일: test_support.py 프로젝트: zwcdp/hyperparameter_hunter

def test_feature_engineer_list_optimization_inequality(env_boston, steps_0,
                                                       steps_1):
    """Test that the `feature_engineer` attribute constructed by an OptPro is NOT the same when
    given a list as input vs. a :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer`
    when the two are actually different. This is an insanity test to make sure that the related
    test in this module, :func:`test_feature_engineer_list_optimization_equality`, is not simply
    equating everything"""
    opt_0, opt_1, opt_2, opt_3 = GBRT(), GBRT(), GBRT(), GBRT()
    opt_0.forge_experiment(Ridge, feature_engineer=steps_0)
    opt_1.forge_experiment(Ridge, feature_engineer=FeatureEngineer(steps_1))
    assert opt_0.feature_engineer != opt_1.feature_engineer

    # Repeat above, but switch which steps are wrapped in `FeatureEngineer`
    opt_2.forge_experiment(Ridge, feature_engineer=steps_1)
    opt_3.forge_experiment(Ridge, feature_engineer=FeatureEngineer(steps_0))
    assert opt_2.feature_engineer != opt_3.feature_engineer

예제 #9

0

파일 보기

def test_validate_fe_steps_error_categorical_mismatch(candidate, template,
                                                      candidate_step_cast):
    """Test that `IncompatibleCandidateError` is raised by `validate_fe_steps` when `candidate`
    has a step that does not fit in a `Categorical` step in `template`. See
    `test_validate_fe_steps` for parameter descriptions"""
    with pytest.raises(IncompatibleCandidateError):
        validate_fe_steps(candidate_step_cast(candidate),
                          FeatureEngineer(template))

예제 #10

0

파일 보기

def test_validate_fe_steps(candidate, template, expected, candidate_step_cast):
    """Test that `validate_fe_steps` produces the `expected` output

    Parameters
    ----------
    candidate: List
        `candidate` value given to :func:`~hyperparameter_hunter.result_reader.validate_fe_steps`
    template: List
        `template` value given to :func:`~hyperparameter_hunter.result_reader.validate_fe_steps`
    expected: List
        Output expected from invoking `validate_fe_steps` with `candidate` and `template`"""
    actual = validate_fe_steps(candidate_step_cast(candidate),
                               FeatureEngineer(template))

    # Because `actual` is going to be a list of `EngineerStep`/`RejectedOptional`, `expected` must
    #   also be passed through a `FeatureEngineer` to convert each function to an `EngineerStep`
    assert actual == FeatureEngineer(expected).steps

예제 #11

0

파일 보기

def test_validate_fe_steps_error_concrete_missing(candidate, template,
                                                  candidate_step_cast):
    """Test that `IncompatibleCandidateError` is raised by `validate_fe_steps` when `candidate`
    is missing a concrete (non-`Categorical`) step in `template`. See `test_validate_fe_steps`
    for parameter descriptions"""
    with pytest.raises(IncompatibleCandidateError):
        validate_fe_steps(candidate_step_cast(candidate),
                          FeatureEngineer(template))

예제 #12

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def fe_optimizer(request):
    if request.param is not None:
        request.param = FeatureEngineer(request.param)
    opt = BayesianOptPro()
    opt.forge_experiment(model_initializer=Ridge,
                         model_init_params={},
                         feature_engineer=request.param)
    opt.go()
    return opt

예제 #13

0

파일 보기

파일: test_support.py 프로젝트: zwcdp/hyperparameter_hunter

def test_do_not_validate(env_boston):
    exp = CVExperiment(
        model_initializer=Ridge,
        model_init_params={},
        feature_engineer=FeatureEngineer([standard_scale], do_validate=False),
    )

    for step in exp.feature_engineer.steps:
        assert step.original_hashes == {}
        assert step.updated_hashes == {}

예제 #14

0

파일 보기

파일: test_support.py 프로젝트: zwcdp/hyperparameter_hunter

def test_inverse_type_error(env_boston):
    """Test that an error is raised if an `EngineerStep` function returns an extra value that is
    not a function or class instance. Extra return values are used for inverse transformations"""
    with pytest.raises(TypeError,
                       match="`inversion` must be callable, or class with .*"):
        exp = CVExperiment(
            model_initializer=Ridge,
            model_init_params={},
            feature_engineer=FeatureEngineer([bad_quantile_transform]),
        )

예제 #15

0

파일 보기

def experiment_fixture(request):
    #################### Build `feature_engineer` ####################
    feature_engineer = FeatureEngineer(steps=request.param)

    #################### Execute `CVExperiment` ####################
    experiment = CVExperiment(
        model_initializer=AdaBoostClassifier,
        model_init_params=dict(),
        feature_engineer=feature_engineer,
    )
    return experiment

예제 #16

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def test_reg_engineer(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate problem with `BayesianOptPro` specifically - same configuration is fine with all
    other `OptPro`s"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()

예제 #17

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def test_reg_engineer_integer_ok(env_boston_regression, hh_assets, opt_pro):
    """Identical to `test_reg_engineer`, except `Integer` dimension added to show that everything is
    fine now. Problem limited to not only `BayesianOptPro`, but also exclusively `Categorical`
    search spaces"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(n_estimators=Integer(10, 40)),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()

예제 #18

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def test_reg_engineer_categorical(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate that `BayesianOptPro` breaks with multiple `Categorical`s when `FeatureEngineer`
    is included in the dimensions"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(
            loss=Categorical(["linear", "square", "exponential"])),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()

예제 #19

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def test_similar_experiments_unordered():
    """Check that an experiment with a single `EngineerStep` is considered "similar" by an
    Optimization Protocol, with two `optional` `EngineerStep`s, where the second step is identical
    to the single step used by the standalone experiment. As of v3.0.0alpha2, this is expected to
    fail because the otherwise identical engineer steps occur at different indexes in
    `FeatureEngineer.steps` for the experiment and the OptPro. The experiment has `sqr_sum_feature`
    at index=0, while the same step in the OptPro is at index=1. Note that the step index in OptPro
    is still 1 despite the fact that the other step immediately preceding it is `optional`"""
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path=assets_dir,
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    exp = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([EngineerStep(sqr_sum_feature)]),
    )

    opt = BayesianOptPro(iterations=1)
    opt.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear",
                               subsample=0.5,
                               max_depth=3),
        feature_engineer=FeatureEngineer([
            Categorical([standard_scale, normalize, min_max_scale],
                        optional=True),
            Categorical([sqr_sum_feature], optional=True),
        ]),
    )
    opt.go()

    assert exp.experiment_id in [_[2] for _ in opt.similar_experiments]

예제 #20

0

파일 보기

def test_1():
    train_inputs, train_targets, holdout_inputs, holdout_targets = get_pima_data(
    )

    feature_engineer = FeatureEngineer()
    feature_engineer.add_step(set_nan_0)
    assert feature_engineer._steps[-1].name == "set_nan_0"
    feature_engineer.add_step(impute_negative_one_0)
    assert feature_engineer._steps[-1].name == "impute_negative_one_0"
    feature_engineer("pre_cv",
                     train_inputs=train_inputs.copy(),
                     holdout_inputs=holdout_inputs.copy())

    expected_train_inputs = [
        [1, 85, 66, 29, -1, 26.6, 0.351, 31],
        [8, 183, 64, -1, -1, 23.3, 0.672, 32],
        [1, 89, 66, 23, 94, 28.1, 0.167, 21],
        [0, 137, 40, 35, 168, 43.1, 2.288, 33],
    ]
    expected_holdout_inputs = [[6, 148, 72, 35, -1, 33.6, 0.627, 50]]

    assert_array_almost_equal(feature_engineer.datasets["train_inputs"],
                              expected_train_inputs)
    assert_array_almost_equal(feature_engineer.datasets["holdout_inputs"],
                              expected_holdout_inputs)

예제 #21

0

파일 보기

파일: test_data_wranglers.py 프로젝트: youjp/hyperparameter_hunter

def engineer_experiment(request):
    """`CVExperiment` fixture that supports provision of a `feature_engineer` through `request`

    Parameters
    ----------
    request: Object
        If `request` has a "param" attribute, it must be a list of feature engineering steps to
        provide to :class:`~hyperparameter_hunter.feature_engineering.FeatureEngineer`"""
    feature_engineer = FeatureEngineer(steps=getattr(request, "param", None))
    experiment = CVExperiment(model_initializer=SVC,
                              model_init_params=dict(),
                              feature_engineer=feature_engineer)
    return experiment

예제 #22

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def test_reg_engineer_categorical_integer_ok(env_boston_regression, hh_assets,
                                             opt_pro):
    """Identical to `test_reg_engineer_categorical`, except `Integer` added to demonstrate that all
    `OptPro`s can optimize with `FeatureEngineer` if space is not exclusively `Categorical`"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(loss=Categorical(
            ["linear", "square", "exponential"]),
                               n_estimators=Integer(10, 40)),
        feature_engineer=FeatureEngineer(
            [Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go()

예제 #23

0

파일 보기

파일: test_pre_cv.py 프로젝트: mdjabc/hyperparameter_hunter

def experiment_prep_fixture(request):
    #################### Build `feature_engineer` ####################
    feature_engineer = FeatureEngineer(steps=request.param)

    #################### Partially Prepare `CVExperiment` ####################
    experiment = CVExperiment(
        model_initializer=AdaBoostClassifier,
        model_init_params=dict(),
        feature_engineer=feature_engineer,
        auto_start=False,
    )
    experiment.preparation_workflow()
    # noinspection PyProtectedMember
    experiment._initialize_random_seeds()
    # noinspection PyProtectedMember
    experiment._initial_preprocessing()

    return experiment

예제 #24

0

파일 보기

def opt_pro(optimization_protocol):
    opt = optimization_protocol(iterations=3,
                                random_state=32,
                                n_initial_points=1)
    opt.forge_experiment(
        model_initializer=XGBRegressor,
        model_init_params=dict(
            max_depth=Integer(2, 10),
            n_estimators=Integer(50, 300),
            learning_rate=Real(0.1, 0.9),
            subsample=0.5,
            booster=Categorical(["gbtree", "gblinear"]),
        ),
        model_extra_params=dict(fit=dict(
            eval_metric=Categorical(["rmse", "mae"]))),
        feature_engineer=FeatureEngineer(
            [Categorical([nothing_transform], optional=True)]),
    )
    opt.go()
    return opt

예제 #25

0

파일 보기

def test_2():
    train_inputs, train_targets, holdout_inputs, holdout_targets = get_pima_data(
    )

    feature_engineer = FeatureEngineer()
    feature_engineer.add_step(set_nan_0)
    feature_engineer.add_step(impute_negative_one_0)
    feature_engineer.add_step(standard_scale_0)
    feature_engineer("pre_cv",
                     train_inputs=train_inputs.copy(),
                     holdout_inputs=holdout_inputs.copy())

    expected_train_inputs = [
        [
            -0.468521, -0.962876, 0.636364, 0.548821, -0.929624, -0.48321,
            -0.618238, 0.363422
        ],
        [
            1.717911, 1.488081, 0.454545, -1.646464, -0.929624, -0.917113,
            -0.235491, 0.571092
        ],
        [
            -0.468521, -0.862837, 0.636364, 0.109764, 0.408471, -0.285982,
            -0.837632, -1.713275
        ],
        [
            -0.780869, 0.337632, -1.727273, 0.987878, 1.450776, 1.686305,
            1.691360, 0.778761
        ],
    ]
    expected_holdout_inputs = [[
        1.093216, 0.612739, 1.181818, 0.987878, -0.929624, 0.437190, -0.289147,
        4.309145
    ]]

    assert_array_almost_equal(feature_engineer.datasets["train_inputs"],
                              expected_train_inputs)
    assert_array_almost_equal(feature_engineer.datasets["holdout_inputs"],
                              expected_holdout_inputs)

예제 #26

0

파일 보기

def test_validate_fe_steps_error_categorical_missing(candidate, template,
                                                     candidate_suffix,
                                                     template_suffix,
                                                     candidate_step_cast):
    """Test that `IncompatibleCandidateError` is raised by `validate_fe_steps` when `candidate`
    is missing a non-`optional` `Categorical` step in `template`

    Parameters
    ----------
    candidate: List
        `candidate` value given to :func:`~hyperparameter_hunter.result_reader.validate_fe_steps`
    template: List
        `template` value given to :func:`~hyperparameter_hunter.result_reader.validate_fe_steps`
    candidate_suffix: List
        Additional steps to append to the end of `candidate` before invoking `validate_fe_steps`
    template_suffix: List
        Additional steps to append to the end of `template` before invoking `validate_fe_steps`"""
    with pytest.raises(IncompatibleCandidateError):
        validate_fe_steps(
            candidate_step_cast(candidate + candidate_suffix),
            FeatureEngineer(template + template_suffix),
        )

예제 #27

0

파일 보기


def es_d(all_inputs):
    return all_inputs


def es_e(all_inputs):
    return all_inputs


##################################################
# Fixtures
##################################################
@pytest.fixture(
    params=[
        FeatureEngineer, lambda _: FeatureEngineer(_).get_key_data()["steps"]
    ],
    ids=["EngineerSteps", "step_dicts"],
)
def candidate_step_cast(request):
    """Processing method applied to `candidate` to produce the candidate steps passed to
    :func:`~hyperparameter_hunter.result_reader.validate_fe_steps`. May be either 1) instantiation
    as a `FeatureEngineer` (which is how `template` is processed), or 2) result of invoking
    :meth:`~hyperparameter_hunter.feature_engineering.FeatureEngineer.get_key_data` on the
    former, then taking its "steps" value. The second method produces a list of
    `EngineerStep`-like dicts, which more closely resembles a candidate retrieved from a saved
    Experiment result description file"""
    return request.param


##################################################

예제 #28

0

파일 보기

def execute():
    env = Environment(
        train_dataset="data/train.csv",
        test_dataset="data/test.csv",
        results_path="HyperparameterHunterAssets",
        target_column="target",
        metrics=dict(gini=gini_normalized_c),
        id_column="id",
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=5, shuffle=True, random_state=15),
        do_predict_proba=1,
        to_csv_params=dict(
            index=False),  # Drops index from final prediction files
    )

    exp = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            n_estimators=200,
            max_depth=4,
            objective="binary:logistic",
            learning_rate=0.1,
            subsample=0.8,
            colsample_bytree=0.8,
            gamma=1,
            reg_alpha=0,
            reg_lambda=1,
            nthread=2,
        ),
        model_extra_params=dict(
            eval_set=[
                (env.train_input, env.train_target),
                (env.validation_input, env.validation_target),
            ],
            eval_metric=gini_xgb,
            early_stopping_rounds=None,
            verbose=False,
        ),
        feature_engineer=FeatureEngineer([
            feature_combinations,
            EngineerStep(upsample_train_data, stage="intra_cv")
        ]),
        feature_selector=[
            "ps_car_13",  # : 1571.65 / shadow  609.23
            "ps_reg_03",  # : 1408.42 / shadow  511.15
            "ps_ind_05_cat",  # : 1387.87 / shadow   84.72
            "ps_ind_03",  # : 1219.47 / shadow  230.55
            "ps_ind_15",  # :  922.18 / shadow  242.00
            "ps_reg_02",  # :  920.65 / shadow  267.50
            "ps_car_14",  # :  798.48 / shadow  549.58
            "ps_car_12",  # :  731.93 / shadow  293.62
            "ps_car_01_cat",  # :  698.07 / shadow  178.72
            "ps_car_07_cat",  # :  694.53 / shadow   36.35
            "ps_ind_17_bin",  # :  620.77 / shadow   23.15
            "ps_car_03_cat",  # :  611.73 / shadow   50.67
            "ps_reg_01",  # :  598.60 / shadow  178.57
            "ps_car_15",  # :  593.35 / shadow  226.43
            "ps_ind_01",  # :  547.32 / shadow  154.58
            "ps_ind_16_bin",  # :  475.37 / shadow   34.17
            "ps_ind_07_bin",  # :  435.28 / shadow   28.92
            "ps_car_06_cat",  # :  398.02 / shadow  212.43
            "ps_car_04_cat",  # :  376.87 / shadow   76.98
            "ps_ind_06_bin",  # :  370.97 / shadow   36.13
            "ps_car_09_cat",  # :  214.12 / shadow   81.38
            "ps_car_02_cat",  # :  203.03 / shadow   26.67
            "ps_ind_02_cat",  # :  189.47 / shadow   65.68
            "ps_car_11",  # :  173.28 / shadow   76.45
            "ps_car_05_cat",  # :  172.75 / shadow   62.92
            "ps_calc_09",  # :  169.13 / shadow  129.72
            "ps_calc_05",  # :  148.83 / shadow  120.68
            "ps_ind_08_bin",  # :  140.73 / shadow   27.63
            "ps_car_08_cat",  # :  120.87 / shadow   28.82
            "ps_ind_09_bin",  # :  113.92 / shadow   27.05
            "ps_ind_04_cat",  # :  107.27 / shadow   37.43
            "ps_ind_18_bin",  # :   77.42 / shadow   25.97
            "ps_ind_12_bin",  # :   39.67 / shadow   15.52
            "ps_ind_14",  # :   37.37 / shadow   16.65
            "ps_car_11_cat",  # Very nice spot from Tilii : https://www.kaggle.com/tilii7
        ],
    )

예제 #29

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def feature_engineer(request):
    return FeatureEngineer(steps=request.param)

예제 #30

0

파일 보기

파일: test_feature_optimization.py 프로젝트: zwcdp/hyperparameter_hunter

def fe_experiment(request):
    if request.param is not None:
        request.param = FeatureEngineer(request.param)
    return CVExperiment(model_initializer=Ridge,
                        model_init_params={},
                        feature_engineer=request.param)