예제 #1
0
def test_1():
    train_inputs, train_targets, holdout_inputs, holdout_targets = get_pima_data(
    )

    feature_engineer = FeatureEngineer()
    feature_engineer.add_step(set_nan_0)
    assert feature_engineer._steps[-1].name == "set_nan_0"
    feature_engineer.add_step(impute_negative_one_0)
    assert feature_engineer._steps[-1].name == "impute_negative_one_0"
    feature_engineer("pre_cv",
                     train_inputs=train_inputs.copy(),
                     holdout_inputs=holdout_inputs.copy())

    expected_train_inputs = [
        [1, 85, 66, 29, -1, 26.6, 0.351, 31],
        [8, 183, 64, -1, -1, 23.3, 0.672, 32],
        [1, 89, 66, 23, 94, 28.1, 0.167, 21],
        [0, 137, 40, 35, 168, 43.1, 2.288, 33],
    ]
    expected_holdout_inputs = [[6, 148, 72, 35, -1, 33.6, 0.627, 50]]

    assert_array_almost_equal(feature_engineer.datasets["train_inputs"],
                              expected_train_inputs)
    assert_array_almost_equal(feature_engineer.datasets["holdout_inputs"],
                              expected_holdout_inputs)
예제 #2
0
def test_2():
    train_inputs, train_targets, holdout_inputs, holdout_targets = get_pima_data(
    )

    feature_engineer = FeatureEngineer()
    feature_engineer.add_step(set_nan_0)
    feature_engineer.add_step(impute_negative_one_0)
    feature_engineer.add_step(standard_scale_0)
    feature_engineer("pre_cv",
                     train_inputs=train_inputs.copy(),
                     holdout_inputs=holdout_inputs.copy())

    expected_train_inputs = [
        [
            -0.468521, -0.962876, 0.636364, 0.548821, -0.929624, -0.48321,
            -0.618238, 0.363422
        ],
        [
            1.717911, 1.488081, 0.454545, -1.646464, -0.929624, -0.917113,
            -0.235491, 0.571092
        ],
        [
            -0.468521, -0.862837, 0.636364, 0.109764, 0.408471, -0.285982,
            -0.837632, -1.713275
        ],
        [
            -0.780869, 0.337632, -1.727273, 0.987878, 1.450776, 1.686305,
            1.691360, 0.778761
        ],
    ]
    expected_holdout_inputs = [[
        1.093216, 0.612739, 1.181818, 0.987878, -0.929624, 0.437190, -0.289147,
        4.309145
    ]]

    assert_array_almost_equal(feature_engineer.datasets["train_inputs"],
                              expected_train_inputs)
    assert_array_almost_equal(feature_engineer.datasets["holdout_inputs"],
                              expected_holdout_inputs)