Ejemplo n.º 1
0
def test_default():
    process = BinningProcess(variable_names)
    process.fit(X, y, check_input=True)

    with raises(TypeError):
        process.get_binned_variable(1)

    with raises(ValueError):
        process.get_binned_variable("new_variable")

    optb = process.get_binned_variable("mean radius")

    assert optb.status == "OPTIMAL"
    assert optb.splits == approx([11.42500019, 12.32999992, 13.09499979,
                                  13.70499992, 15.04500008, 16.92500019],
                                 rel=1e-6)

    optb.binning_table.build()
    assert optb.binning_table.iv == approx(5.04392547, rel=1e-6)
Ejemplo n.º 2
0
def test_fit_params():
    binning_fit_params = {"mean radius": {"max_n_bins": 4}}

    process = BinningProcess(variable_names=variable_names,
                             binning_fit_params=binning_fit_params)
    process.fit(X, y)

    optb = process.get_binned_variable("mean radius")

    assert optb.status == "OPTIMAL"
    assert len(optb.splits) <= 4
Ejemplo n.º 3
0
def test_default_transform_multiclass():
    data = load_wine()
    variable_names = data.feature_names
    X = data.data
    y = data.target

    process = BinningProcess(variable_names)
    process.fit(X, y)
    X_transform = process.transform(X)

    optb = process.get_binned_variable(variable_names[0])
    assert isinstance(optb, MulticlassOptimalBinning)

    optb = MulticlassOptimalBinning()
    x = X[:, 5]
    optb.fit(x, y)
    assert optb.transform(x) == approx(X_transform[:, 5], rel=1e-6)
Ejemplo n.º 4
0
def test_default_transform_continuous():
    data = load_boston()
    variable_names = data.feature_names
    X = data.data
    y = data.target

    process = BinningProcess(variable_names)
    process.fit(X, y)
    X_transform = process.transform(X, metric="mean")

    optb = process.get_binned_variable(variable_names[0])
    assert isinstance(optb, ContinuousOptimalBinning)

    optb = ContinuousOptimalBinning()
    x = X[:, 5]
    optb.fit(x, y)
    assert optb.transform(x, metric="mean") == approx(
        X_transform[:, 5], rel=1e-6)
Ejemplo n.º 5
0
def test_default_pandas():
    df = pd.DataFrame(data.data, columns=data.feature_names)

    process = BinningProcess(variable_names)

    with raises(TypeError):
        process.fit(df.to_dict(), y, check_input=True)

    process.fit(df, y, check_input=True)

    optb = process.get_binned_variable("mean radius")

    assert optb.status == "OPTIMAL"
    assert optb.splits == approx([11.42500019, 12.32999992, 13.09499979,
                                  13.70499992, 15.04500008, 16.92500019],
                                 rel=1e-6)

    optb.binning_table.build()
    assert optb.binning_table.iv == approx(5.04392547, rel=1e-6)