def test_default():
    bpsketch = BinningProcessSketch(variable_names)
    bpsketch.add(df, y)
    bpsketch.solve()

    optb = bpsketch.get_binned_variable("mean radius")

    assert optb.status == "OPTIMAL"

    optb.binning_table.build()
    assert optb.binning_table.iv == approx(5.04392547, rel=1e-2)
def test_default_merge():
    bpsketch_1 = BinningProcessSketch(variable_names)
    bpsketch_2 = BinningProcessSketch(variable_names)

    df_1, y_1 = df.iloc[:200, :], y[:200]
    df_2, y_2 = df.iloc[200:, :], y[200:]

    bpsketch_1.add(df_1, y_1)
    bpsketch_2.add(df_2, y_2)
    bpsketch_1.merge(bpsketch_2)

    bpsketch_1.solve()

    optb = bpsketch_1.get_binned_variable("mean radius")

    assert optb.status == "OPTIMAL"

    optb.binning_table.build()
    assert optb.binning_table.iv == approx(5.04392547, rel=1e-2)
def test_information():
    bpsketch = BinningProcessSketch(variable_names)

    with raises(NotFittedError):
        bpsketch.solve()

    bpsketch.add(df, y)

    with raises(NotFittedError):
        bpsketch.information()

    bpsketch.solve()

    with raises(ValueError):
        bpsketch.information(print_level=-1)

    bpsketch.information(print_level=0)
    bpsketch.information(print_level=1)
    bpsketch.information(print_level=2)
def test_params():
    with raises(TypeError):
        BinningProcessSketch(variable_names=1)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], max_n_prebins=-2)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], min_n_bins=-2)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], max_n_bins=-2.2)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], min_n_bins=3, max_n_bins=2)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], min_bin_size=0.6)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], max_bin_size=-0.6)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[],
                             min_bin_size=0.5,
                             max_bin_size=0.3)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], max_pvalue=1.1)

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], max_pvalue_policy="new_policy")

    with raises(TypeError):
        BinningProcessSketch(variable_names=[], selection_criteria=[])

    with raises(TypeError):
        BinningProcessSketch(variable_names=[], categorical_variables={})

    with raises(TypeError):
        BinningProcessSketch(variable_names=[], categorical_variables=[1, 2])

    with raises(TypeError):
        BinningProcessSketch(variable_names=[], special_codes={1, 2, 3})

    with raises(ValueError):
        BinningProcessSketch(variable_names=[], split_digits=9)

    with raises(TypeError):
        BinningProcessSketch(variable_names=[], binning_fit_params=[1, 2])

    with raises(TypeError):
        BinningProcessSketch(variable_names=[],
                             binning_transform_params=[1, 2])

    with raises(TypeError):
        BinningProcessSketch(variable_names=[], verbose=1)
def test_default_transform():
    bpsketch = BinningProcessSketch(variable_names)
    bpsketch.add(df, y)

    with raises(NotFittedError):
        bpsketch.transform(df, metric="woe")

    bpsketch.solve()

    with raises(TypeError):
        X_transform = bpsketch.transform(df.values, metric="woe")

    with raises(ValueError):
        X_transform = bpsketch.transform(df, metric="new_woe")

    X_transform = bpsketch.transform(df)

    optb = OptimalBinningSketch()
    x = df["mean radius"]
    optb.add(x, y)
    optb.solve()

    assert optb.transform(x,
                          metric="woe") == approx(X_transform["mean radius"],
                                                  rel=1e-6)