コード例 #1
0
def test_default_transform():
    bpsketch = BinningProcessSketch(variable_names)
    bpsketch.add(df, y)

    with raises(NotFittedError):
        bpsketch.transform(df, metric="woe")

    bpsketch.solve()

    with raises(TypeError):
        X_transform = bpsketch.transform(df.values, metric="woe")

    with raises(ValueError):
        X_transform = bpsketch.transform(df, metric="new_woe")

    X_transform = bpsketch.transform(df)

    optb = OptimalBinningSketch()
    x = df["mean radius"]
    optb.add(x, y)
    optb.solve()

    assert optb.transform(x,
                          metric="woe") == approx(X_transform["mean radius"],
                                                  rel=1e-6)
コード例 #2
0
def test_numerical_default_tdigest():
    optb = OptimalBinningSketch(sketch="t-digest", eps=1e-4)
    optb.add(x, y)
    optb.solve()

    assert optb.status == "OPTIMAL"

    optb.binning_table.build()
    assert optb.binning_table.iv == approx(5.04392547, rel=1e-2)

    optb.binning_table.analysis()
    assert optb.binning_table.gini == approx(0.87541620, rel=1e-2)
    assert optb.binning_table.js == approx(0.39378376, rel=1e-2)
    assert optb.binning_table.quality_score == approx(0.0, rel=1e-2)
コード例 #3
0
def test_categorical_default_user_splits():
    x = np.array([
        'Working', 'State servant', 'Working', 'Working', 'Working',
        'State servant', 'Commercial associate', 'State servant', 'Pensioner',
        'Working', 'Working', 'Pensioner', 'Working', 'Working', 'Working',
        'Working', 'Working', 'Working', 'Working', 'State servant', 'Working',
        'Commercial associate', 'Working', 'Pensioner', 'Working', 'Working',
        'Working', 'Working', 'State servant', 'Working',
        'Commercial associate', 'Working', 'Working', 'Commercial associate',
        'State servant', 'Working', 'Commercial associate', 'Working',
        'Pensioner', 'Working', 'Commercial associate', 'Working', 'Working',
        'Pensioner', 'Working', 'Working', 'Pensioner', 'Working',
        'State servant', 'Working', 'State servant', 'Commercial associate',
        'Working', 'Commercial associate', 'Pensioner', 'Working', 'Pensioner',
        'Working', 'Working', 'Working', 'Commercial associate', 'Working',
        'Pensioner', 'Working', 'Commercial associate', 'Commercial associate',
        'State servant', 'Working', 'Commercial associate',
        'Commercial associate', 'Commercial associate', 'Working', 'Working',
        'Working', 'Commercial associate', 'Working', 'Commercial associate',
        'Working', 'Working', 'Pensioner', 'Working', 'Pensioner', 'Working',
        'Working', 'Pensioner', 'Working', 'State servant', 'Working',
        'Working', 'Working', 'Working', 'Working', 'Commercial associate',
        'Commercial associate', 'Commercial associate', 'Working',
        'Commercial associate', 'Working', 'Working', 'Pensioner'
    ],
                 dtype=object)

    y = np.array([
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        0, 0, 0, 0
    ])

    optb = OptimalBinningSketch(dtype="categorical",
                                solver="mip",
                                cat_cutoff=0.1,
                                verbose=True)
    optb.add(x, y)
    optb.solve()

    assert optb.status == "OPTIMAL"
コード例 #4
0
def test_verbose():
    optb = OptimalBinningSketch(verbose=True)
    optb.add(x, y)
    optb.solve()

    assert optb.status == "OPTIMAL"
コード例 #5
0
def test_information():
    optb = OptimalBinningSketch(solver="cp")

    with raises(NotFittedError):
        optb.information()

    optb.add(x, y)
    optb.solve()

    with raises(ValueError):
        optb.information(print_level=-1)

    optb.information(print_level=0)
    optb.information(print_level=1)
    optb.information(print_level=2)

    optb = OptimalBinningSketch(solver="mip")
    optb.add(x, y)
    optb.solve()
    optb.information(print_level=2)
コード例 #6
0
def test_params():
    with raises(TypeError):
        OptimalBinningSketch(name=1)

    with raises(ValueError):
        OptimalBinningSketch(dtype="nominal")

    with raises(ValueError):
        OptimalBinningSketch(sketch="new_sketch")

    with raises(ValueError):
        OptimalBinningSketch(eps=-1e-2)

    with raises(ValueError):
        OptimalBinningSketch(K=-3)

    with raises(ValueError):
        OptimalBinningSketch(solver="new_solver")

    with raises(ValueError):
        OptimalBinningSketch(divergence="new_divergence")

    with raises(ValueError):
        OptimalBinningSketch(max_n_prebins=-2)

    with raises(ValueError):
        OptimalBinningSketch(min_n_bins=-2)

    with raises(ValueError):
        OptimalBinningSketch(max_n_bins=-2.2)

    with raises(ValueError):
        OptimalBinningSketch(min_n_bins=3, max_n_bins=2)

    with raises(ValueError):
        OptimalBinningSketch(min_bin_size=0.6)

    with raises(ValueError):
        OptimalBinningSketch(max_bin_size=-0.6)

    with raises(ValueError):
        OptimalBinningSketch(min_bin_size=0.5, max_bin_size=0.3)

    with raises(ValueError):
        OptimalBinningSketch(min_bin_n_nonevent=-2)

    with raises(ValueError):
        OptimalBinningSketch(max_bin_n_nonevent=-2)

    with raises(ValueError):
        OptimalBinningSketch(min_bin_n_nonevent=3, max_bin_n_nonevent=2)

    with raises(ValueError):
        OptimalBinningSketch(min_bin_n_event=-2)

    with raises(ValueError):
        OptimalBinningSketch(max_bin_n_event=-2)

    with raises(ValueError):
        OptimalBinningSketch(min_bin_n_event=3, max_bin_n_event=2)

    with raises(ValueError):
        OptimalBinningSketch(monotonic_trend="new_trend")

    with raises(ValueError):
        OptimalBinningSketch(min_event_rate_diff=1.1)

    with raises(ValueError):
        OptimalBinningSketch(max_pvalue=1.1)

    with raises(ValueError):
        OptimalBinningSketch(max_pvalue_policy="new_policy")

    with raises(ValueError):
        OptimalBinningSketch(gamma=-0.2)

    with raises(ValueError):
        OptimalBinningSketch(cat_cutoff=-0.2)

    with raises(TypeError):
        OptimalBinningSketch(cat_heuristic=1)

    with raises(TypeError):
        OptimalBinningSketch(special_codes={1, 2, 3})

    with raises(ValueError):
        OptimalBinningSketch(split_digits=9)

    with raises(ValueError):
        OptimalBinningSketch(mip_solver="new_solver")

    with raises(ValueError):
        OptimalBinningSketch(time_limit=-2)

    with raises(TypeError):
        OptimalBinningSketch(verbose=1)
コード例 #7
0
def test_numerical_default_tdigest_merge():
    optb1 = OptimalBinningSketch(sketch="t-digest", eps=1e-4)
    optb2 = OptimalBinningSketch(sketch="t-digest", eps=1e-4)

    x1, x2 = x[:200], x[200:]
    y1, y2 = y[:200], y[200:]

    optb1.add(x1, y1)
    optb2.add(x2, y2)
    optb1.merge(optb2)

    optb1.solve()

    assert optb1.status == "OPTIMAL"

    optb1.binning_table.build()
    assert optb1.binning_table.iv == approx(5.04392547, rel=1e-2)

    optb1.binning_table.analysis()
    assert optb1.binning_table.gini == approx(0.87541620, rel=1e-2)
    assert optb1.binning_table.js == approx(0.39378376, rel=1e-2)
    assert optb1.binning_table.quality_score == approx(0.0, rel=1e-2)