def test_default(): process = BinningProcess(variable_names) process.fit(X, y, check_input=True) with raises(TypeError): process.get_binned_variable(1) with raises(ValueError): process.get_binned_variable("new_variable") optb = process.get_binned_variable("mean radius") assert optb.status == "OPTIMAL" assert optb.splits == approx([11.42500019, 12.32999992, 13.09499979, 13.70499992, 15.04500008, 16.92500019], rel=1e-6) optb.binning_table.build() assert optb.binning_table.iv == approx(5.04392547, rel=1e-6)
def test_fit_params(): binning_fit_params = {"mean radius": {"max_n_bins": 4}} process = BinningProcess(variable_names=variable_names, binning_fit_params=binning_fit_params) process.fit(X, y) optb = process.get_binned_variable("mean radius") assert optb.status == "OPTIMAL" assert len(optb.splits) <= 4
def test_default_transform_multiclass(): data = load_wine() variable_names = data.feature_names X = data.data y = data.target process = BinningProcess(variable_names) process.fit(X, y) X_transform = process.transform(X) optb = process.get_binned_variable(variable_names[0]) assert isinstance(optb, MulticlassOptimalBinning) optb = MulticlassOptimalBinning() x = X[:, 5] optb.fit(x, y) assert optb.transform(x) == approx(X_transform[:, 5], rel=1e-6)
def test_default_transform_continuous(): data = load_boston() variable_names = data.feature_names X = data.data y = data.target process = BinningProcess(variable_names) process.fit(X, y) X_transform = process.transform(X, metric="mean") optb = process.get_binned_variable(variable_names[0]) assert isinstance(optb, ContinuousOptimalBinning) optb = ContinuousOptimalBinning() x = X[:, 5] optb.fit(x, y) assert optb.transform(x, metric="mean") == approx( X_transform[:, 5], rel=1e-6)
def test_default_pandas(): df = pd.DataFrame(data.data, columns=data.feature_names) process = BinningProcess(variable_names) with raises(TypeError): process.fit(df.to_dict(), y, check_input=True) process.fit(df, y, check_input=True) optb = process.get_binned_variable("mean radius") assert optb.status == "OPTIMAL" assert optb.splits == approx([11.42500019, 12.32999992, 13.09499979, 13.70499992, 15.04500008, 16.92500019], rel=1e-6) optb.binning_table.build() assert optb.binning_table.iv == approx(5.04392547, rel=1e-6)