def test_summary_get_support(): data = load_breast_cancer() variable_names = data.feature_names X = data.data y = data.target process = BinningProcess(variable_names, min_iv=0.1, max_iv=0.6) with raises(ValueError): process.summary() with raises(ValueError): process.get_support() process.fit(X, y, check_input=True) assert isinstance(process.summary(), pd.DataFrame) with raises(ValueError): process.get_support(indices=True, names=True) assert all(process.get_support() == [ False, False, False, False, False, False, False, False, False, True, False, True, False, False, True, False, False, False, True, True, False, False, False, False, False, False, False, False, False, True ]) assert process.get_support(indices=True) == approx([9, 11, 14, 18, 19, 29]) assert all( process.get_support(names=True) == [ 'mean fractal dimension', 'texture error', 'smoothness error', 'symmetry error', 'fractal dimension error', 'worst fractal dimension' ])
def test_summary_get_support(): data = load_breast_cancer() variable_names = data.feature_names X = data.data y = data.target selection_criteria = {"iv": {"min": 0.1, "max": 0.6, "strategy": "highest", "top": 10}} process = BinningProcess(variable_names=variable_names, selection_criteria=selection_criteria) with raises(ValueError): process.summary() with raises(ValueError): process.get_support() process.fit(X, y, check_input=True) assert isinstance(process.summary(), pd.DataFrame) with raises(ValueError): process.get_support(indices=True, names=True) assert all(process.get_support() == [ False, False, False, False, False, False, False, False, False, True, False, True, False, False, True, False, False, False, True, True, False, False, False, False, False, False, False, False, False, True]) assert process.get_support(indices=True) == approx([9, 11, 14, 18, 19, 29]) assert all(process.get_support(names=True) == [ 'mean fractal dimension', 'texture error', 'smoothness error', 'symmetry error', 'fractal dimension error', 'worst fractal dimension'])
def test_categorical_variables(): data = load_boston() variable_names = data.feature_names X = data.data y = data.target process = BinningProcess(variable_names, categorical_variables=["CHAS"]) process.fit(X, y, check_input=True) df_summary = process.summary() assert df_summary[df_summary.name == "CHAS"]["dtype"].values[0] == "categorical"
def test_auto_modes(): df = pd.DataFrame(data.data, columns=data.feature_names) binning_fit_params0 = { v: { "monotonic_trend": "auto", "solver": "mip" } for v in data.feature_names } binning_fit_params1 = { v: { "monotonic_trend": "auto_heuristic", "solver": "mip" } for v in data.feature_names } binning_fit_params2 = { v: { "monotonic_trend": "auto", "solver": "cp" } for v in data.feature_names } binning_fit_params3 = { v: { "monotonic_trend": "auto_heuristic", "solver": "cp" } for v in data.feature_names } process0 = BinningProcess(variable_names, binning_fit_params=binning_fit_params0) process1 = BinningProcess(variable_names, binning_fit_params=binning_fit_params1) process2 = BinningProcess(variable_names, binning_fit_params=binning_fit_params2) process3 = BinningProcess(variable_names, binning_fit_params=binning_fit_params3) process0.fit(df, y) process1.fit(df, y) process2.fit(df, y) process3.fit(df, y) assert process0.summary().iv.sum() == process1.summary().iv.sum() assert process2.summary().iv.sum() == process3.summary().iv.sum() assert process0.summary().iv.sum() == process2.summary().iv.sum()