def test_default(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator).fit(df) with raises(ValueError): sct = scorecard.table(style="new") sct = scorecard.table(style="summary") sc_min, sc_max = sct.groupby("Variable").agg( {'Points': [np.min, np.max]}).sum() assert sc_min == approx(-43.65762593147646, rel=1e-6) assert sc_max == approx(42.69694657427327, rel=1e-6)
def test_default_continuous(): data = load_boston() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target binning_process = BinningProcess(variable_names) estimator = LinearRegression() scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator).fit(df) sct = scorecard.table(style="detailed") sc_min, sc_max = sct.groupby("Variable").agg( {'Points': [np.min, np.max]}).sum() assert sc_min == approx(-15.813545796848476, rel=1e-6) assert sc_max == approx(85.08156623609487, rel=1e-6)
def buildScoreCard(df, features, labelCol): binning_process = BinningProcess(features) estimator = HuberRegressor(max_iter=200) scorecard = Scorecard(binning_process=binning_process, target=labelCol, estimator=estimator, scaling_method=None, scaling_method_params={"min": 0, "max": 100}, reverse_scorecard=True) scorecard.verbose = True scorecard.fit(df, check_input=False) scorecard.information(print_level=2) print(scorecard.table(style="summary")) score = scorecard.score(df) y_pred = scorecard.predict(df) plt.scatter(score, df[labelCol], alpha=0.01, label="Average profit") plt.plot(score, y_pred, label="Huber regression", linewidth=2, color="orange") plt.ylabel("Average profit value (unit=100,000)") plt.xlabel("Score") plt.legend() plt.show()
def test_scaling_method_min_max(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scaling_method_params = {"min": 300, "max": 850} scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="min_max", scaling_method_params=scaling_method_params).fit(df) sct = scorecard.table(style="summary") sc_min, sc_max = sct.groupby("Variable").agg( {'Points': [np.min, np.max]}).sum() assert sc_min == approx(300, rel=1e-6) assert sc_max == approx(850, rel=1e-6)
def test_scaling_method_pdo_odd(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target odds = 1 / data.target.mean() binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scaling_method_params = {"pdo": 20, "odds": odds, "scorecard_points": 600} scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="pdo_odds", scaling_method_params=scaling_method_params).fit(df) sct = scorecard.table(style="summary") sc_min, sc_max = sct.groupby("Variable").agg( {'Points': [np.min, np.max]}).sum() assert sc_min == approx(-612.2266586867094, rel=1e-6) assert sc_max == approx(1879.4396115559216, rel=1e-6)
def test_rounding(): data = load_breast_cancer() variable_names = data.feature_names X = pd.DataFrame(data.data, columns=variable_names) y = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scaling_method_params = {"min": 200.52, "max": 850.66} scorecard = Scorecard(binning_process=binning_process, estimator=estimator, scaling_method="min_max", scaling_method_params=scaling_method_params, rounding=True).fit(X, y) sct = scorecard.table(style="summary") sc_min, sc_max = sct.groupby("Variable").agg( {'Points': [np.min, np.max]}).sum() assert sc_min == approx(201, rel=1e-6) assert sc_max == approx(851, rel=1e-6)
def test_rounding_pdo_odds(): data = load_breast_cancer() variable_names = data.feature_names X = pd.DataFrame(data.data, columns=variable_names) y = data.target odds = 1 / data.target.mean() binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scaling_method_params = {"pdo": 20, "odds": odds, "scorecard_points": 600} scorecard = Scorecard(binning_process=binning_process, estimator=estimator, scaling_method="pdo_odds", scaling_method_params=scaling_method_params, rounding=True).fit(X, y) sct = scorecard.table(style="summary") sc_min, sc_max = sct.groupby("Variable").agg( {'Points': [np.min, np.max]}).sum() assert sc_min == approx(-612, rel=1e-6) assert sc_max == approx(1880, rel=1e-6)