def test_verbose(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, verbose=True) with open("tests/test_scorecard_verbose.txt", "w") as f: with redirect_stdout(f): scorecard.fit(df)
def test_scaling_method_params_continuous_pdo_odds(): data = load_boston() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target with raises(ValueError): estimator = LinearRegression() binning_process = BinningProcess(variable_names) scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="pdo_odds", scaling_method_params={}) scorecard.fit(df)
def test_scaling_params(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() with raises(ValueError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="pdo_odds", scaling_method_params={"pdo": 20}) scorecard.fit(df) with raises(ValueError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="pdo_odds", scaling_method_params={"pdo": 20, "odds": -2, "scorecard_points": -22}) scorecard.fit(df) with raises(ValueError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="min_max", scaling_method_params={"min": "a", "max": 600}) scorecard.fit(df) with raises(ValueError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="min_max", scaling_method_params={"min": 900, "max": 600}) scorecard.fit(df)
def test_input(): data = load_breast_cancer() variable_names = data.feature_names X = pd.DataFrame(data.data, columns=variable_names) y = data.target y[0] = 4 binning_process = BinningProcess(variable_names) estimator = LogisticRegression() with raises(ValueError): scorecard = Scorecard(binning_process=binning_process, estimator=estimator) scorecard.fit(X, y)
def test_input(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) target = data.target target[0] = 4 df["target"] = target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() with raises(ValueError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator) scorecard.fit(df)
def test_estimator_not_coef(): from sklearn.ensemble import RandomForestClassifier data = load_breast_cancer() variable_names = data.feature_names X = pd.DataFrame(data.data, columns=variable_names) y = data.target binning_process = BinningProcess(variable_names) estimator = RandomForestClassifier() scorecard = Scorecard(binning_process=binning_process, estimator=estimator) with raises(RuntimeError): scorecard.fit(X, y)
def buildScoreCard(df, features, labelCol): binning_process = BinningProcess(features) estimator = HuberRegressor(max_iter=200) scorecard = Scorecard(binning_process=binning_process, target=labelCol, estimator=estimator, scaling_method=None, scaling_method_params={"min": 0, "max": 100}, reverse_scorecard=True) scorecard.verbose = True scorecard.fit(df, check_input=False) scorecard.information(print_level=2) print(scorecard.table(style="summary")) score = scorecard.score(df) y_pred = scorecard.predict(df) plt.scatter(score, df[labelCol], alpha=0.01, label="Average profit") plt.plot(score, y_pred, label="Huber regression", linewidth=2, color="orange") plt.ylabel("Average profit value (unit=100,000)") plt.xlabel("Score") plt.legend() plt.show()
def test_information(): data = load_breast_cancer() variable_names = data.feature_names X = pd.DataFrame(data.data, columns=variable_names) y = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scorecard = Scorecard(binning_process=binning_process, estimator=estimator) with raises(NotFittedError): scorecard.information() scorecard.fit(X, y) with raises(ValueError): scorecard.information(print_level=-1) with open("tests/test_scorecard_information.txt", "w") as f: with redirect_stdout(f): scorecard.information(print_level=0) scorecard.information(print_level=1) scorecard.information(print_level=2)
def test_predict_score(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() scaling_method_params = {"min": 300.12, "max": 850.66} scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="min_max", scaling_method_params=scaling_method_params) with raises(NotFittedError): pred = scorecard.predict(df) with raises(NotFittedError): pred_proba = scorecard.predict_proba(df) with raises(NotFittedError): score = scorecard.score(df) scorecard.fit(df) pred = scorecard.predict(df) pred_proba = scorecard.predict_proba(df) score = scorecard.score(df) assert pred[:5] == approx([0, 0, 0, 0, 0]) assert pred_proba[:5, 1] == approx( [1.15260206e-06, 9.79035720e-06, 7.52481206e-08, 1.12438599e-03, 9.83145644e-06], rel=1e-6) assert score[:5] == approx([652.16590046, 638.52659074, 669.56413105, 608.27744027, 638.49988325], rel=1e-6)
def test_params(): data = load_breast_cancer() variable_names = data.feature_names df = pd.DataFrame(data.data, columns=variable_names) df["target"] = data.target binning_process = BinningProcess(variable_names) estimator = LogisticRegression() with raises(TypeError): scorecard = Scorecard(target=1, binning_process=binning_process, estimator=estimator) scorecard.fit(df) with raises(TypeError): scorecard = Scorecard(target="target", binning_process=estimator, estimator=estimator) scorecard.fit(df) with raises(TypeError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=binning_process) scorecard.fit(df) with raises(ValueError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="new_method", scaling_method_params=dict()) scorecard.fit(df) with raises(ValueError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="min_max", scaling_method_params=None) scorecard.fit(df) with raises(TypeError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, scaling_method="min_max", scaling_method_params=[]) scorecard.fit(df) with raises(TypeError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, intercept_based=1) scorecard.fit(df) with raises(TypeError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, reverse_scorecard=1) scorecard.fit(df) with raises(TypeError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, rounding=1) scorecard.fit(df) with raises(TypeError): scorecard = Scorecard(target="target", binning_process=binning_process, estimator=estimator, verbose=1) scorecard.fit(df)