Ejemplo n.º 1
0
def test_verbose():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()
    scorecard = Scorecard(target="target", binning_process=binning_process,
                          estimator=estimator, verbose=True)

    with open("tests/test_scorecard_verbose.txt", "w") as f:
        with redirect_stdout(f):
            scorecard.fit(df)
Ejemplo n.º 2
0
def test_scaling_method_params_continuous_pdo_odds():
    data = load_boston()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    with raises(ValueError):
        estimator = LinearRegression()
        binning_process = BinningProcess(variable_names)

        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="pdo_odds",
                              scaling_method_params={})
        scorecard.fit(df)
def test_input():
    data = load_breast_cancer()
    variable_names = data.feature_names
    X = pd.DataFrame(data.data, columns=variable_names)
    y = data.target
    y[0] = 4

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    with raises(ValueError):
        scorecard = Scorecard(binning_process=binning_process,
                              estimator=estimator)
        scorecard.fit(X, y)
Ejemplo n.º 4
0
def test_input():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    target = data.target
    target[0] = 4
    df["target"] = target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    with raises(ValueError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator)
        scorecard.fit(df)
def test_estimator_not_coef():
    from sklearn.ensemble import RandomForestClassifier

    data = load_breast_cancer()
    variable_names = data.feature_names
    X = pd.DataFrame(data.data, columns=variable_names)
    y = data.target

    binning_process = BinningProcess(variable_names)
    estimator = RandomForestClassifier()

    scorecard = Scorecard(binning_process=binning_process, estimator=estimator)

    with raises(RuntimeError):
        scorecard.fit(X, y)
Ejemplo n.º 6
0
def test_params():
    with raises(TypeError):
        cf = Counterfactual(scorecard=None)
        cf.fit(X_binary)

    with raises(NotFittedError):
        binning_process = BinningProcess(feature_names_binary)
        estimator = LogisticRegression()
        scorecard = Scorecard(binning_process=binning_process,
                              estimator=estimator)

        cf = Counterfactual(scorecard=scorecard)
        cf.fit(X_binary)

    with raises(TypeError):
        cf = Counterfactual(scorecard_binary, special_missing=1)
        cf.fit(X_binary)

    with raises(ValueError):
        cf = Counterfactual(scorecard_binary, n_jobs=-1)
        cf.fit(X_binary)

    with raises(TypeError):
        cf = Counterfactual(scorecard_binary, verbose=1)
        cf.fit(X_binary)
Ejemplo n.º 7
0
def test_default_continuous():
    data = load_boston()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LinearRegression()

    scorecard = Scorecard(target="target", binning_process=binning_process,
                          estimator=estimator).fit(df)

    sct = scorecard.table(style="detailed")
    sc_min, sc_max = sct.groupby("Variable").agg(
        {'Points': [np.min, np.max]}).sum()

    assert sc_min == approx(-15.813545796848476, rel=1e-6)
    assert sc_max == approx(85.08156623609487, rel=1e-6)
Ejemplo n.º 8
0
def buildScoreCard(df, features, labelCol):
    binning_process = BinningProcess(features)
    estimator = HuberRegressor(max_iter=200)
    scorecard = Scorecard(binning_process=binning_process, target=labelCol,
                          estimator=estimator, scaling_method=None,
                          scaling_method_params={"min": 0, "max": 100},
                          reverse_scorecard=True)
    scorecard.verbose = True
    scorecard.fit(df, check_input=False)
    scorecard.information(print_level=2)
    print(scorecard.table(style="summary"))
    score = scorecard.score(df)
    y_pred = scorecard.predict(df)
    plt.scatter(score, df[labelCol], alpha=0.01, label="Average profit")
    plt.plot(score, y_pred, label="Huber regression", linewidth=2, color="orange")
    plt.ylabel("Average profit value (unit=100,000)")
    plt.xlabel("Score")
    plt.legend()
    plt.show()
Ejemplo n.º 9
0
def test_default():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    scorecard = Scorecard(target="target", binning_process=binning_process,
                          estimator=estimator).fit(df)

    with raises(ValueError):
        sct = scorecard.table(style="new")

    sct = scorecard.table(style="summary")
    sc_min, sc_max = sct.groupby("Variable").agg(
        {'Points': [np.min, np.max]}).sum()

    assert sc_min == approx(-43.65762593147646, rel=1e-6)
    assert sc_max == approx(42.69694657427327, rel=1e-6)
def _fit_scorecard(target_dtype, X_train, y_train):
    if target_dtype == "binary":
        estimator = LogisticRegression()
    else:
        estimator = LinearRegression()

    variable_names = list(X_train.columns)
    binning_process = BinningProcess(variable_names)
    scorecard = Scorecard(binning_process=binning_process,
                          estimator=estimator).fit(X_train, y_train)

    return scorecard
Ejemplo n.º 11
0
def test_scaling_method_min_max():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    scaling_method_params = {"min": 300, "max": 850}

    scorecard = Scorecard(target="target", binning_process=binning_process,
                          estimator=estimator, scaling_method="min_max",
                          scaling_method_params=scaling_method_params).fit(df)

    sct = scorecard.table(style="summary")
    sc_min, sc_max = sct.groupby("Variable").agg(
        {'Points': [np.min, np.max]}).sum()

    assert sc_min == approx(300, rel=1e-6)
    assert sc_max == approx(850, rel=1e-6)
Ejemplo n.º 12
0
def test_scaling_method_pdo_odd():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target
    odds = 1 / data.target.mean()

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    scaling_method_params = {"pdo": 20, "odds": odds, "scorecard_points": 600}

    scorecard = Scorecard(target="target", binning_process=binning_process,
                          estimator=estimator, scaling_method="pdo_odds",
                          scaling_method_params=scaling_method_params).fit(df)

    sct = scorecard.table(style="summary")
    sc_min, sc_max = sct.groupby("Variable").agg(
        {'Points': [np.min, np.max]}).sum()

    assert sc_min == approx(-612.2266586867094, rel=1e-6)
    assert sc_max == approx(1879.4396115559216, rel=1e-6)
def test_rounding():
    data = load_breast_cancer()
    variable_names = data.feature_names
    X = pd.DataFrame(data.data, columns=variable_names)
    y = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    scaling_method_params = {"min": 200.52, "max": 850.66}

    scorecard = Scorecard(binning_process=binning_process,
                          estimator=estimator, scaling_method="min_max",
                          scaling_method_params=scaling_method_params,
                          rounding=True).fit(X, y)

    sct = scorecard.table(style="summary")
    sc_min, sc_max = sct.groupby("Variable").agg(
        {'Points': [np.min, np.max]}).sum()

    assert sc_min == approx(201, rel=1e-6)
    assert sc_max == approx(851, rel=1e-6)
def test_rounding_pdo_odds():
    data = load_breast_cancer()
    variable_names = data.feature_names
    X = pd.DataFrame(data.data, columns=variable_names)
    y = data.target
    odds = 1 / data.target.mean()

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    scaling_method_params = {"pdo": 20, "odds": odds, "scorecard_points": 600}

    scorecard = Scorecard(binning_process=binning_process,
                          estimator=estimator, scaling_method="pdo_odds",
                          scaling_method_params=scaling_method_params,
                          rounding=True).fit(X, y)

    sct = scorecard.table(style="summary")
    sc_min, sc_max = sct.groupby("Variable").agg(
        {'Points': [np.min, np.max]}).sum()

    assert sc_min == approx(-612, rel=1e-6)
    assert sc_max == approx(1880, rel=1e-6)
Ejemplo n.º 15
0
def test_predict_score():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()
    scaling_method_params = {"min": 300.12, "max": 850.66}

    scorecard = Scorecard(target="target", binning_process=binning_process,
                          estimator=estimator, scaling_method="min_max",
                          scaling_method_params=scaling_method_params)

    with raises(NotFittedError):
        pred = scorecard.predict(df)

    with raises(NotFittedError):
        pred_proba = scorecard.predict_proba(df)

    with raises(NotFittedError):
        score = scorecard.score(df)

    scorecard.fit(df)
    pred = scorecard.predict(df)
    pred_proba = scorecard.predict_proba(df)
    score = scorecard.score(df)

    assert pred[:5] == approx([0, 0, 0, 0, 0])

    assert pred_proba[:5, 1] == approx(
        [1.15260206e-06, 9.79035720e-06, 7.52481206e-08, 1.12438599e-03,
         9.83145644e-06], rel=1e-6)

    assert score[:5] == approx([652.16590046, 638.52659074, 669.56413105,
                                608.27744027, 638.49988325], rel=1e-6)
Ejemplo n.º 16
0
def test_scaling_params():
    data = load_breast_cancer()

    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    with raises(ValueError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="pdo_odds",
                              scaling_method_params={"pdo": 20})
        scorecard.fit(df)

    with raises(ValueError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="pdo_odds",
                              scaling_method_params={"pdo": 20, "odds": -2,
                                                     "scorecard_points": -22})
        scorecard.fit(df)

    with raises(ValueError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="min_max",
                              scaling_method_params={"min": "a", "max": 600})
        scorecard.fit(df)

    with raises(ValueError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="min_max",
                              scaling_method_params={"min": 900, "max": 600})
        scorecard.fit(df)
Ejemplo n.º 17
0
from optbinning.exceptions import CounterfactualsFoundWarning
from optbinning.scorecard import Counterfactual

from sklearn.datasets import load_breast_cancer
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from tests.datasets import load_boston

data = load_breast_cancer()
feature_names_binary = data.feature_names
X_binary = pd.DataFrame(data.data, columns=feature_names_binary)
y_binary = data.target

scorecard_binary = Scorecard(
    binning_process=BinningProcess(feature_names_binary),
    estimator=LogisticRegression()).fit(X_binary, y_binary)

data = load_boston()
feature_names_continuous = data.feature_names
X_continuous = pd.DataFrame(data.data, columns=feature_names_continuous)
y_continuous = data.target

scorecard_continuous = Scorecard(
    binning_process=BinningProcess(feature_names_continuous),
    estimator=LinearRegression()).fit(X_continuous, y_continuous)


def test_params():
    with raises(TypeError):
        cf = Counterfactual(scorecard=None)
Ejemplo n.º 18
0
def test_information():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()
    scorecard = Scorecard(target="target", binning_process=binning_process,
                          estimator=estimator)

    with raises(NotFittedError):
        scorecard.information()

    scorecard.fit(df)

    with raises(ValueError):
        scorecard.information(print_level=-1)

    with open("tests/test_scorecard_information.txt", "w") as f:
        with redirect_stdout(f):
            scorecard.information(print_level=0)
            scorecard.information(print_level=1)
            scorecard.information(print_level=2)
Ejemplo n.º 19
0
def test_params():
    data = load_breast_cancer()
    variable_names = data.feature_names
    df = pd.DataFrame(data.data, columns=variable_names)
    df["target"] = data.target

    binning_process = BinningProcess(variable_names)
    estimator = LogisticRegression()

    with raises(TypeError):
        scorecard = Scorecard(target=1, binning_process=binning_process,
                              estimator=estimator)
        scorecard.fit(df)

    with raises(TypeError):
        scorecard = Scorecard(target="target", binning_process=estimator,
                              estimator=estimator)
        scorecard.fit(df)

    with raises(TypeError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=binning_process)
        scorecard.fit(df)

    with raises(ValueError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="new_method",
                              scaling_method_params=dict())
        scorecard.fit(df)

    with raises(ValueError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="min_max",
                              scaling_method_params=None)
        scorecard.fit(df)

    with raises(TypeError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, scaling_method="min_max",
                              scaling_method_params=[])
        scorecard.fit(df)

    with raises(TypeError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, intercept_based=1)
        scorecard.fit(df)

    with raises(TypeError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, reverse_scorecard=1)
        scorecard.fit(df)

    with raises(TypeError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, rounding=1)
        scorecard.fit(df)

    with raises(TypeError):
        scorecard = Scorecard(target="target", binning_process=binning_process,
                              estimator=estimator, verbose=1)
        scorecard.fit(df)