Beispiel #1
0
def test_card_without_combiner():
    transer = WOETransformer()
    woe_X = transer.fit_transform(df, target)

    card = ScoreCard(transer=transer)
    card.fit(woe_X, target)
    score, sub = card.predict(df, return_sub=True)

    assert score[404] == pytest.approx(460.9789823549386, FUZZ_THRESHOLD)
Beispiel #2
0
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().load(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    card = ScoreCard(
        combiner=com,
        transer=woe_transer,
    )

    with pytest.raises(Exception) as e:
        # will raise an exception when fitting a card
        card.fit(woe, target)

    assert '\'C\' is not matched' in str(e.value)
Beispiel #3
0
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().set_rules(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    model = LogisticRegression()
    model.fit(woe, target)

    with pytest.raises(Exception) as e:
        # will raise an exception when create a card
        card = ScoreCard(
            combiner=com,
            transer=woe_transer,
            model=model,
        )

    assert '\'C\' is not matched' in str(e.value)
Beispiel #4
0
    'avg_cur_bal': [6515.0, 10622.0, 19486.0, 36453.0]
}
comb.set_rules(rules)
train_b = comb.transform(train_s, labels=True)
test_b = comb.transform(test_s, labels=True)
for i in columns[~columns.isin(['split', 'loan_status'])]:
    data_i = pd.concat([train_b[i], train_s['loan_status']], axis=1)
    bin_plot(data_i, x=i, target='loan_status')
    plt.show()
print('分箱完成', '\n' * 2)

# ----------------------------------------------------------------------------------------------------------------------------------------------------
# WOE编码
print('WOE编码'.center(62, '—'))
WOE = WOETransformer()
train_w = WOE.fit_transform(train_b, y='loan_status')
test_w = WOE.fit_transform(test_b, y='loan_status')
print('WOE编码完成', '\n' * 2)
train_w.to_csv('train_w.csv', index=False)
test_w.to_csv('test_w.csv', index=False)

# ----------------------------------------------------------------------------------------------------------------------------------------------------
# 特征筛选B
print('特征第二次筛选'.center(60, '—'))
train_w = pd.read_csv('train_w.csv')
test_w = pd.read_csv('test_w.csv')

train_s2, drops = select(train_w,
                         target='loan_status',
                         iv=0.005,
                         corr=0.8,
Beispiel #5
0
    },
    'B': {
        ','.join(list('ABCD')): 200,
        ','.join(list('EF')): 400,
        'else': 500,
    },
    'C': {
        'A': 200,
        'B': 100,
    },
}

combiner = Combiner()
bins = combiner.fit_transform(df, target, n_bins=5)
woe_transer = WOETransformer()
woe = woe_transer.fit_transform(bins, target)

# create a score card
card = ScoreCard(
    combiner=combiner,
    transer=woe_transer,
)
card.fit(woe, target)

FUZZ_THRESHOLD = 1e-4
TEST_SCORE = pytest.approx(453.58, FUZZ_THRESHOLD)


def test_proba_to_score():
    model = LogisticRegression()
    model.fit(woe, target)