예제 #1
0
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().load(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    card = ScoreCard(
        combiner=com,
        transer=woe_transer,
    )

    with pytest.raises(Exception) as e:
        # will raise an exception when fitting a card
        card.fit(woe, target)

    assert '\'C\' is not matched' in str(e.value)
예제 #2
0
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().set_rules(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    model = LogisticRegression()
    model.fit(woe, target)

    with pytest.raises(Exception) as e:
        # will raise an exception when create a card
        card = ScoreCard(
            combiner=com,
            transer=woe_transer,
            model=model,
        )

    assert '\'C\' is not matched' in str(e.value)
예제 #3
0
def test_combiner_labels_with_empty():
    combiner = Combiner().fit(df, 'target', n_bins = 4, empty_separate = True)
    res = combiner.transform(df, labels = True)
    assert res.loc[2, 'D'] == '4.nan'
예제 #4
0
def test_combiner_labels():
    combiner = Combiner().fit(df, target)
    res = combiner.transform(df, labels = True)
    assert res.loc[451, 'A'] == '3.[3 ~ 4)'
예제 #5
0
        bin_plot(data_c, x=i, target=target)
        plt.show()


# combine(train_s, target='loan_status', columns=columns, exclude=['loan_status'])

comb.fit(train_s, y='loan_status', method='chi', min_samples=0.1)
rules = {
    'emp_length': [['< 1 year'], ['1 year', '2 years', '3 years'],
                   ['4 years', '5 years', '6 years', '7 years', '8 years'],
                   ['9 years', '10+ years']],
    'percent_bc_gt_75': [11.1, 25.9, 52.0],
    'avg_cur_bal': [6515.0, 10622.0, 19486.0, 36453.0]
}
comb.set_rules(rules)
train_b = comb.transform(train_s, labels=True)
test_b = comb.transform(test_s, labels=True)
for i in columns[~columns.isin(['split', 'loan_status'])]:
    data_i = pd.concat([train_b[i], train_s['loan_status']], axis=1)
    bin_plot(data_i, x=i, target='loan_status')
    plt.show()
print('分箱完成', '\n' * 2)

# ----------------------------------------------------------------------------------------------------------------------------------------------------
# WOE编码
print('WOE编码'.center(62, '—'))
WOE = WOETransformer()
train_w = WOE.fit_transform(train_b, y='loan_status')
test_w = WOE.fit_transform(test_b, y='loan_status')
print('WOE编码完成', '\n' * 2)
train_w.to_csv('train_w.csv', index=False)