def test_card_without_combiner():
    transer = WOETransformer()
    woe_X = transer.fit_transform(df, target)

    card = ScoreCard(transer=transer)
    card.fit(woe_X, target)
    score, sub = card.predict(df, return_sub=True)

    assert score[404] == pytest.approx(460.9789823549386, FUZZ_THRESHOLD)
Exemple #2
0
def replace_with_woe(train_data, test_data, exclude_var, target='target'):
    all_var = train_data.columns.tolist()
    for var in all_var:
        if var not in exclude_var and is_object_dtype(train_data[var]):
            woe = WOETransformer().fit(train_data[var], train_data[target])
            train_data[var] = woe.transform(train_data[var])
            train_data[var].astype('float64')
            test_data[var] = woe.transform(test_data[var])
            test_data[var].astype('float64')
    return train_data, test_data
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().load(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    card = ScoreCard(
        combiner=com,
        transer=woe_transer,
    )

    with pytest.raises(Exception) as e:
        # will raise an exception when fitting a card
        card.fit(woe, target)

    assert '\'C\' is not matched' in str(e.value)
def test_woe_transformer_load():
    rules = {
        'A': {
            1: 0.1,
            2: 0.2,
            3: 0.3,
        }
    }

    transer = WOETransformer().load(rules)
    assert transer._rules['A']['woe'][1] == 0.2
Exemple #5
0
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().set_rules(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    model = LogisticRegression()
    model.fit(woe, target)

    with pytest.raises(Exception) as e:
        # will raise an exception when create a card
        card = ScoreCard(
            combiner=com,
            transer=woe_transer,
            model=model,
        )

    assert '\'C\' is not matched' in str(e.value)
def test_woe_transformer_export():
    transer = WOETransformer().fit(df, target)
    t = transer.export()
    assert t['C'][1] == 0
def test_woe_transformer_export_single():
    transer = WOETransformer().fit(feature, target)
    t = transer.export()
    assert t[DEFAULT_NAME][5] == 0.3938235330926786
def test_woe_transformer_exclude():
    res = WOETransformer().fit_transform(df, target, exclude = 'A')
    assert res.loc[451, 'A'] == 3
def test_woe_transformer_select_dtypes():
    res = WOETransformer().fit_transform(df, target, select_dtypes = 'object')
    assert res.loc[451, 'A'] == 3
def test_woe_transformer_frame():
    res = WOETransformer().fit_transform(df, target)
    assert res.iloc[451, 1] == -0.2198594761130199
def test_woe_transformer_with_unknown_group():
    transer = WOETransformer().fit(str_feat, target)
    res = transer.transform(['Z'], default = 'min')
    assert res[0] == -0.2198594761130199
def test_woe_transformer_with_str():
    f = WOETransformer().fit_transform(str_feat, target)
    assert f[451] == -0.2198594761130199
def test_woe_transformer():
    f = WOETransformer().fit_transform(feature, target)
    assert f[451] == -0.17061154127869285
Exemple #14
0
    'percent_bc_gt_75': [11.1, 25.9, 52.0],
    'avg_cur_bal': [6515.0, 10622.0, 19486.0, 36453.0]
}
comb.set_rules(rules)
train_b = comb.transform(train_s, labels=True)
test_b = comb.transform(test_s, labels=True)
for i in columns[~columns.isin(['split', 'loan_status'])]:
    data_i = pd.concat([train_b[i], train_s['loan_status']], axis=1)
    bin_plot(data_i, x=i, target='loan_status')
    plt.show()
print('分箱完成', '\n' * 2)

# ----------------------------------------------------------------------------------------------------------------------------------------------------
# WOE编码
print('WOE编码'.center(62, '—'))
WOE = WOETransformer()
train_w = WOE.fit_transform(train_b, y='loan_status')
test_w = WOE.fit_transform(test_b, y='loan_status')
print('WOE编码完成', '\n' * 2)
train_w.to_csv('train_w.csv', index=False)
test_w.to_csv('test_w.csv', index=False)

# ----------------------------------------------------------------------------------------------------------------------------------------------------
# 特征筛选B
print('特征第二次筛选'.center(60, '—'))
train_w = pd.read_csv('train_w.csv')
test_w = pd.read_csv('test_w.csv')

train_s2, drops = select(train_w,
                         target='loan_status',
                         iv=0.005,
        'nan': 500,
    },
    'B': {
        ','.join(list('ABCD')): 200,
        ','.join(list('EF')): 400,
        'else': 500,
    },
    'C': {
        'A': 200,
        'B': 100,
    },
}

combiner = Combiner()
bins = combiner.fit_transform(df, target, n_bins=5)
woe_transer = WOETransformer()
woe = woe_transer.fit_transform(bins, target)

# create a score card
card = ScoreCard(
    combiner=combiner,
    transer=woe_transer,
)
card.fit(woe, target)

FUZZ_THRESHOLD = 1e-4
TEST_SCORE = pytest.approx(453.58, FUZZ_THRESHOLD)


def test_proba_to_score():
    model = LogisticRegression()