def test_card_without_combiner(): transer = WOETransformer() woe_X = transer.fit_transform(df, target) card = ScoreCard(transer=transer) card.fit(woe_X, target) score, sub = card.predict(df, return_sub=True) assert score[404] == pytest.approx(460.9789823549386, FUZZ_THRESHOLD)
def replace_with_woe(train_data, test_data, exclude_var, target='target'): all_var = train_data.columns.tolist() for var in all_var: if var not in exclude_var and is_object_dtype(train_data[var]): woe = WOETransformer().fit(train_data[var], train_data[target]) train_data[var] = woe.transform(train_data[var]) train_data[var].astype('float64') test_data[var] = woe.transform(test_data[var]) test_data[var].astype('float64') return train_data, test_data
def test_card_combiner_str_not_match(): c = combiner.export() c['C'] = [['A'], ['B'], ['C']] com = Combiner().load(c) bins = com.transform(df) woe_transer = WOETransformer() woe = woe_transer.fit_transform(bins, target) card = ScoreCard( combiner=com, transer=woe_transer, ) with pytest.raises(Exception) as e: # will raise an exception when fitting a card card.fit(woe, target) assert '\'C\' is not matched' in str(e.value)
def test_woe_transformer_load(): rules = { 'A': { 1: 0.1, 2: 0.2, 3: 0.3, } } transer = WOETransformer().load(rules) assert transer._rules['A']['woe'][1] == 0.2
def test_card_combiner_str_not_match(): c = combiner.export() c['C'] = [['A'], ['B'], ['C']] com = Combiner().set_rules(c) bins = com.transform(df) woe_transer = WOETransformer() woe = woe_transer.fit_transform(bins, target) model = LogisticRegression() model.fit(woe, target) with pytest.raises(Exception) as e: # will raise an exception when create a card card = ScoreCard( combiner=com, transer=woe_transer, model=model, ) assert '\'C\' is not matched' in str(e.value)
def test_woe_transformer_export(): transer = WOETransformer().fit(df, target) t = transer.export() assert t['C'][1] == 0
def test_woe_transformer_export_single(): transer = WOETransformer().fit(feature, target) t = transer.export() assert t[DEFAULT_NAME][5] == 0.3938235330926786
def test_woe_transformer_exclude(): res = WOETransformer().fit_transform(df, target, exclude = 'A') assert res.loc[451, 'A'] == 3
def test_woe_transformer_select_dtypes(): res = WOETransformer().fit_transform(df, target, select_dtypes = 'object') assert res.loc[451, 'A'] == 3
def test_woe_transformer_frame(): res = WOETransformer().fit_transform(df, target) assert res.iloc[451, 1] == -0.2198594761130199
def test_woe_transformer_with_unknown_group(): transer = WOETransformer().fit(str_feat, target) res = transer.transform(['Z'], default = 'min') assert res[0] == -0.2198594761130199
def test_woe_transformer_with_str(): f = WOETransformer().fit_transform(str_feat, target) assert f[451] == -0.2198594761130199
def test_woe_transformer(): f = WOETransformer().fit_transform(feature, target) assert f[451] == -0.17061154127869285
'percent_bc_gt_75': [11.1, 25.9, 52.0], 'avg_cur_bal': [6515.0, 10622.0, 19486.0, 36453.0] } comb.set_rules(rules) train_b = comb.transform(train_s, labels=True) test_b = comb.transform(test_s, labels=True) for i in columns[~columns.isin(['split', 'loan_status'])]: data_i = pd.concat([train_b[i], train_s['loan_status']], axis=1) bin_plot(data_i, x=i, target='loan_status') plt.show() print('分箱完成', '\n' * 2) # ---------------------------------------------------------------------------------------------------------------------------------------------------- # WOE编码 print('WOE编码'.center(62, '—')) WOE = WOETransformer() train_w = WOE.fit_transform(train_b, y='loan_status') test_w = WOE.fit_transform(test_b, y='loan_status') print('WOE编码完成', '\n' * 2) train_w.to_csv('train_w.csv', index=False) test_w.to_csv('test_w.csv', index=False) # ---------------------------------------------------------------------------------------------------------------------------------------------------- # 特征筛选B print('特征第二次筛选'.center(60, '—')) train_w = pd.read_csv('train_w.csv') test_w = pd.read_csv('test_w.csv') train_s2, drops = select(train_w, target='loan_status', iv=0.005,
'nan': 500, }, 'B': { ','.join(list('ABCD')): 200, ','.join(list('EF')): 400, 'else': 500, }, 'C': { 'A': 200, 'B': 100, }, } combiner = Combiner() bins = combiner.fit_transform(df, target, n_bins=5) woe_transer = WOETransformer() woe = woe_transer.fit_transform(bins, target) # create a score card card = ScoreCard( combiner=combiner, transer=woe_transer, ) card.fit(woe, target) FUZZ_THRESHOLD = 1e-4 TEST_SCORE = pytest.approx(453.58, FUZZ_THRESHOLD) def test_proba_to_score(): model = LogisticRegression()