Example #1
0
#     X_bin_data = pd.concat([X_bin_data,bin_res], axis = 1)
#==============================================================================

#生成分数
score_data = bin_res_data.replace(dict_code)
score_data["score_sum"] = score_data.sum(axis = 1)

#拼接y值
scorcarde_data = pd.concat([score_data, loan_best_banning['y']], axis =1)

iv_score_sum = step01_feature_engine.filter_iv(scorcarde_data, group=10)
iv_score_sum[1].to_excel(r"F:\TS\offline_model\02_DataProcess\05_result_score\model_result6_Jan.xlsx")
score_group = iv_score_sum[1]

#画个图看下分数的分布情况
step06_draw_plot.drawHistogram(scorcarde_data['score_sum'])
v_feat = ['score_sum']
step02_modle_plot.prob_density(scorcarde_data, v_feat)

'''评分卡'''
fig = plt.figure()
fig.set(alpha=0.2)  # 设定图表颜色alpha参数
#plt.subplot2grid((2,3),(1,0), colspan=2)
scorcarde_data.score_sum[scorcarde_data.y == 0].plot(kind='kde')   
scorcarde_data.score_sum[scorcarde_data.y == 1].plot(kind='kde')
plt.xlabel(u"score_sum")# plots an axis lable
plt.ylabel(u"density") 
plt.title(u"Distribution of score_sum")
plt.legend((u'good', u'bad'),loc='best') # sets our legend for our graph.

#KS值>0.2就可认为模型有比较好的预测准确性
1.0     104
7.5%
'''

a = step01_feature_engine.fill_null_data(lf)
#==============================================================================
#绘图
objectColumns = lf.select_dtypes(include=["object"]).columns
var = lf[objectColumns].columns
for i in var:
    step06_draw_plot.drawBar(lf[i])

objectColumns = lf.select_dtypes(include=["float"]).columns
var = lf[objectColumns].columns
for i in var:
    step06_draw_plot.drawHistogram(lf[i])

#同值化检查
lf2, feature_primaryvalue_ratio = step01_feature_engine.select_primaryvalue_ratio(
    lf, ratiolimit=0.931)

#打印字符型变量
step01_feature_engine.check_feature_binary(lf2)

#观察各个离散值的分布情况
step01_feature_engine.watch_obj(lf2)

# 构建mapping,对有序变量进行转换
mapping_dict1 = {
    "var1": {
        "无": 0,
Example #3
0
    '薪资发放方式',
    '子女个数',
    '贷款申请期限',
    'group_level',
    'risk_level',
]
for i in var:
    step06_draw_plot.drawBar(df4[i])

v_feat = [
    '申请贷款金额', '工作年限', '年龄', '月收入', '月其他收入', '社保公积基数', 'score', '贷款月还', '信用卡月还',
    '准贷记卡月还', '其他负债', '简版汇总负债总计', '征信负债率', '信用卡使用率', '计算年收入', '计算负债率1',
    '计算负债率2'
]
for i in v_feat:
    step06_draw_plot.drawHistogram(df4[i])


def prob_density(data, v_feat):
    font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=10)

    plt.figure(figsize=(16, 20 * 4))
    gs = gridspec.GridSpec(20, 1)
    for i, cn in enumerate(data[v_feat]):
        ax = plt.subplot(gs[i])
        sns.distplot(data[cn][data["y"] == 1], bins=50)
        sns.distplot(data[cn][data["y"] == 0], bins=100)
        ax.set_xlabel('')
        ax.set_title('histogram of feature: ' + str(cn), fontproperties=font)