def test_PSI_frame(): result = PSI( test_df, base_df, combiner={ 'A': [0.3, 0.5, 0.7], 'B': [0.4, 0.8], }, ) assert result['B'] == 0.014528279995858708
def test_PSI_return_frame(): result, frame = PSI( test_df, base_df, combiner={ 'A': [0.3, 0.5, 0.7], 'B': [0.4, 0.8], }, return_frame=True, ) assert frame.loc[4, 'test'] == 0.38
def var_psi_chart(train_data, test_data, var_list, month, q=10): ''' 以训练集为预期分布,跨时间窗监控变量稳定性 var_list:需要查看的变量 ''' data_lst = [] for i in tqdm(var_list): df_lst = [] for j, k in test_data.groupby(month): if len(k) < 50: pass else: df = pd.DataFrame() df['var'] = [i] df.set_index('var', inplace=True) df[j] = PSI(k[i], train_data[i]) df_lst.append(df) data = pd.concat(df_lst, axis=1) data_lst.append(data) data_all = pd.concat(data_lst, axis=0) return data_all
def test_PSI(): result = PSI(feature, base_feature, combiner=[0.3, 0.5, 0.7]) assert result == 0.018630024627491467
print('precision_train:{:.3f} | precision_test:{:.3f}'.format( precision_train, precision_test)) print( 'Recall_train:{:.3f} | Recall_test:{:.3f}'.format(Recall_train, Recall_test), '\n' * 2) vs.roc(fpr_train, tpr_train, fpr_test, tpr_test) # ---------------------------------------------------------------------------------------------------------------------------------------------------- # 报告 print('报告'.center(62, '—')) # EDA报告,参与模型训练的变量情况 EDA = toad.detect(X_train) EDA.to_csv('EDA.csv') print('EDA done!') # PSI PSI = PSI(X_train, X_test) PSI.to_csv('PSI.csv') print('PSI done!') # KS报告 KS_train = KS_bucket(y_train_prob, y_train, bucket=20, method='quantile') KS_test = KS_bucket(y_test_prob, y_test, bucket=20, method='quantile') KS_train.to_csv('KS_train.csv') KS_test.to_csv('KS_test.csv') print('KS done!') end = time.time() print('总耗时:{:.2f}'.format((end - start)))