예제 #1
0
def test_PSI_frame():
    result = PSI(
        test_df,
        base_df,
        combiner={
            'A': [0.3, 0.5, 0.7],
            'B': [0.4, 0.8],
        },
    )

    assert result['B'] == 0.014528279995858708
예제 #2
0
def test_PSI_return_frame():
    result, frame = PSI(
        test_df,
        base_df,
        combiner={
            'A': [0.3, 0.5, 0.7],
            'B': [0.4, 0.8],
        },
        return_frame=True,
    )

    assert frame.loc[4, 'test'] == 0.38
예제 #3
0
def var_psi_chart(train_data, test_data, var_list, month, q=10):
    '''
    以训练集为预期分布,跨时间窗监控变量稳定性
    var_list:需要查看的变量
    '''
    data_lst = []
    for i in tqdm(var_list):
        df_lst = []
        for j, k in test_data.groupby(month):
            if len(k) < 50:
                pass
            else:
                df = pd.DataFrame()
                df['var'] = [i]
                df.set_index('var', inplace=True)
                df[j] = PSI(k[i], train_data[i])
                df_lst.append(df)
        data = pd.concat(df_lst, axis=1)
        data_lst.append(data)
    data_all = pd.concat(data_lst, axis=0)
    return data_all
예제 #4
0
def test_PSI():
    result = PSI(feature, base_feature, combiner=[0.3, 0.5, 0.7])
    assert result == 0.018630024627491467
예제 #5
0
print('precision_train:{:.3f} | precision_test:{:.3f}'.format(
    precision_train, precision_test))
print(
    'Recall_train:{:.3f} | Recall_test:{:.3f}'.format(Recall_train,
                                                      Recall_test), '\n' * 2)
vs.roc(fpr_train, tpr_train, fpr_test, tpr_test)

# ----------------------------------------------------------------------------------------------------------------------------------------------------
# 报告
print('报告'.center(62, '—'))

# EDA报告,参与模型训练的变量情况
EDA = toad.detect(X_train)
EDA.to_csv('EDA.csv')
print('EDA done!')

# PSI
PSI = PSI(X_train, X_test)
PSI.to_csv('PSI.csv')
print('PSI done!')

# KS报告
KS_train = KS_bucket(y_train_prob, y_train, bucket=20, method='quantile')
KS_test = KS_bucket(y_test_prob, y_test, bucket=20, method='quantile')
KS_train.to_csv('KS_train.csv')
KS_test.to_csv('KS_test.csv')
print('KS done!')

end = time.time()
print('总耗时:{:.2f}'.format((end - start)))