sm = RandomOverSampler() X_train_res, y_train_res = sm.fit_sample(X_train, y_train) model.fit(X_train_res, y_train_res) #%% y_pred = model.predict(X_test) acc_score = model.score(X_test, y_test) print(f'The accuracy score is: {acc_score}') rec_score = recall_score(y_test, y_pred) print(f'The recall score is: {rec_score}') #%% #mm.model_report_card(model, X_train_res, y_train_res, X_test, y_test, normalize=False) fig, ax = plt.subplots(1, 2) fig.suptitle(f'accuracy score: {acc_score:.3f}, recall score: {rec_score:.3f}', fontsize=24) mm.plot_confusion_matrix(y_test, y_pred, ax=ax[0], normalize=True) mm.plot_confusion_matrix(y_test, y_pred, ax=ax[1], normalize=False) #%% y_pred = model.predict(X_lastweek) acc_score = model.score(X_lastweek, y_pred) print(f'The accuracy score is: {acc_score}') rec_score = recall_score(y_lastweek, y_pred) print(f'The recall score is: {rec_score}') #%% #mm.model_report_card(model, X_train_res, y_train_res, X_test, y_test, normalize=False) fig, ax = plt.subplots(1, 2) fig.suptitle(f'accuracy score: {acc_score:.3f}, recall score: {rec_score:.3f}', fontsize=24) mm.plot_confusion_matrix(y_lastweek, y_pred, ax=ax[0], normalize=True) mm.plot_confusion_matrix(y_lastweek, y_pred, ax=ax[1], normalize=False)
for n in tqdm(np.arange(1, 127)): X_train_chunky = X_train[n * chunksize:(n + 1) * chunksize] y_train_chunky = y_train[n * chunksize:(n + 1) * chunksize] X_train_chunky = loo.fit_transform(X_train_chunky, y_train_chunky) X_train_res, y_train_res = sm.fit_sample(X_train_chunky, y_train_chunky) model.partial_fit(X_train_res, y_train_res, classes=np.unique(y)) dump(model, 'SGD_model_minibatch_200k_127.joblib') #%% #diplay metrics for model performance y_pred = model.predict(X_test) acc_score = model.score(X_test, y_test) print(f'The accuracy score is: {acc_score}') acc_score = recall_score(y_test, y_pred) print(f'The accuracy score is: {acc_score}') #%% #mm.model_report_card(model, X_train_res, y_train_res, X_test, y_test, normalize=False) fig, ax = plt.subplots() mm.plot_confusion_matrix(y_test, y_pred, ax=ax, normalize=True) fig, ax = plt.subplots() mm.plot_confusion_matrix(y_test, y_pred, ax=ax, normalize=False) #%% #pca = PCA(n_components = 0.9) #X_pca = pca.fit_transform(X_train) #print(X_pca.shape) ##fig, ax = plt.subplots() ##ax.scatter(X_pca[:,0],X_pca[:,1], c=y_train_res) #%% #fn = 'df_full.gzip' #lf.temp_save(fname=os.path.join(data_directory,fn), df=df)
# prob_y_train = logit_result.predict() # 测试数据集 X_test_metric = sm.add_constant(X_test[params.index[1:]]) # 测试数据结果 prob_y_test = logit_result.predict(X_test_metric) # 结果集按 label_pred_test = pd.np.where(prob_y_test > 0.5, 1, 0) # ROC 曲线 plot_roc_curve(prob_y_test, y_test) # KS表&KS曲线 ks_stattable, _ = ks_stats(prob_y_test, y_test) # 提升图&lorenz曲线 # lift_lorenz(prob_y_test, y_test) # 构造混淆矩阵 plot_confusion_matrix(y_test, label_pred_test, labels=[0, 1]) """ 生成评分卡 """ method = 4 var_list = list(params.index) est = params['参数估计'] # 生成评分卡 # method = 4 # # 删除参数索引名称中的 BIN 和 WOE # params.index = [k.replace("_BIN", "") for k in params.index] # params.index = [k.replace("_WOE", "") for k in params.index] # paramsEst = params['参数估计'] # var_list = list(paramsEst.index)[1:] #