def run(kf, data, model, label='flag_y'): """ Train model, predict on test set and get model performance. :param kf: :param data: :param model: :param label: :return: """ defaults, gini_trains, gini_tests, auc_trains, auc_tests, ks_trains, ks_tests, psis, models = [], [], [], [], [], \ [], [], [], [] # 交叉验证 kf_list = list(kf.split(data)) for i, index in enumerate(kf_list): # 训练 X = data.drop(columns=label) y = data[label] train_index, test_index = index[0], index[1] X_train, y_train = X.iloc[train_index], y.iloc[train_index] X_test, y_test = X.iloc[test_index], y.iloc[test_index] model.fit(X_train, y_train) y_pred = model.predict_proba(X_train)[:, 1] y_test_pred = model.predict_proba(X_test)[:, 1] # 计算指标 ks_train, ks_test = calc_ks(y_pred, y_train), calc_ks(y_test_pred, y_test) auc_train, auc_test = calc_auc(y_pred, y_train), calc_auc(y_test_pred, y_test) gini_train, gini_test = calc_gini(y_pred, y_train), calc_gini(y_test_pred, y_test) psi = calc_psi(y_pred, y_test_pred) default = 1.8 * ks_test - 0.8 * abs(ks_train - ks_test) defaults.append(default) gini_trains.append(gini_train) gini_tests.append(gini_test) auc_trains.append(auc_train) auc_tests.append(auc_test) ks_trains.append(ks_train) ks_tests.append(ks_test) psis.append(psi) models.append(model) # 整合结果 metrics = { 'gini_train': float(np.mean(gini_trains)), 'gini_test': float(np.mean(gini_tests)), 'auc_train': float(np.mean(auc_trains)), 'auc_test': float(np.mean(auc_tests)), 'ks_train': float(np.mean(ks_trains)), 'ks_test': float(np.mean(ks_tests)), 'psi': float(np.mean(psis)), 'default': float(np.mean(defaults)) } # 输出每套超参数最优模型 best_model_idx = np.argmax(defaults) dump_pkl(models[best_model_idx]) # 生成训练集测试集 train = data.iloc[kf_list[best_model_idx][0]] write_data(train, 'train.fea') test = data.iloc[kf_list[best_model_idx][1]] write_data(test, 'test.fea') LOG.debug(metrics) nni.report_final_result(metrics)
def run(X_train, X_test, y_train, y_test, model): """ Train model, predict on test set and get model performance. :param X_train: train data :param X_test: :param y_train: train label :param y_test: test label :param model: specific model :return: report final result to nni """ # 训练 model.fit(X_train, y_train) y_pred = model.predict_proba(X_train)[:, 1] y_test_pred = model.predict_proba(X_test)[:, 1] # 计算指标 ks_train, ks_test = calc_ks(y_pred, y_train), calc_ks(y_test_pred, y_test) auc_train, auc_test = calc_auc(y_pred, y_train), calc_auc(y_test_pred, y_test) gini_train, gini_test = calc_gini(y_pred, y_train), calc_gini(y_test_pred, y_test) psi = calc_psi(y_pred, y_test_pred) # 整合结果 metrics = { 'gini_train': gini_train, 'gini_test': gini_test, 'auc_train': auc_train, 'auc_test': auc_test, 'ks_train': ks_train, 'ks_test': ks_test, 'psi': psi, 'default': 1.8 * ks_test - 0.8 * abs(ks_train - ks_test) } dump_pkl(model) LOG.debug(metrics) nni.report_final_result(metrics)