def train(): history_score = []; max_score = -1 for i in range(num_round): fetches = [model.optimizer, model.loss] if batch_size > 0: ls = [] bar = progressbar.ProgressBar() print('[%d]\ttraining...' % i) for j in bar(range(int(train_size / batch_size + 1))): feat_ids, feat_vals, label = slice_libsvm(train_data, j * batch_size, batch_size) #a = model.run_step(fetches, feat_ids, feat_vals, label) #for i in range(len(feat_vals)): feat_vals[i][18] /= 1000;feat_vals[i][19] /= 10;feat_vals[i][20] /= 10 # ******************** _, l = model.run_step(fetches, feat_ids, feat_vals, label) ls.append(l) elif batch_size == -1: feat_ids, feat_vals, label = slice_libsvm(train_data) _, l = model.run_step(fetches, feat_ids, feat_vals, label) ls = [l] train_preds = [] print('[%d]\tevaluating...' % i) bar = progressbar.ProgressBar() for j in bar(range(int(train_size / 10000 + 1))): feat_ids, feat_vals, label = slice_libsvm(train_data, j * 10000, 10000) preds = model.run_step(model.pred_prob, feat_ids, feat_vals, label) train_preds.extend(preds) test_preds = [] bar = progressbar.ProgressBar() for j in bar(range(int(test_size / 10000 + 1))): feat_ids, feat_vals, label = slice_libsvm(test_data, j * 10000, 10000) preds = model.run_step(model.pred_prob, feat_ids, feat_vals, label) #auc = model.run_step(model.auc, feat_ids, feat_vals, label) test_preds.extend(preds) train_true = []; test_true = [] for e in train_data: train_true.append(e[2]) for e in test_data: test_true.append(e[2]) train_score = roc_auc_score(train_true, train_preds) test_score = roc_auc_score(test_true, test_preds) trprecision, trrecall, tracc = calScore(train_true, train_preds) teprecision, terecall, teacc = calScore(test_true, test_preds) print('[%d]\tloss: %f\ttrain-auc: %f\teval-auc: %f\t\tprecision: %f\trecall: %f\ttrain-acc: %f\ttest-acc: %f' % (i, np.mean(ls), train_score, test_score, teprecision, terecall, tracc, teacc)) history_score.append(test_score) if test_score > max_score: model.save_model(FLAGS.model_dir) max_score = test_score if i > min_round and i > early_stop_round: if np.argmax(history_score) == i - early_stop_round and history_score[-1] - history_score[-1 * early_stop_round] < 1e-5: print('early stop\nbest iteration:\n[%d]\teval-auc: %f' % (np.argmax(history_score), np.max(history_score))) model.save_model(FLAGS.model_dir) break
def export_features(): train_data = read_libsvm(FLAGS.data_dir) feat_ids, feat_vals, label = slice_libsvm(train_data) fea = model.run_step(model.fea_out, feat_ids, feat_vals, label) with open(FLAGS.feature_dir, 'w') as f: for i in range(len(fea)): f.write(str(label[i]) + ' ') # label for j in range(len(fea[i])): # features f.write(str(j + 1) + ':' + str(round(fea[i][j], 3)) + ' ') f.write('\n') print('export feature done, file : %s' % FLAGS.feature_dir) exit()