def XGBoost(): try: params = { 'objective': 'binary:logistic', 'eta': 0.08, 'colsample_bytree': 0.886, 'min_child_weight': 1.1, 'max_depth': 7, 'subsample': 0.886, 'gamma': 0.1, 'lambda': 10, 'verbose_eval': True, 'eval_metric': 'auc', 'scale_pos_weight': 6, 'seed': 201703, 'missing': -1 } xgbtrain = xgb.DMatrix(X_train, y_train) xgbtest = xgb.DMatrix(X_test) model = xgb.train(params, xgbtrain, num_boost_round=200) xgb.save_model('xgb_time.model') sys.exit(0) predicted = model.predict(xgbtest) return predicted except: print('die')
def update(self, score, xgb): params = xgb.get_params() if self.best_score > score: prefix = str(-1 * score)[2:6] self.best_score = score self.best_params = params self.best_model = xgb self._write_json(self.best_params, "tmp/best_params_{}.json".format(prefix)) xgb.save_model("tmp/best_model_{}.xgb".format(prefix)) print("best model updated: score: {}, params: {}".format( score, params)) else: pass
def run_cv(x_train, x_test, y_train, y_test): x_train = x_train conf.xgb_config() tic = time.time() data_message = 'X_train.shape={}, X_test.shape = {}'.format( np.shape(x_train), np.shape(x_test)) print(data_message) xgb = XGBooster(conf) best_auc, best_round, cv_rounds, best_model = xgb.fit(x_train, y_train) print('Training time cost {}s'.format(time.time() - tic)) xgb.save_model() result_message = 'best_auc = {}, best_round = {}'.format( best_auc, best_round) print(result_message) # now = time.strftime('%Y-%m-%d %H:%M') result_saved_path = '../result/result_{}-{:.4f}.csv'.format(now, best_auc) xgb_predict(best_model, x_test, y_test, save_result_path=result_saved_path)
'learning_rate':0.05, 'seed':2017, 'nthread':12, 'silent': 1 } #plst+=[('eval_metric','auc')] #evallist=[(x_val,'eval'),(x_train,'train')] num_round=3000 plst=list(params.items()) plst+= [('eval_metric', 'auc')] evallist = [(xgb_val, 'eval'), (xgb_train, 'train')] xgb=xgb.train(params,xgb_train,num_boost_round=num_round) print("save model") xgb.save_model('./model/model4.txt') print('开始预测') preds_sub=xgb.predict(xgb_test) test_tobepredicted['Ki']=preds_sub test_tobepredicted.to_csv('./result/result4.csv',index=False) # with open("result1.csv", "w") as f: # sys.stdout = f # print "Protein_ID,Molecule_ID" # for index, Protein_ID in enumerate(user_ids): # print "{},{}".format(userid, y[index])