def train_lowerq(features): train_data = features[features['university'] != 'polyu'] test_data = features[features['university'] == 'polyu'] for key, value in university_dict.items(): train_data['university'][train_data['university'] == key] = value test_data['university'][test_data['university'] == key] = value lowerq = np.array(list(train_data['lowerq'].values)) test_average = np.array(list(test_data['average'].values)) train_data = train_data.drop(columns=['lowerq', 'upperq']) test_data = test_data.drop(columns=['lowerq', 'upperq']) train_data = train_data.get_values() test_data = test_data.get_values() regression_model = SBBTree(params=regression_params, stacking_num=1, bagging_num=5, bagging_test_size=0.2, num_boost_round=10000, early_stopping_rounds=200) regression_model.fit(train_data, lowerq) pred_lowerq = regression_model.predict(test_data) return get_lowerq(pred_lowerq, test_average), lowerq
params = { 'task': 'train', 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': {'l2', 'auc'}, 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 0 } ############################################################### model = SBBTree(params=params, stacking_num=5, bagging_num=3, bagging_test_size=0.33, num_boost_round=10000, early_stopping_rounds=200) # train 下个月购买次数预测 回归模型 train_features = TrainFeatures.TrainColumns train_label_BuyNum = 'Label_30_101_BuyNum' X = TrainFeatures.data_BuyOrNot_FirstTime[train_features].values y = TrainFeatures.data_BuyOrNot_FirstTime[train_label_BuyNum].values X_pred = PredFeatures.data_BuyOrNot_FirstTime[train_features].values model.fit(X, y) PredFeatures.data_BuyOrNot_FirstTime[train_label_BuyNum] = model.predict( X_pred)
'num_leaves': 64, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 0, 'seed': 2016, 'min_child_weight': 1.5, 'lambda_l2': 10, 'scale_pos_weight': 20 } ############################################################### model = SBBTree(params=params, stacking_num=5, bagging_num=3, bagging_test_size=0.33, num_boost_round=10000, early_stopping_rounds=200, train_features=train_features) # train 下个月购买次数预测 回归模型 train_label_BuyNum = 'label_30_101_BuyNums' train_features = train_features X = train_data[train_features].values y = train_data[train_label_BuyNum].values X_pred = test_data[train_features].values X_valid = valid_data[train_features].values