def test_sparse_matrix_inputs(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf, random_state=42) # dense stack.fit(X1, y).predict(X1) mse = 0.21 got = np.mean((stack.predict(X1) - y)**2) assert round(got, 2) == mse, got # sparse stack.fit(sparse.csr_matrix(X1), y) if Version(sklearn_version) < Version("0.21"): expected_value = 0.20 elif Version(sklearn_version) < Version("0.22"): expected_value = 0.20 else: expected_value = 0.21 got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y)**2) assert round(got, 2) == expected_value, got
def test_sparse_matrix_inputs(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf, random_state=42) # dense stack.fit(X1, y).predict(X1) mse = 0.21 got = np.mean((stack.predict(X1) - y) ** 2) assert round(got, 2) == mse, got # sparse stack.fit(sparse.csr_matrix(X1), y) if Version(sklearn_version) < Version("0.21"): expected_value = 0.20 else: expected_value = 0.19 got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2) assert round(got, 2) == expected_value, got
def build_lasso(): train_df, test_df = load_data() combined_df = pd.concat((train_df.loc[:, 'MSSubClass':'SaleCondition'], test_df.loc[:, 'MSSubClass':'SaleCondition'])) # feature engineering config_categorical_features(combined_df) # combined_df = extract_common_features(combined_df) log_transform_features(combined_df) combined_df = normalize_numerical_features(combined_df) combined_df = one_hot_encoding(combined_df) missing_value_fill(combined_df) X_train = combined_df[:train_df.shape[0]] X_test = combined_df[train_df.shape[0]:] y = np.log1p(train_df["SalePrice"]) # models lass_model = Lasso(alpha=0.0005, max_iter=1000) lgb_params = { 'lambda_l2': 0, 'learning_rate': 0.05, 'min_child_samples': 4, 'n_estimators': 500, 'num_leaves': 10 } lgb_model = lgb.LGBMRegressor(**lgb_params) xgb_params = {'max_depth': 2, 'n_estimators': 360} xgb_model = xgb.XGBRegressor(**xgb_params) rf_params = { 'max_depth': 50, 'max_features': None, 'min_samples_leaf': 4, 'n_estimators': 50 } rf_model = RandomForestRegressor(**rf_params) mata_model = rf_model model = StackingCVRegressor(regressors=(lass_model, lgb_model, xgb_model, rf_model), meta_regressor=mata_model, use_features_in_secondary=True) model.fit(np.array(X_train), np.array(y)) print( "cross_validation_rmse:", np.mean( np.sqrt(-cross_val_score(model, np.array(X_train), np.array(y), cv=3, scoring="neg_mean_squared_error")))) # model prediction stack_preds = np.expm1(model.predict(np.array(X_test))) solution = pd.DataFrame({"id": test_df.Id, "SalePrice": stack_preds}) solution.to_csv("./house_price/submission_stack_v1.csv", index=False)
def AlgoSCR(df_train, df_trainY, m1, m2, m3, m4, m5): model = StackingCVRegressor(regressors=(m1, m2, m3, m4, m5), meta_regressor=m1, use_features_in_secondary=True) rmsle_cv(model, df_train, df_trainY) model.fit(df_train, df_trainY) result = model.predict(df_train) print("rms value of same set: ", np.around(sqrt(mean_squared_error(df_trainY, result)), decimals=7)) return model
def test_predict_meta_features(): lr = LinearRegression() svr_rbf = SVR(kernel='rbf') ridge = Ridge(random_state=1) stregr = StackingCVRegressor(regressors=[lr, ridge], meta_regressor=svr_rbf) X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3) stregr.fit(X_train, y_train) test_meta_features = stregr.predict(X_test) assert test_meta_features.shape[0] == X_test.shape[0]
def test_sparse_matrix_inputs(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) # dense stack.fit(X1, y).predict(X1) mse = 0.20 got = np.mean((stack.predict(X1) - y)**2) assert round(got, 2) == mse # sparse stack.fit(sparse.csr_matrix(X1), y) mse = 0.20 got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y)**2) assert round(got, 2) == mse
def test_different_models(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) stack.fit(X1, y).predict(X1) mse = 0.21 got = np.mean((stack.predict(X1) - y) ** 2) assert round(got, 2) == mse
def test_multivariate(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) stack.fit(X2, y).predict(X2) mse = 0.19 got = np.mean((stack.predict(X2) - y) ** 2) assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
def test_multivariate_class(): lr = LinearRegression() ridge = Ridge(random_state=1) meta = LinearRegression(normalize=True) stregr = StackingCVRegressor(regressors=[lr, ridge], meta_regressor=meta, multi_output=True, random_state=0) stregr.fit(X2, y2).predict(X2) mse = 0.13 got = np.mean((stregr.predict(X2) - y2) ** 2.) assert round(got, 2) == mse, got
def test_sparse_matrix_inputs_with_features_in_secondary(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf, random_state=42, use_features_in_secondary=True) # dense stack.fit(X1, y).predict(X1) mse = 0.20 got = np.mean((stack.predict(X1) - y) ** 2) assert round(got, 2) == mse, got # sparse stack.fit(sparse.csr_matrix(X1), y) mse = 0.20 got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2) assert round(got, 2) == mse, got
def test_sparse_matrix_inputs_with_features_in_secondary(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf, random_state=42, use_features_in_secondary=True) # dense stack.fit(X1, y).predict(X1) mse = 0.20 got = np.mean((stack.predict(X1) - y)**2) assert round(got, 2) == mse, got # sparse stack.fit(sparse.csr_matrix(X1), y) mse = 0.20 got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y)**2) assert round(got, 2) == mse, got
def test_use_features_in_secondary(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf, cv=3, use_features_in_secondary=True) stack.fit(X1, y).predict(X1) mse = 0.2 got = np.mean((stack.predict(X1) - y) ** 2) assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
def test_internals(): lr = LinearRegression() regressors = [lr, lr, lr, lr, lr] cv = 10 stack = StackingCVRegressor(regressors=[lr, lr, lr, lr, lr], meta_regressor=lr, cv=cv) stack.fit(X3, y2) assert stack.predict(X3).mean() == y2.mean() assert stack.meta_regr_.intercept_ == 0.0 assert stack.meta_regr_.coef_[0] == 0.0 assert stack.meta_regr_.coef_[1] == 0.0 assert stack.meta_regr_.coef_[2] == 0.0 assert len(stack.regr_) == len(regressors)
def test_sample_weight(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf, cv=KFold(4, shuffle=True, random_state=7)) pred1 = stack.fit(X1, y, sample_weight=w).predict(X1) mse = 0.21 # 0.20770 got = np.mean((stack.predict(X1) - y) ** 2) assert round(got, 2) == mse, "Expected %.2f, but got %.5f" % (mse, got) pred2 = stack.fit(X1, y).predict(X1) maxdiff = np.max(np.abs(pred1 - pred2)) assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
def test_sample_weight(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf, cv=KFold(4, shuffle=True, random_state=7)) pred1 = stack.fit(X1, y, sample_weight=w).predict(X1) mse = 0.21 # 0.20770 got = np.mean((stack.predict(X1) - y)**2) assert round(got, 2) == mse, "Expected %.2f, but got %.5f" % (mse, got) pred2 = stack.fit(X1, y).predict(X1) maxdiff = np.max(np.abs(pred1 - pred2)) assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
class Model: def __init__(self, lasso_target_params=LASSO_TARGET_PARAMS, ridge_target_params=RIDGE_TARGET_PARAMS, lasso_constraints_params=LASSO_CONSTRAINTS_PARAMS, constraint_columns=CONSTRAINT_COLUMNS, target_column=TARGET_COLUMN): self.target_estimator = StackingCVRegressor( regressors=(Lasso(**lasso_target_params), Ridge(**ridge_target_params)), meta_regressor=Ridge(alpha=0.01)) self.constraints_estimator = Lasso(**lasso_constraints_params) self.constraint_columns = constraint_columns self.target_column = target_column self.test_columns = None def fit(self, data): ''' Feed dataframe ''' train, labels = generate_train_data(data) self.target_estimator.fit(train.values, labels[self.target_column].values) self.constraints_estimator.fit(train.values, labels[self.constraint_columns].values) self.test_columns = train.columns def predict(self, data): predicted_constraints = self.constraints_estimator.predict( data[self.test_columns].values) predicted_data = { self.constraint_columns[i]: constraint for i, constraint in enumerate(predicted_constraints.T) } predicted_data[TARGET_COLUMN] = self.target_estimator.predict( data[self.test_columns].values) return pandas.DataFrame(predicted_data)
min_samples_split=25, min_samples_leaf=35, max_features=150) # results = cross_val_score(et_model, train, y_train, cv=5, scoring='r2') # print("ET score: %.4f (%.4f)" % (results.mean(), results.std())) stack = StackingCVRegressor( #meta_regressor=Ridge(alpha=10), meta_regressor=ElasticNet(l1_ratio=0.1, alpha=1.5), regressors=(svm_pipe, en_pipe, xgb_pipe, rf_model, lgbm_model)) #regressors=(svm_pipe, en_pipe, xgb_pipe, rf_model)) # cv_pred = cross_val_predict(stack, train, y_train, cv=5) # print("R2 score: %.4f" % r2_score(y_train, cv_pred)) # exit() ## R2 score: 0.5600 (en_pipe, rf_model) ## R2 score: 0.5601 (svm_pipe, en_pipe, xgb_pipe, rf_model, et_model) ## R2 score: 0.5605 (svm_pipe, en_pipe, xgb_pipe, rf_model, et_model, lgbm_model) ## R2 score: 0.5618 (svm_pipe, en_pipe, xgb_pipe, rf_model, lgbm_model) stack.fit(train, y_train) y_test = stack.predict(test) df_sub = pd.DataFrame({'ID': id_test, 'y': y_test}) df_sub.to_csv('mercedes_submissions/ensemble.csv', index=False) ## ## https://www.kaggle.com/eaturner/stacking-em-up/output ##
RobustScaler(), ElasticNet(max_iter=1e7, alpha=0.0004, l1_ratio=0.9)) score = rmsle_cv(elasticnet) print(f"ElasticNet score: {score.mean():.4f} ({score.std():.4f})") e = elasticnet.fit(train, y_train) predictions = elasticnet.predict(test) EN = np.expm1(predictions) stack_gen = StackingCVRegressor(regressors=(ridge, lasso, elasticnet, lgb, gbr, svr), meta_regressor=lgb, use_features_in_secondary=True) score = rmsle_cv(stack_gen) print(f"Stack score: {score.mean():.4f} ({score.std():.4f})") sg = stack_gen.fit(train, y_train) predictions = stack_gen.predict(test) STACK = np.expm1(predictions) for i in range(LASSO.size): if LASSO[i] < 55000 or LASSO[i] > 500000: LASSO[i] = XGB[i] else: LASSO[i] = .1 * XGB[i] + .05 * LGB[i] + .05 * LASSO[i] + \ .2 * SVR[i] + .05 * GBR[i] + .25 * RIDGE[i] + .05 * EN[i] + .25 * STACK[i] submission = pd.DataFrame() submission['Id'] = test_ID submission['SalePrice'] = LASSO submission.head() submission.to_csv(r'../submissions/{}_{}.csv'.format( os.path.basename(sys.argv[0])[:-3],
model3 = make_regressor(clf='lgb') lr = LinearRegression() stack_one = StackingCVRegressor(regressors=[model1, model2, model3], meta_regressor=lr, cv=5, verbose=0, store_train_meta_features=True, random_state=SEED) stack_one.fit(X, y, groups=groups) ## Return ordinal predictions pr1 = stack_one.predict(X) optR = OptimizedRounder() coefs_one = get_coef(stack_one, X, y) #print(coefs_one) #one_preds = optR.predict(pr1.reshape(-1, ), coefs_one) #print(np.round(qwk(y, one_preds), 3)) # lr.fit(stack_one.train_meta_features_, y) # p1 = lr.predict(stack_one.predict_meta_features(X)) # Define models for the second ensemble (using group k-fold) model4 = make_regressor(clf='ngb') model5 = make_regressor(clf='cat')
print(datetime.now(), 'GBMR: ',end="") GBMR.fit(X_train, y) print(rmsle(y, GBMR.predict(X_train))) print(datetime.now(), 'LGBMR: ',end="") LGBMR.fit(X_train, y) print(rmsle(y, LGBMR.predict(X_train))) print(datetime.now(), 'XGBR: ',end="") XGBR.fit(X_train, y) print(rmsle(y, XGBR.predict(X_train))) print(datetime.now(), 'STACKING: ',end="") STACKING.fit(np.array(X_train), np.array(y)) print(rmsle(y, STACKING.predict(np.array(X_train)))) #%% print(''' ################################################################################################ # FINISH ################################################################################################ ''') def Arithmetic_Blending(X): return ( (0.1 * RIDGE_MODEL.predict(np.array(X))) + \ (0.35 * LGBMR.predict(X)) + \ (0.55 * STACKING.predict(np.array(X)))#+ \ ) print(datetime.now(),'RMSLE score on train data:')
rf = RandomForestRegressor(n_estimators=50, max_depth=5, random_state=2018, n_jobs=8) xgb = XGBRegressor(n_estimators=50, learning_rate=0.75, random_state=2018, n_jobs=8) lgb = LGBMRegressor(n_estimators=50, learning_rate=0.75, random_state=2018, n_jobs=8) svr = SVR(kernel='rbf', gamma='auto') lr = LinearRegression(n_jobs=8) models = [rf, xgb, lgb, svr] y_pred_self = StackingModels(models=models, meta_model=lr, X_train=X_train, X_test=X_test, y_train=y_train, use_probas=False, task_mode='reg') mse = mean_squared_error(y_test, y_pred_self) print('MyModel: MSE = {:.6f}'.format(mse)) stack_reg = StackingCVRegressor(regressors=models, meta_regressor=lr, cv=5).fit(X_train, y_train) y_pred_mxltend = stack_reg.predict(X_test) mse = mean_squared_error(y_test, y_pred_mxltend) print('Mlxtend: MSE = {:.6f}'.format(mse))
random_state=666) for clf, label in zip([lr, lasso, ridge, xgboost, catboost, gbm, stack], [ 'LR', 'Lasso', 'Ridge', 'XGBoost', 'CatBoost', 'LightGBM', 'StackingCVRegressor' ]): scores = cross_val_score(clf, X_train, Y_train, cv=10, scoring='neg_root_mean_squared_error') print("Neg. RMSE Score: %0.3f (+/- %0.3f) [%s]" % (scores.mean(), scores.std(), label)) stack.fit(X_train, Y_train) Y_pred = stack.predict(x_test) print( 'Accuracy Stacked Regressor (RMSLE):', np.sqrt(metrics.mean_squared_log_error(np.expm1(y_test), np.expm1(Y_pred)))) #####PREDICT TEST DATA AND EXPORT FOR UPLOAD predict_export = pd.DataFrame() predict_export['Id'] = test['Id'] prediction_catboost = cb_reg.predict(test.drop('Id', axis=1)) prediction_catboost = np.expm1(prediction_catboost) predict_export['SalePrice'] = prediction_catboost predict_export.to_csv('submission_cb.csv', index=False) predict_export = pd.DataFrame() predict_export['Id'] = test['Id']
'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1 } model = xgb.XGBRegressor(**other_params) mgb = GridSearchCV(estimator=model, param_grid=cv_params, scoring='neg_mean_squared_error', cv=5, verbose=1) mgb.fit(train_X, train_Y) print('参数的最佳取值:{0}'.format(mgb.best_params_)) print('最佳模型得分:{0}'.format(-mgb.best_score_)) myxgb = mgb.best_estimator_ ##############################--模型融合--###################################### stack = StackingCVRegressor(regressors=[myGBR, myxgb], meta_regressor=LinearRegression(), use_features_in_secondary=True, cv=5) stack.fit(train_X, train_Y) pred_Y = stack.predict(test_X) print(mean_squared_error(test_Y, pred_Y)) Y_pred = stack.predict(test) results = pd.DataFrame(Y_pred, columns=['target']) results.to_csv("results.txt", index=False, header=False) print("over")
#mgb = GridSearchCV(estimator=model, param_grid=cv_params, scoring='neg_mean_squared_error', cv=5, verbose=1) #mgb.fit(train_X, train_Y) #print('参数的最佳取值:{0}'.format(mgb.best_params_)) #print('最佳模型得分:{0}'.format(-mgb.best_score_)) #myxgb = mgb.best_estimator_ myxgb = xgb.XGBRegressor(**other_params) ###############################--模型融合--###################################### stack = StackingCVRegressor(regressors=[myxgb, myRFR, mylgb], meta_regressor=bayes, use_features_in_secondary=True, cv=8) stack.fit(train_X, train_Y) pred_Y = stack.predict(test_X) mse = mean_squared_error(test_Y, pred_Y) print('mse: %.10f' % mse) folds = KFold(n_splits=7, shuffle=True, random_state=2019) #mean = [] #for fold, (i, j) in enumerate(folds.split(train_X1, train_Y1)): # print("fold {}".format(fold+1)) # trn_X, trn_Y = train_X1[i], train_Y1[i] # tsn_X, tsn_Y = train_X1[j], train_Y1[j] # # stack = stack # stack.fit(trn_X, trn_Y) # pred_Y = stack.predict(tsn_X) # mse = mean_squared_error(tsn_Y, pred_Y) # print('mse: %.10f' % mse)
max_depth=50) avg = StackingCVRegressor(regressors=(lightgbm, grb, svr, krr, rf), meta_regressor=xgb, use_features_in_secondary=True) def rmsle(y, y_pred): return np.sqrt(mean_squared_error(y, y_pred)) x = np.array(X) Y = np.array(y) avg.fit(x, Y) y_pred = avg.predict(x) rmsle(y, y_pred) Predict = avg.predict(np.array(test_df.drop('Price', axis=1))) # Converting price back to original scale and making it integer Predict = np.exp(Predict) Predict = Predict.astype(int) #########################################################################3# import lightgbm as lgb model_lgb = lgb.LGBMRegressor(objective='regression', num_leaves=5, learning_rate=0.05, n_estimators=720,
score = rmsle_cv(model) end = time.time() print("{}: {:.6f}, {:.4f} in {:.3f} s".format(name, score.mean(), score.std(), end - start)) sel_models = [ Lasso(alpha=0.0005, random_state=1, max_iter=10000), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3), KernelRidge(alpha=0.9, kernel='polynomial', degree=3, coef0=2.5), GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=10, loss='huber', random_state=5), SVR(gamma=0.0004, kernel='rbf', C=13, epsilon=0.009), BayesianRidge(), ] np.random.seed(42) stack = StackingCVRegressor(regressors=sel_models[:5], meta_regressor=sel_models[5]) start = time.time() score = rmsle_cv(stack) end = time.time() print("Stacked : {:.6f}, (+/-) {:.4f} in {:.3f} s".format( score.mean(), score.std(), end - start)) stack.fit(train, target.values) pred = np.exp(stack.predict(test)) result = pd.DataFrame({'Id': test_ID, 'SalePrice': pred}) result.to_csv("submission.csv", index=False)
Y_pred_linear = sc.inverse_transform(lr.predict(X_test)) #Evaluating print("\n\nLinear Regression SCORE : ", score(Y_pred_linear, actual_cost)) ''' #-------------------------------------------------------------------------------- #Stacking Ensemble Regression ########################################################################### #Importing and Initializing the Regressor from mlxtend.regressor import StackingCVRegressor #Initializing Level One Regressorsxgbr = XGBRegressor() #rf = RandomForestRegressor(n_estimators=100, random_state=1) #lr = LinearRegression() #Stacking the various regressors initialized before stack = StackingCVRegressor(regressors=(xgbr, rf), meta_regressor=xgbr, use_features_in_secondary=True) #Fitting the data stack.fit(X_train, Y_train) #Predicting the Test set results y_pred_ense = sc.inverse_transform(stack.predict(X_test)) #Evaluating print("\n\nStackingCVRegressor SCORE : ", score(y_pred_ense, actual_cost))
#meta regressor with the SalePrice being predictor values stack_gen = StackingCVRegressor(regressors=(ridge, elasticnet, lasso, xgboost, lightgbm), meta_regressor=xgboost, use_features_in_secondary=True) stackX = np.array(X) stacky = np.array(y) #Fit the models elasticnet.fit(X, y) lasso.fit(X, y) ridge.fit(X, y) xgboost.fit(X, y) lightgbm.fit(X, y) stack_gen.fit(stackX, stacky) #We take a weighted average of our predictions, what this does adds some variance at the expense of losing a little bias stack_preds = ((0.2 * elasticnet.predict(test_data)) + (0.1 * lasso.predict(test_data)) + (0.1 * ridge.predict(test_data)) + (0.2 * xgboost.predict(test_data)) + (0.1 * lightgbm.predict(test_data)) + (0.3 * stack_gen.predict(test_data))) #Create The Submission sub = pd.DataFrame() sub['Id'] = test_ids sub['SalePrice'] = np.expm1(stack_preds) sub.to_csv('submission.csv', index=False)
from sklearn.svm import SVR from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score from sklearn.metrics import mean_squared_error import numpy as np import matplotlib.pyplot as plt x, y = boston_housing_data() x = x[:100] y = y[:100] # 划分数据集 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) # 初始化基模型 lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=2019, ) lasso = Lasso() models = [lr, svr_lin, ridge, lasso] print("base model") for model in models: score = cross_val_score(model, x_train, y_train, cv=5) print(score.mean(), "+/-", score.std()) sclf = StackingCVRegressor(regressors=models, meta_regressor=lasso) # 训练回归器 print("stacking model") score = cross_val_score(sclf, x_train, y_train, cv=5) print(score.mean(), "+/-", score.std()) sclf.fit(x_train, y_train) pred = sclf.predict(x_test) print("loss is {}".format(mean_squared_error(y_test, pred)))
model = xgb.XGBRegressor(**other_params) mgb = GridSearchCV(estimator=model, param_grid=cv_params, scoring='neg_mean_squared_error', cv=5, verbose=1) mgb.fit(train_X, train_Y) print('参数的最佳取值:{0}'.format(mgb.best_params_)) print('最佳模型得分:{0}'.format(-mgb.best_score_)) myxgb = mgb.best_estimator_ stack = myxgb stack.fit(train_X, train_Y) Y_pred = stack.predict(test_X) print(mean_squared_error(test_Y, Y_pred)) ################################--模型融合--###################################### stack = StackingCVRegressor(regressors=[myxgb, mylgb], meta_regressor=LinearRegression(), use_features_in_secondary=True, cv=5) stack.fit(train_X, train_Y) pred_Y = stack.predict(test_X) print(mean_squared_error(test_Y, pred_Y)) Y_pred = stack.predict(X1_pca) results = pd.DataFrame(Y_pred, columns=['target']) results.to_csv("results.txt", index=False, header=False) print("over")