def fit(self,x,y): ''' 拟合: ''' x_train = np.array(x) y_train = np.array(y).reshape(y.shape[0],) model_list = [] basic_reg = ['linear','ridge','lasso','ElasticNet','pls','svr','knn','dt','rf','adaBoost','gbm','xgb'] #添加基础回归模型 for model_name in self.listModelName: if model_name in basic_reg: reg = reg_model(model_name,isGridSearch = self.isGridSearch) if model_name in self.dict_para.keys(): #如果用户自定义了参数范围,则对模型参数进行设置 reg.set_parameters(self.dict_para[model_name]) else: pass #模型拟合 reg.fit(x,y) model_list.append(reg.reg_model) self.train_model[model_name] = reg if self.meta_reg == 'linear' : meta_reg = linear_model.LinearRegression() elif self.meta_reg == 'ridge' : meta_reg = linear_model.Ridge() self.stack = StackingRegressor(regressors = model_list,meta_regressor = meta_reg) self.stack.fit(x_train,y_train)
def test_features_in_secondary(): lr = LinearRegression() svr_lin = SVR(kernel='linear') rf = RandomForestRegressor(random_state=2) ridge = Ridge(random_state=0) svr_rbf = SVR(kernel='rbf') stack = StackingRegressor(regressors=[svr_lin, lr, ridge, rf], meta_regressor=svr_rbf, use_features_in_secondary=True) stack.fit(X1, y).predict(X1) mse = 0.14 got = np.mean((stack.predict(X1) - y)**2) print(got) assert round(got, 2) == mse stack = StackingRegressor(regressors=[svr_lin, lr, ridge, rf], meta_regressor=svr_rbf, use_features_in_secondary=False) # dense stack.fit(X1, y).predict(X1) mse = 0.12 got = np.mean((stack.predict(X1) - y)**2) print(got) assert round(got, 2) == mse
def test_get_coeff_fail(): lr = LinearRegression() svr_rbf = SVR(kernel='rbf') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[ridge, lr], meta_regressor=svr_rbf) stregr = stregr.fit(X1, y) got = stregr.coef_
class Blend: def __init__(self, x_train, x_test, y_train): x_train.drop(['Unnamed: 0', 'PromoInterval', 'Date'], axis=1, inplace=True) x_test.drop(['Unnamed: 0', 'Id', 'PromoInterval', 'Date'], axis=1, inplace=True) self.x_train = x_train self.x_test = x_test self.y_train = y_train['Sales'].values def blending(self): mete_reg = LinearRegression() reg1 = model.svm_regressor() reg2 = model.randomforest_regressor() reg3 = model.xgb_regressor() self.blend = StackingRegressor(regressors=[reg1, reg2, reg3], meta_regressor=mete_reg) self.blend.fit(self.x_train, self.y_train) return self.blend def score(self): scores = cross_val_score(self.blend, X=self.x_train, y=self.y_train, cv=10, verbose=2) # scoring='neg_mean_squared_error' return scores def prediction(self): y_pred = self.blend.predict(self.x_test) y_pred = np.expm1(y_pred) return y_pred
class RegressorBlender: def __init__(self, x_train, x_test, y_train, y_test=None): x_train.drop(['Unnamed: 0', 'Id'], axis=1, inplace=True) x_test.drop(['Unnamed: 0', 'Id'], axis=1, inplace=True) self.x_train = x_train self.x_test = x_test self.y_train = y_train['y'].values if self.y_train is not None: self.y_test = y_test['y'].values def reg_blend(self): mete_reg = LinearRegression() reg1 = model.svm_regressor() reg2 = model.randomforest_regressor() reg3 = model.xgb_regressor() self.blend = StackingRegressor(regressors=[reg1, reg2, reg3], meta_regressor=mete_reg) self.blend.fit(self.x_train, self.y_train) return self.blend def score(self): scores = cross_val_score(self.blend, X=self.x_train, y=self.y_train, cv=10, verbose=2) return scores def prediction(self): y_pred = self.blend.predict(self.x_test) return y_pred
def stacking(self): from sklearn.svm import SVR from sklearn.pipeline import make_pipeline from sklearn.preprocessing import RobustScaler,MinMaxScaler from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from xgboost import XGBRegressor import lightgbm as lgb from lightgbm import LGBMRegressor import xgboost as xgb from mlxtend.regressor import StackingRegressor import scipy as sc lasso = make_pipeline(SVR(kernel='rbf', C=2.8, gamma=2)) rf = make_pipeline(RandomForestRegressor(random_state=590,n_estimators =6)) GBoost = GradientBoostingRegressor(n_estimators=500, learning_rate=0.01, max_depth=12, max_features='sqrt', min_samples_leaf=15, min_samples_split=97, loss='ls', random_state =200) model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603, gamma=10, learning_rate=0.01, max_depth=11, min_child_weight=1.7817, n_estimators=500, reg_alpha=0.01, reg_lambda=5, subsample=0.5213, silent=1, seed =1024, nthread = -1) model_lgb = LGBMRegressor(objective='regression',num_leaves=5, learning_rate=0.05, n_estimators=550, max_bin = 25, bagging_fraction = 1, bagging_freq = 5, feature_fraction = 0.7, feature_fraction_seed=9, bagging_seed=9, min_data_in_leaf =42, min_sum_hessian_in_leaf = 40) regressors = [rf,lasso,GBoost, model_lgb,model_xgb] stregr = StackingRegressor(regressors=regressors, meta_regressor=model_xgb) stregr.fit(self.X_train,self.y_train) print("the model is staking and the test's pearsonr is: ", sc.stats.pearsonr(self.y_test, stregr.predict(self.X_test))[0]) return stregr
def test_multivariate_class(): lr = LinearRegression() ridge = Ridge(random_state=1) meta = LinearRegression(normalize=True) stregr = StackingRegressor(regressors=[lr, ridge], meta_regressor=meta) stregr.fit(X2, y2).predict(X2) mse = 0.122 got = np.mean((stregr.predict(X2) - y2)**2) assert round(got, 3) == mse
def blending(self): mete_reg = LinearRegression() reg1 = model.svm_regressor() reg2 = model.randomforest_regressor() reg3 = model.xgb_regressor() self.blend = StackingRegressor(regressors=[reg1, reg2, reg3], meta_regressor=mete_reg) self.blend.fit(self.x_train, self.y_train) return self.blend
def __init__(self, regressors, meta_regressor, verbose=0, store_train_meta_features=False, refit=True): _StackingRegressor.__init__(self, regressors, meta_regressor, verbose, store_train_meta_features, refit) BaseWrapperReg.__init__(self)
def test_get_coeff(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge) stregr.fit(X1, y) got = stregr.coef_ expect = np.array([0.4874216, 0.45518317]) assert_almost_equal(got, expect)
def test_get_intercept(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge) stregr.fit(X1, y) got = stregr.intercept_ expect = 0.02 assert round(got, 2) == expect
def test_predict_meta_features(): lr = LinearRegression() svr_rbf = SVR(kernel='rbf') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[lr, ridge], meta_regressor=svr_rbf) X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3) stregr.fit(X_train, y_train) test_meta_features = stregr.predict(X_test) assert test_meta_features.shape[0] == X_test.shape[0]
def test_get_coeff_fail(): lr = LinearRegression() svr_rbf = SVR(kernel='rbf', gamma='auto') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[ridge, lr], meta_regressor=svr_rbf) with pytest.raises(AttributeError): stregr = stregr.fit(X1, y) r = stregr.coef_ assert r
def test_get_intercept(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge) stregr.fit(X1, y) got = stregr.intercept_ expect = 0.024 assert round(got, 3) == expect
def test_weight_unsupported_meta(): # meta regressor with no support for # sample_weight should raise error lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) lasso = Lasso(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=lasso) stregr.fit(X1, y, sample_weight=w).predict(X1)
def test_multivariate_class(): lr = LinearRegression() ridge = Ridge(random_state=1) meta = LinearRegression(normalize=True) stregr = StackingRegressor(regressors=[lr, ridge], meta_regressor=meta) stregr.fit(X2, y2).predict(X2) mse = 0.122 got = np.mean((stregr.predict(X2) - y2) ** 2) assert round(got, 3) == mse
def test_different_models(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) stregr.fit(X1, y).predict(X1) mse = 0.21 got = np.mean((stregr.predict(X1) - y)**2) assert round(got, 2) == mse
def test_weight_unsupported_regressor(): # including regressor that does not support # sample_weight should raise error lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') lasso = Lasso(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso], meta_regressor=svr_rbf) stregr.fit(X1, y, sample_weight=w).predict(X1)
def test_train_meta_features_(): lr = LinearRegression() svr_rbf = SVR(kernel='rbf') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[lr, ridge], meta_regressor=svr_rbf, store_train_meta_features=True) X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3) stregr.fit(X_train, y_train) train_meta_features = stregr.train_meta_features_ assert train_meta_features.shape[0] == X_train.shape[0]
def test_multivariate_class(): lr = LinearRegression() ridge = Ridge(random_state=1) meta = LinearRegression(normalize=True) stregr = StackingRegressor(regressors=[lr, ridge], meta_regressor=meta) stregr.fit(X2, y2).predict(X2) mse = 0.12 got = np.mean((stregr.predict(X2) - y2)**2.) # there seems to be an issue with the following test on Windows # sometimes via Appveyor assert round(got, 2) == mse, got
def test_multivariate(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) stregr.fit(X2, y).predict(X2) mse = 0.22 got = np.mean((stregr.predict(X2) - y)**2) assert round(got, 2) == mse
def test_different_models(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) y_pred = stregr.fit(X1, y).predict(X1) mse = 0.214 got = np.mean((stregr.predict(X1) - y) ** 2) assert round(got, 3) == mse
def test_multivariate(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) stregr.fit(X2, y).predict(X2) mse = 0.218 got = np.mean((stregr.predict(X2) - y) ** 2) assert round(got, 3) == mse
def test_weight_unsupported_meta(): # meta regressor with no support for # sample_weight should raise error lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) lasso = Lasso(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=lasso) with pytest.raises(TypeError): stregr.fit(X1, y, sample_weight=w).predict(X1)
def test_weight_ones(): # sample weight of ones should produce equivalent outcome as no weight lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) pred1 = stregr.fit(X1, y).predict(X1) pred2 = stregr.fit(X1, y, sample_weight=np.ones(40)).predict(X1) maxdiff = np.max(np.abs(pred1 - pred2)) assert maxdiff < 1e-3, "max diff is %.4f" % maxdiff
def test_get_params(): lr = LinearRegression() svr_rbf = SVR(kernel='rbf') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[ridge, lr], meta_regressor=svr_rbf) got = sorted(list({s.split('__')[0] for s in stregr.get_params().keys()})) expect = [ 'linearregression', 'meta-svr', 'meta_regressor', 'regressors', 'ridge', 'store_train_meta_features', 'verbose' ] assert got == expect, got
def test_weight_ones(): # sample weight of ones should produce equivalent outcome as no weight lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) pred1 = stregr.fit(X1, y).predict(X1) pred2 = stregr.fit(X1, y, sample_weight=np.ones(40)).predict(X1) maxdiff = np.max(np.abs(pred1 - pred2)) assert maxdiff < 1e-3, "max diff is %.4f" % maxdiff
def test_multivariate_class(): lr = LinearRegression() ridge = Ridge(random_state=1) meta = LinearRegression(normalize=True) stregr = StackingRegressor(regressors=[lr, ridge], meta_regressor=meta) stregr.fit(X2, y2).predict(X2) mse = 0.12 got = np.mean((stregr.predict(X2) - y2) ** 2.) # there seems to be an issue with the following test on Windows # sometimes via Appveyor assert round(got, 2) == mse, got
def mlx_reg_1(self): lr, lr_pred = self.linear_regr() rf, rf_pred = self.random_forest_regr() lasso, lasso_pred = self.lasso_regr() sclf = StackingRegresorMLX( regressors=[lr, rf, lasso], meta_regressor=RandomForestRegressor(ccp_alpha=0.1, max_features="auto", n_estimators=30) ) sclf.fit(self.x_train, self.y_train) return sclf.predict(self.x_test)
def test_weight_unsupported_meta(): # meta regressor with no support for # sample_weight should raise error lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) knn = KNeighborsRegressor() stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=knn) with pytest.raises(TypeError): stregr.fit(X1, y, sample_weight=w).predict(X1)
def regressionStacking(df): # StackingRegressor inputdata type is ndarray X_train, X_test, y_train, y_test = trainDataSplit(df) randomforest_regressor = RandomForestRegressor() # # lightgbm不是scikit-learn的包,mlxtend不支持 # lgb_train = lightgbm.Dataset(X_train, y_train) # lgb_eval = lightgbm.Dataset(X_test, y_test, reference=lgb_train) # # # specify your configurations as a dict # params = { # 'task': 'train', # 'boosting_type': 'gbdt', # 'objective': 'regression', # 'metric': {'l2', 'auc'}, # 'num_leaves': 2 ** 10, # 'learning_rate': 1.0, # 'feature_fraction': 0.9, # 'bagging_fraction': 0.8, # 'bagging_freq': 5, # 'verbose': 0 # } # lightgbm_regressor = lightgbm.train(params, # lgb_train, # num_boost_round=20, # valid_sets=lgb_eval, # early_stopping_rounds=5) lasso_regressor = Lasso() dnn_regressor = MLPRegressor() linearRegression_regressor = LinearRegression() stacking_regressor = StackingRegressor( regressors=[randomforest_regressor, lasso_regressor, dnn_regressor], meta_regressor=linearRegression_regressor) stacking_regressor.fit(X_train, X_train) y_pred = stacking_regressor.predict(X_test) criterion_df, predict_result = predictResultOutput(stacking_regressor, X_test, y_test, y_pred) # save model joblib.dump(stacking_regressor, 'stacking.model') return criterion_df, predict_result
def sbg_mlxtend_ensamble(iterate): iterate += 501 lin_mod = linear_model.LinearRegression() bsn_rdg = linear_model.BayesianRidge() elstc_nt = ElasticNet(alpha=0.2, l1_ratio=1) ridge = Ridge(alpha=0.01, tol=0.1, solver='sag') svr_rbf = svm.SVR(kernel='rbf', C=1e3, gamma=0.1) sgd_reg = linear_model.SGDRegressor(penalty='l2', alpha=0.001, n_iter=1000) lasso_reg = linear_model.Lasso(alpha=1, max_iter=3000, normalize='True', selection='random', tol=0.001) rndm_frst = RandomForestRegressor(max_depth=5, n_estimators=10) stregr = StackingRegressor(regressors=[sgd_reg, rndm_frst], meta_regressor=ridge) X_train, X_test, y_train, y_test = train_test_split(df_X, df_Y2, test_size=0.20, random_state=iterate) X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) stregr.fit(X_train, y_train) y_pred = stregr.predict(X_test) #print("Mean Squared Error: %.4f" # % np.mean((y_pred - y_test.values) ** 2)) #print('Variance Score: %.4f' % stregr.score(X_test, y_test.values)) dev_Memory = abs(y_pred - y_test.values) mean_dev = np.mean(dev_Memory) mse_Memory = np.sqrt(np.sum(dev_Memory**2) / dev_Memory.size) mape = np.mean(dev_Memory / y_test.values) max_pe = np.max(dev_Memory) max_ne = np.max(np.negative(dev_Memory)) new_data1 = pd.DataFrame(y_pred) new_data2 = pd.DataFrame(y_test.values) new_data = pd.merge(new_data1, new_data2, left_index=True, right_index=True) filename12 = r'C:\Users\epatdeb\AlphaCANDI\SBG_Rawinput_1.6\latest\Logs\AlphaCandi17_MlxEnsmbl_Memory.log' logging.basicConfig(filename=filename12, level=logging.DEBUG) logging.info( "tensor_bp sbg_mlxtend_ensamble iter:%s \n \n y_pred/y_test: \n %s \n mae:%s mse:%s mape:%s max_pe:%s max_ne:%s", iterate, new_data, mean_dev, mse_Memory, mape, max_pe, max_ne) logging.shutdown() return mean_squared_error(y_test, y_pred), mean_dev, mape
def test_weight_unsupported_regressor(): # including regressor that does not support # sample_weight should raise error lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') knn = KNeighborsRegressor() stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, knn], meta_regressor=svr_rbf) with pytest.raises(TypeError): stregr.fit(X1, y, sample_weight=w).predict(X1)
def test_weight_unsupported_regressor(): # including regressor that does not support # sample_weight should raise error lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') lasso = Lasso(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso], meta_regressor=svr_rbf) with pytest.raises(TypeError): stregr.fit(X1, y, sample_weight=w).predict(X1)
def test_features_in_secondary(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') rf = RandomForestRegressor(n_estimators=10, random_state=2) ridge = Ridge(random_state=0) svr_rbf = SVR(kernel='rbf', gamma='auto') stack = StackingRegressor(regressors=[svr_lin, lr, ridge, rf], meta_regressor=svr_rbf, use_features_in_secondary=True) stack.fit(X1, y).predict(X1) mse = 0.14 got = np.mean((stack.predict(X1) - y) ** 2) print(got) assert round(got, 2) == mse stack = StackingRegressor(regressors=[svr_lin, lr, ridge, rf], meta_regressor=svr_rbf, use_features_in_secondary=False) # dense stack.fit(X1, y).predict(X1) mse = 0.12 got = np.mean((stack.predict(X1) - y) ** 2) print(got) assert round(got, 2) == mse
def test_sample_weight(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) pred1 = stregr.fit(X1, y, sample_weight=w).predict(X1) mse = 0.22 got = np.mean((stregr.predict(X1) - y) ** 2) assert round(got, 2) == mse # make sure that this is not equivalent to the model with no weight pred2 = stregr.fit(X1, y).predict(X1) maxdiff = np.max(np.abs(pred1 - pred2)) assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
def test_predictions_from_sparse_matrix(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge) # dense stregr.fit(X1, y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61 # sparse stregr.fit(sparse.csr_matrix(X1), y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61
def test_sample_weight(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) pred1 = stregr.fit(X1, y, sample_weight=w).predict(X1) mse = 0.22 got = np.mean((stregr.predict(X1) - y)**2) assert round(got, 2) == mse # make sure that this is not equivalent to the model with no weight pred2 = stregr.fit(X1, y).predict(X1) maxdiff = np.max(np.abs(pred1 - pred2)) assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
def train_model(X_train, y_train): clf1 = LinearSVR() clf2 = LinearRegression() clf3 = Ridge() clf4 = LGBMRegressor() svr_linear = LinearSVR() sr = StackingRegressor(regressors=[clf1, clf2, clf3, clf4], meta_regressor=svr_linear) sr.fit(X_train, y_train) result = sr.predict(X_train) score = get_rmse_score(result, y_train) print("RMSE Score train: %.4f" % score) return sr
def Gbc(): from sklearn.ensemble import GradientBoostingClassifier, AdaBoostRegressor from sklearn.linear_model import LogisticRegression from mlxtend.regressor import StackingRegressor from sklearn.svm import SVR adaboost = AdaBoostRegressor() lr = LogisticRegression gb = GradientBoostingClassifier() svr = SVR(kernel='linear') svr_rbf = SVR(kernel='rbf') regressors = [svr, adaboost, gb] stregr = StackingRegressor(regressors=regressors, meta_regressor=svr_rbf) stregr.fit(X_train, y_train) outpred = stregr.predict(X_valid) evaluate_strategy(outpred)
def test_get_params(): lr = LinearRegression() svr_rbf = SVR(kernel='rbf') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[ridge, lr], meta_regressor=svr_rbf) got = sorted(list({s.split('__')[0] for s in stregr.get_params().keys()})) expect = ['linearregression', 'meta-svr', 'meta_regressor', 'regressors', 'ridge', 'store_train_meta_features', 'verbose'] assert got == expect, got
def test_predictions_from_sparse_matrix(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge) # dense stregr.fit(X1, y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61 # sparse stregr.fit(sparse.csr_matrix(X1), y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61
def train(self, X,y): features = X labels = y #test train split X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.25, random_state=4) #Ridge regcv = linear_model.RidgeCV(alphas=[0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75]) regcv.fit(features, labels) regcv.alpha_ reg = linear_model.Ridge(alpha=regcv.alpha_) reg.fit(features, labels) # GB params = {'n_estimators': 100, 'max_depth': 5, 'min_samples_split': 2, 'learning_rate': 0.1, 'loss': 'ls'} gbr = ensemble.GradientBoostingRegressor(**params) gbr.fit(features, labels) #blended model meta = linear_model.LinearRegression() blender = StackingRegressor(regressors=[reg, gbr], meta_regressor=meta) _=blender.fit(features, labels) y_pred = blender.predict(X_test) print "***** TRAINING STATS ********" scores = cross_val_score(blender, features, labels, cv=10) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) mean_diff = np.mean(np.abs(np.exp(Y_test)-np.exp(y_pred))) p_mean_diff = np.mean(mean_diff/np.exp(Y_test)) print "Mean Error:\t %.0f/%0.3f%%" % (mean_diff, p_mean_diff*100) print "***** TRAINING STATS ********" return blender
def test_weight_unsupported_with_no_weight(): # pass no weight to regressors with no weight support # should not be a problem lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf', gamma='auto') lasso = Lasso(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso], meta_regressor=svr_rbf) stregr.fit(X1, y).predict(X1) stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=lasso) stregr.fit(X1, y).predict(X1)