예제 #1
0
def test_unsupported_meta_regressor():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    lasso = Lasso()
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=lasso)

    with pytest.raises(TypeError):
        stack.fit(X1, y, sample_weight=w).predict(X1)
def test_predict_meta_features():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingCVRegressor(regressors=[lr, ridge],
                                 meta_regressor=svr_rbf)
    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
    stregr.fit(X_train, y_train)
    test_meta_features = stregr.predict(X_test)
    assert test_meta_features.shape[0] == X_test.shape[0]
def test_train_meta_features_():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingCVRegressor(regressors=[lr, ridge],
                                 meta_regressor=svr_rbf,
                                 store_train_meta_features=True)
    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
    stregr.fit(X_train, y_train)
    train_meta_features = stregr.train_meta_features_
    assert train_meta_features.shape[0] == X_train.shape[0]
def test_different_models():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf)
    stack.fit(X1, y).predict(X1)
    mse = 0.21
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse
def test_multivariate():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf)
    stack.fit(X2, y).predict(X2)
    mse = 0.19
    got = np.mean((stack.predict(X2) - y) ** 2)
    assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
def test_use_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                cv=3,
                                use_features_in_secondary=True)
    stack.fit(X1, y).predict(X1)
    mse = 0.2
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
예제 #7
0
def test_weight_ones():
    # sample_weight = None and sample_weight = ones
    # should give the same result, provided that the
    # randomness of the models is controled
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                cv=KFold(5, shuffle=True, random_state=5))
    pred1 = stack.fit(X1, y).predict(X1)
    pred2 = stack.fit(X1, y, sample_weight=np.ones(40)).predict(X1)
    assert np.max(np.abs(pred1 - pred2)) < 1e-3
def test_internals():
    lr = LinearRegression()
    regressors = [lr, lr, lr, lr, lr]
    cv = 10
    stack = StackingCVRegressor(regressors=[lr, lr, lr, lr, lr],
                                meta_regressor=lr,
                                cv=cv)
    stack.fit(X3, y2)
    assert stack.predict(X3).mean() == y2.mean()
    assert stack.meta_regr_.intercept_ == 0.0
    assert stack.meta_regr_.coef_[0] == 0.0
    assert stack.meta_regr_.coef_[1] == 0.0
    assert stack.meta_regr_.coef_[2] == 0.0
    assert len(stack.regr_) == len(regressors)
예제 #9
0
def test_sample_weight():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                cv=KFold(4, shuffle=True, random_state=7))
    pred1 = stack.fit(X1, y, sample_weight=w).predict(X1)
    mse = 0.21  # 0.20770
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, "Expected %.2f, but got %.5f" % (mse, got)
    pred2 = stack.fit(X1, y).predict(X1)
    maxdiff = np.max(np.abs(pred1 - pred2))
    assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
예제 #10
0
def test_sparse_matrix_inputs():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                random_state=42)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.21
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, got

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 0.20
    else:
        expected_value = 0.19

    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2)
    assert round(got, 2) == expected_value, got
def test_get_params():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingCVRegressor(regressors=[ridge, lr],
                                 meta_regressor=svr_rbf)

    got = sorted(list({s.split('__')[0] for s in stregr.get_params().keys()}))
    expect = ['cv',
              'linearregression',
              'meta-svr',
              'meta_regressor',
              'regressors',
              'ridge',
              'shuffle',
              'store_train_meta_features',
              'use_features_in_secondary']
    assert got == expect, got
예제 #12
0
def test_weight_unsupported_with_no_weight():
    # should be okay since we do not pass weight
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    lasso = Lasso()
    stack = StackingCVRegressor(regressors=[svr_lin, lr, lasso],
                                meta_regressor=ridge)
    stack.fit(X1, y).predict(X1)

    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=lasso)
    stack.fit(X1, y).predict(X1)
예제 #13
0
def test_sparse_matrix_inputs_with_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                random_state=42,
                                use_features_in_secondary=True)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.20
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, got

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)
    mse = 0.20
    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2)
    assert round(got, 2) == mse, got
cols100 = [i for i,j in enumerate(var) if j in tmp]

pipe4 = make_pipeline(ColumnSelector(cols=tuple(cols100)),
                      RandomForestRegressor(n_estimators=100,n_jobs=-1))

#cols137 = [i for i,j in enumerate(var) if j in var]

#pipe5 = make_pipeline(ColumnSelector(cols=tuple(cols137)),
#                      RandomForestRegressor(n_estimators=100,n_jobs=-1))


lasso = Lasso()

lasso = Lasso(alpha=0.5)
stack1 = StackingCVRegressor(regressors=(pipe1, pipe2,pipe3,pipe4), 
                        use_features_in_secondary=True,
                        meta_regressor=lasso)
tr_x = tr.loc[:,var]
#tr_x2 = tr.loc[:,var2]
tr_y = tr.loc[:,'elect_down']
#tr_y2 = tr.loc[:,'square_elect_down']
te_N = te.loc[te.typhoon =='NESATANDHAITANG', var]
te_M = te.loc[te.typhoon =='MEGI', var]
stack1.fit(tr_x.values, tr_y.values)

Nes = stack.predict(te_N.values)
Meg = stack.predict(te_M.values)

test = pd.read_csv('/Users/charlie/Desktop/Taipower/data/submit.csv')
#test.NesatAndHaitang = Nes *(602539/2471910.1150000058)
#test.Megi = Meg *(4180000/3816284.0750000146)
예제 #15
0
# XGBoost Regressor
xgboost = XGBRegressor()

# Ridge Regressor
ridge_alphas = [0.1, 0.3, 1, 3, 5, 10, 15, 18, 20, 30, 50, 75, 100]
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=ridge_alphas, cv=kf))

# Gradient Boosting Regressor
gbr = GradientBoostingRegressor()

# Random Forest Regressor
rf = RandomForestRegressor()

# Stack up all the models above, optimized using xgboost
stack_gen = StackingCVRegressor(regressors=(xgboost, lightgbm, ridge, gbr, rf),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

# Skorlar

scores = {}

score = cv_rmse(lightgbm)
print("lightgbm: {:.4f} ({:.4f})".format(score.mean(), score.std()))
scores['lgb'] = (score.mean(), score.std())

score = cv_rmse(xgboost)
print("xgboost: {:.4f} ({:.4f})".format(score.mean(), score.std()))
scores['xgb'] = (score.mean(), score.std())

score = cv_rmse(ridge)
 def stacked_model(self):
     self.stacked_model = StackingCVRegressor(
         regressors=(self.ridge, self.lasso, self.elasticnet,
                     self.gradient_boost, self.lightgbm, self.xgboost),
         meta_regressor=self.xgboost,
         use_features_in_secondary=True)
예제 #17
0
score_dict['AdaBoost'] = diff
diff

# In[381]:

from mlxtend.regressor import StackingCVRegressor
best_stack = MultiOutputRegressor(
    StackingCVRegressor(
        regressors=(KNeighborsRegressor(
            n_neighbors=best_param_nn['n_neighbors'],
            algorithm=best_param_nn['algorithm']),
                    xgb.XGBRegressor(
                        n_estimators=best_param_xgb['estimator__n_estimators'],
                        eta=best_param_xgb['estimator__eta'],
                        gamma=best_param_xgb['estimator__gamma'],
                        max_depth=best_param_xgb['estimator__max_depth'])),
        meta_regressor=RandomForestRegressor(
            n_estimators=best_param_rf['n_estimators'],
            criterion=best_param_rf['criterion'],
            max_leaf_nodes=best_param_rf['max_leaf_nodes'],
            n_jobs=best_param_rf['n_jobs'],
            warm_start=best_param_rf['warm_start']),
        n_jobs=-1,
        refit=False))
best_stack.fit(x_train[:100], y_train[:100])

# In[382]:

pred_y = best_stack.predict(x_test)
diff = mean_absolute_error(y_test, pred_y)
score_dict['StackingCVRegressor'] = diff
예제 #18
0
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import ElasticNet
from mlxtend.regressor import StackingCVRegressor
from sklearn.model_selection import GridSearchCV

rf = RandomForestRegressor()
en = ElasticNet()
gbr = GradientBoostingRegressor()
etr = ExtraTreesRegressor()
ada = AdaBoostRegressor()

stack = StackingCVRegressor(regressors=(en, gbr, etr, ada),
                            meta_regressor=rf,
                            random_state=4)
grid = GridSearchCV(estimator=stack,
                    param_grid={'meta_regressor__n_estimators': [10, 100]},
                    cv=10,
                    refit=True)

grid.fit(trainpc, y)
grid_predict = grid.predict(testpc)
grid_test = pd.DataFrame(grid_test)

#model fitting with kerasRegressor

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
예제 #19
0
XGBR = XGBRegressor(
                    learning_rate=0.01, 
                    n_estimators=3500,
                    max_depth=3, 
                    min_child_weight=0,
                    gamma=0, 
                    subsample=0.7,
                    colsample_bytree=0.7,
                    scale_pos_weight=1, 
                    reg_alpha=0.00006,
                    seed = 100
                    )
STACKING = StackingCVRegressor(regressors=(RIDGE, BRIDGE,  LASSO,  #ELASTICNET,
                                            GBMR, LGBMR, XGBR),
                                meta_regressor=XGBR,
                                use_features_in_secondary=True,
                                random_state = 0)

print(datetime.now(), 'RIDGE: ',end="")
RIDGE_MODEL = RIDGE.fit(X_train, y)
print(rmsle(y, RIDGE_MODEL.predict(X_train)))

print(datetime.now(), 'BRIDGE: ',end="")
BRIDGE.fit(X_train, y)
print(rmsle(y, BRIDGE.predict(X_train)))

print(datetime.now(), 'LASSO: ',end="")
LASSO_MODEL = LASSO.fit(X_train, y)
print(rmsle(y, LASSO_MODEL.predict(X_train)))
예제 #20
0
                             max_depth=5,
                             learning_rate=0.03,
                             colsample_bytree=0.8,
                             subsample=0.7,
                             booster='gbtree')
xgb_cols = [
    'weather', 'atemp', 'humidity', 'windspeed', 'holiday', 'workingday',
    'Hour', 'week_day', 'Year', 'Day', 'season'
]

params = {'depth': 6, 'learning_rate': 0.05, 'iterations': 150}
cat_model = CatBoostRegressor(1000)
cat_model.fit(train[xgb_cols], train['registered_log'])

lr = LinearRegression()
streg_model = StackingCVRegressor(
    regressors=[cat_model, rf_model, gbm_model, xgb_model], meta_regressor=lr)

scores_casual_cat = cross_val_score(cat_model,
                                    train[xgb_cols],
                                    train['casual_log'],
                                    cv=5,
                                    scoring=make_scorer(
                                        log_rmsle, greater_is_better=False))
scores_r_cat = cross_val_score(cat_model,
                               train[xgb_cols],
                               train['registered_log'],
                               cv=5,
                               scoring=make_scorer(log_rmsle,
                                                   greater_is_better=False))

scores_casual_xgb = cross_val_score(xgb_model,
예제 #21
0
                   random_state=1,
                   activation='tanh',
                   max_iter=10000, )

lasso = Lasso(max_iter=5000, alpha=0.001, random_state=SEED)
enet = ElasticNet(random_state=SEED, alpha=0.001)
ridge = Ridge(alpha=1, random_state=SEED)
rf = RandomForestRegressor(n_estimators=1024,
                           bootstrap=True,
                           max_features='auto',
                           min_samples_leaf=1,
                           min_samples_split=2,
                           random_state=SEED, )
xgb = GradientBoostingRegressor(random_state=SEED, n_estimators=1024, learning_rate=0.05, )
stack = StackingCVRegressor(regressors=(ridge, lasso, rf, xgb, enet, mlp),
                            meta_regressor=lasso, verbose=1,
                            n_jobs=2, use_features_in_secondary=True)

# clf_label_zip = zip([ ridge, lasso, rf, xgb, mlp, stack], ['Ridge', 'Lasso','Random Forest', 'xgb',
#                                                                           'mlp', 'StackingClassifier'])

clf_label_zip = [(ridge, 'Ridge'), (lasso, 'Lasso')]
def get_statistic():
    df = pd.read_csv(root_path + in_file, index_col='index')
    # ageing only
    # df.rename(columns = {'SPICE1':'label'}, inplace = True)
    # pred_results = []
    results = []
    for i in range(6):
        # for i in [0]:
        if i == 0:
예제 #22
0
    'reg_alpha': 0,
    'reg_lambda': 1
}

#model = xgb.XGBRegressor(**other_params)
#mgb = GridSearchCV(estimator=model, param_grid=cv_params, scoring='neg_mean_squared_error', cv=5, verbose=1)
#mgb.fit(train_X, train_Y)
#print('参数的最佳取值:{0}'.format(mgb.best_params_))
#print('最佳模型得分:{0}'.format(-mgb.best_score_))
#myxgb = mgb.best_estimator_

myxgb = xgb.XGBRegressor(**other_params)

###############################--模型融合--######################################
stack = StackingCVRegressor(regressors=[myxgb, myRFR, mylgb],
                            meta_regressor=bayes,
                            use_features_in_secondary=True,
                            cv=8)

stack.fit(train_X, train_Y)
pred_Y = stack.predict(test_X)
mse = mean_squared_error(test_Y, pred_Y)
print('mse: %.10f' % mse)
folds = KFold(n_splits=7, shuffle=True, random_state=2019)

#mean = []
#for fold, (i, j) in enumerate(folds.split(train_X1, train_Y1)):
#    print("fold {}".format(fold+1))
#    trn_X, trn_Y = train_X1[i], train_Y1[i]
#    tsn_X, tsn_Y = train_X1[j], train_Y1[j]
#
#    stack = stack
예제 #23
0
r = ridge.fit(train, y_train)
predictions = ridge.predict(test)
RIDGE = np.expm1(predictions)

elasticnet = make_pipeline(
    RobustScaler(), ElasticNet(max_iter=1e7, alpha=0.0004, l1_ratio=0.9))
score = rmsle_cv(elasticnet)
print(f"ElasticNet score: {score.mean():.4f} ({score.std():.4f})")

e = elasticnet.fit(train, y_train)
predictions = elasticnet.predict(test)
EN = np.expm1(predictions)

stack_gen = StackingCVRegressor(regressors=(ridge, lasso, elasticnet, lgb, gbr,
                                            svr),
                                meta_regressor=lgb,
                                use_features_in_secondary=True)
score = rmsle_cv(stack_gen)
print(f"Stack score: {score.mean():.4f} ({score.std():.4f})")
sg = stack_gen.fit(train, y_train)
predictions = stack_gen.predict(test)
STACK = np.expm1(predictions)

for i in range(LASSO.size):
    if LASSO[i] < 55000 or LASSO[i] > 500000:
        LASSO[i] = XGB[i]
    else:
        LASSO[i] = .1 * XGB[i] + .05 * LGB[i] + .05 * LASSO[i] + \
                   .2 * SVR[i] + .05 * GBR[i] + .25 * RIDGE[i] + .05 * EN[i] + .25 * STACK[i]

submission = pd.DataFrame()
                              verbose=-1)

# Stack model
baggingmodel_lasso = BaggingRegressor(base_estimator=lasso)
baggingmodel_ENet = BaggingRegressor(base_estimator=ENet)
# baggingmodel_KRR = BaggingRegressor(base_estimator=KRR)
baggingmodel_svr = BaggingRegressor(base_estimator=svr)
baggingmodel_GBoost = BaggingRegressor(base_estimator=GBoost)

GBoost = GBoost
baggingmodel_lgb = BaggingRegressor(base_estimator=model_lgb)
baggingmodel_xgb = BaggingRegressor(base_estimator=model_xgb)
stackmodel = make_pipeline(
    imp_median,
    StackingCVRegressor(regressors=(lasso, ENet, GBoost, model_xgb, model_lgb),
                        meta_regressor=model_xgb,
                        use_features_in_secondary=True))

# TestModel(stackmodel, 1, 0.20,True)
#stackmodel: RMSLE: 0.012 | MAPE: 7.785
#baggingmodel_xgb: RMSLE: 0.012 | MAPE: 7.482
#baggingmodel_lgb:RMSLE: 0.012 | MAPE: 7.568
#baggingmodel_GBoost:
#GBoost:

# sbaggingmodel_lasso = MakePrediction(baggingmodel_lasso);print("sbaggingmodel_lasso done")
# sbaggingmodel_ENet = MakePrediction(baggingmodel_ENet);print("baggingmodel_ENet done")
# sbaggingmodel_KRR = MakePrediction(baggingmodel_KRR);print("baggingmodel_KRR done")
# sbaggingmodel_svr = MakePrediction(baggingmodel_svr);print("baggingmodel_svr done")
sGBoost = MakePrediction(baggingmodel_GBoost)
print("baggingmodel_GBoost done")
예제 #25
0
                                min_samples_split=10,
                                loss='huber',
                                random_state=0)

rf = RandomForestRegressor(n_estimators=1200,
                           max_depth=15,
                           min_samples_split=5,
                           min_samples_leaf=5,
                           max_features=None,
                           oob_score=True,
                           random_state=0,
                           n_jobs=-1)

# Stack up all the models above, optimized using xgboost
stack_gen = StackingCVRegressor(regressors=(lasso, xgboost, lightgbm, svr,
                                            ridge, gbr, rf),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)


def elapsed_time(time_start, time_stop):
    print('Elapsed time:', timedelta(seconds=round(time_stop - time_start, 0)))


#%% MODELLING : GET BASELINE CROSS-VALIDATION SCORES - MANDATORY

scores = {}

print('Baseline Cross-Validation Scores (RMSLE)')

print('lasso')
t_start = perf_counter()
예제 #26
0
elasticnet_l1ratios = [0.8, 0.85, 0.9, 0.95, 1]
#lasso
lasso_alphas = [5e-5, 1e-4, 5e-4, 1e-3]
#ridge
ridge_alphas = [13.5, 14, 14.5, 15, 15.5]


MODELS = { 
    "elasticnet" : make_pipeline(RobustScaler(), ElasticNetCV(max_iter=1e7, alphas=elasticnet_alphas, l1_ratio=elasticnet_l1ratios)),
     "lasso" : make_pipeline(RobustScaler(), LassoCV(max_iter=1e7, alphas=lasso_alphas, random_state=42)),
     "ridge" : make_pipeline(RobustScaler(), RidgeCV(alphas=ridge_alphas)),
     "gradb" : GradientBoostingRegressor(n_estimators=6000, learning_rate=0.01,
                                  max_depth=4, max_features='sqrt',
                                  min_samples_leaf=15, min_samples_split=10,
                                  loss='huber', random_state=42),

    "svr" : make_pipeline(RobustScaler(),
                    SVR(C=20, epsilon=0.008, gamma=0.0003)),

    "xgboost" : XGBRegressor(learning_rate=0.01, n_estimators=6000,
                       max_depth=3, min_child_weight=0,
                       gamma=0, subsample=0.7,
                       colsample_bytree=0.7,
                       objective='reg:squarederror', nthread=-1,
                       scale_pos_weight=1, seed=27,
                       reg_alpha=0.00006, random_state=42)}

MODELS_stack = StackingCVRegressor(regressors=(MODELS['elasticnet'], MODELS['gradb'], MODELS['lasso'], 
                                          MODELS['ridge'], MODELS['svr'], MODELS['xgboost']),
                              meta_regressor=MODELS['xgboost'],
                              use_features_in_secondary=True)
예제 #27
0
rf = RandomForestRegressor(max_depth=15,
                           min_samples_split=5,
                           min_samples_leaf=5,
                           max_features=None,
                           oob_score=True)

ridge_alphas = [1e-3, 1e-2, 1e-1, 1, 0.1, 0.3, 1, 3, 5, 10, 15, 18, 20, 30, 50, 75, 100]
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=ridge_alphas, normalize=True, cv=cv))

lasso_alphas = np.logspace(-10, 0.1, 140)
lasso = make_pipeline(RobustScaler(), LassoCV(alphas=lasso_alphas, normalize=True, tol=0.1, cv=cv))

# Stacking the models
stack = StackingCVRegressor(regressors=(lgbm, rf, svr, ridge, lasso),
                            meta_regressor=lgbm,
                            use_features_in_secondary=True)


# Define a scoring system
def mse(y, y_pred):
    return mean_squared_error(y, y_pred)


def cv_mse(model, X, y):
    rmse = -cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=cv)
    return rmse


# Test the models accuracy
all_models = {'Random Forest': rf,
예제 #28
0
#
# Extra Trees
#

et_model = ExtraTreesRegressor(n_estimators=100,
                               n_jobs=4,
                               min_samples_split=25,
                               min_samples_leaf=35,
                               max_features=150)

# results = cross_val_score(et_model, train, y_train, cv=5, scoring='r2')
# print("ET score: %.4f (%.4f)" % (results.mean(), results.std()))

stack = StackingCVRegressor(  #meta_regressor=Ridge(alpha=10),
    meta_regressor=ElasticNet(l1_ratio=0.1, alpha=1.5),
    regressors=(svm_pipe, en_pipe, xgb_pipe, rf_model, lgbm_model))
#regressors=(svm_pipe, en_pipe, xgb_pipe, rf_model))

# cv_pred = cross_val_predict(stack, train, y_train, cv=5)
# print("R2 score: %.4f" % r2_score(y_train, cv_pred))
# exit()

## R2 score: 0.5600 (en_pipe, rf_model)
## R2 score: 0.5601 (svm_pipe, en_pipe, xgb_pipe, rf_model, et_model)
## R2 score: 0.5605 (svm_pipe, en_pipe, xgb_pipe, rf_model, et_model, lgbm_model)
## R2 score: 0.5618 (svm_pipe, en_pipe, xgb_pipe, rf_model, lgbm_model)

stack.fit(train, y_train)

y_test = stack.predict(test)
예제 #29
0
            ('get', PipeExtractor(size_features)),
        ])),
        ('means', Pipeline([
            ('get', PipeExtractor(mean_features)),
        ])),
        ('medians', Pipeline([
            ('get', PipeExtractor(median_features)),
        ])),
    ])),
])

p31 = make_pipeline(pipe3, lgbr2)
p32 = make_pipeline(pipe3, lr1)

p8 = StackingCVRegressor(
    regressors=[p11, p12, p13, p21, p22, p23],
    meta_regressor=lassoCV, cv=5)

pipe = p8

mode = 'Submit'

if mode == 'Val':
    cv = cross_val_score(pipe, train_df, y_train, cv=5)

    print("R^2 Score: %0.4f (+/- %0.3f) [%s]" % (
        cv.mean(), cv.std(), pipe.__class__))

elif mode == 'Grid':

    params = {
        for regr in self.regressors:
            regr.fit(X, y)
        return self

    def predict(self, X):
        self.predictions = np.column_stack([regr.predict(X) for regr in self.regressors])
        return np.mean(self.predictions, axis=1)

lasso = Lasso(alpha=0.01)
ridge = Ridge(alpha=0.1)
en = ElasticNet(alpha=0.0005, l1_ratio=0.5)
svr = SVR(C=1500, epsilon=7)
rf = RandomForestRegressor(n_estimators=75, min_samples_leaf=6, min_samples_split=2, max_depth=4, max_features=5)
gbm  = GradientBoostingRegressor(subsample=0.7, min_samples_leaf=6)
xgbm = xgb.sklearn.XGBRegressor(n_estimators=75, colsample_bytree=0.8, max_depth=2, subsample=0.5)
stacked = StackingCVRegressor(regressors=(lasso, ridge, en, xgbm, svr, rf), meta_regressor=Lasso(alpha=128), cv=5, use_features_in_secondary=True)
stack_nofeats = StackingCVRegressor(regressors=(lasso, ridge, en, xgbm, svr, rf), meta_regressor=Lasso(alpha=15), cv=5)
average = AveragingRegressor((lasso, ridge, en, svr, rf, gbm, xgbm, stacked))

# lasso.fit(X, y)
# ridge.fit(X, y)
# stack_nofeats.fit(X,y)
# stacked.fit(X,y)
# exit()

# import seaborn as sns
# import matplotlib.pyplot as plt
#
# average.fit(X,y)
# average.predict(X)
# sns.pairplot(pd.DataFrame(average.predictions))
rf = RandomForestRegressor(n_estimators=50,
                           max_depth=5,
                           random_state=2018,
                           n_jobs=8)
xgb = XGBRegressor(n_estimators=50,
                   learning_rate=0.75,
                   random_state=2018,
                   n_jobs=8)
lgb = LGBMRegressor(n_estimators=50,
                    learning_rate=0.75,
                    random_state=2018,
                    n_jobs=8)
svr = SVR(kernel='rbf', gamma='auto')
lr = LinearRegression(n_jobs=8)
models = [rf, xgb, lgb, svr]

y_pred_self = StackingModels(models=models,
                             meta_model=lr,
                             X_train=X_train,
                             X_test=X_test,
                             y_train=y_train,
                             use_probas=False,
                             task_mode='reg')
mse = mean_squared_error(y_test, y_pred_self)
print('MyModel:  MSE = {:.6f}'.format(mse))

stack_reg = StackingCVRegressor(regressors=models, meta_regressor=lr,
                                cv=5).fit(X_train, y_train)
y_pred_mxltend = stack_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred_mxltend)
print('Mlxtend:  MSE = {:.6f}'.format(mse))
                           seed=27,
                           reg_alpha=0.00006)

# suing gradient boosting algorithm
gbr = GradientBoostingRegressor(n_estimators=3000,
                                learning_rate=0.05,
                                max_depth=4,
                                max_features='sqrt',
                                min_samples_leaf=15,
                                min_samples_split=10,
                                loss='huber',
                                random_state=42)

# using ensemble
stack_gen = StackingCVRegressor(regressors=(lasso, gbr, svr),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

# training our methods
score = cv_rmse(lasso)
print("LASSO: {:.4f} ({:.4f})\n".format(score.mean(), score.std()), )

score = cv_rmse(svr)
print("SVR: {:.4f} ({:.4f})\n".format(score.mean(), score.std()), )

score = cv_rmse(gbr)
print("gbr: {:.4f} ({:.4f})\n".format(score.mean(), score.std()), )

# fitting ensemble of algorithms
print('stack_gen')
stack_gen_model = stack_gen.fit(np.array(train), np.array(y_label))
예제 #33
0
                                min_samples_split=10,
                                loss='huber',
                                random_state=42)

# Random Forest Regressor
rf = RandomForestRegressor(n_estimators=1200,
                           max_depth=15,
                           min_samples_split=5,
                           min_samples_leaf=5,
                           max_features=None,
                           oob_score=True,
                           random_state=42)

# Stack up all the models above, optimized using xgboost
stack_gen = StackingCVRegressor(regressors=(xgboost, lightgbm, svr, ridge, gbr,
                                            rf),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

scores = {}

score = cv_rmse(lightgbm)
print("lightgbm: {:.4f} ({:.4f})".format(score.mean(), score.std()))
scores['lgb'] = (score.mean(), score.std())

## RANDOM FOREST CLASSIFIER
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=10, criterion="entropy")
rfc.fit(x_train, y_train)
y_pred = rfc.predict(x_test)

cm = confusion_matrix(y_test, y_pred)
예제 #34
0
# Support Vector Regressor
svr = make_pipeline(RobustScaler(), SVR(C= 20, epsilon= 0.008, gamma=0.0003))

# Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.01, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=10,loss='huber', random_state=42)

gbr2 = GradientBoostingRegressor(n_estimators=1500, learning_rate=0.01, max_depth=1, max_features='sqrt',min_samples_leaf=15, min_samples_split=10, loss='huber', random_state=42)  

# Random Forest Regressor
rf = RandomForestRegressor(n_estimators=1200, max_depth=15, min_samples_split=5, min_samples_leaf=5, max_features=None, oob_score=True, random_state=42)

rf2 = RandomForestRegressor(n_estimators=1200, max_depth=5, min_samples_split=5, min_samples_leaf=5, max_features=None,oob_score=True, random_state=42)

# Stack up all the models above, optimized using xgboost
stack_gen = StackingCVRegressor(regressors=(xgboost, lightgbm, svr, ridge, gbr, gbr2,xgboost2), meta_regressor=xgboost,use_features_in_secondary=True)

"""# **MAE scores for several different maching learning algorithms**"""

# Title
scores = {}


score = cv_rmse(lightgbm)
print("lightgbm: {:.4f} ({:.4f})".format(score.mean(), score.std()))
scores['lgb'] = (score.mean(), score.std())

score = cv_rmse(xgboost)
print("xgboost: {:.4f} ({:.4f})".format(score.mean(), score.std()))
scores['xgb'] = (score.mean(), score.std())
예제 #35
0
    'gamma': 0,
    'reg_alpha': 0,
    'reg_lambda': 1
}

model = xgb.XGBRegressor(**other_params)
mgb = GridSearchCV(estimator=model,
                   param_grid=cv_params,
                   scoring='neg_mean_squared_error',
                   cv=5,
                   verbose=1)
mgb.fit(train_X, train_Y)
print('参数的最佳取值:{0}'.format(mgb.best_params_))
print('最佳模型得分:{0}'.format(-mgb.best_score_))
myxgb = mgb.best_estimator_

##############################--模型融合--######################################
stack = StackingCVRegressor(regressors=[myGBR, myxgb],
                            meta_regressor=LinearRegression(),
                            use_features_in_secondary=True,
                            cv=5)

stack.fit(train_X, train_Y)
pred_Y = stack.predict(test_X)
print(mean_squared_error(test_Y, pred_Y))

Y_pred = stack.predict(test)
results = pd.DataFrame(Y_pred, columns=['target'])
results.to_csv("results.txt", index=False, header=False)
print("over")
예제 #36
0
                 n_estimators=3460,
                 max_depth=3,
                 min_child_weight=0,
                 gamma=0,
                 subsample=0.7,
                 colsample_bytree=0.7,
                 objective='reg:linear',
                 nthread=4,
                 scale_pos_weight=1,
                 seed=27,
                 reg_alpha=0.00006))

#We use a stack model, which basically means the predications of our base regressors act as input to our
#meta regressor with the SalePrice being predictor values
stack_gen = StackingCVRegressor(regressors=(ridge, elasticnet, lasso, xgboost,
                                            lightgbm),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

stackX = np.array(X)
stacky = np.array(y)

#Fit the models
elasticnet.fit(X, y)
lasso.fit(X, y)
ridge.fit(X, y)
xgboost.fit(X, y)
lightgbm.fit(X, y)
stack_gen.fit(stackX, stacky)

#We take a weighted average of our predictions, what this does adds some variance at the expense of losing a little bias
stack_preds = ((0.2 * elasticnet.predict(test_data)) +
예제 #37
0
catboost = cbr.CatBoostRegressor(random_state=666,
                                 n_estimators=5676,
                                 l2_leaf_reg=4.819227931494778,
                                 learning_rate=0.017224166221196126,
                                 max_depth=5,
                                 silent=True)
gbm = lgb.LGBMRegressor(n_estimators=565,
                        objective='regression',
                        learning_rate=0.11517897606077306,
                        max_depth=2,
                        min_data_in_leaf=2,
                        num_leaves=222,
                        random_state=666)

stack = StackingCVRegressor(regressors=(lr, lasso, ridge, xgboost, catboost,
                                        gbm),
                            meta_regressor=lasso,
                            random_state=666)

for clf, label in zip([lr, lasso, ridge, xgboost, catboost, gbm, stack], [
        'LR', 'Lasso', 'Ridge', 'XGBoost', 'CatBoost', 'LightGBM',
        'StackingCVRegressor'
]):
    scores = cross_val_score(clf,
                             X_train,
                             Y_train,
                             cv=10,
                             scoring='neg_root_mean_squared_error')
    print("Neg. RMSE Score: %0.3f (+/- %0.3f) [%s]" %
          (scores.mean(), scores.std(), label))

stack.fit(X_train, Y_train)
x_train, y_train = processData.get_training_data()

# param_alpha = np.arange(1e-4, 1e-3, 1e-4)
param_alpha = [0.1, 1, 10]
# param_alpha = [0.1]

# The StackingCVRegressor uses scikit-learn's check_cv
# internally, which doesn't support a random seed. Thus
# NumPy's random seed need to be specified explicitely for
# deterministic behavior
RANDOM_SEED = 33
np.random.seed(RANDOM_SEED)
stregr = StackingCVRegressor(
    regressors=[Ridge(), Lasso()],
    # meta_regressor=ElasticNet(),
    meta_regressor=RandomForestRegressor(random_state=RANDOM_SEED),
    use_features_in_secondary=True)

# param_n_estimators = [100, 1000, 10000]
param_n_estimators = [100]
# elastic_net_param_alpha = np.arange(1e-4, 1e-3, 1e-4)
# elastic_net_param_l1_ratio = np.arange(0.1, 1.0, 0.1)
# param_grid = {'ridge__alpha': param_alpha, 'lasso__alpha': param_alpha,
#                               'meta-elasticnet__alpha': elastic_net_param_alpha,
#                               'meta-elasticnet__l1_ratio': elastic_net_param_l1_ratio,
#                               'meta-elasticnet__max_iter':[1000]
#                               }
param_grid = {
    'ridge__alpha': param_alpha,
    'lasso__alpha': param_alpha,
예제 #39
0
# Kernel Ridge Regression : made robust to outliers
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=alphas_alt, cv=kfolds))

# LASSO Regression : made robust to outliers
lasso = make_pipeline(
    RobustScaler(),
    LassoCV(max_iter=1e7, alphas=alphas2, random_state=42, cv=kfolds))

# Elastic Net Regression : made robust to outliers
elasticnet = make_pipeline(
    RobustScaler(),
    ElasticNetCV(max_iter=1e7, alphas=e_alphas, cv=kfolds, l1_ratio=e_l1ratio))

stack_gen = StackingCVRegressor(regressors=(ridge, elasticnet, lightgbm),
                                meta_regressor=elasticnet,
                                use_features_in_secondary=True)

# store models, scores and prediction values
models = {
    'Ridge': ridge,
    'Lasso': lasso,
    'ElasticNet': elasticnet,
    'lightgbm': lightgbm,
    'xgboost': xgboost
}
predictions = {}
scores = {}

#Training the models
for name, model in models.items():
예제 #40
0
                         verbose=-1,
                         # min_data_in_leaf=2,
                         # min_sum_hessian_in_leaf=11
                         )

xgboost = XGBRegressor(learning_rate=0.01, n_estimators=3460,
                       max_depth=3, min_child_weight=0,
                       gamma=0, subsample=0.7,
                       colsample_bytree=0.7,
                       objective='reg:linear', nthread=-1,
                       scale_pos_weight=1, seed=27,
                       reg_alpha=0.00006)

# stack
stack_gen = StackingCVRegressor(regressors=(ridge, lasso, elasticnet,
                                            gbr, xgboost, lightgbm),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

print('TEST score on CV')

score = cv_rmse(ridge)
print("Kernel Ridge score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()), datetime.now(), )

score = cv_rmse(lasso)
print("Lasso score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()), datetime.now(), )

score = cv_rmse(elasticnet)
print("ElasticNet score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()), datetime.now(), )

score = cv_rmse(svr)
print("SVR score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()), datetime.now(), )
예제 #41
0
print("start")
dataset=wholeDataset.values  
print(dataset.shape)
X = dataset[0:1460,2:276]
Y = trainDataset['SalePrice']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
    
xgbclassifier = xgb.XGBRegressor(colsample_bytree=0.7,
                 gamma=0,
                 min_child_weight=1.5,
                 n_estimators=1000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42,
                 max_features=220)
xgbclassifier.fit(X_train, Y_train)

rfclassifier = RandomForestRegressor(n_estimators = 1000, max_features=220)
rfclassifier.fit(X_train, Y_train)

stack_gen = StackingCVRegressor(regressors=(xgbclassifier,rfclassifier),
                                meta_regressor=xgbclassifier,
                                use_features_in_secondary=True)
stack_gen.fit(X_train,Y_train)


print("Score rf",rfclassifier.score(X_test, Y_test))
print("Score xgb",xgbclassifier.score(X_test, Y_test))
print("Score stack",stack_gen.score(X_test, Y_test))
예제 #42
0
                                min_samples_split=10,
                                loss='huber',
                                random_state=42)  

# Random Forest Regressor
rf = RandomForestRegressor(n_estimators=1200, # 1200棵树
                          max_depth=15, #  树的最大深度
                          min_samples_split=5,
                          min_samples_leaf=5,
                          max_features=None,
                          oob_score=True,
                          random_state=42)

# Stack up all the models above, optimized using xgboost
stack_gen = StackingCVRegressor(regressors=(xgboost, lightgbm, svr, ridge, gbr, rf),
                                meta_regressor=xgboost, # 第二层的元学习器为 xgboost
                                use_features_in_secondary=True)


# In[273]:


"""
Train models

Get cross validation scores for each model

单独使用lightgbm k 折交叉验证训练, 看看各个模型的预测分数为多少,用于查看各个模型的预测表现
"""

예제 #43
0
rf = RandomForestRegressor(random_state=rand, n_estimators=10)
rf = model_train(rf, X_matrix_train, y_train_new)
model_eval(rf, X_matrix_train, y_train_new)
model_eval(rf, X_matrix_test, y_test_new)

# In[ ]:

import random

rand = random.seed(9001)
ridge = Ridge(random_state=rand)
lasso = Lasso(random_state=rand)
rf = RandomForestRegressor(random_state=rand)

stack = StackingCVRegressor(regressors=(lasso, ridge),
                            meta_regressor=rf,
                            random_state=rand,
                            use_features_in_secondary=True)

params = {'lasso__alpha': [0.1, 1.0, 2.0], 'ridge__alpha': [0.1, 1.0, 2.0]}

grid = GridSearchCV(estimator=stack,
                    param_grid={
                        'lasso__alpha': [x / 5.0 for x in range(1, 2)],
                        'ridge__alpha': [x / 20.0 for x in range(1, 2)],
                    },
                    cv=2,
                    refit=True)

X = np.hstack(X_matrix_train)
grid.fit(X, y_train_new)