Esempio n. 1
0
def test_predictions_from_sparse_matrix():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge)

    # dense
    stregr.fit(X1, y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61

    # sparse
    stregr.fit(sparse.csr_matrix(X1), y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61
Esempio n. 2
0
def test_predictions_from_sparse_matrix():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr],
                               meta_regressor=ridge)

    # dense
    stregr.fit(X1, y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61

    # sparse
    stregr.fit(sparse.csr_matrix(X1), y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61
Esempio n. 3
0
svr_lin = SVR(kernel='linear')
ridge = Ridge(random_state=1)
svr_rbf = SVR(kernel='rbf')

stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                           meta_regressor=svr_rbf)

# Training the stacking classifier

stregr.fit(X, y)
stregr.predict(X)

# Evaluate and visualize the fit

print("Mean Squared Error: %.4f" % np.mean((stregr.predict(X) - y)**2))
print('Variance Score: %.4f' % stregr.score(X, y))

with plt.style.context(('seaborn-whitegrid')):
    plt.scatter(X, y, c='lightgray')
    plt.plot(X, stregr.predict(X), c='darkgreen', lw=2)

plt.show()

# Example 2 - Stacked Regression and GridSearch

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso

# Initializing models

lr = LinearRegression()
Esempio n. 4
0
gbm_penetration_rate = lgb.LGBMRegressor(
    n_estimators=200,
    subsample_freq=1,
    subsample=0.8,
    colsample_bytree=0.8,
    learning_rate=0.05,
    max_depth=8,
    num_leaves=256,
    objective='xentropy',
    device='gpu',
)

xgb_penetration_rate = xgb.XGBRegressor(n_estimators=200,
                                        subsample_freq=1,
                                        subsample=0.7,
                                        colsample_bytree=0.7,
                                        learning_rate=0.1,
                                        max_depth=8,
                                        num_leaves=256,
                                        objective='reg:logistic',
                                        n_jobs=-1)

meta_reg = Ridge()

stregr = StackingRegressor(
    regressors=[gbm_penetration_rate, xgb_penetration_rate],
    meta_regressor=meta_reg)

stregr.fit(X_train, y_train[:, 0])
print(1 - stregr.score(X_val, y_val[:, 0]))
Esempio n. 5
0
K.set_session(sess)
np.random.seed(7)
rn.seed(7)

from mlxtend.regressor import StackingRegressor
rf = RandomForestRegressor(n_estimators=54, max_depth=None, random_state=8)
ext = ExtraTreesRegressor(n_estimators=584,
                          min_samples_split=2,
                          random_state=8)


def create_model():
    model = Sequential()
    model.add(Dense(540, input_dim=8, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae'])
    return model


nn = KerasRegressor(build_fn=create_model, epochs=32, batch_size=32, verbose=0)
clf = StackingRegressor(regressors=[nn, ext], meta_regressor=rf)

scores = []
for train, test in kfold.split(X, y):
    clf.fit(X[train], y[train])
    score = clf.score(X[test], y[test])
    print(score)
    scores.append(score)
print("%.3f%% (+/- %.3f)" % (np.mean(scores), np.std(scores)))
Esempio n. 6
0
XGBoost = grid_search(train_X, train_Y, xgb.XGBRegressor())
light_GBM = grid_search(train_X, train_Y, lgb.LGBMRegressor())

stacked_regression = StackingRegressor(regressors=[
    elastic_net, lasso, ridge, support_vector_regressor, XGBoost, light_GBM
],
                                       meta_regressor=support_vector_regressor)

stacked_regression.fit(train_X, train_Y)

stacked = stacked_regression.predict(test_X)

ensembled = np.expm1((0.1 * elastic_net.predict(test_X)) +
                     (0.2 * lasso.predict(test_X)) +
                     (0.1 * ridge.predict(test_X)) +
                     (0.1 * support_vector_regressor.predict(test_X)) +
                     (0.2 * XGBoost.predict(test_X)) +
                     (0.1 * light_GBM.predict(test_X)) + (0.2 * stacked))

print(stacked_regression.score(train_X, train_Y))
"""
Export submission data
"""
submission = pd.DataFrame({
    'Id':
    test_X.index + (len(train_X_bf) - len(train_X) + 1),
    'SalePrice':
    ensembled
})
submission.to_csv('data/submission.csv', index=False)
Esempio n. 7
0
gbm_estimator = ensemble.GradientBoostingRegressor(random_state=100,
                                                   n_estimators=100,
                                                   max_features=3,
                                                   max_depth=5,
                                                   learning_rate=0.05)
stage1_models = [dt_estimator, rf_estimator, gbm_estimator]
stage2_model = tree.DecisionTreeRegressor(random_state=100)

stacked_model = StackingRegressor(regressors=stage1_models,
                                  meta_regressor=stage2_model)

stacked_model.fit(X_train1, y_train)
print(stacked_model.grid_scores_)
print(stacked_model.best_params_)
print(stacked_model.best_score_)
print(stacked_model.score(X_train1, y_train))

stacked_model.fit(X_train1, y_train)
stacked_model.predict(X_train1)

X_test = total_data1[train_data.shape[0]:]
X_test.shape
X1 = X_test.select_dtypes(include=['number']).columns
X_test1 = X_test[X1]
X_test1.shape
X_test1.info()
X_test1 = X_test1.drop(['bought', 'sold', 'libor_rate', 'return'],
                       axis=1,
                       inplace=False)

X_test1.shape
Esempio n. 8
0
reg_dt = DecisionTreeRegressor(min_samples_leaf = 11 , min_samples_split = 33, random_state=500)
reg_lr = LinearRegression(normalize=True)
reg_ridge = Ridge(random_state=500)

# Instantiate the 2nd-layer regressor
reg_meta = LinearRegression()

# Build the Stacking regressor
reg_stack = StackingRegressor(
    regressors=[reg_dt, reg_lr, reg_ridge],
    meta_regressor=reg_meta)
reg_stack.fit(X_train, y_train)

# Evaluate the performance on the test set using the MAE metric
pred = reg_stack.predict(X_test)
stacks = reg_stack.score(X_test, y_test)
rmseS = np.sqrt(mean_squared_error(y_test, pred))
print('MAE: {:.3f}'.format(mean_absolute_error(y_test, pred)))
print('RMSE (Stacking): {:.3f}, Accuracy Score: {:.2f}'.format(rmseS, stacks), **sp)





with mgt.changepath(path):
    npzfile = np.load('mydata.npz')
    X_train, X_test =  npzfile['X_train'], npzfile['X_test']
    y_train, y_test =npzfile['y_train'], npzfile['y_test']


# Create the first-layer models
Esempio n. 9
0
    X_train,
    y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    early_stopping_rounds=
    50,  #stop if 50 consequent rounds without decrease of error
    verbose=False)  # Change verbose to True if you want to see it train

predict = reg.predict(X_test)
print("R-squared value: ", reg.score(X_test, y_test))
# Best R-squared value I have so far is: 0.9866523109744227

xgb.plot_importance(reg)
plt.show()

lr = LinearRegression()

stregr = StackingRegressor(regressors=[lr, reg], meta_regressor=reg)
stregr = stregr.fit(X_train, y_train)
print('Variance Score: %.4f' % stregr.score(X_train, y_train))

dump(stregr, 'classical_model.joblib')

# RFE Classical Model
estimator = reg
selector = RFE(estimator, 3, step=1)
selector = selector.fit(X, y)
print("Feature Ranking: ", selector.ranking_)

# TODO: Serialize classical model so that doesn't retrain,
# then load up and try with a date from the training data to see if it works.
#######support vector########
from sklearn.svm import SVR
regressor_s  = SVR(kernel = 'linear') 
regressor_s.fit(X_train,y_train)

#####stacking

from mlxtend.regressor import StackingRegressor
from mlxtend.data import boston_housing_data
stregr = StackingRegressor(regressors=[regressor,GBoost ,regressor_r], 
                           meta_regressor=xgb)
stregr.fit(X_train,y_train)
y_pred = stregr.predict(X_test)
print('RMSE for Stacked Regression is {:.4f}'.format(sqrt(mean_squared_error(y_test, y_pred))))
stregr.score(X_test,y_test)

model = Sequential()
model.add(Dense(200, input_dim=220, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adadelta()) # Mean squared error
model.fit(X_train, y_train, validation_data=(X_test, y_test),epochs=66, batch_size=32, verbose=2)
y_pred = model.predict(X_test)
print('RMSE for Neural Network is {:.4f}'.format(sqrt(mean_squared_error(y_test, y_pred))))
model.score(X_test,y_test)

plt.style.use('ggplot')
plt.plot(y_pred, y_test, 'ro')
plt.xlabel('Predictions', fontsize = 15)
plt.ylabel('Reality', fontsize = 15)
plt.title('Predictions x Reality on dataset Test', fontsize = 15)