Ejemplo n.º 1
0
def stacking(X, y, k_cv):
    res = []
    estimators = [('krr', KernelRidge(kernel="cosine", alpha=0.001)),
                  ('svr', SVR(C=2000, gamma=0.001)),
                  ("enet",
                   ElasticNet(alpha=0.00001, l1_ratio=0.0005, max_iter=10000))]
    reg = StackingRegressor(estimators=estimators,
                            n_jobs=15,
                            final_estimator=LinearRegression())
    kfold = KFold(n_splits=k_cv, shuffle=True, random_state=0)
    vaild_split = kfold.split(y)
    for i in range(k_cv):
        split_index = vaild_split.__next__()
        test_index = split_index[1]
        y_test = y[test_index]
        trainval_index = split_index[0]
        X_trainval = X[trainval_index, :]
        X_test = X[test_index, :]
        y_trainval = y[trainval_index]
        reg.fit(X_trainval, y_trainval)
        print((reg.score(X_trainval, y_trainval))**0.5)
        test_pre = reg.predict(X_test)
        print("accuracy: ", (r_2(y_test, test_pre))**0.5)
        res.append(r_2(y_test, test_pre)**0.5)
        print("mean acacuracy: ", np.array(res).mean())
    print("mean acacuracy: ", np.array(res).mean())
Ejemplo n.º 2
0
def stacking_qtlmas(X_trainval, y_trainval, X_test, y_test):
    res = []
    estimators = [('krr', KernelRidge(kernel="cosine", alpha=0.005)),
                  ('svr', SVR(C=2500, gamma=0.001)),
                  ("enet",
                   ElasticNet(alpha=0.00001, l1_ratio=0.0005, max_iter=10000))]
    reg = StackingRegressor(estimators=estimators,
                            n_jobs=15,
                            final_estimator=LinearRegression())

    reg.fit(X_trainval, y_trainval)
    print((reg.score(X_trainval, y_trainval))**0.5)
    test_pre = reg.predict(X_test)
    return test_pre
stack_test_mse = mean_squared_error(ytest, stack_test_pred)

print("RMSE using StackRegressor:\t{}\t{}\t{}\n".format(
    np.sqrt(stack_train_mse), np.sqrt(stack_val_mse), np.sqrt(stack_test_mse)))

df_rf = pd.DataFrame({'Actual': ytest, 'Predicted': stack_test_pred})
fig1 = pp.figure(figsize=(8, 6))
df_rf.head(n=300).plot()
pp.legend()
pp.title("StackRegressor Actual v/s Predicted Annual Rainfall")
pp.xticks(())
pp.yticks(())
pp.show()

print(rfreg.score(Xtest, ytest), elastic.score(Xtest, ytest),
      stack.score(Xtest, ytest))

# CLASSIFICATION & CLUSTERING METHODS

# Data splitting
X = np.array(raindf[['JAN-FEB', 'MAR-MAY', 'JUN-SEP', 'OCT-DEC']])
y = np.array(raindf['REGION'])

le = preprocessing.LabelEncoder()
le.fit(y)
y = le.fit_transform(y)

Xreduced = PCA(n_components=2).fit_transform(X)
results = pd.DataFrame(Xreduced, columns=['pca1', 'pca2'])

Xtrain, Xtest, ytrain, ytest = model_selection.train_test_split(Xreduced,
Ejemplo n.º 4
0
#Step 1:Loading data
X, y = load_boston(return_X_y=True)

#Step 2:Split data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=40)

#step3:Training
regression = StackingRegressor(estimators=[
    ('knn',
     KNeighborsRegressor(n_neighbors=4,
                         weights='distance',
                         leaf_size=1,
                         metric='manhattan')),
    ('dt', GradientBoostingRegressor(max_depth=3, n_estimators=220))
],
                               final_estimator=Ridge(random_state=40),
                               cv=5,
                               n_jobs=-1)
regression.fit(X_train, y_train)
score_train = regression.score(X_train, y_train)
score_test = regression.score(X_test, y_test)
pred_train = regression.predict(X_train)
pred_test = regression.predict(X_test)
rmse_train = np.sqrt(metrics.mean_squared_error(pred_train, y_train))
rmse_test = np.sqrt(metrics.mean_squared_error(pred_test, y_test))
print('RMSE:{:.2f}/{:.2f}'.format(rmse_train, rmse_test))
print('R2Score:{:.2f}/{:.2f}'.format(score_train, score_test))
Ejemplo n.º 5
0
from sklearn.datasets import load_boston
X, y = load_boston(return_X_y=True)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
reg.fit(X_train, y_train)

y_pred = reg.predict(X_test)

plt.figure()
plt.plot(y_test[:30], 'gd', label='Original')
plt.plot(y_pred[:30], 'b^', label='Stacking Regressor')
plt.show()

from sklearn.metrics import r2_score
print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

#For multiple stacking layres
final_layer = StackingRegressor(estimators=[
    ('rf', RandomForestRegressor(random_state=42)),
    ('gbrt', GradientBoostingRegressor(random_state=42))
],
                                final_estimator=RidgeCV())
multi_layer_regressor = StackingRegressor(estimators=[
    ('ridge', RidgeCV()), ('lasso', LassoCV(random_state=42)),
    ('svr', SVR(C=1, gamma=1e-6, kernel='rbf'))
],
                                          final_estimator=final_layer)
multi_layer_regressor.fit(X_train, y_train)

print('R2 score: {:.2f}'.format(multi_layer_regressor.score(X_test, y_test)))
Ejemplo n.º 6
0
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor, StackingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import RidgeCV

dataset = pd.read_pickle("data/final_dataset.pkl")
dataset = dataset[dataset.score < 5]

X = dataset.drop(["score", "author_id", "tweet_id"], axis=1,
                 errors="ignore").to_numpy()
y = dataset[["score"]].values.ravel()

# Train a Stacking regressor
print("#################### STACKING ####################")
estimators = [("Random Forest",
               RandomForestRegressor(random_state=14, n_jobs=-1,
                                     max_depth=13)),
              ("Gradient Boosting",
               HistGradientBoostingRegressor(random_state=14,
                                             l2_regularization=1))]
model = StackingRegressor(estimators=estimators,
                          final_estimator=RidgeCV(),
                          n_jobs=-1)
model.fit(X, y)

# Print the evaluation results
print("MSError", model.score(X, y))  # 0.6033506428465925
print(model)
print()
pickle.dump(model, open("data/final_best_model_stacking.pkl", 'wb'))