Python XGBRegressor.score Exemples, xgboost.sklearn.XGBRegressor.score Python Exemples

Exemple #1

0

Afficher le fichier

def model_intrv3(Y_train, X_train, Y_test, X_test, Targ):
    global reslts
    global metrs
    import pandas as pd
    import numpy as np
    import datetime as dt
    import sklearn
    from sklearn.metrics import mean_squared_error
    from xgboost.sklearn import XGBRegressor
    from sklearn.metrics import mean_squared_error
    model = XGBRegressor(n_estimators=200,
                         learning_rate=0.05,
                         max_depth=4,
                         random_state=0,
                         subsample=0.9,
                         colsample_bytree=1.0,
                         loss='ls').fit(X_train, Y_train)
    model.score(X_test, Y_test)

    pred_Yxgb = model.predict(X_test)
    mse = mean_squared_error(Y_test, pred_Yxgb)
    nRMSE = np.sqrt(mse) / Targ.mean()
    # nRMSE=np.sqrt(mse)/max(Targ)
    Yts_pd = {'Yts': Y_test, 'Ypd': pred_Yxgb}
    Yts_pd = pd.DataFrame(Yts_pd)
    print(mse, nRMSE)
    metrs = {'mse': mse, 'nRMSE': nRMSE}
    reslts = {'Ypred': pred_Yxgb, 'Yts_pd': Yts_pd}
    return {'Yts_pd': Yts_pd, 'mse': mse, 'nRMSE': nRMSE}

Exemple #2

0

Afficher le fichier

def run_xgb(output_train, df_X_train, df_Y_train, output_test, df_X_test,
            df_Y_test):
    xgb_estimator = XGBRegressor()
    param_grid = {
        'nthread': [4],  #when use hyperthread, xgboost may become slower
        'objective': ['reg:linear'],
        'learning_rate': [.03, 0.05, .07],  #so called `eta` value
        'max_depth': [5, 6, 7],
        'min_child_weight': [4],
        'silent': [1],
        'subsample': [0.7],
        'colsample_bytree': [0.7],
        'n_estimators': [30]
    }

    opt_pars = {"score": None, "alpha": None}
    xgb_grid = GridSearchCV(xgb_estimator, param_grid)
    xgb_grid.fit(df_X_train, df_Y_train.cnt)
    r2_train = xgb_grid.best_score_
    opt_pars = xgb_grid.best_params_
    # n_estimators = 30,max_features='log2',bootstrap=True,  max_depth=None
    xgb_opt = XGBRegressor(random_state=1).set_params(**opt_pars)
    xgb_opt.fit(df_X_train, df_Y_train.cnt)
    r2_train = xgb_opt.score(df_X_train, df_Y_train.cnt)
    r2_test = xgb_opt.score(df_X_test, df_Y_test.cnt)
    result = df_proc.compare_results("XGBoost", xgb_opt, output_train,
                                     df_X_train, output_test, df_X_test)
    return {
        "r2": [r2_train, r2_test],
        "R2": [result[1], result[2]],
        "plot": result[0]
    }

Exemple #3

0

Afficher le fichier

def R2_estimator(_x_train, _x_test, _y_train, _y_test):
    train_score = []
    test_score = []
    for n in range(1, 100):
        reg = XGBRegressor(n_estimators=n, min_samples_split=7, max_depth=5)
        reg.fit(_x_train, _y_train)
        train_score.append(reg.score(_x_train, _y_train))
        test_score.append(reg.score(_x_test, _y_test))
        print(n, 'estimators done!')
    plt.plot(range(1, 100), train_score, color='skyblue', label='training')
    plt.plot(range(1, 100), test_score, color='red', label='testing')
    plt.legend()
    plt.xlabel('number of estimators')
    plt.ylabel('$R^2$ score')
    plt.show()

Exemple #4

0

Afficher le fichier

def xg_boost(_features, _x_train, _x_test, _y_train, _y_test, store=True, load=False, silent=False):
    if load:
        reg_xgb = joblib.load('XGB')
    else:
        reg_xgb = XGBRegressor(n_estimators=200, min_child_weight=7, max_depth=5,
                               n_jobs=-1, silent=True)
        reg_xgb.fit(_x_train, _y_train)
        if store:
            joblib.dump(reg_xgb, 'GB')

    score = reg_xgb.score(_x_test, _y_test)
    print("\nXG Boosting:")
    print('Training Accuracy:\t', reg_xgb.score(_x_train, _y_train))
    print('Testing Accuracy:\t', score)
    if not silent:
        print('\nImportance for each:')
        importance = []
        for i in range(0, len(_features)):
            importance.append([_features[i], reg_xgb.feature_importances_[i]])
        importance.sort(key=lambda x: x[1], reverse=True)
        for each in importance:
            print(each[0] + ':\t', each[1])
    return reg_xgb, score

Exemple #5

0

Afficher le fichier

Fichier : code3.py Projet : soubhikSM/Urban-Air-Quality-Prediction

def XGBoostPredictor(X_train, y_train, X_test, y_test):
    #Fitting XGB regressor
    xboost = XGBRegressor(n_estimators=200)
    xboost.fit(X_train, y_train)
    xgb_score = xboost.score(X_test, y_test)
    xgb_score
    #Predict
    xboost_pred = xboost.predict(X_test)
    xgboostRMSE = sqrt(mean_squared_error(y_test, xboost_pred))
    print("Root mean squared error: %.2f" % xgboostRMSE)
    print('R-squared fir XGBoost : %.2f' % r2_score(y_test, xboost_pred))
    plt.scatter(y_test, xboost_pred)
    plt.xlabel('Measured')
    plt.ylabel('Predicted')
    plt.title('XGBoost Predicted vs Actual')
    plt.show()
    chart_regression(xboost_pred, y_test, 'XGBoost Predictor')
    return xgb_score, xgboostRMSE

Exemple #6

0

Afficher le fichier

    def xgb(X_train, X_test, y_train, y_test):
        
        mod = XGBRegressor(learning_rate=0.2, objective='reg:squarederror')
        estimators = np.arange(1, 200, 10)
        scores = []
        estim = []

        for n in estimators:
            mod.set_params(n_estimators=n)
            mod.fit(X_train, y_train)
            scores.append(mod.score(X_test, y_test))
            estim.append(n)

        xdf = pd.DataFrame({'Estimator':estim, 'Score':scores})
        best = next((x for x in xdf['Estimator'][xdf['Score'] == max(xdf['Score'])]), None)

        xgbr = XGBRegressor(n_estimators=best, learning_rate=0.2, objective='reg:squarederror')
        xgbr.fit(X_train, y_train)
        
        return xgbr

Exemple #7

0

Afficher le fichier

Fichier : m25_XGB_plot_importance4_boston.py Projet : G-sup/academy-study

                                                    datasets.target,
                                                    train_size=0.8,
                                                    random_state=104)

#2

# model  = GradientBoostingRegressor(max_depth=4)
model = XGBRegressor(n_jobs=-1, use_label_encoder=False)

#3

model.fit(x_train, y_train, eval_metric='mlogloss')

#4

acc = model.score(x_test, y_test)

print(model.feature_importances_)
print('acc : ', acc)
'''
def plot_feature_importances_dataset(model):
    n_features = datasets.data.shape[1]
    plt.barh(np.arange(n_features),model.feature_importances_,
            align='center')
    plt.yticks(np.arange(n_features),datasets.feature_names)
    plt.xlabel("Feature Importances")
    plt.ylim(-1, n_features)

plot_feature_importances_dataset(model)
'''

Exemple #8

0

Afficher le fichier

Fichier : sample787.py Projet : tetherless-world/CodeGraph

#
# > XGBoost (Extreme Gradient Boosting) belongs to a family of boosting algorithms and uses the gradient boosting (GBM) framework at its core. It is an optimized distributed gradient boosting library. But wait, what is boosting? Well, keep on reading.

# In[ ]:

# Initialize model
from xgboost.sklearn import XGBRegressor
XGB_Regressor = XGBRegressor()

# Fit the model on our data
XGB_Regressor.fit(X_train, y_train)

# In[ ]:

# Score model
XGB_Regressor.score(X_train, y_train)

# <a id="76"></a> <br>
# ## 7-6 LassoCV
# Lasso linear model with iterative fitting along a regularization path.
# The best model is selected by cross-validation.

# In[ ]:

lasso = LassoCV()

# In[ ]:

# Fit the model on our data
lasso.fit(X_train, y_train)

Exemple #9

0

Afficher le fichier

Fichier : house price.py Projet : ankitjain0101/Estimate-House-Sales-Price

}
xg = XGBRegressor(random_state=96, objective='reg:linear')
gridsearch = GridSearchCV(xg, param_grid=grid, cv=5)
gridsearch.fit(X_train, y_train)
print(gridsearch.best_score_)
print(gridsearch.best_params_)

xgb = XGBRegressor(random_state=96,
                   objective='reg:linear',
                   min_child_weight=6,
                   n_estimators=1000,
                   max_depth=7,
                   colsample_bytree=0.6)
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)
accuracy = xgb.score(X_test, y_test)
'Accuracy: ' + str(np.round(accuracy * 100, 2)) + '%'
mean_absolute_error(y_test, xgb_pred)
mean_squared_error(y_test, xgb_pred)
np.sqrt(mean_squared_error(y_test, xgb_pred))

lgb = LGBMRegressor(objective='regression')
lgb.fit(X_train, y_train)
lgb_pred = lgb.predict(X_test)
accuracy = lgb.score(X_test, y_test)
'Accuracy: ' + str(np.round(accuracy * 100, 2)) + '%'
mean_absolute_error(y_test, lgb_pred)
mean_squared_error(y_test, lgb_pred)
np.sqrt(mean_squared_error(y_test, lgb_pred))

from yellowbrick.regressor import ResidualsPlot

Exemple #10

0

Afficher le fichier

Fichier : xg_boost.py Projet : Bubesh958/miniproject

import numpy as np
import matplotlib.pyplot as plt
import urllib.request as urllib2
from bs4 import BeautifulSoup
import json

df = pd.read_csv('dataset_cleaned.csv')

df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

y = df['Value']
#df.shape

X = df.drop(['Value'], axis=1)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.3,
                                                    random_state=1234)

from xgboost.sklearn import XGBRegressor

xboost = XGBRegressor(n_estimators=200)

xboost.fit(X_train, y_train)

xgb_score = xboost.score(X_test, y_test)

print(xgb_score)

Exemple #11

0

Afficher le fichier

def getTunedXGBoostModel(X_train, Y_train, X_test, Y_test):
    #tune number of estimators (decision trees)
    param_test = {'n_estimators': range(20, 101, 10)}

    gsearch = GridSearchCV(estimator=XGBRegressor(learning_rate=0.1,
                                                  random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    n_estimators = gsearch.best_params_['n_estimators']

    #tune min_child_weight
    param_test = {'min_child_weight': range(1, 12, 1)}

    gsearch = GridSearchCV(estimator=XGBRegressor(learning_rate=0.1,
                                                  n_estimators=n_estimators,
                                                  random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    min_child_weight = gsearch.best_params_['min_child_weight']

    #tune max_depth
    param_test = {'max_depth': range(3, 12, 1)}

    gsearch = GridSearchCV(estimator=XGBRegressor(
        learning_rate=0.1,
        n_estimators=n_estimators,
        min_child_weight=min_child_weight,
        random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    max_depth = gsearch.best_params_['max_depth']

    #tune gamma
    param_test = {'gamma': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]}
    gsearch = GridSearchCV(estimator=XGBRegressor(
        learning_rate=0.1,
        n_estimators=n_estimators,
        min_child_weight=min_child_weight,
        max_depth=max_depth,
        random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    gamma = gsearch.best_params_['gamma']

    #tune subsample
    param_test = {'subsample': [0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]}
    gsearch = GridSearchCV(estimator=XGBRegressor(
        learning_rate=0.1,
        n_estimators=n_estimators,
        min_child_weight=min_child_weight,
        max_depth=max_depth,
        gamma=gamma,
        random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    subsample = gsearch.best_params_['subsample']

    #tune colsample_bytree
    param_test = {
        'colsample_bytree': [0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
    }
    gsearch = GridSearchCV(estimator=XGBRegressor(
        learning_rate=0.1,
        n_estimators=n_estimators,
        min_child_weight=min_child_weight,
        max_depth=max_depth,
        gamma=gamma,
        subsample=subsample,
        random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    colsample_bytree = gsearch.best_params_['colsample_bytree']

    param_test = {'reg_alpha': [1e-5, 1e-2, 0.1, 1, 100]}
    gsearch = GridSearchCV(estimator=XGBRegressor(
        learning_rate=0.1,
        n_estimators=n_estimators,
        min_child_weight=min_child_weight,
        max_depth=max_depth,
        gamma=gamma,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    reg_alpha = gsearch.best_params_['reg_alpha']

    param_test = {'reg_lambda': [1e-5, 1e-2, 0.1, 1, 100]}
    gsearch = GridSearchCV(estimator=XGBRegressor(
        learning_rate=0.1,
        n_estimators=n_estimators,
        min_child_weight=min_child_weight,
        max_depth=max_depth,
        gamma=gamma,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        reg_alpha=reg_alpha,
        random_state=10),
                           param_grid=param_test,
                           n_jobs=4,
                           iid=False,
                           cv=5)
    gsearch.fit(X_train, Y_train.values.ravel())
    reg_lambda = gsearch.best_params_['reg_lambda']

    print("\n----------------------------------")
    print("Tuned XGBoost params:")
    print("n_estimators:", n_estimators)
    print("min_child_weight:", min_child_weight)
    print("max_depth:", max_depth)
    print("gamma:", gamma)
    print("subsample:", subsample)
    print("colsample_bytree:", colsample_bytree)
    print("reg_alpha", reg_alpha)
    print("reg_lambda", reg_lambda)
    print("----------------------------------\n")

    #proportionally decrease learning rate and increase # of estimators. Pick th bst combo
    model = XGBRegressor(learning_rate=0.01,
                         n_estimators=n_estimators * 10,
                         min_child_weight=min_child_weight,
                         max_depth=max_depth,
                         gamma=gamma,
                         subsample=subsample,
                         colsample_bytree=colsample_bytree,
                         reg_alpha=reg_alpha,
                         reg_lambda=reg_lambda,
                         random_state=10)
    model.fit(X_train, Y_train.values.ravel())
    score = model.score(X_test, Y_test)

    model2 = XGBRegressor(learning_rate=0.05,
                          n_estimators=n_estimators * 2,
                          min_child_weight=min_child_weight,
                          max_depth=max_depth,
                          gamma=gamma,
                          subsample=subsample,
                          colsample_bytree=colsample_bytree,
                          reg_alpha=reg_alpha,
                          reg_lambda=reg_lambda,
                          random_state=10)
    model2.fit(X_train, Y_train.values.ravel())
    score2 = model2.score(X_test, Y_test)
    if (score2 > score):
        score = score2
        model = model2

    model3 = XGBRegressor(learning_rate=0.1,
                          n_estimators=n_estimators,
                          min_child_weight=min_child_weight,
                          max_depth=max_depth,
                          gamma=gamma,
                          subsample=subsample,
                          colsample_bytree=colsample_bytree,
                          reg_alpha=reg_alpha,
                          reg_lambda=reg_lambda,
                          random_state=10)
    model3.fit(X_train, Y_train.values.ravel())
    score3 = model3.score(X_test, Y_test)
    if (score3 > score):
        score = score3
        model = model3

    model4 = XGBRegressor(learning_rate=0.2,
                          n_estimators=int(n_estimators / 2),
                          min_child_weight=min_child_weight,
                          max_depth=max_depth,
                          gamma=gamma,
                          subsample=subsample,
                          colsample_bytree=colsample_bytree,
                          reg_alpha=reg_alpha,
                          reg_lambda=reg_lambda,
                          random_state=10)
    model4.fit(X_train, Y_train.values.ravel())
    score4 = model4.score(X_test, Y_test)
    if (score4 > score):
        model = model4

    return model

Exemple #12

0

Afficher le fichier

Fichier : Sales_forecast_predict.py Projet : amuleengulati/Sales-Predictor

text = "Accuracy: " + str(accuracy_dt) + "\nMean Squared Error: " + str(mse_dt) + "\nMean Absolute Error: " + str(mae_dt)
output_file.write(text)

scatter_plot(target_test, predicted_sales)
plt.savefig('images/predict/dt.png')
plt.close()

###                              XGB REGRESSOR                                     ###
xgbr = XGBRegressor(objective='reg:linear', nthread= 4, n_estimators= 500, max_depth= 6, learning_rate= 0.5)
xb = xgbr.fit(other_train,target_train)
predicted_sales = xgbr.predict(other_test)

mae_xgbr = round(mean_absolute_error(target_test, predicted_sales),3)
mse_xgbr = round(mean_squared_error(target_test, predicted_sales),3)
accuracy_xgbr = round(xgbr.score(other_test, target_test),3)

#write results to output file
output_file.write("\n------------------------------------\n")
output_file.write("XGB REGRESSOR STATISTICS:\n")
output_file.write("------------------------------------\n")

text = "Accuracy: " + str(accuracy_xgbr) + "\nMean Squared Error: " + str(mse_xgbr) + "\nMean Absolute Error: " + str(mae_xgbr)
output_file.write(text)

scatter_plot(target_test, predicted_sales)
plt.savefig('images/predict/xgbr.png')
plt.close()

output_file.close()

Exemple #13

0

Afficher le fichier

Fichier : PredicitingUsedCarPrice.py Projet : KrishnaKumarTiwari/xlearn

xgb_reg.fit(X_train,y_train)


# In[440]:


y_pred = xgb_reg.predict(X_test)
xgb_mse = mean_squared_error(y_test, y_pred)
xgb_rmse = np.sqrt(forest_mse)
xgb_rmse


# In[442]:


xgb_reg.score(X_test,y_test)


# In[444]:


# Offline i used CV=8

from sklearn.model_selection import cross_val_score

scores = cross_val_score(xgb_reg, X_test, y_test,
                         scoring="neg_mean_squared_error", cv=2)
rmse_scores = np.sqrt(-scores)

display_scores(rmse_scores)

Exemple #14

0

Afficher le fichier

Fichier : bike.py Projet : Qualia061/Bike---Kaggle

                        n_estimators=160,
                        max_depth=6,
                        min_child_weight=3,
                        gamma=0,
                        subsample=0.7,
                        colsample_bytree=0.7,
                        nthread=4,
                        scale_pos_weight=1,
                        seed=27)
grid = GridSearchCV(estimator=xgb_best, param_grid=param_test, cv=5)
grid.fit(source_X, source_y)
grid.grid_scores_
grid.best_estimator_

xgb_best.fit(train_X, train_y)
xgb_best.score(test_X, test_y)
print(xgb_best.score(test_X, test_y))

xgb_param = xgb_best.get_xgb_params()
xgb.cv(xgb_param,
       xgtrain,
       num_boost_round=5000,
       nfold=15,
       metrics=['auc'],
       early_stopping_rounds=50,
       stratified=True,
       seed=1301)

full_xy = pd.concat([source_X, source_y], axis=1)
target = 'count'

Exemple #15

0

Afficher le fichier

Fichier : ML_project_Trees.py Projet : moyken/Housing_Price_Regression_Models

# Refit Xtreme Gradient Boosting
xgb_tree = XGBRegressor(max_depth = 2, 
                        learning_rate = 0.10777777777777778, 
                        n_estimators = 300,
                        min_child_weight = 5,
                        colsample_bytree = 1,
                        gamma = 0,
                        reg_lambda = 1,
                        reg_alpha = 0.30000000000000004,
                        subsample= 2/3, 
                        random_state=0)

xgb_tree.fit(x_train, y_train)

# Training R sq: 95.65%
print('Training Score:', xgb_tree.score(x_train, y_train))
# Training RMSE: .08271
print('Training RMSE:', math.sqrt(mean_squared_error(y_train, xgb_tree.predict(x_train))))

# Test R sq: 91.03%
print('Test Score:', xgb_tree.score(x_test, y_test))
# Test RMSE: .12411
print('Test RMSE:', math.sqrt(mean_squared_error(y_test, xgb_tree.predict(x_test))))



##############################################################################

# Random Forest Test R sq: 89.25%, RMSE: 0.13589
rf_test_pred = rf_tree.predict(house_test_x)
rf_test_pred = np.exp(rf_test_pred)

Exemple #16

0

Afficher le fichier

fit_params={"early_stopping_rounds":20, 
            "eval_metric" : 'rmse', 
            "eval_set" : [(X_val, y_val.reshape(-1))],
            'verbose': 1,
           }

model.fit(X_train, y_train.reshape(-1), **fit_params)

"""# Métrica

$$
\textrm{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^{n} \left(\frac{\hat{y}_i - y_i}{y_i}\right)^2}
$$
"""

score = model.score(X_val, y_val)

if log_output:
    y_pred_train = np.exp(model.predict(X_train)*max_log_y)
    y_pred = np.exp(model.predict(X_val)*max_log_y)
    y_pred_test = np.exp(model.predict(X_test)*max_log_y)
else:
    y_pred_train = model.predict(X_train)*y_std + y_mean
    y_pred = model.predict(X_val)*y_std + y_mean
    y_pred_test = model.predict(X_test)*y_std + y_mean

# Train
train_RMSE = np.sqrt((((df_train['Sales'].values - y_pred_train)/df_train['Sales'].values)**2).sum()/len(y_pred_train))

# Validación
val_RMSE = np.sqrt((((df_val['Sales'].values - y_pred)/df_val['Sales'].values)**2).sum()/len(y_pred))

Exemple #17

0

Afficher le fichier

Fichier : TrainingModel.py Projet : ML-Projects2020/sales-prediction

xbr.fit(x_train, y_train)

predict = xbr.predict(x_test)

# from xgboost import plot_importance
# plot_importance(xbr)
# plt.show()

print('************************** XBR Evualtion ********************')
x_test['predict'] = predict
rmse = np.sqrt(mean_squared_error(y_test, predict))
print("RMSE", rmse)
print("MAE", mean_absolute_error(y_test, predict))
print("r2_score", r2_score(y_test, predict))
print(xbr.score(x_train, y_train))

# xbr.plot_importance(model)
# plt.rcParams['figure.figsize'] = [5, 5]
# plt.show()

# print("********************* XGB MODEL *******************")
# print("best_ntree_limit",model.best_ntree_limit)
# print("best_score",model.best_score)
# print("best_iteration",model.best_iteration)

# dtest = xgb.DMatrix(x_test, label=y_test, feature_names=list(x_test.columns))
# y_pred = model.predict(dtest)
# x_test['pred'] = y_pred
# # x_test['predict'] = predict
# print(x_test.to_csv(os.path.dirname(__file__)+'/xtest_results.csv'))