Esempi in Python per XGBRegressor.XGBRegressor, esempi in Python per xgboost.sklearn.XGBRegressor.XGBRegressor

Esempio n. 1

0

Mostra file

File: model_manager.py Progetto: aguillenATC/MuonCount

    def fit_xgboost_regression(self):
        if (self.X_val is not None):
            X_train_aux = pd.concat([pd.DataFrame(self.X_train.copy()), pd.DataFrame(self.X_val.copy())])
            y_train_aux = pd.Series(
                pd.concat([pd.DataFrame(self.y_train.copy()), pd.DataFrame(self.y_val.copy())]).values.reshape(
                    self.y_train.shape[0] + self.y_val.shape[0], ))
        else:
            X_train_aux = self.X_train
            y_train_aux = self.y_train



        xgbreg = XGBRegressor(nthreads=-1)
        params = {
            "max_depth": [i for i in range(5,55,5)],
            "learning_rate": [0.001,0.01,0.1],
            "gamma": [i for i in range(1,20)],
            "n_estimators": [i * 10 for i in range(5, 55, 5)]
        }
        self.gs_xgboost = RandomizedSearchCV(xgbreg, params, n_jobs=-1,verbose=2)
        self.gs_xgboost.fit(X_train_aux, y_train_aux)

        self.xgboost_reg_model = self.gs_xgboost.best_estimator_
        #self.xgboost_reg_model = self.xgboost_reg_model.fit(X_train_aux, y_train_aux)

        return self.xgboost_reg_model

Esempio n. 2

0

Mostra file

def generate_XGB_model(train_df):
    train_df.drop(['conversionTime'], axis=1, inplace=True)
    print 'Train And Fix Missing App Count Value...'
    train_df, xgb_appcount = train_model_for_appcounts(train_df)
    joblib.dump(xgb_appcount, 'XGB_missing.model')
    '''print 'Train And Fix Missing Age Value...'
    train_df, xgb_age = train_model_for_age(train_df)
    joblib.dump(xgb_age, 'XGB_age.model')'''
    train_df.drop(['marriageStatus', 'haveBaby', 'sitesetID', 'positionType'],
                  axis=1,
                  inplace=True)
    print 'Done'
    print train_df.info()
    print train_df.describe()
    print train_df.isnull().sum()
    train_np = train_df.as_matrix()
    y = train_np[:, 0]
    X = train_np[:, 1:]
    print 'Train Xgboost Model...'
    start_time = datetime.datetime.now()
    xbg_clf = XGBRegressor(n_estimators=100,
                           max_depth=6,
                           objective="binary:logistic",
                           silent=False)
    xbg_clf.fit(X, y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)
    model_df = pd.DataFrame({
        'columns': list(train_df.columns)[1:],
        'values': xbg_clf.feature_importances_
    })
    print model_df
    return xbg_clf

Esempio n. 3

0

Mostra file

File: featureimportance_chk_withRandCol.py Progetto: ntdsyq/house_price_prediction

def FI_xgb_sklearn():
    X, y = load_traindata(encodetype='le')
    cols = list(X.columns)

    rndcol = np.random.randn(X.shape[0])
    X = np.column_stack((X, rndcol))
    cols.append('random')

    xgb1 = XGBRegressor(learning_rate=0.01,
                        n_estimators=3320,
                        max_depth=3,
                        min_child_weight=4,
                        colsample_bytree=0.8,
                        subsample=0.8,
                        importance_type='total_gain',
                        objective='reg:linear',
                        n_jobs=-1,
                        random_state=0,
                        seed=27,
                        silent=True)

    xgb1.fit(X, y)

    imp = sorted(list(zip(cols, xgb1.feature_importances_)),
                 key=lambda t: abs(t[1]),
                 reverse=True)
    imp = pd.DataFrame(imp, columns=['Feature', 'Importance'])
    rnd_idx = np.argwhere(imp['Feature'] == 'random')[0][0]
    print(imp.iloc[:rnd_idx + 1, :])
    return imp

Esempio n. 4

0

Mostra file

    def skl_cv(self):
        logging.info("{0}:正在进行网格搜索".format(self.now_time()))
        if self.model == 'C':
            grid_search = GridSearchCV(estimator=self.rf,
                                       param_grid=self.cv_param,
                                       scoring='accuracy')
            grid_search.fit(self.X_train, self.Y_train)
            logging.info("{0}:最优参数:{1}".format(self.now_time(),
                                               grid_search.best_params_))
            logging.info("{0}:最优参数acc结果:{1}".format(self.now_time(),
                                                    grid_search.best_score_))
            self.rf = XGBClassifier(
                n_estimators=grid_search.best_params_['n_estimators'],
                max_depth=grid_search.best_params_['max_depth'],
                min_child_weight=grid_search.best_params_['min_child_weight'],
                gamma=grid_search.best_params_['gamma'],
                learning_rate=grid_search.best_params_['learning_rate'])

        elif self.model == 'R':
            grid_search = GridSearchCV(estimator=self.rf,
                                       param_grid=self.cv_param,
                                       scoring='neg_mean_absolute_error')

            grid_search.fit(self.X_train, self.Y_train)
            logging.info("{0}:最优参数:{1}".format(self.now_time(),
                                               grid_search.best_params_))
            logging.info("{0}:最优参数R平方结果:{1}".format(self.now_time(),
                                                    grid_search.best_score_))
            self.rf = XGBRegressor(
                n_estimators=grid_search.best_params_['n_estimators'],
                max_depth=grid_search.best_params_['max_depth'],
                min_child_weight=grid_search.best_params_['min_child_weight'],
                gamma=grid_search.best_params_['gamma'],
                learning_rate=grid_search.best_params_['learning_rate'])

Esempio n. 5

0

Mostra file

File: Generation.py Progetto: Misswurabbit/CCF_fund_competition

    def fit_model(self, data, target, test):
        clf = XGBRegressor(learning_rate=self.learning_rate,
                           n_estimators=self.n_estimators,
                           max_depth=self.max_depth,
                           min_child_weight=self.min_child_weight,
                           gamma=self.gamma,
                           subsample=self.subsample,
                           colsample_bytree=self.colsample_bytree,
                           objective=self.objective,
                           nthread=self.nthread,
                           scale_pos_weight=self.scale_pos_weight,
                           reg_alpha=self.reg_alpha,
                           reg_lambda=self.reg_lambda,
                           seed=self.seed)
        data = np.array(data).astype(float)
        scaler = MinMaxScaler()
        temp = scaler.fit(data)
        data = scaler.transform(data)
        test = scaler.transform(test)
        target = scaler.fit_transform(target)

        clf.fit(data, target)
        new_feature = clf.apply(data)
        new_test = clf.apply(test)
        X_train_new = self.mergeToOne(pd.DataFrame(data), new_feature)
        X_test_new = self.mergeToOne(pd.DataFrame(test), new_test)
        X_train_new = pd.DataFrame(X_train_new)
        X_test_new = pd.DataFrame(X_test_new)
        return X_train_new, target, X_test_new

Esempio n. 6

0

Mostra file

 def __init__(self,
              mean_model_params={},
              upper_quantile_params={
                  'alpha': 0.95,
                  'delta': 1.0,
                  'thresh': 1.0,
                  'variance': 1.0
              },
              lower_quantile_params={
                  'alpha': 0.05,
                  'delta': 1.0,
                  'thresh': 1.0,
                  'variance': 1.0
              }):
     self.mean_model_params = mean_model_params
     self.upper_quantile_params = upper_quantile_params
     self.lower_quantile_params = lower_quantile_params
     self.gb = XGBRegressor(**mean_model_params)
     mean_model_params.pop('alpha', None)
     upper_quantile_params_combined = {**mean_model_params}
     upper_quantile_params_combined.update(upper_quantile_params)
     lower_quantile_params_combiled = {**mean_model_params}
     lower_quantile_params_combiled.update(lower_quantile_params)
     self.gb_quantile_upper = XGBQuantileRegressor(
         **upper_quantile_params_combined)
     self.gb_quantile_lower = XGBQuantileRegressor(
         **lower_quantile_params_combiled)
     self.upper_alpha = upper_quantile_params['alpha']
     self.lower_alpha = lower_quantile_params['alpha']

Esempio n. 7

0

Mostra file

File: base.py Progetto: sbkhosh/dax

def set_grid_search(regrs, X_train, y_train, reg):
    if (regrs == 'tree'):
        random_grid = build_grid_tree()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = DecisionTreeRegressor(max_features=prms['max_features'], max_depth=prms['max_depth'], \
                                         min_samples_split=prms['min_samples_split'], max_leaf_nodes=prms['max_leaf_nodes'], \
                                         min_samples_leaf=prms['min_samples_leaf'])
    elif (regrs == 'forest'):
        random_grid = build_grid_rf()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = RandomForestRegressor(n_estimators=prms['n_estimators'],max_features=prms['max_features'], \
                                         max_depth=prms['max_depth'], min_samples_split=prms['min_samples_split'], \
                                         min_samples_leaf=prms['min_samples_leaf'], n_jobs=-1)
    elif (regrs == 'xgbr'):
        random_grid = build_grid_xgbr()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = XGBRegressor(learning_rate=prms['learning_rate'], max_depth=prms['max_depth'], \
                                min_child_weight=prms['min_child_weight'], n_estimators=prms['n_estimators'],\
                                subsample=prms['subsample'], n_jobs=-1)
    elif (regrs == 'nn'):
        random_grid = build_grid_nn()
        prms = grid_search(reg, X_train, y_train, random_grid)
        reg_prms = MLPRegressor(hidden_layer_sizes=prms['hidden_layer_sizes'],activation=prms['activation'],solver=prms['solver'],\
                                alpha=prms['alpha'],learning_rate_init=prms['learning_rate_init'],learning_rate=prms['learning_rate'],\
                                max_iter=prms['max_iter'],tol=prms['tol'],momentum=prms['momentum'],beta_1=prms['beta_1'],\
                                beta_2=prms['beta_2'],n_iter_no_change=prms['n_iter_no_change'])
    return (reg_prms)

Esempio n. 8

0

Mostra file

    def train(self, X_train, X_test, y_train, y_test):
        '''
        Trains the machine learning model based on the dataframe provided as input.
        The fitted model will be saved under model/xgboost.pkl
        The function returns the MSE and the RMSE
        :param df:
        :return: RMSE and MAE scores
        '''
        print('Training is starting...')
        eval_set = [(X_train, y_train), (X_test, y_test)]

        self.model = XGBRegressor(max_depth=7,
                                  objective='reg:squarederror',
                                  gamma=0,
                                  learning_rate=0.03,
                                  subsample=1,
                                  colsample_bytree=0.9,
                                  min_child_weight=10)

        self.model.fit(X_train,
                       y_train,
                       eval_set=eval_set,
                       eval_metric="rmse",
                       early_stopping_rounds=500)

        predictions = self.predict(X_test)

        with open('generated/gxboost_model.pickle', 'wb') as file:
            pickle.dump(self.model, file)

        self.evaluate(y_test, X_test)

Esempio n. 9

0

Mostra file

    def grid_search(self, X_train, X_test, y_train, y_test):
        grid_param = {
            'max_depth': [n for n in range(2, 10)],
            'gamma': np.arange(0, 0.5, 0.1),
            'learning_rate': [0.0001, 0.001, 0.01, 0.1],
            'subsample': np.arange(0.5, 0.9, 0.1),
            'colsample_bytree': np.arange(0.5, 0.9, 0.1),
            'min_child_weight': [1, 3, 5, 7]
        }

        model = XGBRegressor(max_depth=7,
                             objective='reg:squarederror',
                             gamma=0,
                             learning_rate=0.03,
                             subsample=1,
                             colsample_bytree=0.9,
                             min_child_weight=10)

        gd_sr = GridSearchCV(estimator=model,
                             param_grid=grid_param,
                             scoring='neg_mean_squared_error',
                             cv=5,
                             n_jobs=-1)

        gd_sr.fit(X_train, y_train)

        best_parameters = gd_sr.best_params_
        print(best_parameters)

Esempio n. 10

0

Mostra file

def xgbregressor(xtrain, y_train, x_test):
    
  xgb_reg = XGBRegressor()
  parameters = {'nthread':[4], 
              'objective':['reg:linear'],
              'learning_rate': [.07], 
              'max_depth': [7],
              'min_child_weight': [4],
              'silent': [1],
              'subsample': [0.7],
              'colsample_bytree': [0.7],
              'n_estimators': [12]}

  clf_xgbreg = GridSearchCV(xgb_reg,
                        parameters,
                        n_jobs = 5,
                        cv = 2,
                        verbose=True)

  clf_xgbreg.fit(x_train,y_train)
  
  #print(clf_xgbreg.best_params_)
  
  #values_to_predict = y_train
  preds = clf_RF.predict(x_train)
  
  y_test_pred = clf_xgbreg.predict(x_test)
  
  print(y_test_pred)
  
  print(pd.DataFrame(y_test_pred).describe())
  
  return preds

Esempio n. 11

0

Mostra file

File: tool.py Progetto: piupiuup/competition

def over_sample(train, test, feat):
    predictors = [x for x in train.columns if x not in ['ID', 'y']]
    groups = list(train[feat].unique())
    result = None
    for name in groups:
        train_temp = pd.concat([train, train[train[feat] == name]])
        test_temp = test[test[feat] == name]
        model = XGBRegressor(max_depth=4,
                             learning_rate=0.0045,
                             n_estimators=1250,
                             silent=True,
                             objective='reg:linear',
                             nthread=-1,
                             min_child_weight=1,
                             max_delta_step=0,
                             subsample=0.93,
                             seed=27)
        model.fit(train_temp[predictors], train_temp['y'])
        pred = model.predict(test_temp[predictors])
        if result is None:
            result = pd.DataFrame({'ID': test_temp['ID'].values, 'y': pred})
        else:
            result = pd.concat([
                result,
                pd.DataFrame({
                    'ID': test_temp['ID'].values,
                    'y': pred
                })
            ])
    result.sort_values('ID', inplace=True)

    return result

Esempio n. 12

0

Mostra file

def get_estimator():

    drop_cols = [
        'CODGEO', 'LIBGEO', 'REG', 'DEP', 'Code Nuance', 'Code du département'
    ]
    base_cols = [
        'Orientation Economique', 'SEG Croissance POP', 'Urbanité Ruralité',
        'Dynamique Démographique BV', 'Environnement Démographique',
        'Fidélité', 'SYN MEDICAL', 'Seg Dyn Entre',
        'SEG Environnement Démographique Obsolète', 'Seg Cap Fiscale',
        'DYN SetC', 'CP', 'MED14', 'Nb Femme', 'Nb Homme'
    ]

    base_transformer = FunctionTransformer(_preprocessor, validate=False)

    base_transformer = make_pipeline(base_transformer,
                                     SimpleImputer(strategy='most_frequent'))

    preprocessor = ColumnTransformer(
        transformers=[
            ('base', base_transformer, base_cols),
            ('drop cols', 'drop', drop_cols),
        ],
        remainder='passthrough')  # remainder='drop' or 'passthrough'

    regressor = XGBRegressor()

    pipeline = Pipeline(steps=[('preprocessing',
                                preprocessor), ('Regressor', regressor)])

    return pipeline

Esempio n. 13

0

Mostra file

    def model_xgb_search(self, X, Y):
        # train_x, valid_x, train_y, valid_y = train_test_split(X, Y, test_size=0.1, random_state=0)  # 分训练集和验证集
        print('model_xgb_search start')
        xgb_model = XGBRegressor()

        # cv_split = ShuffleSplit(n_splits=5, train_size=0.7, test_size=0.2)
        # param_grid = dict(
        #     max_depth=[2],
        #     min_child_weight= [1, 2, 3, 4, 5, 6],
        #     gamma=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
        #     learning_rate=np.linspace(0.03, 1, 10),
        #     n_estimators=[50, 100, 200, 400],
        #     num_class=[2],
        #     objective=['multi:softmax']
        # )
        param_grid = dict(
            max_depth=[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],  # 3
            learning_rate=np.linspace(0.03, 0.1, 5),
            n_estimators=[100, 200, 300],  # 200
            # num_class=[2],
            # objective=['multi:softmax']  # 'binary:logistic'
        )
        start = time.time()
        cv_split = StratifiedKFold(n_splits=5, shuffle=True)
        grid = GridSearchCV(xgb_model, param_grid, cv=cv_split)  # scoring='neg_log_loss'
        grid_result = grid.fit(X, Y)
        print("Best: %f using params: %s estimator: %s" % (
            grid_result.best_score_, grid_result.best_params_, grid_result.best_estimator_))
        print('GridSearchCV process use %.2f seconds' % (time.time() - start))
        print("Save model to " + self.model_path)
        dump(grid_result, self.model_path)
        print('end=======')

Esempio n. 14

0

Mostra file

 def xgboostmodel(self):
     df = pd.read_csv(datafile, encoding='utf-8', index_col=0)
     print(df.shape)
     traindata = df.iloc[:, :].values
     x = traindata[:, :-1]
     y = traindata[:, -1]
     x_train, x_test, y_train, y_test = train_test_split(
         x, y, train_size=0.7)  # list
     if self.params is None:
         params = {'max_depth': 80, 'n_estimators': 512}
     else:
         params = self.params
     raw_model = XGBRegressor(max_depth=128,
                              n_estimators=768,
                              learning_rate=0.01,
                              silence=False)
     raw_model.fit(x_train, y_train)
     raw_model.save_model(self.model_file)
     pred = raw_model.predict(x_test)
     self.true = y_test
     self.pred = pred
     self.show_save_figure(fig_path=self.fig_path,
                           modelname=self.job_name,
                           detal_idx=500)
     t_mean = self.cal_mean(self.true)
     p_mean = self.cal_mean(self.pred)
     self.save_result(self.result_path, true_mean=t_mean, pred_mean=p_mean)

Esempio n. 15

0

Mostra file

 def fit_model_split(self, X_train, y_train, X_test, y_test):
     ##X_train_1用于生成模型  X_train_2用于和新特征组成新训练集合
     X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(
         X_train, y_train, test_size=0.6, random_state=0)
     clf = XGBRegressor(learning_rate=self.learning_rate,
                        n_estimators=self.n_estimators,
                        max_depth=self.max_depth,
                        min_child_weight=self.min_child_weight,
                        gamma=self.gamma,
                        subsample=self.subsample,
                        colsample_bytree=self.colsample_bytree,
                        objective=self.objective,
                        nthread=self.nthread,
                        scale_pos_weight=self.scale_pos_weight,
                        reg_alpha=self.reg_alpha,
                        reg_lambda=self.reg_lambda,
                        seed=self.seed)
     clf.fit(X_train_1, y_train_1)
     # y_pre = clf.predict(X_train_2)
     # y_pro = clf.predict_proba(X_train_2)[:, 1]
     # print
     # "pred_leaf=T AUC Score : %f" % metrics.roc_auc_score(y_train_2, y_pro)
     # print
     # "pred_leaf=T  Accuracy : %.4g" % metrics.accuracy_score(y_train_2, y_pre)
     new_feature = clf.apply(X_train_2)
     X_train_new2 = self.mergeToOne(X_train_2, new_feature)
     new_feature_test = clf.apply(X_test)
     X_test_new = self.mergeToOne(X_test, new_feature_test)
     print
     "Training set of sample size 0.4 fewer than before"
     return X_train_new2, y_train_2, X_test_new, y_test

Esempio n. 16

0

Mostra file

def model_intrv3(Y_train, X_train, Y_test, X_test, Targ):
    global reslts
    global metrs
    import pandas as pd
    import numpy as np
    import datetime as dt
    import sklearn
    from sklearn.metrics import mean_squared_error
    from xgboost.sklearn import XGBRegressor
    from sklearn.metrics import mean_squared_error
    model = XGBRegressor(n_estimators=200,
                         learning_rate=0.05,
                         max_depth=4,
                         random_state=0,
                         subsample=0.9,
                         colsample_bytree=1.0,
                         loss='ls').fit(X_train, Y_train)
    model.score(X_test, Y_test)

    pred_Yxgb = model.predict(X_test)
    mse = mean_squared_error(Y_test, pred_Yxgb)
    nRMSE = np.sqrt(mse) / Targ.mean()
    # nRMSE=np.sqrt(mse)/max(Targ)
    Yts_pd = {'Yts': Y_test, 'Ypd': pred_Yxgb}
    Yts_pd = pd.DataFrame(Yts_pd)
    print(mse, nRMSE)
    metrs = {'mse': mse, 'nRMSE': nRMSE}
    reslts = {'Ypred': pred_Yxgb, 'Yts_pd': Yts_pd}
    return {'Yts_pd': Yts_pd, 'mse': mse, 'nRMSE': nRMSE}

Esempio n. 17

0

Mostra file

 def fit_model(self, X_train, y_train, X_test, y_test):
     clf = XGBRegressor(learning_rate=self.learning_rate,
                        n_estimators=self.n_estimators,
                        max_depth=self.max_depth,
                        min_child_weight=self.min_child_weight,
                        gamma=self.gamma,
                        subsample=self.subsample,
                        colsample_bytree=self.colsample_bytree,
                        objective=self.objective,
                        nthread=self.nthread,
                        scale_pos_weight=self.scale_pos_weight,
                        reg_alpha=self.reg_alpha,
                        reg_lambda=self.reg_lambda,
                        seed=self.seed)
     clf.fit(X_train, y_train)
     # y_pre = clf.predict(X_test)
     # y_pro = clf.predict_proba(X_test)[:, 1]
     # print
     # "pred_leaf=T  AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro)
     # print("pred_leaf=T  Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre))
     new_feature = clf.apply(X_train)
     X_train_new = self.mergeToOne(X_train, new_feature)
     new_feature_test = clf.apply(X_test)
     X_test_new = self.mergeToOne(X_test, new_feature_test)
     print
     "Training set sample number remains the same"
     return X_train_new, y_train, X_test_new, y_test

Esempio n. 18

0

Mostra file

File: train_models.py Progetto: rohansurve212/Club_Mahindra_Data_Hack

def train_xgb(df_preprocessed, df_target):
    """
    Train an XGBoost Regressor on the data
    :param df_preprocessed: features
    :param df_target: target
    :return: a tuple of best estimator and best estimator score
    """
    xgb_reg = XGBRegressor(
        nthread=4,
        objective='reg:linear',
        learning_rate=0.02,  # so called `eta` value
        max_depth=10,
        min_child_weight=1,
        gamma=3,
        subsample=1.0,
        colsample_bytree=0.35)

    param_grid = {'n_estimators': [1000]}

    gridsearch_xgb = GridSearchCV(xgb_reg,
                                  param_grid,
                                  cv=3,
                                  verbose=1,
                                  n_jobs=-1,
                                  scoring='neg_mean_squared_error')
    gridsearch_xgb.fit(df_preprocessed, df_target)

    # save the model to disk
    # xgb_filename = r'models\xgboost_model.sav'
    # pickle.dump(gridsearch_xgb, open(xgb_filename, 'wb'))
    print(np.sqrt(-gridsearch_xgb.best_score_))

    return gridsearch_xgb.best_estimator_, np.sqrt(-gridsearch_xgb.best_score_)

Esempio n. 19

0

Mostra file

    def def_model(self, parameters: dict = None):
        model = XGBRegressor()

        if parameters is not None:
            model.set_params(**parameters)

        self._model = model

Esempio n. 20

0

Mostra file

def XGB_reg_evaluation(individual, evaluation_method='roll_win'):
    '''
    evaluation_method : can be roll_win, mse
    '''

    if evaluation_method == 'roll_win':
        trainNumber = individual[6]  # the train num
        param = {
            'eta': individual[0],
            'silent': True,
            'objective': "reg:linear",
            'nthread': -1,
            'min_child_weight': individual[1],
            'max_depth': individual[2],
            'subsample': individual[3],
            'colsample_bylevel': individual[4],
            'seed': 0
        }
        roll_win_mseValue = 0
        for i in xrange(N_validation):
            trainingX, trainingY = trainX[(trainNum - (i + 1) * window - trainNumber):(trainNum - (i + 1) * window),:],\
                                          trainY[(trainNum - (i + 1) * window - trainNumber):(trainNum - (i + 1) * window)]

            testingX, testingY= trainX[(trainNum - (i + 1) * window):(trainNum - i * window),:], \
                                       trainY[(trainNum - (i + 1) * window):(trainNum - i * window)]
            dtrain = xgb.DMatrix(data=trainingX, label=trainingY)
            bst = xgb.train(params=param,
                            dtrain=dtrain,
                            num_boost_round=individual[5])
            testingX = xgb.DMatrix(testingX)
            roll_win_mseValue += sum(
                (testingY - bst.predict(testingX))**2) / window
        roll_win_mseValue /= N_validation
        return (roll_win_mseValue, )

    if evaluation_method == 'mse':
        ### The cross validation evaluation
        N_SPLITS = N_splits
        kf = KFold(n_splits=N_SPLITS)
        cv_mseValue = 0
        fc = XGBRegressor(learning_rate=individual[0],
                          n_estimators=individual[5],
                          silent=True,
                          objective="reg:linear",
                          nthread=-1,
                          gamma=0,
                          min_child_weight=individual[1],
                          max_depth=individual[2],
                          subsample=individual[3],
                          colsample_bylevel=individual[4],
                          seed=0)
        for train, test in kf.split(trainX):
            fc.fit(trainX[train, :], trainY[train])
            cv_mseValue += sum(
                (trainY[test] - fc.predict(trainX[test, :]))**2) / len(test)
        cv_mseValue = cv_mseValue / N_SPLITS
        return (cv_mseValue, )

    print "There is no evaluation method for %s" % evaluation_method
    raise Exception("evaluation_method is not valid")

Esempio n. 21

0

Mostra file

File: MLStock.py Progetto: HuangNing616/stock_predict

    def xgboost_single_pred(self):

        x_train = self.x_train
        y_train = self.y_train

        x_test = self.x_test
        y_test = self.y_test

        self.y_pred_all_xgb = []
        y_train = list(y_train)
        xgboost_clf = XGBRegressor(learning_rate=0.1, n_estimators=75)

        for i in range(len(x_test)):
            xgboost_clf.fit(x_train, y_train)
            x_test_one = x_test.iloc[i:i + 1]
            y_test_one = xgboost_clf.predict(x_test_one)
            self.y_pred_all_xgb.append(list(y_test_one)[0])
            x_train = x_train.append(x_test_one)
            y_train.append(y_test[i])

        xgboost_mse = mean_squared_error(self.y_test, self.y_pred_all_xgb)
        xgboost_rmse = np.sqrt(xgboost_mse)
        y_pred_all_xgb = pd.DataFrame(list(self.y_pred_all_xgb))
        ratio_single_xgb = pd.DataFrame(list(self.y_test)) / y_pred_all_xgb
        return xgboost_rmse, y_pred_all_xgb, ratio_single_xgb

Esempio n. 22

0

Mostra file

    def __train_model(self, features):
        combo_list = [
            ['available_year_avg', 'min_nights_year_avg', 'price_year_avg']
        #     ['available_winter_avg', 'min_nights_winter_avg', 'price_winter_avg'],
        #     ['available_spring_avg', 'min_nights_spring_avg', 'price_spring_avg'],
        #     ['available_summer_avg', 'min_nights_summer_avg', 'price_summer_avg']
        ]
        for combo in combo_list:
            X_base = features.drop([
                'price_year_avg', 'price_winter_avg', 'price_spring_avg', 'price_summer_avg', 'price_fall_avg',
                'available_year_avg', 'available_winter_avg', 'available_spring_avg', 'available_summer_avg', 'available_fall_avg',
                'min_nights_year_avg', 'min_nights_winter_avg', 'min_nights_spring_avg', 'min_nights_summer_avg', 'min_nights_fall_avg'
            ], axis=1)
            X_base[combo[0]] = features[combo[0]]
            X_base[combo[1]] = features[combo[1]]
            y = features[combo[2]]
            X_train, X_test, y_train, y_test = train_test_split(X_base, y, test_size=.25, random_state=42, shuffle=True)

            model = XGBRegressor(
                objective='reg:squarederror',
                learning_rate=0.1,
                max_depth=8,
                n_estimators=200,
                cv=5,
                n_jobs=-1
            )
            model.fit(X_train, y_train)
            self.logger.info('Gradient boost model:')
            self.logger.info(f'Target label: {combo[2]}')
            self.logger.info(f'R^2: {model.score(X_test, y_test)}')
            self.logger.info(f'MAE: {mean_absolute_error(y_test, model.predict(X_test))}')
            return model

Esempio n. 23

0

Mostra file

File: XGBoost.py Progetto: zzti-bsj/analytics-zoo

    def _build(self, **config):
        """
        build the models and initialize.
        :param config: hyper parameters for building the model
        :return:
        """
        self.set_params(**config)
        if self.model_type == "regressor":
            self.model = XGBRegressor(n_estimators=self.n_estimators, max_depth=self.max_depth,
                                      n_jobs=self.n_jobs, tree_method=self.tree_method,
                                      random_state=self.random_state,
                                      learning_rate=self.learning_rate,
                                      min_child_weight=self.min_child_weight, seed=self.seed,
                                      subsample=self.subsample,
                                      colsample_bytree=self.colsample_bytree,
                                      gamma=self.gamma, reg_alpha=self.reg_alpha,
                                      reg_lambda=self.reg_lambda, verbosity=self.verbosity)
        elif self.model_type == "classifier":
            self.model = XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
                                       n_jobs=self.n_jobs, tree_method=self.tree_method,
                                       random_state=self.random_state,
                                       learning_rate=self.learning_rate,
                                       min_child_weight=self.min_child_weight, seed=self.seed,
                                       subsample=self.subsample,
                                       colsample_bytree=self.colsample_bytree,
                                       gamma=self.gamma, reg_alpha=self.reg_alpha,
                                       objective='binary:logistic',
                                       reg_lambda=self.reg_lambda, verbosity=self.verbosity)
        else:
            raise ValueError("model_type can only be \"regressor\" or \"classifier\"")

        self.model_init = True

Esempio n. 24

0

Mostra file

File: dcfe_final_clean.py Progetto: michaeljoyce217/Galvanize_DSI_Capstone_Project

def cbd_model(cbd_df,cbd_finalinput):
    '''
    function that creates model from the cbd dataframe and returns the predicted
    number of crimes for the next three days
    '''

    X_cbd=cbd_df[['year', 'month', 'day', 'tmax', 'tmin', 'consumer_price_index',
       'gdp_millions_2007', 'seasonally_adjusted_unemployment',
       'unadjusted_unemployment', 'Possession, cocaine ',
       'Heroin, possession ', 'Heroin Price Canada',
       'day_segment_1200pm-1159pm', 'day_of_week_Monday',
       'day_of_week_Saturday', 'day_of_week_Sunday', 'day_of_week_Thursday',
       'day_of_week_Tuesday', 'day_of_week_Wednesday']]
    y_cbd=cbd_df['number_of_crimes']


    scaler = StandardScaler()
    scaler.fit(X_cbd)  # Don't cheat - fit only on training data
    X_cbd = scaler.transform(X_cbd)
    cbd_input_scaled = scaler.transform(cbd_finalinput)
    xgb=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)
    xgb.fit(X_cbd,y_cbd)
    predict_cbd=xgb.predict(cbd_input_scaled)

    return predict_cbd

Esempio n. 25

0

Mostra file

File: make_model_func1.py Progetto: CCCartman/BigdataTalentsResearch

def get_ntree():
    rmse_t_total, rmse_v_total = [], []
    for ntree in range(10, 500, 10):
        xgb_base = XGBRegressor(objective='reg:linear',
                                n_estimators=ntree,
                                random_state=1234,
                                silent=0,
                                booster='gbtree',
                                eval_metric='rmse')
        rmse_t_1, rmse_v_1 = [], []
        print('此时 ntree = %s' % ntree)
        for train, test in get_cv(y=y_train, n_splits=5, random_state=42):
            X_t, y_t = X_train[train], y_train[train]
            X_v, y_v = X_train[test], y_train[test]
            xgb_base.fit(X_t, y_t)
            y_t_pre = xgb_base.predict(X_t)
            y_v_pre = xgb_base.predict(X_v)
            rmse_t_each = np.sqrt(mean_squared_error(y_t, y_t_pre))
            rmse_v_each = np.sqrt(mean_squared_error(y_v, y_v_pre))
            rmse_t_1.append(rmse_t_each)
            rmse_v_1.append(rmse_v_each)
        rmse_t = np.mean(rmse_t_1)
        rmse_v = np.mean(rmse_v_1)
        rmse_t_total.append(rmse_t)
        rmse_v_total.append(rmse_v)

    return rmse_t_total, rmse_v_total

Esempio n. 26

0

Mostra file

File: trainclassxgb.py Progetto: Kobtul/diploma-profiling-experiments

def train_first_test(experiment_name, x_train, y_train, features):
    global file_loc
    file_loc = 'data/' + experiment_name + '/'
    from xgboost.sklearn import XGBRegressor
    import scipy.stats as st

    one_to_left = st.beta(10, 1)
    from_zero_positive = st.expon(0, 50)

    params = {
        "n_estimators": st.randint(3, 15),
        "max_depth": st.randint(3, 40),
        "learning_rate": st.uniform(0.05, 0.4),
        "colsample_bytree": one_to_left,
        "subsample": one_to_left,
        "gamma": st.uniform(0, 10),
        'reg_alpha': from_zero_positive,
        "min_child_weight": from_zero_positive,
    }
    #xgbreg = XGBRegressor(nthreads=-1)
    xgbreg = XGBRegressor()

    from sklearn.model_selection import RandomizedSearchCV
    gs = RandomizedSearchCV(xgbreg, params, n_jobs=1)
    gs.fit(x_train, y_train)

    joblib.dump(gs.best_estimator_, file_loc + 'clf_bestmodel.pkl')
    return gs.best_estimator_

Esempio n. 27

0

Mostra file

File: func_predictors.py Progetto: srams1986/senior-data-science

    def __init__(self, nb_classes, bags=1, param={}):
        import xgboost as xgb
        from xgboost.sklearn import XGBRegressor

        self.nb_classes = nb_classes
        self.objective = param.get('objective','reg:linear')
        self.nthread = param.get('nthread',-1)
        self.n_estimators = param.get('n_estimators',10)
        self.max_depth = param.get('max_depth', 6)
        self.learning_rate = param.get('learning_rate', 0.3)
        self.colsample_bytree = param.get('colsample_bytree', 1.0)
        self.subsample = param.get('subsample', 1.0)
        self.missing = param.get('missing', None)
        self.seed = param.get('seed', 0)
        self.bags = bags
        self.bags_models = tuple()
        self.train_y = None
        for bag in range(self.bags):
            models = tuple()
            for k in range(self.nb_classes):
                model = XGBRegressor(objective = self.objective, nthread = self.nthread, seed = self.seed + bag,
                                     n_estimators = self.n_estimators, missing = self.missing,
                                     max_depth = self.max_depth, learning_rate = self.learning_rate,
                                     colsample_bytree = self.colsample_bytree, subsample = self.subsample)
                models = models + (model,)
            self.bags_models = self.bags_models + (models, )

Esempio n. 28

0

Mostra file

def learn_model(X_train, y_train, X_valid, y_valid):
    t1 = time()
    model = XGBRegressor(max_depth=7, n_estimators=500)
    model.fit(X_train, y_train, eval_metric="rmse", eval_set=[(X_train, y_train), (X_valid, y_valid)], verbose=True, early_stopping_rounds=10)
    t2 = time()
    print('Total of training time: ', t2 - t1)
    return model

Esempio n. 29

0

Mostra file

def tun_reg_alpha(reg_alpha_range, param_data_path, train_x, train_y):
    '''
    tune the reg_alpha param in xgboost
    get the best param and save them to the file for further tuning

    :param reg_alpha_range: the range of reg_alpha you want to test

    :param param_data_path: default './../data/param_data.pkl'

    :return: void
    '''
    # get the newest param first
    param_dict = get_param_data(param_data_path=param_data_path)

    print "正则化参数reg_alpha调优"
    param_test1 = {'reg_alpha': reg_alpha_range}
    gsearch1 = GridSearchCV(estimator=XGBRegressor(**param_dict),
                            param_grid=param_test1,
                            scoring='neg_mean_squared_error',
                            iid=False,
                            cv=5)
    gsearch1.fit(X=train_x, y=train_y)

    # show the results
    for i in gsearch1.grid_scores_:
        print i
    print "best_params_ and best_score_:"
    print gsearch1.best_params_, gsearch1.best_score_

    # change some param and return
    param_dict['reg_alpha'] = gsearch1.best_params_['reg_alpha']

    save_param_data(param_dict=param_dict, param_data_path=param_data_path)

Esempio n. 30

0

Mostra file

def search_best_parameters(X, y):

    xgb_grid = {
        'n_estimators': [80, 100, 120],
        'max_depth': [3, 4, 5],
        'learning_rate': [0.1, 0.2, 0.5],
        'booster': ['gbtree', 'gblinear', 'dart'],
        'gamma': [0, 0.2, 0.5],
        'subsample': [0.5, 0.8],
        'reg_alpha': [0.2, 0.3, 0.5],
        'reg_lambda': [0.5, 0.8, 1],
        'colsample_bytree': [1, 0.8, 0.5],
        'colsample_bylevel': [1, 0.8, 0.5],
        'colsample_bynode': [1, 0.8, 0.5],
        'random_state': [77]
    }

    xgb_gridsearch = GridSearchCV(XGBRegressor(),
                                  xgb_grid,
                                  n_jobs=-1,
                                  verbose=True,
                                  scoring='r2')

    xgb_gridsearch.fit(X, y)
    print(f"best parameters: {xgb_gridsearch.best_params_}")