def test_regressor_evals_result(loop): # noqa with cluster() as (s, [a, b]): with Client(s["address"], loop=loop): a = dxgb.XGBRegressor() X2 = da.from_array(X, 5) y2 = da.from_array(y, 5) a.fit(X2, y2, eval_metric="rmse", eval_set=[(X, y)]) evals_result = a.evals_result() b = xgb.XGBRegressor() b.fit(X, y, eval_metric="rmse", eval_set=[(X, y)]) assert_eq(evals_result, b.evals_result())
def test_regressor(loop): # noqa with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): a = dxgb.XGBRegressor() X2 = da.from_array(X, 5) y2 = da.from_array(y, 5) a.fit(X2, y2) p1 = a.predict(X2) b = xgb.XGBRegressor() b.fit(X, y) assert_eq(p1, b.predict(X))
def test_regressor(xgboost_loop): # noqa with cluster() as (s, [a, b]): with Client(s['address'], loop=xgboost_loop): a = dxgb.XGBRegressor() X2 = da.from_array(X, 5) y2 = da.from_array(y, 5) weight1 = da.from_array(weight, 5) a.fit(X2, y2, sample_weight=weight1) p1 = a.predict(X2) b = xgb.XGBRegressor() b.fit(X, y, sample_weight=weight) np.testing.assert_array_almost_equal(a.feature_importances_, b.feature_importances_) assert_eq(p1, b.predict(X))
def main(): object = ps.preprocess() X_train, X_test, y_train, y_test = object.cleaning() param_grid = { 'objective': ['binary:logistic'], 'nround': [1000], 'max_depth': [8] } estimator = dxgb.XGBRegressor() grid_search = GridSearchCV(estimator, param_grid, verbose=2, cv=2, n_jobs=-1) client = Client(processes=False) start_time = time.time() with joblib.parallel_backend("dask"): grid_search.fit(X_train, y_train) end_time = time.time() grid_search.predict(X_test) print ("time difference in GridSearchCV second XGBRegressor is %d seconds " % end_time) client.shutdown()
def test_regressor_with_early_stopping(loop): # noqa with cluster() as (s, [a, b]): with Client(s["address"], loop=loop): a = dxgb.XGBRegressor() X2 = da.from_array(X, 5) y2 = da.from_array(y, 5) a.fit( X2, y2, early_stopping_rounds=4, eval_metric="rmse", eval_set=[(X, y)], ) p1 = a.predict(X2) b = xgb.XGBRegressor() b.fit(X, y, early_stopping_rounds=4, eval_metric="rmse", eval_set=[(X, y)]) assert_eq(p1, b.predict(X)) assert_eq(a.best_score, b.best_score)
def test_validation_weights_xgbregressor(loop): # noqa from sklearn.datasets import make_regression from sklearn.metrics import mean_squared_error # prepare training and test data X, y = make_regression(n_samples=2000, random_state=42) with cluster() as (s, [a, b]): with Client(s["address"], loop=loop): X_train, X_test = X[:1600], X[1600:] y_train, y_test = y[:1600], y[1600:] dX_train = da.from_array(X_train) dy_train = da.from_array(y_train) dX_test = da.from_array(X_test) reg = dxgb.XGBRegressor() reg.fit( dX_train, dy_train, # sample_weight=weights_train, ) preds = reg.predict(dX_test) rng = np.random.RandomState(0) weights_train = 100.0 + rng.rand(len(X_train)) weights_train = da.from_array(weights_train) weights_test = 100.0 + rng.rand(len(X_test)) reg.fit( dX_train, dy_train, sample_weight=weights_train, sample_weight_eval_set=[weights_test], ) preds2 = reg.predict(dX_test) err = mean_squared_error(preds, y_test) err2 = mean_squared_error(preds2, y_test) assert err != err2
y_train = y_train.to_dask_array(lengths=True) print("scaling") #scaler = StandardScaler() #scaler.fit(X_train) #scaled_data = scaler.transform(X_train) #X_test = scaler.transform(X_test) print("training") # In[ ]: base_model = dxgb.XGBRegressor(objective='reg:squarederror', tree_method='hist', verbosity=3, n_jobs=-1, n_estimators=1000, learning_rate=0.010, max_depth=0, max_leaves=4, grow_policy='lossguide') with joblib.parallel_backend('dask'): base_model.fit(X_train, y_train.flatten()) #base_model.save_model('base_line_no_max_deph_lr_%f_%i.model'%(lr,leaves)) # predictions = base_model.predict(X_test) predictions = client.persist(predictions) # #print ("########") #print ("R^2:",r2_score(y_test.compute(), predictions.compute())) #print ("MAE:",mean_absolute_error(y_test.compute(), predictions.compute()))