def test_weights_regressor(): """Check weighted average regression prediction on boston dataset.""" reg1 = DummyRegressor(strategy='mean') reg2 = DummyRegressor(strategy='median') reg3 = DummyRegressor(strategy='quantile', quantile=.2) ereg = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 2, 10]) X_r_train, X_r_test, y_r_train, y_r_test = \ train_test_split(X_r, y_r, test_size=.25) reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test) reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test) reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test) ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test) avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0, weights=[1, 2, 10]) assert_almost_equal(ereg_pred, avg, decimal=2) ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=None) ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 1, 1]) ereg_weights_none.fit(X_r_train, y_r_train) ereg_weights_equal.fit(X_r_train, y_r_train) ereg_none_pred = ereg_weights_none.predict(X_r_test) ereg_equal_pred = ereg_weights_equal.predict(X_r_test) assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
def plot_voting_regressor(): X, y = load_diabetes(return_X_y=True) # Train classifiers reg1 = GradientBoostingRegressor(random_state=1) reg2 = RandomForestRegressor(random_state=1) reg3 = LinearRegression() reg1.fit(X, y) reg2.fit(X, y) reg3.fit(X, y) ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)]) ereg.fit(X, y) """ Making predictions """ xt = X[:20] pred1 = reg1.predict(xt) pred2 = reg2.predict(xt) pred3 = reg3.predict(xt) pred4 = ereg.predict(xt) """ Plot the results """ plt.figure() plt.plot(pred1, 'gd', label='GradientBoostingRegressor') plt.plot(pred2, 'b^', label='RandomForestRegressor') plt.plot(pred3, 'ys', label='LinearRegression') plt.plot(pred4, 'r*', ms=10, label='VotingRegressor') plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) plt.ylabel('predicted') plt.xlabel('training samples') plt.legend(loc="best") plt.title('Regressor predictions and their average') plt.show()
def train(features: List[str], target_col: str, train_: pd.DataFrame, valid_: pd.DataFrame): # target_col = "burn_area" # date_split = "2013-01-01" # train_all = get_training_dataset() # train_ = train_all.loc[train_all.date < date_split] # valid_ = train_all.loc[train_all.date > date_split] X_train, y_train = train_[features], train_[target_col] X_valid, y_valid = valid_[features], valid_[target_col] xgb_model = xgb.XGBRegressor( n_estimators=300, max_depth=3, colsample_bytree=0.5, objective="reg:squarederror", ) xgb_model.fit(X_train, y_train) cat_model = CatBoostRegressor(iterations=300, depth=5, learning_rate=0.1, loss_function="RMSE") cat_model.fit(X_train, y_train, eval_set=(X_valid, y_valid), plot=True) lgb_model = lgb.LGBMRegressor(n_estimators=100, max_depth=8, num_leaves=6, objective="regression") lgb_model.fit(X_train, y_train) voting_regressor = VotingRegressor([("xgb", xgb_model), ("cat", cat_model), ("lgb", lgb_model)]) # voting_regressor = VotingRegressor([('xgb', xgb_model), ('lgb', lgb_model)]) voting_regressor.fit(X_train, y_train) return voting_regressor
def _get_voter(self, mode, estimators, weights=None): if self.configs['fit']['train_mode'] == 'clf': if mode == 'average': voting = 'soft' elif mode == 'vote': voting = 'hard' voter = VotingClassifier( estimators=estimators, voting=voting, weights=weights, n_jobs=-1) elif self.configs['fit']['train_mode'] == 'reg': if mode == 'average': voter = VotingRegressor( estimators=estimators, weights=weights, n_jobs=-1) return voter
def test_model_voting_regression(self): # Could not find an implementation for the node Sum:Sum(8) model = VotingRegressor([ ('lr', LinearRegression()), ('dt', SGDRegressor())]) model, X = fit_regression_model(model) model_onnx = convert_sklearn( model, "voting regression", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model( X.astype(np.float64), model, model_onnx, basename="SklearnVotingRegressorDouble", comparable_outputs=[0])
def initialize(method, coef=None): regressor = None if method == "linear_regression": regressor = linear_model.LinearRegression(normalize=True) if method == "elastic_net": regressor = linear_model.ElasticNet(normalize=True) if method == "bayesian_ridge": regressor = linear_model.BayesianRidge(normalize=True) if method == "Support_Vector_Machine": regressor = svm.SVR() if method == "Decision_tree": regressor = tree.DecisionTreeClassifier() if method == "KNN": regressor = NearestCentroid() if method == "Gaussian": regressor = GaussianNB() if method == "Random_Forest": if coef == None: regressor = RandomForestRegressor(n_estimators=30) else: regressor = RandomForestRegressor(n_estimators=coef) else: if "Random_Forest" in method: trees = method.split("t")[1] regressor = RandomForestRegressor(n_estimators=int(trees)) if method == "ensemble": r1 = initialize("linear_regression") r2 = initialize("Random_Forest") r3 = initialize("bayesian_ridge") if coef == None: regressor = VotingRegressor([('lr', r1), ('rf', r2), ('br', r3)]) else: regressor = VotingRegressor(estimators=[('lr', r1), ('rf', r2), ('br', r3)], weights=coef) return regressor
def _regress(): #------------Regression------------ #knn knnr = KNeighborsRegressor() #logistic lr = LogisticRegression() #svm svr = LinearSVR() #nn mlpr = MLPRegressor() #xgboost xgbr = XGBRegressor() #voting votec = VotingRegressor( estimators=[('knnr', knnr), ('lr', lr), ('svr', svr), ('mlpr', mlpr), ('xgbr', xgbr)]) votec = votec.fit(xtr, ytr_encoded) y_pred = votec.predict(xte) print() print(mean_squared_error(y_true=yte, y_pred=y_pred)) print()
def regression_modeling(data): '''Models the response rate with Voting Regression''' # Scaling the data scaled_data = preprocessing.StandardScaler().fit_transform(data) # Creating train-test X = scaled_data[:, 0:8] y = scaled_data[:, 8] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) #Voting Regression reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) reg2 = RandomForestRegressor(random_state=1, n_estimators=10) reg3 = LinearRegression() ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)]) ereg = ereg.fit(X_train, y_train) y_hat_ereg = ereg.predict(X_test) r2_ereg = r2_score(y_test, y_hat_ereg) return r2_ereg
def run_ensemble_run(self, model_name = 'Ensemble'): reg1 = SVR(C=10, kernel= "rbf", epsilon = 0.1, gamma = 'auto') reg2 = KNeighborsRegressor(n_neighbors = 11) reg3 = RandomForestRegressor(n_estimators = 100) model = VotingRegressor([('RF', reg3)]) model.fit(self.X_train, self.Y_train) self.evaluate_regression(self.Y_train, model.predict(self.X_train), self.dates_train, model_name+'-OnTrain', slicer = 1) self.evaluate_regression(self.Y_test, model.predict(self.X_test), self.dates_test, model_name+'-OnTest', slicer = 1)
def ensemble_of_best_params_xgb_reg(self, max_evals): best_params = self.params_to_ensemble(fn_name='xgb_reg', space=xgb_para, algo=tpe.suggest, max_evals=max_evals) models_to_voting = {} for i in range(len(best_params)): reg = xgb.XGBRegressor(**best_params[i]) models_to_voting[str(i)] = reg model_ensemble = VotingRegressor([ (name, model) for name, model in models_to_voting.items() ]) return model_ensemble, best_params
def test_notfitted(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], voting='soft') ereg = VotingRegressor([('dr', DummyRegressor())]) msg = ("This %s instance is not fitted yet. Call \'fit\'" " with appropriate arguments before using this method.") assert_raise_message(NotFittedError, msg % 'VotingClassifier', eclf.predict, X) assert_raise_message(NotFittedError, msg % 'VotingClassifier', eclf.predict_proba, X) assert_raise_message(NotFittedError, msg % 'VotingClassifier', eclf.transform, X) assert_raise_message(NotFittedError, msg % 'VotingRegressor', ereg.predict, X_r) assert_raise_message(NotFittedError, msg % 'VotingRegressor', ereg.transform, X_r)
def get_model(param: dict) -> BaseEstimator: model_name = param.pop('name') if model_name == 'xgb': return XGBRegressor(**param[model_name]) elif model_name == 'lgb': return LGBMRegressor(**param[model_name]) elif model_name == 'cb': return CatBoostRegressor(**param[model_name]) elif model_name == 'rf': return RandomForestRegressor(**param[model_name]) elif model_name == 'svm': return make_pipeline(StandardScaler(), SVR(**param[model_name])) elif model_name == 'knn': return make_pipeline(StandardScaler(), KNeighborsRegressor(**param[model_name])) elif model_name == 'mlp': return make_pipeline(StandardScaler(), MLPRegressor(**param[model_name])) elif model_name == 'vote': return VotingRegressor(estimators=[ ('svm', get_model(dict(param, name='svm'))), ('rf', get_model(dict(param, name='rf'))), ('lgb', get_model(dict(param, name='lgb'))), ('knn', get_model(dict(param, name='knn'))), ]) elif model_name == 'stack': model = SuperLearner(scorer=mean_squared_error, random_state=132) model.add([ get_model(dict(param, name='svm')), get_model(dict(param, name='rf')), get_model(dict(param, name='lgb')), get_model(dict(param, name='knn')), ]) model.add_meta(GradientBoostingRegressor(random_state=22)) return model elif model_name == 'sk_stack': return StackingRegressor( estimators=[ ('svm', get_model(dict(param, name='svm'))), ('rf', get_model(dict(param, name='rf'))), ('lgb', get_model(dict(param, name='lgb'))), ('knn', get_model(dict(param, name='knn'))), ], final_estimator=GradientBoostingRegressor(random_state=42) )
def ensemble_pipe(self, pipes): """Create a mean ensemble pipe where individual pipes feed into a mean voting ensemble model. Args: pipes (list): List of pipes that will have their outputs averaged Returns: Pipeline: Pipeline object that has multiple multiple feeding Voting object """ ests = [] for i, p in enumerate(pipes): ests.append((f'p{i}', p)) if self.model_obj == 'reg': ensemble = VotingRegressor(estimators=ests) elif self.model_obj == 'class': ensemble = VotingClassifier(estimators=ests) return Pipeline([('ensemble', ensemble)])
def define_models(): # linear regression reg_model = LinearRegression() xgb_model = XGBRegressor(colsample_bytree=0.6, gamma=0.7, max_depth=4, objective='reg:squarederror') ada = AdaBoostRegressor(random_state=0, n_estimators=100) rf = make_pipeline( MinMaxScaler(), RandomForestRegressor(bootstrap=True, max_features=0.15000000000000002, min_samples_leaf=6, min_samples_split=16, n_estimators=100)) # rf = RandomForestRegressor(bootstrap=True, max_features=0.15000000000000002, min_samples_leaf=6, min_samples_split=16, n_estimators=100) # svr = SVR(C=1.0, epsilon=0.2) er = VotingRegressor([('rf', rf), ('xgb_model', xgb_model)]) return [reg_model, xgb_model, ada, rf, er]
def get_estimator(): data_merger = FunctionTransformer(_merge_external_data) date_encoder = FunctionTransformer(_encode_dates) date_cols = ["DateOfDeparture"] categorical_encoder = make_pipeline( SimpleImputer(strategy="constant", fill_value="missing"), OneHotEncoder(handle_unknown="ignore")) categorical_cols = [ "Arrival", "Departure", "day", "weekday", "holidays", "week", "n_days" ] preprocessor = make_column_transformer( (categorical_encoder, categorical_cols)) # Best parameters RandomForest n_estimators_rf = 1400 min_samples_split_rf = 2 min_samples_leaf_rf = 2 max_features_rf = 'auto' max_depth_rf = 70 bootstrap_rf = True # Best parameters SVR C_svr = 100 gamma_svr = 0.01 kernel_svr = 'rbf' rf = RandomForestRegressor(n_estimators=n_estimators_rf, max_depth=max_depth_rf, max_features=max_features_rf, min_samples_split=min_samples_split_rf, min_samples_leaf=min_samples_leaf_rf, bootstrap=bootstrap_rf, n_jobs=-1) svr = SVR(C=C_svr, gamma=gamma_svr, kernel=kernel_svr) regressor_voting = VotingRegressor(estimators=[('rf', rf), ("svr", svr)]) return make_pipeline(data_merger, date_encoder, preprocessor, regressor_voting)
def get_estimator(): '''Returns pipeline with the model to be used on the train data.''' # CatBoostRegressor boost_reg = CatBoostRegressor(n_estimators = 5000, learning_rate=0.05, max_depth=6, verbose=False) # add regressor to the pre-precessing pipeline pipeline_boost = preprocessor('Boost').steps.append(['model',boost_reg]) # Neural Network nn_reg = KerasRegressor(build_fn=model.nn_model, epochs=60, batch_size=16, verbose=False) KerasRegressor._estimator_type = "regressor" # add regressor to the pre-precessing pipeline pipeline_nn = preprocessor('NN').steps.append(['model', nn_reg]) # Voting regressor regressor = VotingRegressor(estimators= [('boost', pipeline_boost), ('nn', pipeline_nn)] ) return regressor
def get_regressor(i): regressor = 'linear' if i == 'linear': regressor = linear_model.LinearRegression() elif i == 'svr': regressor = svm.SVR() elif i == 'knn': regressor = KNeighborsRegressor() elif i == 'gradient_boost': regressor = GradientBoostingRegressor() elif i == 'decision_tree': regressor = tree.DecisionTreeRegressor() elif i == 'random_forest': regressor = RandomForestRegressor() elif i == 'mlp': regressor = MLPRegressor(random_state=1, max_iter=500) elif i == 'voting': regr = GradientBoostingRegressor() regr2 = tree.DecisionTreeRegressor() regressor = VotingRegressor(estimators=[('gb', regr), ('rf', regr2)]) return regressor
def steam_learning_voting(data, NUM_FOLDS): """ Voting regressor that combines different types of regressors to try and overcome their weaknesses. """ X = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]] y = data[["price_"]] kfold = KFold(n_splits=NUM_FOLDS) gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20) random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20) linear_regression_model = linear_model.LinearRegression() voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)]) mse_scorer = make_scorer(mean_squared_error) results = cross_val_score(voting_model, X, y.values.ravel(), scoring=mse_scorer, cv=kfold) print(f"Boosting - MSE Array: {results}") final_results = f"Voting - Mean MSE over {NUM_FOLDS} folds: {np.mean(results)}" print(final_results) return(final_results)
def test_notfitted(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], voting='soft') ereg = VotingRegressor([('dr', DummyRegressor())]) msg = ("This %s instance is not fitted yet. Call \'fit\'" " with appropriate arguments before using this estimator.") with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'): eclf.predict(X) with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'): eclf.predict_proba(X) with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'): eclf.transform(X) with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'): ereg.predict(X_r) with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'): ereg.transform(X_r)
def esmld(): r1 = LinearRegression() #r2 = RandomForestRegressor(n_estimators=10, random_state=1) r3 = SVR(kernel='rbf') er = VotingRegressor([ ('lr', r1), #('rf', r2), ('svr_rbf', r3) ]) er.fit(X_train, y_train) y_pred = er.fit(X_train, y_train).predict(X_test) st.write('Mean Absolute Error:', mean_absolute_error(y_test, y_pred)) st.write('Mean Squared Error:', mean_squared_error(y_test, y_pred)) st.write('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred))) print(er.fit(X_train, y_train).predict(X_test)) st.title(er.fit(X_train, y_train).predict(X_test))
def main(): df = read_df() #df = pd.DataFrame(df) x_name = "BindLevel" y_name = "Rank" X = df[x_name].values y = df[y_name].values regression(LinearRegression(), x_name, y_name, df) regression(Ridge(alpha=.5), x_name, y_name, df) regression(neighbors.KNeighborsRegressor(), x_name, y_name, df) regression(DecisionTreeRegressor(random_state=0), x_name, y_name, df) #regression(RANSACRegressor(random_state=0), x_name, y_name, df) regression(VotingRegressor([('lr', LinearRegression()), ('rf', RandomForestRegressor(n_estimators=10, random_state=1))]), x_name, y_name, df) # Selecting columns dataset = df[['BindLevel', 'Gl', 'Gp', 'Ip', 'Mixcr']] k_neihgbours(dataset)
def _get_base_ensembler(self, models): # If wrapping in ensemble, set n_jobs for ensemble # and each indv model, make sure 1 for model in models: try: model[1].n_jobs = 1 except AttributeError: pass # Ensemble of des ensembles case if hasattr(model[1], 'estimators'): for estimator in model[1].estimators: try: estimator.n_jobs = 1 except AttributeError: pass if self.spec['problem_type'] == 'regression': return VotingRegressor(models, n_jobs=self.spec['n_jobs']) return VotingClassifier(models, voting='soft', n_jobs=self.spec['n_jobs'])
def __init__(self, x_train, y_train, test_split_available=False, test_size=0.1, shuffle=True, number_of_estimator=10, estimator=None, estimators=None, random_state=None): if test_split_available: self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x_train, y_train, test_size=test_size, shuffle=shuffle, random_state=random_state) else: self.x_test = x_train self.y_test = y_train self.x_train = x_train self.y_train = y_train self.y_predict_test = {} self.y_predict_train = {} self.models = {'svr': SVR(), 'knn': KNeighborsRegressor(), 'tree': DecisionTreeRegressor(), 'logistic': LogisticRegression(), 'linear': LinearRegression(), 'ridge': Ridge(), 'ridgecv': RidgeCV(), 'lasso': Lasso(), 'lassolars': LassoLars(alpha=0.1), 'bayesian': BayesianRidge(), 'ElasticNet': ElasticNet(), 'TheilSenRegressor': TheilSenRegressor(), 'ARDRegression': ARDRegression(), 'RANSACRegressor': RANSACRegressor(), 'HuberRegressor': HuberRegressor(), 'randomForest': RandomForestRegressor(n_estimators=50), 'boost': AdaBoostRegressor(random_state=0, n_estimators=100)} self.estimator = self.models[estimator] estimators_list = [] for i in range(len(estimators)): estimators_list.append((estimators[i], self.models[estimators[i]])) self.models = {'svr': SVR(), 'knn': KNeighborsRegressor(), 'tree': DecisionTreeRegressor(), 'logistic': LogisticRegression(), 'linear': LinearRegression(), 'ridge': Ridge(), 'ridgecv': RidgeCV(), 'lasso': Lasso(), 'lassolars': LassoLars(alpha=0.1), 'bayesian': BayesianRidge(), 'ElasticNet': ElasticNet(), 'TheilSenRegressor': TheilSenRegressor(), 'ARDRegression': ARDRegression(), 'RANSACRegressor': RANSACRegressor(), 'HuberRegressor': HuberRegressor(), 'randomForest': RandomForestRegressor(n_estimators=50), 'bagging': BaggingRegressor(base_estimator=self.estimator, n_estimators=number_of_estimator, max_features=0.8), 'voting': VotingRegressor(estimators=estimators_list), 'boost': AdaBoostRegressor(random_state=0, n_estimators=100)}
def test_notfitted(): eclf = VotingClassifier( estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())], voting="soft", ) ereg = VotingRegressor([("dr", DummyRegressor())]) msg = ("This %s instance is not fitted yet. Call 'fit'" " with appropriate arguments before using this estimator.") with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): eclf.predict(X) with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): eclf.predict_proba(X) with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): eclf.transform(X) with pytest.raises(NotFittedError, match=msg % "VotingRegressor"): ereg.predict(X_r) with pytest.raises(NotFittedError, match=msg % "VotingRegressor"): ereg.transform(X_r)
#Ensemble, different k -> 0.06736 ## final setup without ensemble-> 0.063695 , with -> 0.0635.. model = neighbors.KNeighborsRegressor(n_neighbors=best_k, algorithm='kd_tree', weights='distance') model2 = neighbors.KNeighborsRegressor(n_neighbors=int(best_k / 2), algorithm='kd_tree', weights='distance') model3 = neighbors.KNeighborsRegressor(n_neighbors=best_k * 2, algorithm='kd_tree', weights='distance') model4 = neighbors.KNeighborsRegressor(n_neighbors=best_k - 2, algorithm='kd_tree', weights='distance') model5 = neighbors.KNeighborsRegressor(n_neighbors=best_k + 2, algorithm='kd_tree', weights='distance') ensemble = VotingRegressor([('m1', model), ('m2', model2), ('m3', model3), ('m4', model4), ('m5', model5)], weights=[1, 1, 1, 1, 1]) ensemble.fit(x_train, y_train) # model.fit(x_train, y_train) pred = ensemble.predict(x_test) #make prediction on test set error = mean_absolute_error(y_test, pred) #calculate err r2 = r2_score(y_test, pred) print('MAE: ', error) print('R2: ', r2) error_RMSE = math.sqrt(mean_squared_error(y_test, pred)) #calculate err print('RMSE value is:', error_RMSE)
def model_to_test(): return VotingRegressor([ ('lr', LinearRegression()), ('dt', DecisionTreeRegressor()), ])
from sklearn.neighbors import KNeighborsRegressor knn = KNeighborsRegressor() knn = KNeighborsRegressor(algorithm='brute') knn.fit(X_train, y_train) knn.score(X_train, y_train) knn.score(X_test, y_test) #votingRegressor from sklearn.ensemble import VotingRegressor reg1 = GradientBoostingRegressor() reg2 = RandomForestRegressor() reg3 = LinearRegression() reg4 = DecisionTreeRegressor() reg5 = KNeighborsRegressor() reg6 = AdaBoostRegressor() ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2)]) ereg = ereg.fit(X_train, y_train) ereg.score(X_train, y_train) ereg.score(X_test, y_test) #predict values from voting method compare it to y_test vote_pred = ereg.predict(X_test) # #mse in $ mse = mean_absolute_error(y_test, vote_pred) print("The mean absolute error is:$", mse) #chceking r^2 from sklearn.metrics import r2_score print("r_Score:", r2_score(y_test, vote_pred))
pred2 = regr2.predict(X_test2).round(0) RFRMSE2 = mse(y_test2, pred2) print(RFRMSE2) print("Average error on new number of hospitalizations per day:", round(RFRMSE2**0.5, 0)) print("XGBoost Regressor Model") xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X_train2, y_train2) pred3 = xgb_model.predict(X_test2).round(0) RFRMSE3 = mse(y_test2, pred3) print("Average error on new number of hospitalizations per day:", round(RFRMSE3**0.5, 0)) print(RFRMSE3) print("VotingRegressor") ensemble = VotingRegressor(estimators=[("rf", regr2), ("gbr", model), ("dtr", ETregr), ("xgbr", xgb_model)], ) ensemble.fit(X_train2, y_train2) predvot = ensemble.predict(X_test2).round(0) MSE5 = mse(y_test2, predvot) print("Average error on new number of hospitalizations per day:", round(MSE5**0.5, 0)) print(MSE5) print("VotingRegressor2") ensemble2 = VotingRegressor(estimators=[("rf", regr), ("gbr", model)], ) ensemble2.fit(X_train2, y_train2) predvot2 = ensemble2.predict(X_test2).round(0) MSE6 = mse(y_test2, predvot2) print("Average error on new number of hospitalizations per day:",
build_auto(GBDTLMRegressor(RandomForestRegressor(n_estimators = 7, max_depth = 6, random_state = 13), LinearRegression()), "GBDTLMAuto") build_auto(GBDTLMRegressor(XGBRFRegressor(n_estimators = 17, max_depth = 6, random_state = 13), ElasticNet(random_state = 13)), "XGBRFLMAuto") build_auto(GradientBoostingRegressor(init = None, random_state = 13), "GradientBoostingAuto") build_auto(HistGradientBoostingRegressor(max_iter = 31, random_state = 13), "HistGradientBoostingAuto") build_auto(HuberRegressor(), "HuberAuto") build_auto(LarsCV(cv = 3), "LarsAuto") build_auto(LassoCV(cv = 3, random_state = 13), "LassoAuto") build_auto(LassoLarsCV(cv = 3), "LassoLarsAuto") build_auto(LinearRegression(), "LinearRegressionAuto") build_auto(BaggingRegressor(LinearRegression(), max_features = 0.75, random_state = 13), "LinearRegressionEnsembleAuto") build_auto(OrthogonalMatchingPursuitCV(cv = 3), "OMPAuto") build_auto(RandomForestRegressor(n_estimators = 10, min_samples_leaf = 3, random_state = 13), "RandomForestAuto", flat = True) build_auto(RidgeCV(), "RidgeAuto") build_auto(StackingRegressor([("ridge", Ridge(random_state = 13)), ("lasso", Lasso(random_state = 13))], final_estimator = GradientBoostingRegressor(n_estimators = 7, random_state = 13)), "StackingEnsembleAuto") build_auto(TheilSenRegressor(n_subsamples = 31, random_state = 13), "TheilSenAuto") build_auto(VotingRegressor([("dt", DecisionTreeRegressor(random_state = 13)), ("knn", KNeighborsRegressor()), ("lr", LinearRegression())], weights = [3, 1, 2]), "VotingEnsembleAuto") build_auto(XGBRFRegressor(n_estimators = 31, max_depth = 6, random_state = 13), "XGBRFAuto") if "Auto" in datasets: build_auto(TransformedTargetRegressor(DecisionTreeRegressor(random_state = 13)), "TransformedDecisionTreeAuto") build_auto(TransformedTargetRegressor(LinearRegression(), func = numpy.log, inverse_func = numpy.exp), "TransformedLinearRegressionAuto") def build_auto_isotonic(regressor, auto_isotonic_X, name): pipeline = PMMLPipeline([ ("regressor", regressor) ]) pipeline.fit(auto_isotonic_X, auto_y) pipeline.verify(auto_isotonic_X.sample(frac = 0.05, random_state = 13)) store_pkl(pipeline, name) mpg = DataFrame(pipeline.predict(auto_isotonic_X), columns = ["mpg"]) store_csv(mpg, name)
res, lgb_m = run_regression(lgb_m, t_list, 'LightGBM_grid_search', res) params['lgb_grid'] = lgb_m.get_params() xgb_m = grid_fit(xgb_m, xgb_range, t_list) res, xgb_m = run_regression(xgb_m, t_list, 'XGboost_grid_search', res) params['xgb_grid'] = xgb_m.get_params() vr_range = { 'rf__max_depth': [18, 22], 'lgb__n_estimators': [32, 40], 'lgb__num_leaves': [30, 40], 'xgb__n_estimators': [480, 520] } lgb_init['learning_rate'] = 0.15 xgb_init['learning_rate'] = 0.1 rf_init['min_samples_split'] = 10 rf, lgb_m, xgb_m = regen_model(rf_params, lgb_params, xgb_params) hybrid_m = VotingRegressor([('rf', rf), ('lgb', lgb_m), ('xgb', xgb_m)]) res, hybrid_m = run_regression(hybrid_m, t_list, 'hybrid_regrission', res) rf, lgb_m, xgb_m = regen_model(rf_init, lgb_init, xgb_init) hybrid_m = VotingRegressor([('rf', rf), ('lgb', lgb_m), ('xgb', xgb_m)]) hybrid_m = grid_fit(hybrid_m, vr_range, t_list) res, hybrid_m = run_regression(hybrid_m, t_list, 'hybrid_regrission_grid_search', res) params['vr_grid'] = { x[0]: x[1].get_params() for x in hybrid_m.get_params()['estimators'] } rf, lgb_m, xgb_m = regen_model(rf_params, lgb_params, xgb_params) stack_m = StackingRegressor(estimators=[('rf', rf), ('lgb', lgb_m)], final_estimator=xgb_m) #('xgb', xgb_m)) res, stack_m = run_regression(stack_m, t_list, 'stack_generation', res) rf, lgb_m, xgb_m = regen_model(rf_init, lgb_init, xgb_params)