def test_weights_regressor(): """Check weighted average regression prediction on boston dataset.""" reg1 = DummyRegressor(strategy='mean') reg2 = DummyRegressor(strategy='median') reg3 = DummyRegressor(strategy='quantile', quantile=.2) ereg = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 2, 10]) X_r_train, X_r_test, y_r_train, y_r_test = \ train_test_split(X_r, y_r, test_size=.25) reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test) reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test) reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test) ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test) avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0, weights=[1, 2, 10]) assert_almost_equal(ereg_pred, avg, decimal=2) ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=None) ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 1, 1]) ereg_weights_none.fit(X_r_train, y_r_train) ereg_weights_equal.fit(X_r_train, y_r_train) ereg_none_pred = ereg_weights_none.predict(X_r_test) ereg_equal_pred = ereg_weights_equal.predict(X_r_test) assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
def run_ensemble_run(self, model_name = 'Ensemble'): reg1 = SVR(C=10, kernel= "rbf", epsilon = 0.1, gamma = 'auto') reg2 = KNeighborsRegressor(n_neighbors = 11) reg3 = RandomForestRegressor(n_estimators = 100) model = VotingRegressor([('RF', reg3)]) model.fit(self.X_train, self.Y_train) self.evaluate_regression(self.Y_train, model.predict(self.X_train), self.dates_train, model_name+'-OnTrain', slicer = 1) self.evaluate_regression(self.Y_test, model.predict(self.X_test), self.dates_test, model_name+'-OnTest', slicer = 1)
def trail_main(): n_folds = 10 train_path = 'data/assign3_students_train.txt' test_path = 'data/assign3_students_test.txt' train_data = read_process_data(train_path) test_data = read_process_data(test_path) models_dict = get_models() scores_dict = {} learned_models_dict = {} for df_key, df_val in train_data.items(): X_train, X_test, y_train, y_test = get_final_score_tts( df_val.copy(), test_data[df_key].copy(), n_best=15) voting_list = [] for model_key, model_val in models_dict.items(): model = model_val.fit(X_train, y_train) name = f'{df_key}_{model_key}' learned_models_dict[name] = model voting_list.append((name, model)) # print(f"{name}, Train MSE ", mean_squared_error(y_train, model.predict(X_train))) # print(f"{name}, Train RScore ", r2_score(y_train, model.predict(X_train))) # print(f"{name}, Test RScore ", r2_score(y_test, model.predict(X_test))) print(f"X_test: {X_test.shape}, y_test: {y_test.shape}") print(f"{name}, Test MSE ", mean_squared_error(y_test, model.predict(X_test))) print(f"{name}, Test Score", model.score(X_test, y_test)) print('=' * 75, '\n') model = VotingRegressor(voting_list) model = model.fit(X_train, y_train) print('=' * 75, '\n') print(f"{df_key}, Voting Test MSE = ", mean_squared_error(y_test, model.predict(X_test))) print(f"{df_key}, Voting Test Score", model.score(X_test, y_test)) print('=' * 75, '\n\n')
def voting_predictions(data, base_models, val=True): data = data_copy(data) Xtrain, Xtest, y = data index = 0 vote_params = [] for base_model in base_models: name = 'model' + str(index) index += 1 model = base_model[0] params = base_model[1] model = model(**params) result = (name, model) vote_params.append(result) votemodel = VotingRegressor(vote_params) votemodel.fit(Xtrain, y) y_pred = votemodel.predict(Xtest) y_pred = np.exp(y_pred) if val: k_fold_crossval(data, model=votemodel) y_pred = np.exp(y_pred) return y_pred
def voting_regressor(self): estimators_num = 10 regs = { 'GBR': GradientBoostingRegressor( random_state=1, n_estimators=estimators_num), 'RF': RandomForestRegressor( random_state=1, n_estimators=estimators_num, n_jobs=-1), 'LR': LinearRegression(), } ereg_estimators = [] ereg_name = '' for idx, (name, reg) in enumerate(regs.items()): ereg_estimators.append((name, reg)) ereg_name += f'{name}_' ereg = VotingRegressor(estimators=ereg_estimators, n_jobs=-1) ereg.fit(self.X_train, self.y_train) y_pred = ereg.predict(self.X_test) root_dir = ('/Users/lujingze/Programming/SWFusion/' 'regression/tc/lightgbm/model/') ereg_dir = f'{root_dir}{ereg_name[:-1]}/' os.makedirs(ereg_dir, exist_ok=True) dump(ereg, f'{ereg_dir}voting_model.joblib') with open(f'{ereg_dir}test_pred.pkl', 'wb') as f: pickle.dump(y_pred, f)
def rainfall_runoff(precip_file, delineated_file, discharge_file, plot_fname): # give precipitation data and delineated watershed data as input # inputs should be .mat only precip_mat = loadmat(precip_file)['basin_daily_precipitation'] basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated'] # read discharge data as .xls input discharge_df = pd.ExcelFile(discharge_file) discharge_df = discharge_df.parse(0) discharge_df = discharge_df.fillna(0) # Replace the nan values with 0's basin_num = 5 reg1 = RandomForestRegressor(n_estimators=100, random_state=42) reg4 = BaggingRegressor(n_estimators=100, random_state=50) voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)]) X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated, False) voting_reg.fit(X, y) y_pred = voting_reg.predict(X) plt.scatter(y_pred, y_pred - y, c='r') plt.title("Runoff prediction data using a voting-regressor") plt.xlabel("Predicted Output") plt.ylabel("Error in prediction") print(plot_fname) plt.savefig(plot_fname)
def reg_fit_predict(self, x_train, x_test, y_train, y_test, est_name, report_flg=True): if est_name == 'vote': if len(self.regression_estimators.keys()) > 1: print(self.regression_estimators.items()) model = VotingRegressor( estimators=self.regression_estimators.items()) else: print('Caution: No models') return else: model = self.base_regression_estimators[est_name] model.fit(x_train, y_train) # predict test data y_pred = model.predict(x_test) # report scores if report_flg == True: self.reg_score_report(y_test, y_pred) # add model to dict self.regression_estimators[est_name] = model return
def vote_prediction(X_train, X_test, y_train, y_test, alpha, l1_ratio, n_estimators, max_depth, c, gamma): # def vote_prediction(X_train, X_test, y_train, y_test, forest, svr): print("******************* VOTING ******************", end="\n\n") # forest = RandomForestRegressor(n_estimators=242, max_depth=5) # elasic_net = ElasticNet(alpha=0.141, l1_ratio=1.0) forest = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth) # elasic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio) # linear_regressor = LinearRegression() svr = SVR(kernel='rbf', C=c, gamma=gamma) voting_regressor = VotingRegressor(estimators=[ ('rf', forest), # ('enet', elasic_net), # ('lr', linear_regressor), ('svr', svr) ]) voting_regressor = voting_regressor.fit(X_train, y_train) y_pred = voting_regressor.predict(X_test) evaluate('Voting', y_test, y_pred, write_predictions=True) print("\n*********************************************", end="\n\n")
def model_fit_save(train_x, train_y, test_x, test_y): ## Training the model r1 = LinearRegression() #r2 = RandomForestRegressor(n_estimators=10, random_state=1) r3 = SVR(kernel='rbf') er = VotingRegressor([ ('lr', r1), #('rf', r2), ('svr_rbf', r3) ]) er.fit(train_x, train_y) ### Evaluating based on the train data y_pred = er.predict(test_x) print('Mean Absolute Error:', mean_absolute_error(test_y, y_pred)) print('Mean Squared Error:', mean_squared_error(test_y, y_pred)) print('Root Mean Squared Error:', np.sqrt(mean_squared_error(test_y, y_pred))) ## Saving the model # Save the model as a pickle in a file joblib.dump(er, 'model.pkl')
def ensemble_lgb_regressor(self): try: root_dir = ('/Users/lujingze/Programming/SWFusion/' 'regression/tc/lightgbm/model/') model_dir = { 'SG-FL': (f"""{root_dir}na_101.845662_fl_smogn_""" f"""final_threshold_square_2/"""), 'MSE': f'{root_dir}na_2.188733/', } er_name = '' estimators = [] for idx, (name, out_dir) in enumerate(model_dir.items()): er_name += f'{name}_' save_file = [f for f in os.listdir(out_dir) if f.endswith('.pkl') and f.startswith(f'{self.basin}')] if len(save_file) != 1: self.logger.error('Count of Bunch is not ONE') exit(1) with open(f'{out_dir}{save_file[0]}', 'rb') as f: best_result = pickle.load(f) estimators.append((name, best_result.model)) er_name = er_name[:-1] er = VotingRegressor(estimators) er.fit(self.X_train, self.y_train) os.makedirs(f'{root_dir}{er_name[:-1]}/', exist_ok=True) y_pred = er.predict(self.X_test) y_pred.to_pickle(f'{er_dir}y_pred.pkl') except Exception as msg: breakpoint() exit(msg)
def voting_compile_fit(self): #This funtion does compiling and fitting on VotingRegressor prev_mse = 0 i = 0 #We do n fitting and compling to find the best VotingRegressor while (i < self.n_repetition): if i == 0: self.voting_reg = VotingRegressor(estimators=self.reg_models) self.voting_reg.fit(self.X_train, self.y_train.values.ravel()) y_pred = self.voting_reg.predict(self.X_test) prev_mse = mean_squared_error(self.y_test, y_pred) print(i + 1, ". ", "Voting_reg", prev_mse / 1000000) else: current_reg = VotingRegressor(estimators=self.reg_models) current_reg.fit(self.X_train, self.y_train.values.ravel()) y_pred = current_reg.predict(self.X_test) mse = mean_squared_error(self.y_test, y_pred) print(i + 1, ". ", "Voting_reg", mse / 1000000) if mse < prev_mse: self.voting_reg = current_reg prev_mse = mse i = i + 1
def train(self): self.gripperjack = self.gripperjack[0] self.location = self.location[0] generator = pg.generator_factory(self.type) self.df: pd.DataFrame = generator.generate(self.gripperjack, self.location, 1) print(self.df.columns) self.df = self.df.drop(columns=['Timestamp']).dropna() print('DATAFRAME IS LOADED IN') x = None x_train = None x_test = None y = None y_train = None y_test = None regressor = None y = self.df.pop('next') x = self.df x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) r = [('K Neighbour Regressor', KNeighborsRegressor(n_neighbors=15, n_jobs=5, leaf_size=50)), ('Random Forrest Regressor', RandomForestRegressor(n_estimators=200, n_jobs=5)), ('Ada Regressor', AdaBoostRegressor(n_estimators=100, learning_rate=0.1))] regressor = VotingRegressor(r, weights=[0.1, 1, 0.1]) regressor.fit(x_train, y_train) print('===================') print('SCORE X/Y TEST') print(regressor.score(x_test, y_test)) dump_location = 'Recources\\regressor_dumps\\' + self.type + '\\' + str( self.gripperjack) + '\\' + self.location print('==================') print('ACCURACY') y_pred = regressor.predict(x_test) mae = metrics.mean_absolute_error(y_test, y_pred) mape = (mae / (y.max() - y.min())) * 100 print('MAE') print(mae) print('MAPE') print(mape) if not os.path.exists(dump_location): os.makedirs(dump_location) pickle.dump(regressor, open(dump_location + '\\regressor.sav', 'wb')) return mape
def test_notfitted(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], voting='soft') ereg = VotingRegressor([('dr', DummyRegressor())]) msg = ("This %s instance is not fitted yet. Call \'fit\'" " with appropriate arguments before using this estimator.") with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'): eclf.predict(X) with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'): eclf.predict_proba(X) with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'): eclf.transform(X) with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'): ereg.predict(X_r) with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'): ereg.transform(X_r)
class VotingAggregationMethod(AggregationMethod): def __init__(self, config, train_values, train_labels, test_values, logger): super().__init__(config, train_values, train_labels, test_values, logger) self.model = VotingRegressor([ ('random_forest', SVR(kernel='rbf', gamma=0.1)), ('krr', KernelRidge(kernel='rbf', gamma=0.1)), ('ada', AdaBoostRegressor()), ('rf', RandomForestRegressor()), ('et', ExtraTreesRegressor()) ]) def train_model(self, values, labels): self.model = self.model.fit(values, labels) return self.model.predict(values) def test_model(self, values): return self.model.predict(values)
def test_notfitted(): eclf = VotingClassifier( estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())], voting="soft", ) ereg = VotingRegressor([("dr", DummyRegressor())]) msg = ("This %s instance is not fitted yet. Call 'fit'" " with appropriate arguments before using this estimator.") with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): eclf.predict(X) with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): eclf.predict_proba(X) with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): eclf.transform(X) with pytest.raises(NotFittedError, match=msg % "VotingRegressor"): ereg.predict(X_r) with pytest.raises(NotFittedError, match=msg % "VotingRegressor"): ereg.transform(X_r)
def train_voting_regressor(algos): vr = VotingRegressor(algos) vr.fit(X_train, y_train) y_pred = vr.predict(X_test1) r2 = r2_score(y_test1, y_pred) mae = mean_absolute_error(y_test1, y_pred) return vr, r2, mae
def regression_modeling(data, model): # Scaling the data scaled_data = preprocessing.StandardScaler().fit_transform(data) # Creating train-test X = scaled_data[:,0:8] y = scaled_data[:,8] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) models = [ ['linear', linear_model.Lasso(alpha=0.1).fit(X_train, y_train)], ['decision_tree' , DecisionTreeRegressor(random_state=0).fit(X_train, y_train)], ['ridge', linear_model.Ridge(alpha=.5).fit(X_train, y_train)], ['svm', svm.SVR(kernel='rbf', gamma='auto').fit(X_train, y_train)]] if model == 'all': for m in models: y_predicted = m[1].predict(X_test) r2 = r2_score(y_test, y_predicted) print("{}: {}".format(m[0], r2)) if model == "lasso": lasso_reg = linear_model.Lasso(alpha=0.1).fit(X_train, y_train) y_hat_lasso = lasso_reg.predict(X_test) r2_lasso = r2_score(y_test, y_hat_lasso) print("R^2 score for Lasso:", r2_lasso) elif model == "decision_tree": dt_reg = DecisionTreeRegressor(random_state=0).fit(X_train, y_train) y_hat_dt = dt_reg.predict(X_test) r2_dt_reg = r2_score(y_test, y_hat_dt) print("R^2 score for Decision tree:", r2_dt_reg) elif model == "ridge": ridge_reg = linear_model.Ridge(alpha=.5).fit(X_train, y_train) y_hat_ridge = ridge_reg.predict(X_test) r2_ridge = r2_score(y_test, y_hat_ridge) print("R^2 score for Ridge:", r2_ridge) elif model == "svm": svm_reg = svm.SVR(kernel='rbf').fit(X_train, y_train) y_hat_svm = svm_reg.predict(X_test) r2_svm = r2_score(y_test, y_hat_svm) print("R^2 score for RBF SVM:", r2_svm) elif model == "voting": reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) reg2 = RandomForestRegressor(random_state=1, n_estimators=10) reg3 = LinearRegression() ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)]) ereg = ereg.fit(X_train, y_train) y_hat_ereg = ereg.predict(X_test) r2_ereg = r2_score(y_test, y_hat_ereg) return r2_ereg
def get_training_goals(X, y, X_test): # 集成学习 voting_reg = VotingRegressor(estimators=[ ('rf_ploy', forest_polynomialregression(degree=3)), ('gb_ploy', gb_polynomialregression(degree=3)), ('ls_ploy', ls_polynomialregression(degree=3)), # ('rf_reg', RandomForestRegressor(n_estimators=100, oob_score=True, random_state=500)), # ('gb_reg', GradientBoostingRegressor(loss='ls', max_depth=3, max_leaf_nodes=10, min_samples_leaf=1, n_estimators=200, random_state=100)), # ('ls_reg', LassoCV(eps=1e-3, cv=4, max_iter=5000, random_state=100)) ], weights=[0.2, 0.6, 0.2]) voting_reg.fit(X, y) predict_y = voting_reg.predict(X_test) return predict_y
def vote_prediction_standalone(X_train, X_test, y_train, y_test): # def vote_prediction(X_train, X_test, y_train, y_test, forest, svr): print("******************* VOTING ******************", end="\n\n") param_dist = {'n_estimators': range(10, 320), 'max_depth': range(2, 50)} forest = RandomForestRegressor() rscv = RandomizedSearchCV(forest, param_dist, cv=10, n_iter=100, scoring=scoring, n_jobs=JOBS, verbose=1) Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000] gammas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] param_grid = {'C': Cs, 'gamma': gammas} svr = GridSearchCV(SVR(kernel='rbf'), param_grid, cv=10, scoring=scoring, n_jobs=JOBS, verbose=1) param_dist_en = {'alpha': stats.expon(0, 1), 'l1_ratio': stats.expon(0, 1)} enet = ElasticNet() model_cv = RandomizedSearchCV(enet, param_dist_en, cv=10, n_iter=100, scoring=scoring, n_jobs=JOBS, verbose=1) voting_regressor = VotingRegressor(estimators=[ ('rf', rscv), ('enet', enet), # ('lr', linear_regressor), ('svr', svr) ]) voting_regressor = voting_regressor.fit(X_train, y_train) y_pred = voting_regressor.predict(X_test) evaluate('Voting', y_test, y_pred, write_predictions=True) print("\n*********************************************", end="\n\n")
def steam_voting_predict_learned(data): """ Runs the voting model with the values to predict already being in the model. """ pre_learned_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]] pre_learned_label = data[["price_"]] gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20) random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20) linear_regression_model = linear_model.LinearRegression() voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)]) voting_model.fit(pre_learned_train, pre_learned_label.values.ravel()) preds = voting_model.predict(pre_learned_train) mse = mean_squared_error(pre_learned_label, preds) return np.mean(mse)
def steam_best_model_test(data): """ Fits the best model with 90% of our data then predicts on the remaining 10%. This simulates a "Real world situation" """ best_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]] best_label = data[["price_"]] X_train, X_test, y_train, y_test = train_test_split(best_train, best_label, test_size=0.1, random_state=2) gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20) random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20) linear_regression_model = linear_model.LinearRegression() voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)]) voting_model.fit(X_train, y_train.values.ravel()) preds = voting_model.predict(X_test) mse = mean_squared_error(y_test, preds) return np.mean(mse)
def get_flow(precip_file, delineated_file, discharge_file, D, T, file_name_b4_reg, file_name_after_reg): # give precipitation data and delineated watershed data as input # inputs should be .mat only precip_mat = loadmat(precip_file)['basin_daily_precipitation'] basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated'] print(basin_mat_delineated.shape) # read discharge data as .xls input discharge_df = pd.ExcelFile(discharge_file) discharge_df = discharge_df.parse(0) discharge_df = discharge_df.fillna(0) # Replace the nan values with 0's all_datetimes = discharge_df['Date'] all_years = list(map(lambda datetime_obj: int(datetime_obj.date().strftime("%Y")), all_datetimes)) years_list = list(set(all_years)) discharge_df["Year"] = all_years # num days is D and num_years is T in the DQT format # D,T are USER INPUTS num_days = int(D) num_years = int(T) gather_dqt_plot(0, discharge_df, years_list, num_days, num_years, file_name_b4_reg) basin_num = 5 reg1 = RandomForestRegressor(n_estimators=100, random_state=42) reg4 = BaggingRegressor(n_estimators=100, random_state=50) voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)]) X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated, False) voting_reg.fit(X, y) new_discharge_df = deepcopy(discharge_df) new_discharge_df = new_discharge_df[(new_discharge_df["Year"] >= years_list[0]) & (new_discharge_df["Year"] <= years_list[-1])] print(len(discharge_df['Year']), len(new_discharge_df["Year"])) X, y = get_data(new_discharge_df, basin_num, precip_mat, basin_mat_delineated, True) y_pred = voting_reg.predict(X) new_discharge_df["New_Discharge"] = y_pred gather_dqt_plot(1, new_discharge_df, years_list, num_days, num_years, file_name_after_reg)
def test_onnxt_iris_voting_regressor(self): iris = load_iris() X, y = iris.data, iris.target y = y.astype(numpy.float32) X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11) clr = VotingRegressor(estimators=[( 'lr', LinearRegression()), ('dt', DecisionTreeRegressor(max_depth=2))]) clr.fit(X_train, y_train) X_test = X_test.astype(numpy.float32) X_test = numpy.vstack([X_test[:4], X_test[-4:]]) res0 = clr.predict(X_test).astype(numpy.float32) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def, runtime='python') res1 = oinf.run({'X': X_test}) regs = DataFrame(res1['variable']).values self.assertEqualArray(res0, regs.ravel(), decimal=6)
def voting(self, X, y, models, select=False): """ Voting Regressor """ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.35) regressor = VotingRegressor(models) regressor.fit(X_train, y_train.ravel()) previsoes = self.y_scaler.inverse_transform(regressor.predict(X_test)) y_test = self.y_scaler.inverse_transform(y_test) mae = mean_absolute_error(y_test, previsoes) self.plot_results(y_test, previsoes, 'Voting Regressor', mae, select) return regressor
def reg_fit_predict_cv(self, est_name, x=None, y=None, report_flg=True): if est_name == 'vote': if len(self.regression_estimators.keys()) > 1: print(self.regression_estimators.items()) model = VotingRegressor( estimators=self.regression_estimators.items()) else: print('Caution: No models') return else: model = self.base_regression_estimators[est_name] scores_list = { 'mae': [], 'mae_mean': [], 'rmse': [], 'rmse_mean': [], } for train_idx, test_idx in KFold(n_splits=5).split(x, y): x_train = x.loc[train_idx, :] y_train = y[train_idx] x_test = x.loc[test_idx, :] y_test = y[test_idx] model.fit(x_train, y_train) # predict test data y_pred = model.predict(x_test) scores = self.calc_reg_scores(y_test, y_pred) for k in scores_list.keys(): scores_list[k].append(scores[k]) for k in scores_list.keys(): print(k + ': %.4f' % np.mean(scores_list[k])) # # add model to dict # self.regression_estimators[est_name] = model return y_pred
class BindingModel: model = None def __init__(self, n_jobs=-1, verbose=False, random_state=None): estimators = [('rf', RandomForestRegressor(max_depth=3, random_state=random_state)), ('lr', LinearRegression()), ('br', BayesianRidge()), ('gb', GradientBoostingRegressor(max_depth=4, random_state=random_state))] self.estimators = estimators self.model = VotingRegressor(estimators, n_jobs=n_jobs, verbose=verbose) def fit(self, X, y): self.model.fit(X, y) def predict(self, X): return self.model.predict(X)
def plot_voting_regressor(): X, y = load_diabetes(return_X_y=True) # Train classifiers reg1 = GradientBoostingRegressor(random_state=1) reg2 = RandomForestRegressor(random_state=1) reg3 = LinearRegression() reg1.fit(X, y) reg2.fit(X, y) reg3.fit(X, y) ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)]) ereg.fit(X, y) """ Making predictions """ xt = X[:20] pred1 = reg1.predict(xt) pred2 = reg2.predict(xt) pred3 = reg3.predict(xt) pred4 = ereg.predict(xt) """ Plot the results """ plt.figure() plt.plot(pred1, 'gd', label='GradientBoostingRegressor') plt.plot(pred2, 'b^', label='RandomForestRegressor') plt.plot(pred3, 'ys', label='LinearRegression') plt.plot(pred4, 'r*', ms=10, label='VotingRegressor') plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) plt.ylabel('predicted') plt.xlabel('training samples') plt.legend(loc="best") plt.title('Regressor predictions and their average') plt.show()
def _regress(): #------------Regression------------ #knn knnr = KNeighborsRegressor() #logistic lr = LogisticRegression() #svm svr = LinearSVR() #nn mlpr = MLPRegressor() #xgboost xgbr = XGBRegressor() #voting votec = VotingRegressor( estimators=[('knnr', knnr), ('lr', lr), ('svr', svr), ('mlpr', mlpr), ('xgbr', xgbr)]) votec = votec.fit(xtr, ytr_encoded) y_pred = votec.predict(xte) print() print(mean_squared_error(y_true=yte, y_pred=y_pred)) print()
def regression_modeling(data): '''Models the response rate with Voting Regression''' # Scaling the data scaled_data = preprocessing.StandardScaler().fit_transform(data) # Creating train-test X = scaled_data[:, 0:8] y = scaled_data[:, 8] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) #Voting Regression reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) reg2 = RandomForestRegressor(random_state=1, n_estimators=10) reg3 = LinearRegression() ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)]) ereg = ereg.fit(X_train, y_train) y_hat_ereg = ereg.predict(X_test) r2_ereg = r2_score(y_test, y_hat_ereg) return r2_ereg
#Ensemble, different k -> 0.06736 ## final setup without ensemble-> 0.063695 , with -> 0.0635.. model = neighbors.KNeighborsRegressor(n_neighbors=best_k, algorithm='kd_tree', weights='distance') model2 = neighbors.KNeighborsRegressor(n_neighbors=int(best_k / 2), algorithm='kd_tree', weights='distance') model3 = neighbors.KNeighborsRegressor(n_neighbors=best_k * 2, algorithm='kd_tree', weights='distance') model4 = neighbors.KNeighborsRegressor(n_neighbors=best_k - 2, algorithm='kd_tree', weights='distance') model5 = neighbors.KNeighborsRegressor(n_neighbors=best_k + 2, algorithm='kd_tree', weights='distance') ensemble = VotingRegressor([('m1', model), ('m2', model2), ('m3', model3), ('m4', model4), ('m5', model5)], weights=[1, 1, 1, 1, 1]) ensemble.fit(x_train, y_train) # model.fit(x_train, y_train) pred = ensemble.predict(x_test) #make prediction on test set error = mean_absolute_error(y_test, pred) #calculate err r2 = r2_score(y_test, pred) print('MAE: ', error) print('R2: ', r2) error_RMSE = math.sqrt(mean_squared_error(y_test, pred)) #calculate err print('RMSE value is:', error_RMSE)