def trail_main(): n_folds = 10 train_path = 'data/assign3_students_train.txt' test_path = 'data/assign3_students_test.txt' train_data = read_process_data(train_path) test_data = read_process_data(test_path) models_dict = get_models() scores_dict = {} learned_models_dict = {} for df_key, df_val in train_data.items(): X_train, X_test, y_train, y_test = get_final_score_tts( df_val.copy(), test_data[df_key].copy(), n_best=15) voting_list = [] for model_key, model_val in models_dict.items(): model = model_val.fit(X_train, y_train) name = f'{df_key}_{model_key}' learned_models_dict[name] = model voting_list.append((name, model)) # print(f"{name}, Train MSE ", mean_squared_error(y_train, model.predict(X_train))) # print(f"{name}, Train RScore ", r2_score(y_train, model.predict(X_train))) # print(f"{name}, Test RScore ", r2_score(y_test, model.predict(X_test))) print(f"X_test: {X_test.shape}, y_test: {y_test.shape}") print(f"{name}, Test MSE ", mean_squared_error(y_test, model.predict(X_test))) print(f"{name}, Test Score", model.score(X_test, y_test)) print('=' * 75, '\n') model = VotingRegressor(voting_list) model = model.fit(X_train, y_train) print('=' * 75, '\n') print(f"{df_key}, Voting Test MSE = ", mean_squared_error(y_test, model.predict(X_test))) print(f"{df_key}, Voting Test Score", model.score(X_test, y_test)) print('=' * 75, '\n\n')
def train(self): self.gripperjack = self.gripperjack[0] self.location = self.location[0] generator = pg.generator_factory(self.type) self.df: pd.DataFrame = generator.generate(self.gripperjack, self.location, 1) print(self.df.columns) self.df = self.df.drop(columns=['Timestamp']).dropna() print('DATAFRAME IS LOADED IN') x = None x_train = None x_test = None y = None y_train = None y_test = None regressor = None y = self.df.pop('next') x = self.df x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) r = [('K Neighbour Regressor', KNeighborsRegressor(n_neighbors=15, n_jobs=5, leaf_size=50)), ('Random Forrest Regressor', RandomForestRegressor(n_estimators=200, n_jobs=5)), ('Ada Regressor', AdaBoostRegressor(n_estimators=100, learning_rate=0.1))] regressor = VotingRegressor(r, weights=[0.1, 1, 0.1]) regressor.fit(x_train, y_train) print('===================') print('SCORE X/Y TEST') print(regressor.score(x_test, y_test)) dump_location = 'Recources\\regressor_dumps\\' + self.type + '\\' + str( self.gripperjack) + '\\' + self.location print('==================') print('ACCURACY') y_pred = regressor.predict(x_test) mae = metrics.mean_absolute_error(y_test, y_pred) mape = (mae / (y.max() - y.min())) * 100 print('MAE') print(mae) print('MAPE') print(mape) if not os.path.exists(dump_location): os.makedirs(dump_location) pickle.dump(regressor, open(dump_location + '\\regressor.sav', 'wb')) return mape
def voting(): # dtr model tuned_parameters = [{ 'criterion': ['mse', 'mae'], 'max_depth': np.arange(1, 10) }] dtr = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5) # rfr model tuned_parameters = { 'min_samples_split': [3, 6, 9], 'n_estimators': [10, 50, 100] } rfr = GridSearchCV(RandomForestRegressor(), param_grid=tuned_parameters, cv=5) # build voting model voting_reg = VotingRegressor(estimators=[('dtr_reg', dtr), ('rfr_reg', rfr)], weights=[1, 2]) # fit the model using some training data voting_reg.fit(X_train, Y_train) # print the mean accuracy of testing predictions train_score = voting_reg.score(X_test, Y_test) # print the mean accuracy of testing predictions print("Accuracy score for final voting= " + str(round(train_score, 4)))
reg6 = ExtraTreesRegressor() reg8 = XGBRegressor() reg8.fit(X_train, y_train) reg1.fit(X_train, y_train) reg2.fit(X_train, y_train) reg3.fit(X_train, y_train) ereg.fit(X_train, y_train) reg4.fit(X_train, y_train) reg5.fit(X_train, y_train) reg6.fit(X_train, y_train) # reg7.fit(X_train, y_train) print("GradientBoostingRegressor:", reg1.score(X_test, y_test)) print("RandomForestRegressor:", reg2.score(X_test, y_test)) print("LinearRegression:", reg3.score(X_test, y_test)) print("VotingRegressor:", ereg.score(X_test, y_test)) print("AdaBoostRegressor:", reg4.score(X_test, y_test)) print("BaggingRegressor:", reg5.score(X_test, y_test)) print("ExtraTreesRegressor:", reg6.score(X_test, y_test)) # print("StackingRegressor:", reg7.score(X_test, y_test)) print("XGBRegressor:", reg8.score(X_test, y_test)) XGBpredictions = reg8.predict(X_test) MAE = mean_absolute_error(y_test, XGBpredictions) print('XGBoost validation MAE = ', MAE) xx = [] # try: # file = open('regression.csv', 'w', newline='') # file_w = csv.writer(file) # except Exception: # print('regression.csv open faild')
knn = KNeighborsRegressor() knn = KNeighborsRegressor(algorithm='brute') knn.fit(X_train, y_train) knn.score(X_train,y_train) knn.score(X_test,y_test) #voting from sklearn.ensemble import VotingRegressor reg1=GradientBoostingRegressor(n_estimators=42) reg2=RandomForestRegressor(n_estimators=70) reg3=LinearRegression() reg4=DecisionTreeRegressor() ereg =VotingRegressor(estimators=[('gb', reg1),('lr',reg3)]) ereg =ereg.fit(X_train, y_train) ereg.score(X_train,y_train) ereg.score(X_test,y_test) #predict values from voting method compare it to y_test vote_pred=ereg.predict(X_test) #chceking r^2 from sklearn.metrics import r2_score print("r_Score:",r2_score(y_test, vote_pred)) # mean_absolute_error(y_test,vote_pred) from sklearn.model_selection import cross_val_score cv = ShuffleSplit(n_splits=10, test_size=0.4, random_state=42)
from sklearn.linear_model import LinearRegression from sklearn.ensemble import VotingRegressor from sklearn.model_selection import train_test_split import pandas as pd import numpy as np df_weekly = pd.read_csv('data//tech_df') df_weekly.replace([np.inf, -np.inf], np.nan) df_weekly.dropna(inplace=True) df_weekly.drop(columns='Unnamed: 0', inplace=True) print(df_weekly.columns) features = df_weekly[[ 'open', 'high', 'low', 'close', 'adx', 'aroon', 'macd', 'rsi', 'stoch', 'obv', 'ma_50', 'current_close_pct_change' ]].values labels = df_weekly['future_close_pct_change'].values X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42) reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) reg2 = RandomForestRegressor(random_state=1, n_estimators=10) reg3 = LinearRegression() ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)]) ereg = ereg.fit(X_train, y_train) print(ereg.score(X_test, y_test))
print(rnd_regressor.score(X_test, y_test)) y_preds = rnd_regressor.predict(X_test) rmse = np.sqrt(mean_squared_error(y_test, y_preds)) mae = mean_absolute_error(y_test, y_preds) print("RMSE: ", rmse) print("MAE: ", mae) w.writerow(["rnd_4", rmse, mae, "none"]) #0.9939952063199885 with open('rnd_4.pkl', 'wb') as f: pickle.dump(rnd_regressor, f) rnd_regressor = RandomForestRegressor(n_estimators=10, random_state=0) gbrt = GradientBoostingRegressor(random_state=0, learning_rate=0.1) voting = VotingRegressor(estimators=[('rf', rnd_regressor), ('gb', gbrt)]) voting.fit(X_train, y_train) print(voting.score(X_test, y_test)) y_preds = voting.predict(X_test) rmse = np.sqrt(mean_squared_error(y_test, y_preds)) mae = mean_absolute_error(y_test, y_preds) print("RMSE: ", rmse) print("MAE: ", mae) w.writerow(["voting_4", rmse, mae, "none"]) with open('voting_4.pkl', 'wb') as f: pickle.dump(voting, f) scaler = "sc" sc = StandardScaler() X_train_scaled = sc.fit_transform(X_train) X_test_scaled = sc.transform(X_test)
print("Neural Network Regressor:") model_neural = MLPRegressor(hidden_layer_sizes=(5, 4), activation='logistic', solver='lbfgs', max_iter=36000) model_neural.fit(X_train, y_train) print(model_neural.score(X_train, y_train)) print(model_neural.score(X_valid, y_valid)) y_pred = model_kneighbors.predict(X_valid) print(r2_score(y_valid, y_pred)) print("") print("Voting Regressor:") model_voting = VotingRegressor([('neighbors', KNeighborsRegressor(50)), ('forest', RandomForestRegressor(n_estimators=100, min_samples_leaf=20)), ('svr', SVR(kernel='rbf')), ('neural', MLPRegressor(hidden_layer_sizes=(4, 5), activation='logistic', solver='lbfgs', max_iter=50000))]) model_voting.fit(X_train, y_train) print(model_voting.score(X_train, y_train)) print(model_voting.score(X_valid, y_valid)) y_pred = model_kneighbors.predict(X_valid) print(r2_score(y_valid, y_pred)) print("")
def modeling_compare(X, y): import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from sklearn.linear_model import Ridge from sklearn.linear_model import RidgeCV from sklearn.model_selection import RepeatedKFold from sklearn.linear_model import ElasticNet from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import PoissonRegressor from sklearn.experimental import enable_hist_gradient_boosting from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.linear_model import Lasso from sklearn.linear_model import SGDRegressor from sklearn.neural_network import MLPClassifier from sklearn.ensemble import VotingRegressor models_lab = [ 'Linear Regression', 'Ridge', 'Ridge with tuning hyperparameters', 'Elastic Net', 'Random Forest', 'Poisson Regression', 'Gradient Boosting regression', 'Lasso', 'Stochastic Gradient Descent', 'Neural Network', 'Voting Regression' ] reg1 = LinearRegression().fit(X, y) reg2 = Ridge().fit(X, y) reg3 = Ridge(alpha=0.2).fit(X, y) cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1) grid = dict() grid['alpha'] = arange(0, 1, 0.01) cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1) reg3 = RidgeCV(alphas=arange(0, 1, 0.01), cv=cv, scoring='neg_mean_absolute_error').fit(X, y) reg4 = ElasticNet().fit(X, y) reg5 = RandomForestRegressor().fit(X, y) reg6 = PoissonRegressor().fit(X, y) reg7 = HistGradientBoostingRegressor(loss='poisson', learning_rate=.01).fit(X, y) reg8 = Lasso().fit(X, y) reg9 = SGDRegressor(loss='squared_loss', penalty='l2').fit(X, y) reg10 = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(17, 10), random_state=1).fit(X, y) # VotingRegressor without NN ereg = VotingRegressor(estimators=[('lr', reg1), ('rd', reg2), ( 'rs', reg3), ('en', reg4), ('rf', reg5), ('pr', reg6), ('gb', reg7), ('ls', reg8), ('gd', reg9)]).fit(X, y) models_obj = [ reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9, reg10, ereg ] score = [ reg1.score(X, y), reg2.score(X, y), reg3.score(X, y), reg4.score(X, y), reg5.score(X, y), reg6.score(X, y), reg7.score(X, y), reg8.score(X, y), reg9.score(X, y), reg10.score(X, y), ereg.score(X, y) ] score_df = pd.DataFrame() score_df['models_lab'] = models_lab score_df['models_obj'] = models_obj score_df['score'] = score return (score_df)
from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import VotingRegressor from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor def load_data(): x, y = load_boston(return_X_y=True) x = StandardScaler().fit_transform(x) y = StandardScaler().fit_transform(y.reshape(-1, 1)) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) return (x_train, y_train), (x_test, y_test) if __name__ == '__main__': (x_train, y_train), (x_test, y_test) = load_data() model1 = LinearRegression(n_jobs=-1) model2 = RandomForestRegressor(n_estimators=10, random_state=1, n_jobs=-1) model3 = LinearRegression(n_jobs=-1) model = VotingRegressor(estimators=(['model1', model1], ['model2', model2], ['model3', model3])) model.fit(x_train, y_train) print(model.score(x_test, y_test))
def train_algs(self): """ TRAIN WlTHOUT CROSS VALIDATION """ st.subheader("Results") self.chosen_models_names = [] self.chosen_models = [] if len(self.algorithms) == 0: st.warning('You should select at least one algorithm') return X = self.raw_data.drop(self.out_col, axis=1) y = self.raw_data[self.out_col] msk = np.random.rand(len(X)) < self.percent_train / 100 X_train = X[msk] X_test = X[~msk] Y_train = y[msk] Y_test = y[~msk] for alg in self.algorithms: if alg == 'LinearSVR': from sklearn.svm import LinearSVR svc = LinearSVR() svc.fit(X_train, Y_train) st.write("LinearSVR score", svc.score(X_test, Y_test)) self.chosen_models_names.append('LinearSVR') self.chosen_models.append(svc) elif alg == 'RidgeCV': from sklearn.linear_model import RidgeCV rid = RidgeCV() rid.fit(X_train, Y_train) st.write("RidgeCV score", rid.score(X_test, Y_test)) self.chosen_models_names.append('RidgeCV') self.chosen_models.append(rid) elif alg == 'Random Forest Regressor': from sklearn.ensemble import RandomForestRegressor rfc = RandomForestRegressor() rfc.fit(X_train, Y_train) st.write("rfc score", rfc.score(X_test, Y_test)) self.chosen_models_names.append('Random Forest Regressor') self.chosen_models.append(rfc) elif alg == 'Adaboost': from sklearn.ensemble import AdaBoostRegressor ada = AdaBoostRegressor() ada.fit(X_train, Y_train) st.write("ada score", ada.score(X_test, Y_test)) self.chosen_models_names.append('Adaboost') self.chosen_models.append(ada) elif alg == 'XGBoost': import xgboost as xgb xgb = xgb.XGBRegressor(n_estimators=300) xgb.fit(X_train, Y_train, verbose=0) st.write("xgb score", xgb.score(X_test, Y_test)) self.chosen_models_names.append('XGBoost') self.chosen_models.append(xgb) if self.meta_model_check: if self.meta_model_type == "voting": from sklearn.ensemble import VotingRegressor stack = VotingRegressor(estimators=list( zip(self.chosen_models_names, self.chosen_models))) stack.fit(X_train, Y_train) st.write("voting score", stack.score(X_test, Y_test)) else: from sklearn.ensemble import StackingRegressor if self.meta_model == "GradientBoostingRegressor": from sklearn.ensemble import GradientBoostingRegressor stack = StackingRegressor( estimators=list( zip(self.chosen_models_names, self.chosen_models)), final_estimator=GradientBoostingRegressor()) elif self.meta_model == "RandomForestRegressor": from sklearn.ensemble import RandomForestRegressor stack = StackingRegressor( estimators=list( zip(self.chosen_models_names, self.chosen_models)), final_estimator=RandomForestRegressor()) stack.fit(X_train, Y_train) st.write("stack score", stack.score(X_test, Y_test))
clf1 = LassoCV() clf2 = xgb.XGBRegressor(n_jobs=6, objective='reg:squarederror', booster='dart', training=True, colsample_bytree=0.5, learning_rate=0.25, max_depth=5, n_estimators=25) clf3 = RandomForestRegressor(criterion='mse', max_depth=None, max_features='log2', min_samples_split=5, random_state=23) eclf1 = VotingRegressor([('lasso', clf1), ('xgb', clf2), ('randf', clf3)]) eclf1 = eclf1.fit(X_train, Y_train) # Training predicted_eclf1 = eclf1.predict(X_train) rmse = (np.sqrt(mean_squared_error(Y_train, predicted_eclf1))) train_score = eclf1.score(X_train, Y_train) print('Training RMSE: {} '.format(rmse)) print('Training R2 score: {} '.format(train_score)) # Test predicted_eclf1 = eclf1.predict(X_test) rmse = (np.sqrt(mean_squared_error(Y_test, predicted_eclf1))) test_score = eclf1.score(X_test, Y_test) print('Testing RMSE: {} '.format(rmse)) print('Testing R2 score: {} '.format(test_score))
# %% #Random forest from sklearn.ensemble import RandomForestRegressor randForr = RandomForestRegressor(max_depth=32, random_state=0, n_jobs=4) #randForr.fit(X_train,y_train) #print(randForr.score(X_test, y_test)) # %% #ensemble of the best models from sklearn.ensemble import VotingRegressor from sklearn.linear_model import SGDRegressor estimators=[('SVR', SGDRegressor(max_iter=1000, tol=1e-3)), ('RF', randForr)] ensemble = VotingRegressor(estimators, weights=[1, 0.87], n_jobs=4) ensemble.fit(X_train, y_train) print(ensemble.score(X_test, y_test)) # %%
#ENSEMBLE METHOD # In[78]: from sklearn.ensemble import VotingRegressor # In[79]: #estimators=[('knn', knnbest), ('rfc', rfcbest), ('logmodel', logmodelbest)] print('knn: {}'.format(knn_best.score(X_test, y_test))) print('rf: {}'.format(rfc_best.score(X_test, y_test))) print('log_reg: {}'.format(logmodel.score(X_test, y_test))) # In[80]: estimators = [('knn', knn_best), ('rf', rfc_best)] # In[81]: ensemble = VotingRegressor(estimators) # In[82]: ensemble.fit(X_train, y_train) # In[83]: ensemble.score(X_test, y_test) # In[ ]:
cmap=plt.cm.binary, interpolation='nearest') im.set_clim(0, 16) ax[0, 5].set_title('Selection from the input data') plt.show() # In[30]: Performance = [] N = digits.data.shape[0] ind = np.random.permutation(N) for i in range(1,50,2): clf = RandomForestClassifier(n_estimators=i, max_depth=6, random_state=0) clf = clf.fit(digits.data[ind[:int(N*0.7)],:], digits.target[ind[:int(N*0.7)]]) Performance.append(clf.score(digits.data[ind[int(N*0.7):],:], digits.target[ind[int(N*0.7):]])) Perform = np.array(Performance) plt.plot(np.arange(1,50,2),Perform) plt.xlabel('Número de árboles') plt.ylabel('Accuracy') plt.grid() plt.show() # Veamos la importancia de las variables según el último modelo entrenado: # In[70]: plt.bar(np.arange(digits.data.shape[1]),clf.feature_importances_)
model_Lasso.fit(x_train, y_train) score = model_Lasso.score(x_train, y_train) score_val = model_Lasso.score(x_val, y_val) model_RF = RandomForestRegressor(n_estimators=40) model_RF.fit(x_train, y_train) model_RF.score(x_train, y_train) model_RF.score(x_val, y_val) cross_val_score(model_RF, x_train, y_train, cv=3) importances = model_RF.feature_importances_ indices = np.argsort(importances) plt.figure(1, figsize=(50, 50)) plt.title('Feature Importances') plt.barh(range(len(indices)), importances[indices], color='b', align='center') plt.yticks(range(len(indices)), x_train.columns[indices]) plt.xlabel('Relative Importance') model_Gb = GradientBoostingRegressor(learning_rate=0.1, n_estimators=150) model_Gb.fit(x_train, y_train) model_Gb.score(x_train, y_train) from sklearn.ensemble import VotingRegressor ensemble_model = VotingRegressor(estimators=[('gb', model_Gb), ('Rf', model_RF), ('LR', model_Lasso)], weights=[0.6, 0.2, 0.2]) ensemble_model.fit(x_train1, y_train) ensemble_model.score(x_val1, y_val) ensemble_model.fit(np.array(data_train1), data_label) ensemble_model.score(np.array(data_train1), data_label) y_pred = ensemble_model.predict(np.array(data_test1))
knn = KNeighborsRegressor(algorithm='brute') knn.fit(X_train, y_train) knn.score(X_train, y_train) knn.score(X_test, y_test) #votingRegressor from sklearn.ensemble import VotingRegressor reg1 = GradientBoostingRegressor() reg2 = RandomForestRegressor() reg3 = LinearRegression() reg4 = DecisionTreeRegressor() reg5 = KNeighborsRegressor() reg6 = AdaBoostRegressor() ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2)]) ereg = ereg.fit(X_train, y_train) ereg.score(X_train, y_train) ereg.score(X_test, y_test) #predict values from voting method compare it to y_test vote_pred = ereg.predict(X_test) # #mse in $ mse = mean_absolute_error(y_test, vote_pred) print("The mean absolute error is:$", mse) #chceking r^2 from sklearn.metrics import r2_score print("r_Score:", r2_score(y_test, vote_pred)) print('Randomforest Model Score: ', random_forest.score(X_test, y_test))
model.compile(optimizer='adam', loss='mean_squared_error') return model def build_model3(): model = Sequential() model.add(Dense(32, activation='relu', input_shape=(13, ))) model.add(Dense(16, activation='relu')) model.add(Dense(1, activation='linear')) model.compile(optimizer='adam', loss='mean_squared_error') return model if __name__ == '__main__': (x_train, y_train), (x_test, y_test) = load_data() model1 = KerasRegressor(build_fn=build_model1, epochs=100, batch_size=64) model1._estimator_type = "regressor" model2 = KerasRegressor(build_fn=build_model2, epochs=100, batch_size=64) model2._estimator_type = "regressor" model3 = KerasRegressor(build_fn=build_model3, epochs=100, batch_size=64) model3._estimator_type = "regressor" cls = VotingRegressor(estimators=[('model1', model1), ('model2', model2), ('model3', model3)]) cls.fit(x_train, y_train) joblib.dump(cls, "sklearn-regressor.h5") print("score: ", cls.score(x_test, y_test))