def ExtraTreeGS(X_train, X_test, y_train, y_test): reg = ExtraTreeRegressor() grid_values = { 'criterion': ["mse", "mae"], 'max_depth': list(range(20, 25)) } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params) logSave(nameOfModel="ExtraTreeGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def ExtraTreeRegressorPrediction(train_X, train_y, test_X, valid_X, valid_y): etr = ExtraTreeRegressor() etr.fit(train_X, train_y) result = etr.predict(test_X) valid_ypred = etr.predict(valid_X) valid_mape = mape_loss(valid_y, valid_ypred) print ' the mape score of ExtraTreeRegressor in valid set is :', valid_mape return result
def ExtraTree(X_train, X_test, y_train, y_test): reg = ExtraTreeRegressor() reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="ExtraTree", reg=reg, metrics=metrics, val_metrics=val_metrics)
# In[849]: ETR # In[856]: ETR.fit(x, y) # In[857]: ETR_prediction = ETR.predict(x_test) plt.plot(ETR_prediction[0], label = 'prediction') plt.plot(y_test.iloc[0], label = 'real') # In[858]: print('mean_absolute_error', mean_absolute_error(y_test, ETR_prediction)) print('mean_squared_error', mean_squared_error(y_test, ETR_prediction)) print('Прогноз на завтра:', ETR_prediction[0][-1]) # In[859]:
clf = DecisionTreeRegressor(max_depth= None, min_samples_split = 2, random_state = 0).fit(X,y) clfE = ExtraTreeRegressor(max_depth=None, min_samples_split=2, random_state=0).fit(X,y) scores = cross_val_score(clf, X, y, cv = 5) scoresE = cross_val_score(clfE, X, y, cv = 5) print('Training Decision',scores.mean()) print('Training Extra', scoresE.mean()) unseen = cross_val_score(clf, testX, testy, cv = 5) unseenE = cross_val_score(clfE, testX, testy, cv = 5) print('New Data Decision', unseen.mean()) print('New Data Extra', unseenE.mean()) defaultPrdict = clf.predict(testX) #defaultPrdictLog = clf.predict_proba(testX) extraPrdict = clfE.predict(testX) #extraPrdictLog = clfE.predict_proba(testX) defaultTrain = clf.predict(X) extraTrain = clfE.predict(X) #print(defaultPrdictLog) print(clfE.n_outputs_) print(X.shape) print(extraPrdict) print(defaultPrdict) print(testy) dCompare = [(list(defaultPrdict[i]).index(1), list(testy[i]).index(1)) for i in range(len(testy))] eCompare = [(list(extraPrdict[i]).index(1) if 1 in list(extraPrdict[i]) else None, list(testy[i]).index(1)) for i in
#svr = SVR() #svr.fit(X_train,y_train) #print("Test set score:{:.2f}".format(svr.score(X_test,y_test))) #print("Best score on train set:{:.2f}".format(svr.best_score_)) #y_pred = svr.predict(X_test) '''lgb''' # gbm = ExtraTreeRegressor() gbm = GridSearchCV(gbm, param_grid={"min_samples_leaf":[1,4,8,16,32],\ 'min_samples_split':[4,10,20,100],\ 'max_depth':[2,8,16,32]}, cv=6) gbm.fit(X_train, y_train) y_pred = gbm.predict(X_test) # eval print("MSE:", metrics.mean_squared_error(y_test, y_pred)) print("Test set score:{:.2f}".format(gbm.score(X_test, y_test))) #print("AUC Score (Train): %f" % metrics.roc_auc_score(y_test, y_pred)) fig, ax = plt.subplots() ax.scatter(y_test, y_pred) ax.plot([y_test.min(), y_test.max()], [y_pred.min(), y_pred.max()], 'k--', lw=4) ax.set_xlabel('Measured') ax.set_ylabel('Predicted') plt.show()
from math import * import pandas as pd import numpy as np from sklearn.tree import ExtraTreeRegressor import matplotlib.pyplot as plt import re,os data=pd.read_csv('ice.csv') x=data[['temp','street']] y=data['ice'] clf=ExtraTreeRegressor() clf.fit(x,y) p=clf.predict(x) print clf.score(x,y) t=np.arange(0.0,31.0) plt.plot(t,data['ice'],'--',t,p,'-') plt.show()
from sklearn.ensemble import RandomForestRegressor rf_model=RandomForestRegressor(n_estimators=700,random_state=42) rf_model.fit(x_train,y_train) y_predict=rf_model.predict(x_test) r2_score(y_test,y_predict.ravel()) # ### ExtraTreeRegressor # In[85]: from sklearn.tree import ExtraTreeRegressor extratree_model=ExtraTreeRegressor(random_state=42) extratree_model.fit(x_train,y_train) y_predict=extratree_model.predict(x_test) r2_score(y_test,y_predict.ravel()) # ### Result # # So from here we can conclude that out of multiple models RandomForestRegressor model is working well with 90.66% accuracy. which is a very good accuracy. # In[86]: # Using pickle we will save our model so that we can use it further import pickle pickle.dump(extratree_model,open('model.pkl','wb')) model=pickle.load(open('model.pkl','rb'))
ri_MakingLT_prepared, ri_MakingLT_labels, test_size=0.20, random_state=42) # Training Data는 Training Data_really,Training Data_val 분리 ri_MakingLT_prepared_train_re, ri_MakingLT_prepared_train_val, ri_MakingLT_labels_train_re, ri_MakingLT_labels_train_val = train_test_split( ri_MakingLT_prepared_train, ri_MakingLT_labels_train, test_size=0.25, random_state=42) ###**ExtraTreesRegressor**### # **ExtraTreesRegressor** 모델 훈련 시킴 from sklearn.tree import ExtraTreeRegressor Et_tree_reg = ExtraTreeRegressor(max_depth=11, random_state=42) Et_tree_reg.fit(ri_MakingLT_prepared_train, ri_MakingLT_labels_train) ri_MakingLT_predicted = Et_tree_reg.predict(ri_MakingLT_prepared_test) from sklearn.metrics import mean_squared_error Et_tree_reg_mse = mean_squared_error(ri_MakingLT_labels_test, ri_MakingLT_predicted) Et_tree_reg_rmse = np.sqrt(Et_tree_reg_mse) print(Et_tree_reg_rmse) from sklearn.metrics import mean_absolute_error Et_tree_reg_mae = mean_absolute_error(ri_MakingLT_labels_test, ri_MakingLT_predicted) print(Et_tree_reg_mae) Et_tree_reg_mape = (np.abs((ri_MakingLT_predicted - ri_MakingLT_labels_test) / ri_MakingLT_labels_test).mean(axis=0)) print(Et_tree_reg_mape)
random_state=42) # Training Data는 Training Data_really,Training Data_val 분리 ri_PaintingLT_prepared_train_re, ri_PaintingLT_prepared_train_val, ri_PaintingLT_labels_train_re, ri_PaintingLT_labels_train_val = train_test_split( ri_PaintingLT_prepared_train, ri_PaintingLT_labels_train, test_size=0.25, random_state=42) ###**ExtraTreesRegressor**### # **ExtraTreesRegressor** 모델 훈련 시킴 from sklearn.tree import ExtraTreeRegressor Et_tree_reg = ExtraTreeRegressor(max_depth=12, random_state=42) Et_tree_reg.fit(ri_PaintingLT_prepared_train, ri_PaintingLT_labels_train) ri_PaintingLT_predicted = Et_tree_reg.predict(ri_PaintingLT_prepared_test) from sklearn.metrics import mean_squared_error Et_tree_reg_mse = mean_squared_error(ri_PaintingLT_labels_test, ri_PaintingLT_predicted) Et_tree_reg_rmse = np.sqrt(Et_tree_reg_mse) print(Et_tree_reg_rmse) from sklearn.metrics import mean_absolute_error Et_tree_reg_mae = mean_absolute_error(ri_PaintingLT_labels_test, ri_PaintingLT_predicted) print(Et_tree_reg_mae) Et_tree_reg_mape = (np.abs( (ri_PaintingLT_predicted - ri_PaintingLT_labels_test) / ri_PaintingLT_labels_test).mean(axis=0))
from math import * import pandas as pd import numpy as np from sklearn.tree import ExtraTreeRegressor import matplotlib.pyplot as plt import re, os data = pd.read_csv('ice.csv') x = data[['temp', 'street']] y = data['ice'] clf = ExtraTreeRegressor() clf.fit(x, y) p = clf.predict(x) print clf.score(x, y) t = np.arange(0.0, 31.0) plt.plot(t, data['ice'], '--', t, p, '-') plt.show()
name_folder = folder.split("/")[6] train_data = np.array(pd.read_csv('train_data.csv', sep=';')) test_data = np.array(pd.read_csv('test_data.csv', sep=';')) train_labels = np.array(pd.read_csv('train_labels.csv', sep=';')) test_labels = np.array(pd.read_csv('test_labels.csv', sep=';')) inicio = time.time() # importar random forest regressor from sklearn.tree import ExtraTreeRegressor # treinar o modelo no conjunto de dados regression = ExtraTreeRegressor().fit(train_data, train_labels) # prever predictions_labels = regression.predict(test_data) fim = time.time() df_time = pd.DataFrame({'Execution Time:': [fim - inicio]}) output_path = os.path.join( '/home/isadorasalles/Documents/Regressao/extra_tree', 'time_' + name_folder) df_time.to_csv(output_path, sep=';') from sklearn import metrics df_metrics = pd.DataFrame({ 'Mean Absolute Error': [metrics.mean_absolute_error(test_labels, predictions_labels)], 'Mean Squared Error':
from sklearn.tree import DecisionTreeRegressor # Define model. Specify a number for random_state to ensure same results each run dt = DecisionTreeRegressor(random_state=1) # Fit model dt.fit(X_train, y_train) dt_prediction = dt.predict(X_test) dt_score = accuracy_score(y_test, dt_prediction) print(dt_score) from sklearn.tree import ExtraTreeRegressor # Define model. Specify a number for random_state to ensure same results each run etr = ExtraTreeRegressor() # Fit model etr.fit(X_train, y_train) etr_prediction = etr.predict(X_test) etr_score = accuracy_score(y_test, etr_prediction) print(etr_score) X_train = df_train.drop("Survived", axis=1) y_train = df_train["Survived"] X_train = X_train.drop("PassengerId", axis=1) X_test = df_test.drop("PassengerId", axis=1) xgboost = xgb.XGBClassifier(max_depth=3, n_estimators=300, learning_rate=0.05).fit(X_train, y_train) Y_pred = xgboost.predict(X_test) submission = pd.DataFrame({ "PassengerId": df_test["PassengerId"], "Survived": Y_pred }) submission.to_csv('submission.csv', index=False)
def etr(x_train, y_train, x_test): model = ExtraTreeRegressor() model.fit(x_train, y_train) # 线性回归建模 predicted = model.predict(x_test) return (predicted)
evs_t = [] r2_t = [] for tr_i, ts_i in rkf.split(data): print(i, j, k, c) train, test = data.iloc[tr_i], data.iloc[ts_i] train_x = train.drop(columns=['Rainfall']) train_y = train['Rainfall'] test_x = test.drop(columns=['Rainfall']) test_y = test['Rainfall'] model = ExtraTreeRegressor(criterion='mse', splitter='best', max_depth=i, min_samples_leaf=j, min_samples_split=k) model.fit(train_x, train_y) ts_p = model.predict(test_x) mse_t.append(mse(test_y, ts_p)) rmse_t.append(rmse(test_y, ts_p)) mae_t.append(mae(test_y, ts_p)) mdae_t.append(mdae(test_y, ts_p)) evs_t.append(evs(test_y, ts_p)) r2_t.append(r2(test_y, ts_p)) c += 1 dep_f.append(i) saml_f.append(j) sams_f.append(k) mse_f.append(np.mean(mse_t)) rmse_f.append(np.mean(rmse_t)) mae_f.append(np.mean(mae_t)) mdae_f.append(np.mean(mdae_t)) evs_f.append(np.mean(evs_t))
# 학습모델 구축을 위해 data형식을 Vector로 변환 et_X1 = et_m_Inputdata.values et_Y1 = et_m_Outputdata.values # Training Data, Test Data 분리 et_X1_train, et_X1_test, et_Y1_train, et_Y1_test = train_test_split( et_X1, et_Y1, test_size=0.33, random_state=42) ######################################################################################################################## # ExtraTree 모델 구축 making_extratree_model = ExtraTreeRegressor(max_depth=10, random_state=42) making_extratree_model.fit(et_X1_train, et_Y1_train) et_m_predicted = making_extratree_model.predict(et_X1_test) et_m_predicted[et_m_predicted < 0] = 0 # [1,n]에서 [n,1]로 배열을 바꿔주는 과정을 추가 et_length_x1test = len(et_X1_test) et_m_predicted = et_m_predicted.reshape(et_length_x1test, 1) # 학습 모델 성능 확인 et_m_mae = abs(et_m_predicted - et_Y1_test).mean(axis=0) et_m_mape = (np.abs((et_m_predicted - et_Y1_test) / et_Y1_test).mean(axis=0)) et_m_rmse = np.sqrt(((et_m_predicted - et_Y1_test)**2).mean(axis=0)) et_m_rmsle = np.sqrt( (((np.log(et_m_predicted + 1) - np.log(et_Y1_test + 1))**2).mean(axis=0))) print(et_m_mae) print(et_m_mape)
def predict_extra_tree(train_X, train_Y, test, param=30): clf = ExtraTreeRegressor(min_samples_leaf=param, min_samples_split=1, criterion='mse') clf.fit(train_X, train_Y) preds = clf.predict(test) return preds
n = X.shape[1] int_scores = {} ext_scores = {} for i in range(1, n + 1): int_score_tmp1 = inf ext_score_tmp1 = inf for features in combinations(range(n), i): X_cuted = X[:, features] int_score_tmp2 = inf ext_score_tmp2 = inf for train_index, test_index in cv.split(X_cuted): X_train, X_test = X_cuted[train_index], X_cuted[test_index] y_train, y_test = y[train_index], y[test_index] alg.fit(X_train, y_train) y_pred = alg.predict(X_train) error = mean_squared_error(y_train, y_pred) int_score_tmp2 = min(int_score_tmp2, error) y_pred = alg.predict(X_test) error = mean_squared_error(y_test, y_pred) ext_score_tmp2 = min(ext_score_tmp2, error) int_score_tmp1 = min(int_score_tmp1, int_score_tmp2) ext_score_tmp1 = min(ext_score_tmp1, ext_score_tmp2) int_scores[i] = int_score_tmp1 ext_scores[i] = ext_score_tmp1 print(int_scores, ext_scores)
class ExtraTreeClass: """ Name : ExtraTreeRegressor Attribute : None Method : predict, predict_by_cv, save_model """ def __init__(self): # 알고리즘 이름 self._name = 'extratree' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = ExtraTreeRegressor() # 모델 학습 self._model.fit(self._x_train, self._y_train) # 데이터 전처리 def preprocessing(self, data): # 학습 x = [] # 레이블 y = [] # 기준점(7일) base_interval = 7 # 기온 temps = list(data["temperature"]) for i in range(len(temps)): if i < base_interval: continue y.append(temps[i]) xa = [] for p in range(base_interval): d = i + p - base_interval xa.append(temps[d]) x.append(xa) return x, y # 일반 예측 def predict(self, save_img=False, show_chart=False): # 예측 y_pred = self._model.predict(self._x_test) # 스코어 정보 score = r2_score(self._y_test, y_pred) # 리포트 확인 if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'): print(f'Coef = {self._model.coef_}') print(f'intercept = {self._model.intercept_}') print(f'Score = {score}') # 이미지 저장 여부 if save_img: self.save_chart_image(y_pred, show_chart) # 예측 값 & 스코어 return [list(y_pred), score] # CV 예측(Cross Validation) def predict_by_cv(self): # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현 return False # GridSearchCV 예측 def predict_by_gs(self): pass # 모델 저장 및 갱신 def save_model(self, renew=False): # 모델 저장 if not renew: # 처음 저장 joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') else: # 기존 모델 대체 if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'): os.rename( self._f_path + f'/model/{self._name}_rg.pkl', self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl') joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') # 회귀 차트 저장 def save_chart_image(self, data, show_chart): # 사이즈 plt.figure(figsize=(15, 10), dpi=100) # 레이블 plt.plot(self._y_test, c='r') # 예측 값 plt.plot(data, c='b') # 이미지로 저장 plt.savefig('./chart_images/tenki-kion-lr.png') # 차트 확인(Optional) if show_chart: plt.show() def __del__(self): del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
#X1 = preprocessing.normalize(X1) X = list(zip(*X1)) Y = cols[11] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=rn) X_train = np.array(X_train) y_train = np.array(y_train) X_test = np.array(X_test) y_test = np.array(y_test) #print(y_test) lin_reg_mod = ExtraTreeRegressor() lin_reg_mod.fit(X_train, y_train) pred = lin_reg_mod.predict(X_test) #print(pred) #print(y_test) test_set_r2 = r2_score(y_test, pred) #print(test_set_r2) tr2+=test_set_r2 #abs_er = mean_absolute_error(y_test, pred) #tabse+=abs_er temp = [] for (i,j) in zip(y_test, pred): t = (abs(i-j))/float(i) temp.append(t) #print(temp) #print(np.meadian(temp))
# from sklearn.model_selection import GridSearchCV # param_grid = [ # { "max_depth":list(range(1, 100))} # ] # grid = GridSearchCV(estimator=ExtraTreeRegressor(random_state=42), param_grid=param_grid, verbose=2, cv=10) # grid_result = grid.fit(BL_LT_prepared_train,BL_LT_labels_train) # print('Best Score: ', grid_result.best_score_) # print('Best Params: ', grid_result.best_params_) #################################################################################################################### # ExtraTreeRegressor # #################################################################################################################### Et_tree_reg = ExtraTreeRegressor(max_depth=13, random_state=42) Et_tree_reg.fit(BL_LT_prepared_train,BL_LT_labels_train) BL_LT_predicted = Et_tree_reg.predict(BL_LT_prepared_test) Et_tree_mse = mean_squared_error(BL_LT_labels_test, BL_LT_predicted) Et_tree_rmse = np.sqrt(Et_tree_mse) # print(Et_tree_rmse) Et_tree_mae = mean_absolute_error(BL_LT_labels_test, BL_LT_predicted) # print(Et_tree_mae) Et_tree_mape = (np.abs((BL_LT_predicted - BL_LT_labels_test) / BL_LT_labels_test).mean(axis=0)) # print("Et_tree: "+str(Et_tree_mape)) Et_tree_rmsle = np.sqrt(mean_squared_log_error(BL_LT_labels_test, BL_LT_predicted)) # print(Et_tree_rmsle)