Beispiel #1
0
def ExtraTreeGS(X_train, X_test, y_train, y_test):
    reg = ExtraTreeRegressor()
    grid_values = {
        'criterion': ["mse", "mae"],
        'max_depth': list(range(20, 25))
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params)
    logSave(nameOfModel="ExtraTreeGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
Beispiel #2
0
def ExtraTreeRegressorPrediction(train_X, train_y, test_X, valid_X, valid_y):
    etr = ExtraTreeRegressor()
    etr.fit(train_X, train_y)

    result = etr.predict(test_X)

    valid_ypred = etr.predict(valid_X)

    valid_mape = mape_loss(valid_y, valid_ypred)

    print ' the mape score of ExtraTreeRegressor in valid set is :', valid_mape
    return result
Beispiel #3
0
def ExtraTree(X_train, X_test, y_train, y_test):
    reg = ExtraTreeRegressor()
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="ExtraTree",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
Beispiel #4
0
# In[849]:


ETR


# In[856]:


ETR.fit(x, y)


# In[857]:


ETR_prediction = ETR.predict(x_test)
plt.plot(ETR_prediction[0], label = 'prediction')
plt.plot(y_test.iloc[0], label = 'real')


# In[858]:


print('mean_absolute_error', mean_absolute_error(y_test, ETR_prediction))
print('mean_squared_error', mean_squared_error(y_test, ETR_prediction))
print('Прогноз на завтра:', ETR_prediction[0][-1])


# In[859]:

Beispiel #5
0
clf = DecisionTreeRegressor(max_depth= None, min_samples_split = 2, random_state = 0).fit(X,y)
clfE = ExtraTreeRegressor(max_depth=None, min_samples_split=2, random_state=0).fit(X,y)

scores = cross_val_score(clf, X, y, cv = 5)
scoresE = cross_val_score(clfE, X, y, cv = 5)
print('Training Decision',scores.mean())
print('Training Extra', scoresE.mean())

unseen = cross_val_score(clf, testX, testy, cv = 5)
unseenE = cross_val_score(clfE, testX, testy, cv = 5)
print('New Data Decision', unseen.mean())
print('New Data Extra', unseenE.mean())

defaultPrdict = clf.predict(testX)
#defaultPrdictLog = clf.predict_proba(testX)
extraPrdict = clfE.predict(testX)
#extraPrdictLog = clfE.predict_proba(testX)
defaultTrain = clf.predict(X)
extraTrain = clfE.predict(X)

#print(defaultPrdictLog)

print(clfE.n_outputs_)

print(X.shape)
print(extraPrdict)
print(defaultPrdict)
print(testy)

dCompare = [(list(defaultPrdict[i]).index(1), list(testy[i]).index(1)) for i in range(len(testy))]
eCompare = [(list(extraPrdict[i]).index(1) if 1 in list(extraPrdict[i]) else None, list(testy[i]).index(1)) for i in
#svr = SVR()
#svr.fit(X_train,y_train)
#print("Test set score:{:.2f}".format(svr.score(X_test,y_test)))
#print("Best score on train set:{:.2f}".format(svr.best_score_))
#y_pred = svr.predict(X_test)
'''lgb'''

#
gbm = ExtraTreeRegressor()

gbm = GridSearchCV(gbm, param_grid={"min_samples_leaf":[1,4,8,16,32],\
                                     'min_samples_split':[4,10,20,100],\
                                  'max_depth':[2,8,16,32]}, cv=6)

gbm.fit(X_train, y_train)
y_pred = gbm.predict(X_test)
# eval
print("MSE:", metrics.mean_squared_error(y_test, y_pred))
print("Test set score:{:.2f}".format(gbm.score(X_test, y_test)))
#print("AUC Score (Train): %f" % metrics.roc_auc_score(y_test, y_pred))

fig, ax = plt.subplots()
ax.scatter(y_test, y_pred)
ax.plot([y_test.min(), y_test.max()],
        [y_pred.min(), y_pred.max()],
        'k--',
        lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()
Beispiel #7
0
from math import *
import pandas as pd
import numpy as np
from sklearn.tree import ExtraTreeRegressor
import matplotlib.pyplot as plt
import re,os
data=pd.read_csv('ice.csv')
x=data[['temp','street']]
y=data['ice']
clf=ExtraTreeRegressor()
clf.fit(x,y)
p=clf.predict(x)
print clf.score(x,y)
t=np.arange(0.0,31.0)
plt.plot(t,data['ice'],'--',t,p,'-')
plt.show()
from sklearn.ensemble import RandomForestRegressor
rf_model=RandomForestRegressor(n_estimators=700,random_state=42)
rf_model.fit(x_train,y_train)
y_predict=rf_model.predict(x_test)
r2_score(y_test,y_predict.ravel())


# ### ExtraTreeRegressor

# In[85]:


from sklearn.tree import ExtraTreeRegressor
extratree_model=ExtraTreeRegressor(random_state=42)
extratree_model.fit(x_train,y_train)
y_predict=extratree_model.predict(x_test)
r2_score(y_test,y_predict.ravel())


# ### Result
# 
# So from here we can conclude that out of multiple models RandomForestRegressor model is working well with 90.66% accuracy. which is a very good accuracy.

# In[86]:


# Using pickle we will save our model so that we can use it further
import pickle
pickle.dump(extratree_model,open('model.pkl','wb'))
model=pickle.load(open('model.pkl','rb'))
    ri_MakingLT_prepared, ri_MakingLT_labels, test_size=0.20, random_state=42)

# Training Data는 Training Data_really,Training Data_val  분리
ri_MakingLT_prepared_train_re, ri_MakingLT_prepared_train_val, ri_MakingLT_labels_train_re, ri_MakingLT_labels_train_val = train_test_split(
    ri_MakingLT_prepared_train,
    ri_MakingLT_labels_train,
    test_size=0.25,
    random_state=42)

###**ExtraTreesRegressor**###

# **ExtraTreesRegressor** 모델 훈련 시킴
from sklearn.tree import ExtraTreeRegressor
Et_tree_reg = ExtraTreeRegressor(max_depth=11, random_state=42)
Et_tree_reg.fit(ri_MakingLT_prepared_train, ri_MakingLT_labels_train)
ri_MakingLT_predicted = Et_tree_reg.predict(ri_MakingLT_prepared_test)

from sklearn.metrics import mean_squared_error
Et_tree_reg_mse = mean_squared_error(ri_MakingLT_labels_test,
                                     ri_MakingLT_predicted)
Et_tree_reg_rmse = np.sqrt(Et_tree_reg_mse)
print(Et_tree_reg_rmse)

from sklearn.metrics import mean_absolute_error
Et_tree_reg_mae = mean_absolute_error(ri_MakingLT_labels_test,
                                      ri_MakingLT_predicted)
print(Et_tree_reg_mae)

Et_tree_reg_mape = (np.abs((ri_MakingLT_predicted - ri_MakingLT_labels_test) /
                           ri_MakingLT_labels_test).mean(axis=0))
print(Et_tree_reg_mape)
Beispiel #10
0
    random_state=42)

# Training Data는 Training Data_really,Training Data_val  분리
ri_PaintingLT_prepared_train_re, ri_PaintingLT_prepared_train_val, ri_PaintingLT_labels_train_re, ri_PaintingLT_labels_train_val = train_test_split(
    ri_PaintingLT_prepared_train,
    ri_PaintingLT_labels_train,
    test_size=0.25,
    random_state=42)

###**ExtraTreesRegressor**###

# **ExtraTreesRegressor** 모델 훈련 시킴
from sklearn.tree import ExtraTreeRegressor
Et_tree_reg = ExtraTreeRegressor(max_depth=12, random_state=42)
Et_tree_reg.fit(ri_PaintingLT_prepared_train, ri_PaintingLT_labels_train)
ri_PaintingLT_predicted = Et_tree_reg.predict(ri_PaintingLT_prepared_test)

from sklearn.metrics import mean_squared_error
Et_tree_reg_mse = mean_squared_error(ri_PaintingLT_labels_test,
                                     ri_PaintingLT_predicted)
Et_tree_reg_rmse = np.sqrt(Et_tree_reg_mse)
print(Et_tree_reg_rmse)

from sklearn.metrics import mean_absolute_error
Et_tree_reg_mae = mean_absolute_error(ri_PaintingLT_labels_test,
                                      ri_PaintingLT_predicted)
print(Et_tree_reg_mae)

Et_tree_reg_mape = (np.abs(
    (ri_PaintingLT_predicted - ri_PaintingLT_labels_test) /
    ri_PaintingLT_labels_test).mean(axis=0))
Beispiel #11
0
from math import *
import pandas as pd
import numpy as np
from sklearn.tree import ExtraTreeRegressor
import matplotlib.pyplot as plt
import re, os
data = pd.read_csv('ice.csv')
x = data[['temp', 'street']]
y = data['ice']
clf = ExtraTreeRegressor()
clf.fit(x, y)
p = clf.predict(x)
print clf.score(x, y)
t = np.arange(0.0, 31.0)
plt.plot(t, data['ice'], '--', t, p, '-')
plt.show()
Beispiel #12
0
    name_folder = folder.split("/")[6]
    train_data = np.array(pd.read_csv('train_data.csv', sep=';'))
    test_data = np.array(pd.read_csv('test_data.csv', sep=';'))
    train_labels = np.array(pd.read_csv('train_labels.csv', sep=';'))
    test_labels = np.array(pd.read_csv('test_labels.csv', sep=';'))

    inicio = time.time()

    # importar random forest regressor
    from sklearn.tree import ExtraTreeRegressor

    # treinar o modelo no conjunto de dados
    regression = ExtraTreeRegressor().fit(train_data, train_labels)

    # prever
    predictions_labels = regression.predict(test_data)

    fim = time.time()
    df_time = pd.DataFrame({'Execution Time:': [fim - inicio]})

    output_path = os.path.join(
        '/home/isadorasalles/Documents/Regressao/extra_tree',
        'time_' + name_folder)
    df_time.to_csv(output_path, sep=';')

    from sklearn import metrics

    df_metrics = pd.DataFrame({
        'Mean Absolute Error':
        [metrics.mean_absolute_error(test_labels, predictions_labels)],
        'Mean Squared Error':
from sklearn.tree import DecisionTreeRegressor

# Define model. Specify a number for random_state to ensure same results each run
dt = DecisionTreeRegressor(random_state=1)

# Fit model
dt.fit(X_train, y_train)
dt_prediction = dt.predict(X_test)
dt_score = accuracy_score(y_test, dt_prediction)
print(dt_score)
from sklearn.tree import ExtraTreeRegressor
# Define model. Specify a number for random_state to ensure same results each run
etr = ExtraTreeRegressor()
# Fit model
etr.fit(X_train, y_train)
etr_prediction = etr.predict(X_test)
etr_score = accuracy_score(y_test, etr_prediction)
print(etr_score)
X_train = df_train.drop("Survived", axis=1)
y_train = df_train["Survived"]
X_train = X_train.drop("PassengerId", axis=1)
X_test = df_test.drop("PassengerId", axis=1)
xgboost = xgb.XGBClassifier(max_depth=3, n_estimators=300,
                            learning_rate=0.05).fit(X_train, y_train)
Y_pred = xgboost.predict(X_test)
submission = pd.DataFrame({
    "PassengerId": df_test["PassengerId"],
    "Survived": Y_pred
})
submission.to_csv('submission.csv', index=False)
Beispiel #14
0
def etr(x_train, y_train, x_test):
    model = ExtraTreeRegressor()
    model.fit(x_train, y_train)  # 线性回归建模
    predicted = model.predict(x_test)
    return (predicted)
 evs_t = []
 r2_t = []
 for tr_i, ts_i in rkf.split(data):
     print(i, j, k, c)
     train, test = data.iloc[tr_i], data.iloc[ts_i]
     train_x = train.drop(columns=['Rainfall'])
     train_y = train['Rainfall']
     test_x = test.drop(columns=['Rainfall'])
     test_y = test['Rainfall']
     model = ExtraTreeRegressor(criterion='mse',
                                splitter='best',
                                max_depth=i,
                                min_samples_leaf=j,
                                min_samples_split=k)
     model.fit(train_x, train_y)
     ts_p = model.predict(test_x)
     mse_t.append(mse(test_y, ts_p))
     rmse_t.append(rmse(test_y, ts_p))
     mae_t.append(mae(test_y, ts_p))
     mdae_t.append(mdae(test_y, ts_p))
     evs_t.append(evs(test_y, ts_p))
     r2_t.append(r2(test_y, ts_p))
     c += 1
     dep_f.append(i)
     saml_f.append(j)
     sams_f.append(k)
     mse_f.append(np.mean(mse_t))
     rmse_f.append(np.mean(rmse_t))
     mae_f.append(np.mean(mae_t))
     mdae_f.append(np.mean(mdae_t))
     evs_f.append(np.mean(evs_t))
Beispiel #16
0
# 학습모델 구축을 위해 data형식을 Vector로 변환
et_X1 = et_m_Inputdata.values
et_Y1 = et_m_Outputdata.values

# Training Data, Test Data 분리
et_X1_train, et_X1_test, et_Y1_train, et_Y1_test = train_test_split(
    et_X1, et_Y1, test_size=0.33, random_state=42)

########################################################################################################################
# ExtraTree 모델 구축
making_extratree_model = ExtraTreeRegressor(max_depth=10, random_state=42)

making_extratree_model.fit(et_X1_train, et_Y1_train)

et_m_predicted = making_extratree_model.predict(et_X1_test)
et_m_predicted[et_m_predicted < 0] = 0

# [1,n]에서 [n,1]로 배열을 바꿔주는 과정을 추가
et_length_x1test = len(et_X1_test)
et_m_predicted = et_m_predicted.reshape(et_length_x1test, 1)

# 학습 모델 성능 확인
et_m_mae = abs(et_m_predicted - et_Y1_test).mean(axis=0)
et_m_mape = (np.abs((et_m_predicted - et_Y1_test) / et_Y1_test).mean(axis=0))
et_m_rmse = np.sqrt(((et_m_predicted - et_Y1_test)**2).mean(axis=0))
et_m_rmsle = np.sqrt(
    (((np.log(et_m_predicted + 1) - np.log(et_Y1_test + 1))**2).mean(axis=0)))

print(et_m_mae)
print(et_m_mape)
Beispiel #17
0
def predict_extra_tree(train_X, train_Y, test, param=30):
    clf = ExtraTreeRegressor(min_samples_leaf=param, min_samples_split=1,
                             criterion='mse')
    clf.fit(train_X, train_Y)
    preds = clf.predict(test)
    return preds
    n = X.shape[1]

    int_scores = {}
    ext_scores = {}

    for i in range(1, n + 1):
        int_score_tmp1 = inf
        ext_score_tmp1 = inf
        for features in combinations(range(n), i):
            X_cuted = X[:, features]
            int_score_tmp2 = inf
            ext_score_tmp2 = inf
            for train_index, test_index in cv.split(X_cuted):
                X_train, X_test = X_cuted[train_index], X_cuted[test_index]
                y_train, y_test = y[train_index], y[test_index]

                alg.fit(X_train, y_train)
                y_pred = alg.predict(X_train)
                error = mean_squared_error(y_train, y_pred)
                int_score_tmp2 = min(int_score_tmp2, error)

                y_pred = alg.predict(X_test)
                error = mean_squared_error(y_test, y_pred)
                ext_score_tmp2 = min(ext_score_tmp2, error)
            int_score_tmp1 = min(int_score_tmp1, int_score_tmp2)
            ext_score_tmp1 = min(ext_score_tmp1, ext_score_tmp2)
        int_scores[i] = int_score_tmp1
        ext_scores[i] = ext_score_tmp1

    print(int_scores, ext_scores)
Beispiel #19
0
class ExtraTreeClass:
    """
    Name      : ExtraTreeRegressor
    Attribute : None
    Method    : predict, predict_by_cv, save_model
    """
    def __init__(self):
        # 알고리즘 이름
        self._name = 'extratree'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = ExtraTreeRegressor()

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

    # 데이터 전처리
    def preprocessing(self, data):
        # 학습
        x = []
        # 레이블
        y = []
        # 기준점(7일)
        base_interval = 7
        # 기온
        temps = list(data["temperature"])

        for i in range(len(temps)):
            if i < base_interval:
                continue
            y.append(temps[i])

            xa = []

            for p in range(base_interval):
                d = i + p - base_interval
                xa.append(temps[d])
            x.append(xa)
        return x, y

    # 일반 예측
    def predict(self, save_img=False, show_chart=False):
        # 예측
        y_pred = self._model.predict(self._x_test)

        # 스코어 정보
        score = r2_score(self._y_test, y_pred)

        # 리포트 확인
        if hasattr(self._model, 'coef_') and hasattr(self._model,
                                                     'intercept_'):
            print(f'Coef = {self._model.coef_}')
            print(f'intercept = {self._model.intercept_}')

        print(f'Score = {score}')

        # 이미지 저장 여부
        if save_img:
            self.save_chart_image(y_pred, show_chart)

        # 예측 값  & 스코어
        return [list(y_pred), score]

    #  CV 예측(Cross Validation)
    def predict_by_cv(self):
        # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현
        return False

    #  GridSearchCV 예측
    def predict_by_gs(self):
        pass

    # 모델 저장 및 갱신
    def save_model(self, renew=False):
        # 모델 저장
        if not renew:
            # 처음 저장
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')
        else:
            # 기존 모델 대체
            if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'):
                os.rename(
                    self._f_path + f'/model/{self._name}_rg.pkl',
                    self._f_path +
                    f'/model/{str(self._name) + str(time.time())}_rg.pkl')
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')

    # 회귀 차트 저장
    def save_chart_image(self, data, show_chart):
        # 사이즈
        plt.figure(figsize=(15, 10), dpi=100)

        # 레이블
        plt.plot(self._y_test, c='r')

        # 예측 값
        plt.plot(data, c='b')

        # 이미지로 저장
        plt.savefig('./chart_images/tenki-kion-lr.png')

        # 차트 확인(Optional)
        if show_chart:
            plt.show()

    def __del__(self):
        del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
Beispiel #20
0
    #X1 = preprocessing.normalize(X1)
    X = list(zip(*X1))
    Y = cols[11]

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=rn) 
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    #print(y_test)

    lin_reg_mod = ExtraTreeRegressor()
    
    lin_reg_mod.fit(X_train, y_train)  
    pred = lin_reg_mod.predict(X_test)
    #print(pred)
    #print(y_test)
    test_set_r2 = r2_score(y_test, pred)
    #print(test_set_r2)
    tr2+=test_set_r2
    
    #abs_er = mean_absolute_error(y_test, pred)
    #tabse+=abs_er

    temp = []
    for (i,j) in zip(y_test, pred):
        t = (abs(i-j))/float(i)
        temp.append(t)
    #print(temp)
    #print(np.meadian(temp))
Beispiel #21
0
# from sklearn.model_selection import GridSearchCV
# param_grid = [
#   { "max_depth":list(range(1, 100))}
#  ]
# grid = GridSearchCV(estimator=ExtraTreeRegressor(random_state=42), param_grid=param_grid, verbose=2, cv=10)
# grid_result = grid.fit(BL_LT_prepared_train,BL_LT_labels_train)
# print('Best Score: ', grid_result.best_score_)
# print('Best Params: ', grid_result.best_params_)


####################################################################################################################
                                              # ExtraTreeRegressor #
####################################################################################################################
Et_tree_reg = ExtraTreeRegressor(max_depth=13, random_state=42)
Et_tree_reg.fit(BL_LT_prepared_train,BL_LT_labels_train)
BL_LT_predicted = Et_tree_reg.predict(BL_LT_prepared_test)


Et_tree_mse = mean_squared_error(BL_LT_labels_test, BL_LT_predicted)
Et_tree_rmse = np.sqrt(Et_tree_mse)
# print(Et_tree_rmse)

Et_tree_mae = mean_absolute_error(BL_LT_labels_test, BL_LT_predicted)
# print(Et_tree_mae)

Et_tree_mape = (np.abs((BL_LT_predicted - BL_LT_labels_test) / BL_LT_labels_test).mean(axis=0))
# print("Et_tree: "+str(Et_tree_mape))

Et_tree_rmsle = np.sqrt(mean_squared_log_error(BL_LT_labels_test, BL_LT_predicted))
# print(Et_tree_rmsle)