Ejemplo n.º 1
0
def ExtraTreeGS(X_train, X_test, y_train, y_test):
    reg = ExtraTreeRegressor()
    grid_values = {
        'criterion': ["mse", "mae"],
        'max_depth': list(range(20, 25))
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params)
    logSave(nameOfModel="ExtraTreeGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
Ejemplo n.º 2
0
 def fit(x, y):
     clf = ExtraTreeRegressor(
         #clf = DecisionTreeRegressor(
         max_depth=10,
         max_features=100,
         min_impurity_decrease=0.000001)
     clf.fit(x, y)
     return clf
Ejemplo n.º 3
0
def get_regressor(training_set):
    """
    Estimation of the value function using a regression algorithm.
    Training set contains tuples of (state, score)
    V: S -> R
    """
    clf = ExtraTreeRegressor()
    clf.fit(*zip(*training_set))
    return clf
Ejemplo n.º 4
0
def get_regressor(training_set):
    """
    Estimation of the value function using a regression algorithm.
    Training set contains tuples of (state, score)
    V: S -> R
    """
    clf = ExtraTreeRegressor()
    clf.fit(*zip(*training_set))
    return clf
Ejemplo n.º 5
0
def ExtraTreeRegressorPrediction(train_X, train_y, test_X, valid_X, valid_y):
    etr = ExtraTreeRegressor()
    etr.fit(train_X, train_y)

    result = etr.predict(test_X)

    valid_ypred = etr.predict(valid_X)

    valid_mape = mape_loss(valid_y, valid_ypred)

    print ' the mape score of ExtraTreeRegressor in valid set is :', valid_mape
    return result
Ejemplo n.º 6
0
def build_lonely_tree_regressor(X, y, max_features, max_depth,
                                min_samples_split):
    clf = ExtraTreeRegressor(max_features=max_features,
                             max_depth=max_depth,
                             min_samples_split=min_samples_split)
    clf = clf.fit(X, y)
    return clf
Ejemplo n.º 7
0
def test_extra_tree_reg():
    X, y = load_iris(return_X_y=True)
    X_ = X.tolist()
    for y_ in [(y == 0).astype(int), (y == 2).astype(int)]:
        for max_depth in [5, 10, None]:
            clf = ExtraTreeRegressor(max_depth=max_depth, random_state=5)
            clf.fit(X, y_)
            clf_ = convert_estimator(clf)

            for method in ["predict"]:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    scores = getattr(clf, method)(X)
                scores_ = getattr(clf_, method)(X_)
                assert np.allclose(scores.shape, shape(scores_))
                assert np.allclose(scores, scores_, equal_nan=True)
Ejemplo n.º 8
0
def ExtraTree(X_train, X_test, y_train, y_test):
    reg = ExtraTreeRegressor()
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="ExtraTree",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
Ejemplo n.º 9
0
    def fit(self, X, y=None, **fit_params):
        """Fit estimator to X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : None, default=None
            Not used in the fitting process but kept for compatibility.

        fit_params : dict, optional
            Optional extra fit parameters.

        Returns
        -------
        self : estimator
            Returns the instance itself.

        """
        # Just make y a random gaussian variable
        X = check_array(X)
        rng = check_random_state(self.random_state)
        y_rand = rng.randn(X.shape[0])

        tree_est = ExtraTreeRegressor(
            min_samples_leaf=self.min_samples_leaf,
            max_leaf_nodes=self.max_leaf_nodes,
            max_features=1,  # Completely random tree
            splitter='random',
            random_state=rng,
        )
        tree_est.fit(X, y_rand, **fit_params)
        self.tree_ = tree_est.tree_
        return self
ri_MakingLT_prepared_train, ri_MakingLT_prepared_test, ri_MakingLT_labels_train, ri_MakingLT_labels_test = train_test_split(
    ri_MakingLT_prepared, ri_MakingLT_labels, test_size=0.20, random_state=42)

# Training Data는 Training Data_really,Training Data_val  분리
ri_MakingLT_prepared_train_re, ri_MakingLT_prepared_train_val, ri_MakingLT_labels_train_re, ri_MakingLT_labels_train_val = train_test_split(
    ri_MakingLT_prepared_train,
    ri_MakingLT_labels_train,
    test_size=0.25,
    random_state=42)

###**ExtraTreesRegressor**###

# **ExtraTreesRegressor** 모델 훈련 시킴
from sklearn.tree import ExtraTreeRegressor
Et_tree_reg = ExtraTreeRegressor(max_depth=11, random_state=42)
Et_tree_reg.fit(ri_MakingLT_prepared_train, ri_MakingLT_labels_train)
ri_MakingLT_predicted = Et_tree_reg.predict(ri_MakingLT_prepared_test)

from sklearn.metrics import mean_squared_error
Et_tree_reg_mse = mean_squared_error(ri_MakingLT_labels_test,
                                     ri_MakingLT_predicted)
Et_tree_reg_rmse = np.sqrt(Et_tree_reg_mse)
print(Et_tree_reg_rmse)

from sklearn.metrics import mean_absolute_error
Et_tree_reg_mae = mean_absolute_error(ri_MakingLT_labels_test,
                                      ri_MakingLT_predicted)
print(Et_tree_reg_mae)

Et_tree_reg_mape = (np.abs((ri_MakingLT_predicted - ri_MakingLT_labels_test) /
                           ri_MakingLT_labels_test).mean(axis=0))
Ejemplo n.º 11
0
scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)

new_X = pd.DataFrame(scaled_X, columns=X.columns)
new_X.head

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(new_X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

#check r2 score accuracy for Train data
from sklearn.tree import ExtraTreeRegressor
model = ExtraTreeRegressor()
model.fit(X_train, y_train)
print(model.score(X_train, y_train))

#check r2 score accuracy for Test data
from sklearn.tree import ExtraTreeRegressor
model = ExtraTreeRegressor()
model.fit(X_test, y_test)
print(model.score(X_test, y_test))

print(model.feature_importances_)
imp_feat = pd.Series(model.feature_importances_, index=X.columns)
imp_feat.nlargest(5).plot(kind='barh')
plt.show()

from sklearn.linear_model import LinearRegression
lm = LinearRegression()
Ejemplo n.º 12
0
from math import *
import pandas as pd
import numpy as np
from sklearn.tree import ExtraTreeRegressor
import matplotlib.pyplot as plt
import re,os
data=pd.read_csv('ice.csv')
x=data[['temp','street']]
y=data['ice']
clf=ExtraTreeRegressor()
clf.fit(x,y)
p=clf.predict(x)
print clf.score(x,y)
t=np.arange(0.0,31.0)
plt.plot(t,data['ice'],'--',t,p,'-')
plt.show()
Ejemplo n.º 13
0
	def Build_MapMean_Model(self):
		knn_MapMean =  ExtraTreeRegressor()

		knn_MapMean.fit(self.MapFeature_list,self.MapMean_list)
		print knn_MapMean.feature_importances_
		self.Dump_Model('Model/MapMean.model',knn_MapMean)
Ejemplo n.º 14
0
from pandas import read_csv
from sklearn.tree import ExtraTreeRegressor
# load data
dataframe = read_csv('useformodel.csv')
array = dataframe.values

X = array[:, 0:26]
Y = array[:, 26]
# feature extraction
model = ExtraTreeRegressor(random_state=0)
model.fit(X, Y)
print(model.feature_importances_)
Ejemplo n.º 15
0
dt.score(X_train, y_train)

# <a id="79"></a> <br>
# ## 7-9 ExtraTreeRegressor

# In[ ]:

from sklearn.tree import ExtraTreeRegressor

dtr = ExtraTreeRegressor()

# In[ ]:

# Fit model
dtr.fit(X_train, y_train)

# In[ ]:

# Fit model
dtr.score(X_train, y_train)

# -----------------
# <a id="8"></a> <br>
# ## 8- Conclusion
# This kernel is not completed yet, I will try to cover all the parts related to the process of ML with a variety of Python packages and I know that there are still some problems then I hope to get your feedback to improve it.

# You can follow me on:
# <br>
# > ###### [ GitHub](https://github.com/mjbahmani)
# <br>
Ejemplo n.º 16
0
# In[848]:


ETR = ExtraTreeRegressor()


# In[849]:


ETR


# In[856]:


ETR.fit(x, y)


# In[857]:


ETR_prediction = ETR.predict(x_test)
plt.plot(ETR_prediction[0], label = 'prediction')
plt.plot(y_test.iloc[0], label = 'real')


# In[858]:


print('mean_absolute_error', mean_absolute_error(y_test, ETR_prediction))
print('mean_squared_error', mean_squared_error(y_test, ETR_prediction))
 mdae_t = []
 evs_t = []
 r2_t = []
 for tr_i, ts_i in rkf.split(data):
     print(i, j, k, c)
     train, test = data.iloc[tr_i], data.iloc[ts_i]
     train_x = train.drop(columns=['Rainfall'])
     train_y = train['Rainfall']
     test_x = test.drop(columns=['Rainfall'])
     test_y = test['Rainfall']
     model = ExtraTreeRegressor(criterion='mse',
                                splitter='best',
                                max_depth=i,
                                min_samples_leaf=j,
                                min_samples_split=k)
     model.fit(train_x, train_y)
     ts_p = model.predict(test_x)
     mse_t.append(mse(test_y, ts_p))
     rmse_t.append(rmse(test_y, ts_p))
     mae_t.append(mae(test_y, ts_p))
     mdae_t.append(mdae(test_y, ts_p))
     evs_t.append(evs(test_y, ts_p))
     r2_t.append(r2(test_y, ts_p))
     c += 1
     dep_f.append(i)
     saml_f.append(j)
     sams_f.append(k)
     mse_f.append(np.mean(mse_t))
     rmse_f.append(np.mean(rmse_t))
     mae_f.append(np.mean(mae_t))
     mdae_f.append(np.mean(mdae_t))
Ejemplo n.º 18
0
from math import *
import pandas as pd
import numpy as np
from sklearn.tree import ExtraTreeRegressor
import matplotlib.pyplot as plt
import re, os
data = pd.read_csv('ice.csv')
x = data[['temp', 'street']]
y = data['ice']
clf = ExtraTreeRegressor()
clf.fit(x, y)
p = clf.predict(x)
print clf.score(x, y)
t = np.arange(0.0, 31.0)
plt.plot(t, data['ice'], '--', t, p, '-')
plt.show()
Ejemplo n.º 19
0
sc=StandardScaler()
df.iloc[:,:]=sc.fit_transform(df.iloc[:,:])

# Feature Selection
 # univariate Selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression,chi2
best_features=SelectKBest(score_func=f_regression,k='all')
best_features.fit(df.iloc[:,1:],df.iloc[:,0])
feature_scores=pd.DataFrame(best_features.scores_,index=df.iloc[:,1:].columns)
feature_scores.plot(kind='barh')

 # Feature Selection
from sklearn.tree import ExtraTreeRegressor
regressor=ExtraTreeRegressor()
regressor.fit(df.iloc[:,1:],df.iloc[:,0])
importance_score=pd.Series(regressor.feature_importances_,index=df.iloc[:,1:].columns)
importance_score.plot(kind='barh')

# Segregating feature & target columns
x=df.iloc[:,1:]
y=df.iloc[:,0]

# Modelling
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)

# Ridge Regression
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
Ejemplo n.º 20
0
class ExtraTreeClass:
    """
    Name      : ExtraTreeRegressor
    Attribute : None
    Method    : predict, predict_by_cv, save_model
    """
    def __init__(self):
        # 알고리즘 이름
        self._name = 'extratree'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = ExtraTreeRegressor()

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

    # 데이터 전처리
    def preprocessing(self, data):
        # 학습
        x = []
        # 레이블
        y = []
        # 기준점(7일)
        base_interval = 7
        # 기온
        temps = list(data["temperature"])

        for i in range(len(temps)):
            if i < base_interval:
                continue
            y.append(temps[i])

            xa = []

            for p in range(base_interval):
                d = i + p - base_interval
                xa.append(temps[d])
            x.append(xa)
        return x, y

    # 일반 예측
    def predict(self, save_img=False, show_chart=False):
        # 예측
        y_pred = self._model.predict(self._x_test)

        # 스코어 정보
        score = r2_score(self._y_test, y_pred)

        # 리포트 확인
        if hasattr(self._model, 'coef_') and hasattr(self._model,
                                                     'intercept_'):
            print(f'Coef = {self._model.coef_}')
            print(f'intercept = {self._model.intercept_}')

        print(f'Score = {score}')

        # 이미지 저장 여부
        if save_img:
            self.save_chart_image(y_pred, show_chart)

        # 예측 값  & 스코어
        return [list(y_pred), score]

    #  CV 예측(Cross Validation)
    def predict_by_cv(self):
        # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현
        return False

    #  GridSearchCV 예측
    def predict_by_gs(self):
        pass

    # 모델 저장 및 갱신
    def save_model(self, renew=False):
        # 모델 저장
        if not renew:
            # 처음 저장
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')
        else:
            # 기존 모델 대체
            if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'):
                os.rename(
                    self._f_path + f'/model/{self._name}_rg.pkl',
                    self._f_path +
                    f'/model/{str(self._name) + str(time.time())}_rg.pkl')
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')

    # 회귀 차트 저장
    def save_chart_image(self, data, show_chart):
        # 사이즈
        plt.figure(figsize=(15, 10), dpi=100)

        # 레이블
        plt.plot(self._y_test, c='r')

        # 예측 값
        plt.plot(data, c='b')

        # 이미지로 저장
        plt.savefig('./chart_images/tenki-kion-lr.png')

        # 차트 확인(Optional)
        if show_chart:
            plt.show()

    def __del__(self):
        del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
#splitting datasets
X = df.iloc[:, :-1]
y = df.iloc[:, -1].values
y = y.reshape(-1, 1)

#handling missing vlaues(0 in pm2.5)
from sklearn.impute import SimpleImputer
im = SimpleImputer(missing_values=0, strategy='mean')
im = im.fit(y)
y = im.transform(y)

#feature selection
from sklearn.tree import ExtraTreeRegressor
model = ExtraTreeRegressor()
model.fit(X, y)
print(model.feature_importances_)
feat_imp = pd.Series(model.feature_importances_, index=X.columns)
feat_imp.nlargest(5).plot(
    kind='barh')  #picking 5 columns that are in corelations with pm2.5
plt.show()

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

print('r^2 value of train set:', reg.score(X_train, y_train))
Ejemplo n.º 22
0
def predict_extra_tree(train_X, train_Y, test, param=30):
    clf = ExtraTreeRegressor(min_samples_leaf=param, min_samples_split=1,
                             criterion='mse')
    clf.fit(train_X, train_Y)
    preds = clf.predict(test)
    return preds
Ejemplo n.º 23
0
	def Build_MapMean_Model(self):
		MapMean =  ExtraTreeRegressor()
		MapMean.fit(self.MapFeature_list,(self.MapMean_list))
		self.Dump_Model('Model/MapMean.model',MapMean)
		print MapMean.feature_importances_
def build_lonely_tree_regressor(X, y, max_features, max_depth, min_samples_split):
	clf = ExtraTreeRegressor(max_features=max_features, max_depth=max_depth, min_samples_split=min_samples_split)
	clf = clf.fit(X, y)
	return clf
Ejemplo n.º 25
0
def doregress(X_train, y_train, n_train, X_test, y_test, n_test, band, fnames):
    lin  = LinearRegression()
    lin.fit(X_train, y_train)
    lres  = lin.predict(X_test) - y_test
    zl, ml, sl, fl = summstats(z_test, lres, n_test)
    
    #gbr1 = GradientBoostingRegressor(loss="ls")
    #gbr2 = GradientBoostingRegressor(loss="lad")
    #gbr1.fit(X_train, y_train)
    #gbr2.fit(X_train, y_train)
    #g1res = gbr1.predict(X_test) - y_test
    #g2res = gbr2.predict(X_test) - y_test
    #g1z, g1med, g1std = summstats(z_test, g1res)
    #g2z, g2med, g2std = summstats(z_test, g2res)
    
    #ada   = AdaBoostRegressor()
    #ada.fit(X_train, y_train)
    #ares  = ada.predict(X_test) - y_test
    #az, amed, astd = summstats(z_test, ares)

    # Some of these appear to be unstable
    # I.e. feature importance changes
    #for extension in ("A", "B", "C", "D", "E"):

    for extension in ("A",):
        print "# Regressing", extension

        xtr   = ExtraTreeRegressor()
        xtr.fit(X_train, y_train)
        zx, mx, sx, fx = doplot(xtr, X_test, y_test, z_test, n_test, fnames, "%s-band ExtraTreeRegressor"%(band), "R_%s_%s_ext.png"%(band, extension))

        xtrw  = ExtraTreeRegressor()
        xtrw.fit(X_train, y_train, sample_weight=np.log10(n_train))
        zxw, mxw, sxw, fxw = doplot(xtrw, X_test, y_test, z_test, n_test, fnames, "%s-band weighted ExtraTreeRegressor"%(band), "R_%s_%s_ext_weight.png"%(band, extension))

        ####

        tree = DecisionTreeRegressor()
        tree.fit(X_train, y_train)
        zt, mt, st, ft = doplot(tree, X_test, y_test, z_test, n_test, fnames, "%s-band DecisionTreeRegressor"%(band), "R_%s_%s_tree.png"%(band, extension))

        treew = DecisionTreeRegressor()
        treew.fit(X_train, y_train, sample_weight=np.log10(n_train))
        ztw, mtw, stw, ftw = doplot(treew, X_test, y_test, z_test, n_test, fnames, "%s-band weighted DecisionTreeRegressor"%(band), "R_%s_%s_tree_weight.png"%(band, extension))

        ####
        weights = n_train
        nt      = 50

        rfr  = RandomForestRegressor(n_estimators=nt)
        rfr.fit(X_train, y_train)
        zr, mr, sr, fr = doplot(rfr, X_test, y_test, z_test, n_test, fnames, "%s-band RandomForestRegressor"%(band), "R_%s_%s_%d_rfr.png"%(band, extension, nt))
                
        rfrw  = RandomForestRegressor(n_estimators=nt)
        rfrw.fit(X_train, y_train, sample_weight=weights)
        zrw, mrw, srw, frw = doplot(rfrw, X_test, y_test, z_test, n_test, fnames, "%s-band weighted RandomForestRegressor"%(band), "R_%s_%s_%d_rfr_weight.png"%(band, extension, nt))
        print "RF %d : %.5e +/- %.5e vs weighted %.5e +/- %.5e" % (nt, 
                                                                      np.median(fr), 0.741 * (np.percentile(fr, 75) - np.percentile(fr, 25)),
                                                                      np.median(frw), 0.741 * (np.percentile(frw, 75) - np.percentile(frw, 25)))
        
        ####

        # Compare all models
        fig, (sp1, sp2, sp3) = plt.subplots(3, 1, sharex=True, figsize=(16,12))
        sp1.plot(zl, ml, "r-", label="LinearRegression")
        sp1.plot(zt, mt, "b-", label="DecisionTreeRegressor")
        sp1.plot(zr, mr, "g-", label="RandomForestRegressor")
        sp1.plot(zx, mx, "m-", label="ExtraTreeRegressor")

        sp2.plot(zl[np.where(sl>0.)], sl[np.where(sl>0.)], "r-")
        sp2.plot(zt[np.where(st>0.)], st[np.where(st>0.)], "b-")
        sp2.plot(zr[np.where(sr>0.)], sr[np.where(sr>0.)], "g-")
        sp2.plot(zx[np.where(sx>0.)], sx[np.where(sx>0.)], "m-")
        ymin, ymax = sp2.get_ylim()
        sp2.set_ylim(max(1e-7,ymin), 1e-1)

        sp3.plot(zl[np.where(fl>0.)], fl[np.where(fl>0.)], "r-")
        sp3.plot(zt[np.where(ft>0.)], ft[np.where(ft>0.)], "b-")
        sp3.plot(zr[np.where(fr>0.)], fr[np.where(fr>0.)], "g-")
        sp3.plot(zx[np.where(fx>0.)], fx[np.where(fx>0.)], "m-")
        ymin, ymax = sp3.get_ylim()
        sp3.set_ylim(max(1e-7,ymin), 1.1)

        sp1.legend(loc=2, fancybox=True)
        sp1.set_title("Mean refraction residual (arcsec)", weight="bold")
        sp2.set_ylabel("RMS residual (arcsec)", weight="bold")
        sp3.set_ylabel("f_tot with dR>%.3f"%(dcrLevel), weight="bold")
        sp3.set_xlabel("Zenith distance (deg)", weight="bold")
        sp1.axhline(y=0, c='k', linestyle='--', alpha=0.5)
        sp2.axhline(y=dcrLevel, c='k', linestyle='--', alpha=0.5)
        sp3.axhline(y=0.01, c='k', linestyle='--', alpha=0.5)
        sp2.semilogy()
        sp3.semilogy()
        plt.savefig("R_%s_%s.png" % (band, extension))

        ###

        fig, (sp1, sp2, sp3) = plt.subplots(3, 1, sharex=True, figsize=(16,12))
        sp1.plot(zl,  ml,  "r-", label="LinearRegression")
        sp1.plot(ztw, mtw, "b-", label="DecisionTreeRegressor weighted")
        sp1.plot(zrw, mrw, "g-", label="RandomForestRegressor weighted")
        sp1.plot(zxw, mxw, "m-", label="ExtraTreeRegressor weighted")

        sp2.plot(zl[np.where(sl>0.)],  sl[np.where(sl>0.)],  "r-")
        sp2.plot(ztw[np.where(stw>0.)], stw[np.where(stw>0.)], "b-")
        sp2.plot(zrw[np.where(srw>0.)], srw[np.where(srw>0.)], "g-")
        sp2.plot(zxw[np.where(sxw>0.)], sxw[np.where(sxw>0.)], "m-")
        ymin, ymax = sp2.get_ylim()
        sp2.set_ylim(max(1e-7,ymin), 1e-1)

        sp3.plot(zl[np.where(fl>0.)],  fl[np.where(fl>0.)],  "r-")
        sp3.plot(ztw[np.where(ftw>0.)], ftw[np.where(ftw>0.)], "b-")
        sp3.plot(zrw[np.where(frw>0.)], frw[np.where(frw>0.)], "g-")
        sp3.plot(zxw[np.where(fxw>0.)], fxw[np.where(fxw>0.)], "m-")
        ymin, ymax = sp3.get_ylim()
        sp3.set_ylim(max(1e-7,ymin), 1.1)

        sp1.legend(loc=2, fancybox=True)
        sp1.set_title("Mean refraction residual (arcsec)", weight="bold")
        sp2.set_ylabel("RMS residual (arcsec)", weight="bold")
        sp3.set_ylabel("f_tot with dR>%.3f"%(dcrLevel), weight="bold")
        sp3.set_xlabel("Zenith distance (deg)", weight="bold")
        sp1.axhline(y=0, c='k', linestyle='--', alpha=0.5)
        sp2.axhline(y=dcrLevel, c='k', linestyle='--', alpha=0.5)
        sp3.axhline(y=0.01, c='k', linestyle='--', alpha=0.5)
        sp2.semilogy()
        sp3.semilogy()
        plt.savefig("R_%s_%s_weight.png" % (band, extension))
                                                    random_state=42)
"""##Model Selection"""

from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
r2_score(y_test, rfr.predict(X_test))

mean_squared_error(y_test, rfr.predict(X_test))

X_train.columns.shape
forest.feature_importances_.shape

forest = ExtraTreeRegressor()
forest.fit(X_train, y_train)
importances = forest.feature_importances_

indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(X_train.shape[1]):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

# Plot the impurity-based feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.bar(range(X_train.shape[1]),
        importances[indices],
Ejemplo n.º 27
0
et_m_Outputdata = et_MakingLT[['MakingLT']]

# 학습모델 구축을 위해 data형식을 Vector로 변환
et_X1 = et_m_Inputdata.values
et_Y1 = et_m_Outputdata.values

# Training Data, Test Data 분리
et_X1_train, et_X1_test, et_Y1_train, et_Y1_test = train_test_split(
    et_X1, et_Y1, test_size=0.33, random_state=42)

########################################################################################################################
# ExtraTree 모델 구축
making_extratree_model = ExtraTreeRegressor(max_depth=10, random_state=42)

making_extratree_model.fit(et_X1_train, et_Y1_train)

et_m_predicted = making_extratree_model.predict(et_X1_test)
et_m_predicted[et_m_predicted < 0] = 0

# [1,n]에서 [n,1]로 배열을 바꿔주는 과정을 추가
et_length_x1test = len(et_X1_test)
et_m_predicted = et_m_predicted.reshape(et_length_x1test, 1)

# 학습 모델 성능 확인
et_m_mae = abs(et_m_predicted - et_Y1_test).mean(axis=0)
et_m_mape = (np.abs((et_m_predicted - et_Y1_test) / et_Y1_test).mean(axis=0))
et_m_rmse = np.sqrt(((et_m_predicted - et_Y1_test)**2).mean(axis=0))
et_m_rmsle = np.sqrt(
    (((np.log(et_m_predicted + 1) - np.log(et_Y1_test + 1))**2).mean(axis=0)))
Ejemplo n.º 28
0
def getModel(x, y):
    et = ExtraTreeRegressor()
    et.fit(x, y)
    #joblib.dump(et,'./model/et')#保存模型
    return et
Ejemplo n.º 29
0
res5 = forest_reg.score(X_test, y_test)
print('forest_reg: ', res5)

grad_reg = GradientBoostingRegressor(n_estimators=500)
grad_reg.fit(X_train, y_train)
grad_reg.fit(X_train, y_train)
res6 = grad_reg.score(X_test, y_test)
print('grad_reg: ', res6)

ada_reg = AdaBoostRegressor(n_estimators=200)
ada_reg.fit(X_train, y_train)
ada_reg.fit(X_train, y_train)
res7 = ada_reg.score(X_test, y_test)
print('ada_reg: ', res7)

decision_reg = DecisionTreeRegressor(random_state=333,
                                     min_samples_leaf=3,
                                     max_leaf_nodes=5)
decision_reg.fit(X_train, y_train)
decision_reg.fit(X_train, y_train)
res8 = decision_reg.score(X_test, y_test)
print('decision_reg: ', res8)

extraTree_reg = ExtraTreeRegressor(random_state=333,
                                   min_samples_leaf=3,
                                   max_leaf_nodes=5)
extraTree_reg.fit(X_train, y_train)
extraTree_reg.fit(X_train, y_train)
res9 = extraTree_reg.score(X_test, y_test)
print('extraTree_reg: ', res9)
Ejemplo n.º 30
0
	def Build_MapMean_Model(self):
		MapMean_Model =  ExtraTreeRegressor()

		MapMean_Model.fit(self.MapFeature_list,self.MapMean_list)
		self.Dump_Model('Model/MapMean.model',MapMean_Model)
from sklearn.ensemble import RandomForestRegressor
rf_model=RandomForestRegressor(n_estimators=700,random_state=42)
rf_model.fit(x_train,y_train)
y_predict=rf_model.predict(x_test)
r2_score(y_test,y_predict.ravel())


# ### ExtraTreeRegressor

# In[85]:


from sklearn.tree import ExtraTreeRegressor
extratree_model=ExtraTreeRegressor(random_state=42)
extratree_model.fit(x_train,y_train)
y_predict=extratree_model.predict(x_test)
r2_score(y_test,y_predict.ravel())


# ### Result
# 
# So from here we can conclude that out of multiple models RandomForestRegressor model is working well with 90.66% accuracy. which is a very good accuracy.

# In[86]:


# Using pickle we will save our model so that we can use it further
import pickle
pickle.dump(extratree_model,open('model.pkl','wb'))
model=pickle.load(open('model.pkl','rb'))
Ejemplo n.º 32
0
 def ExtraTreesModel(self, train_x, train_y):
     print('begin train ExtraTrees')
     model = ExtraTreeRegressor()
     model.fit(train_x, train_y)
     return model
    n = X.shape[1]

    int_scores = {}
    ext_scores = {}

    for i in range(1, n + 1):
        int_score_tmp1 = inf
        ext_score_tmp1 = inf
        for features in combinations(range(n), i):
            X_cuted = X[:, features]
            int_score_tmp2 = inf
            ext_score_tmp2 = inf
            for train_index, test_index in cv.split(X_cuted):
                X_train, X_test = X_cuted[train_index], X_cuted[test_index]
                y_train, y_test = y[train_index], y[test_index]

                alg.fit(X_train, y_train)
                y_pred = alg.predict(X_train)
                error = mean_squared_error(y_train, y_pred)
                int_score_tmp2 = min(int_score_tmp2, error)

                y_pred = alg.predict(X_test)
                error = mean_squared_error(y_test, y_pred)
                ext_score_tmp2 = min(ext_score_tmp2, error)
            int_score_tmp1 = min(int_score_tmp1, int_score_tmp2)
            ext_score_tmp1 = min(ext_score_tmp1, ext_score_tmp2)
        int_scores[i] = int_score_tmp1
        ext_scores[i] = ext_score_tmp1

    print(int_scores, ext_scores)
Ejemplo n.º 34
0
def getExtraTreeModel(x, y):
    et = ExtraTreeRegressor()
    et.fit(x, y)
    return et
Ejemplo n.º 35
0
    X1 = cols[0:11]
    #X1 = preprocessing.normalize(X1)
    X = list(zip(*X1))
    Y = cols[11]

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=rn) 
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    #print(y_test)

    lin_reg_mod = ExtraTreeRegressor()
    
    lin_reg_mod.fit(X_train, y_train)  
    pred = lin_reg_mod.predict(X_test)
    #print(pred)
    #print(y_test)
    test_set_r2 = r2_score(y_test, pred)
    #print(test_set_r2)
    tr2+=test_set_r2
    
    #abs_er = mean_absolute_error(y_test, pred)
    #tabse+=abs_er

    temp = []
    for (i,j) in zip(y_test, pred):
        t = (abs(i-j))/float(i)
        temp.append(t)
    #print(temp)
from sklearn.tree import DecisionTreeRegressor

# Define model. Specify a number for random_state to ensure same results each run
dt = DecisionTreeRegressor(random_state=1)

# Fit model
dt.fit(X_train, y_train)
dt_prediction = dt.predict(X_test)
dt_score = accuracy_score(y_test, dt_prediction)
print(dt_score)
from sklearn.tree import ExtraTreeRegressor
# Define model. Specify a number for random_state to ensure same results each run
etr = ExtraTreeRegressor()
# Fit model
etr.fit(X_train, y_train)
etr_prediction = etr.predict(X_test)
etr_score = accuracy_score(y_test, etr_prediction)
print(etr_score)
X_train = df_train.drop("Survived", axis=1)
y_train = df_train["Survived"]
X_train = X_train.drop("PassengerId", axis=1)
X_test = df_test.drop("PassengerId", axis=1)
xgboost = xgb.XGBClassifier(max_depth=3, n_estimators=300,
                            learning_rate=0.05).fit(X_train, y_train)
Y_pred = xgboost.predict(X_test)
submission = pd.DataFrame({
    "PassengerId": df_test["PassengerId"],
    "Survived": Y_pred
})
submission.to_csv('submission.csv', index=False)
 def extra_tree_regressor(self):
     x_train, x_test, y_train, y_test = self.preprocessing()
     model = ExtraTreeRegressor()
     y_pred = model.fit(x_train, y_train).predict(x_test)
     self.printing(y_test, y_pred, 'Extra Tree')
Ejemplo n.º 38
0
import matplotlib.pyplot as plt
import seaborn as sb

df=pd.read_csv('Data/Real-Data/Real_combine.csv')

df=df.dropna()

sb.pairplot(df)
df.corr()

X=df.iloc[:,:-1]
y=df.iloc[:,-1]

from sklearn.tree import ExtraTreeRegressor
et=ExtraTreeRegressor()
et.fit(X,y)
print(et.feature_importances_)
feat_imp=pd.Series(et.feature_importances_,index=X.columns)
feat_imp.nlargest(5).plot(kind='barh')
plt.show()

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30)

from keras.models import Sequential
from keras.layers import Dense

regressor=Sequential()
#adding input layer and first hidden layer
regressor.add(Dense(units=128,kernel_initializer='normal',input_dim=X_train.shape[1],activation='relu'))