Ejemplo n.º 1
0
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
df.iloc[:,:]=sc.fit_transform(df.iloc[:,:])

# Feature Selection
 # univariate Selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression,chi2
best_features=SelectKBest(score_func=f_regression,k='all')
best_features.fit(df.iloc[:,1:],df.iloc[:,0])
feature_scores=pd.DataFrame(best_features.scores_,index=df.iloc[:,1:].columns)
feature_scores.plot(kind='barh')

 # Feature Selection
from sklearn.tree import ExtraTreeRegressor
regressor=ExtraTreeRegressor()
regressor.fit(df.iloc[:,1:],df.iloc[:,0])
importance_score=pd.Series(regressor.feature_importances_,index=df.iloc[:,1:].columns)
importance_score.plot(kind='barh')

# Segregating feature & target columns
x=df.iloc[:,1:]
y=df.iloc[:,0]

# Modelling
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)

# Ridge Regression
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
Ejemplo n.º 2
0
from pandas import read_csv
from sklearn.tree import ExtraTreeRegressor
# load data
dataframe = read_csv('useformodel.csv')
array = dataframe.values

X = array[:, 0:26]
Y = array[:, 26]
# feature extraction
model = ExtraTreeRegressor(random_state=0)
model.fit(X, Y)
print(model.feature_importances_)
Ejemplo n.º 3
0
     et_m_Material_one_hot_encoded, et_m_Making_Co_one_hot_encoded),
    axis=1)

et_m_Outputdata = et_MakingLT[['MakingLT']]

# 학습모델 구축을 위해 data형식을 Vector로 변환
et_X1 = et_m_Inputdata.values
et_Y1 = et_m_Outputdata.values

# Training Data, Test Data 분리
et_X1_train, et_X1_test, et_Y1_train, et_Y1_test = train_test_split(
    et_X1, et_Y1, test_size=0.33, random_state=42)

########################################################################################################################
# ExtraTree 모델 구축
making_extratree_model = ExtraTreeRegressor(max_depth=10, random_state=42)

making_extratree_model.fit(et_X1_train, et_Y1_train)

et_m_predicted = making_extratree_model.predict(et_X1_test)
et_m_predicted[et_m_predicted < 0] = 0

# [1,n]에서 [n,1]로 배열을 바꿔주는 과정을 추가
et_length_x1test = len(et_X1_test)
et_m_predicted = et_m_predicted.reshape(et_length_x1test, 1)

# 학습 모델 성능 확인
et_m_mae = abs(et_m_predicted - et_Y1_test).mean(axis=0)
et_m_mape = (np.abs((et_m_predicted - et_Y1_test) / et_Y1_test).mean(axis=0))
et_m_rmse = np.sqrt(((et_m_predicted - et_Y1_test)**2).mean(axis=0))
et_m_rmsle = np.sqrt(
Ejemplo n.º 4
0
 def pass_arguments(self, kwargs):
     super().__init__(ExtraTreeRegressor(**kwargs))
Ejemplo n.º 5
0
tree=DecisionTreeClassifier()
tree=tree.fit(X_train,Y_train)
Y1=tree.predict(X_train)
Y2=tree.predict(X_test)
print(accuracy_score(Y_train,Y1))  #1.0
print(accuracy_score(Y_test,Y2))  #0.8791208791208791

tree=DecisionTreeRegressor()
tree = tree.fit(X_train, Y_train)
Y1 = tree.predict(X_train)
Y2 = tree.predict(X_test)
print(accuracy_score(Y_train, Y1)) #1.0
print(accuracy_score(Y_test, Y2)) #0.8571428571428571

tree=ExtraTreeClassifier()
tree = tree.fit(X_train, Y_train)
Y1 = tree.predict(X_train)
Y2 = tree.predict(X_test)
print(accuracy_score(Y_train, Y1)) #1.0
print(accuracy_score(Y_test, Y2)) #0.7472527472527473

tree=ExtraTreeRegressor()
tree = tree.fit(X_train, Y_train)
Y1 = tree.predict(X_train)
Y2 = tree.predict(X_test)
print(accuracy_score(Y_train, Y1)) #1.0
print(accuracy_score(Y_test, Y2)) #0.7912087912087912



Ejemplo n.º 6
0
def getExtraTreeModel(x, y):
    et = ExtraTreeRegressor()
    et.fit(x, y)
    return et
Ejemplo n.º 7
0
def get_models(methods=List):
    models = dict()
    # linear models
    if 'LinearRegression' in methods:
        models['lr'] = LinearRegression()

    alpha = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    if 'Lasso' in methods:
        for a in alpha:
            models['lasso-' + str(a)] = Lasso(alpha=a)
    if 'Ridge' in methods:
        for a in alpha:
            models['ridge-' + str(a)] = Ridge(alpha=a)
    if 'ElasticNet' in methods:
        for a1 in alpha:
            for a2 in alpha:
                name = 'en-' + str(a1) + '-' + str(a2)
                models[name] = ElasticNet(a1, a2)
    if 'HuberRegressor' in methods:
        models['huber'] = HuberRegressor()
    if 'Lars' in methods:
        models['lars'] = Lars()
    if 'LassoLars' in methods:
        models['llars'] = LassoLars()
    if 'PassiveAggressiveRegressor' in methods:
        models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
    if 'RANSACRegressor' in methods:
        models['ranscac'] = RANSACRegressor()
    if 'SGDRegressor' in methods:
        models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
    if 'TheilSenRegressor' in methods:
        models['theil'] = TheilSenRegressor()
    # non-linear models
    if 'KNeighborsRegressor' in methods:
        n_neighbors = range(1, 21)
        for k in n_neighbors:
            models['knn-' + str(k)] = KNeighborsRegressor(n_neighbors=k)
    if 'DecisionTreeRegressor' in methods:
        models['cart'] = DecisionTreeRegressor()
    if 'ExtraTreeRegressor' in methods:
        models['extra'] = ExtraTreeRegressor()
    if 'SVR' in methods:
        models['svml'] = SVR(kernel='linear')
        models['svmp'] = SVR(kernel='poly')
        c_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        for c in c_values:
            models['svmr' + str(c)] = SVR(C=c)
    # ensemble models
    n_trees = 100
    if 'AdaBoostRegressor' in methods:
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees)
    if 'BaggingRegressor' in methods:
        models['bag'] = BaggingRegressor(n_estimators=n_trees)
    if 'RandomForestRegressor' in methods:
        models['rf'] = RandomForestRegressor(n_estimators=n_trees)
    if 'ExtraTreesRegressor' in methods:
        models['et'] = ExtraTreesRegressor(n_estimators=n_trees)
    if 'GradientBoostingRegressor' in methods:
        models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    print('Defined %d models' % len(models))
    return models
Ejemplo n.º 8
0
    ('RANSACRegressor', lambda: RANSACRegressor()),
    ('SGDRegressor', lambda: SGDRegressor()),
    # Way too slow.
    #('TheilSenRegressor', lambda: TheilSenRegressor()),

    # Neighbors.
    ('KNeighborsRegressor', lambda: KNeighborsRegressor()),
    # Predicts Nan, infinity or too large of value.
    #('RadiusNeighborsRegressor', lambda: RadiusNeighborsRegressor()),

    # Neural network.
    # Increase max_iter to avoid Warning about non-convergence within max_iter.
    ('MLPRegressor', lambda: MLPRegressor(max_iter=1000)),

    # Support vector machine.
    ('SVR', lambda: SVR()),
    ('LinearSVR', lambda: LinearSVR()),
    ('NuSVR', lambda: NuSVR()),

    # Tree.
    ('DecisionTreeRegressor', lambda: DecisionTreeRegressor()),
    ('ExtraTreeRegressor', lambda: ExtraTreeRegressor()),
])

# Regressors that do not support the sample_weight optional fit() argument.
REGRESSORS_NOT_SUPPORTING_SAMPLE_WEIGHT = set([
    'PLSRegression', 'GaussianProcessRegressor', 'PassiveAggressiveRegressor',
    'RandomizedLogisticRegression', 'SGDRegressor', 'TheilSenRegressor',
    'KNeighborsRegressor', 'MLPRegressor'
])
Ejemplo n.º 9
0
y_train = train_value.iloc[:, -1]

test_value = df[df['date'] >= '2020-09-01']

x_test = test_value.iloc[:, 1:-1]
y_test = test_value.iloc[:, -1]

# print(x_train.shape, y_train.shape) #(321035, 6) (321035,)
# print(x_test.shape, y_test.shape)   #(16707, 6) (16707,)

kfold = KFold(n_splits=5, shuffle=True)

# 훈련 loop
scalers = np.array([MinMaxScaler(), StandardScaler()])
models = np.array([DecisionTreeRegressor(), RandomForestRegressor(), BaggingRegressor(),\
            ExtraTreeRegressor(), ExtraTreesRegressor()])
# , KNeighborsRegressor()
# BaggingRegressor, DecisionTreeRegressor, ExtraTreeRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor, RandomForestRegressor

result_list = []
for i in models:
    print(i, '   :')

    #2. 모델구성
    model = i

    scores = cross_val_score(model, x_train, y_train, cv=kfold)
    print('scores : ', scores)

    #3. 훈련
    model.fit(x_train, y_train)
Ejemplo n.º 10
0
print Lscore.mean()
plt.plot(Lscore)
#%% SVM
svc = svm.SVC(C=1, kernel='linear')
SVCscore = cross_val_score(svc, X, Y_label, cv=k_fold, scoring='accuracy')
print SVCscore.mean()
plt.plot(SVCscore)

#%% Regression Task

x_train, x_test, y_train, y_test = train_test_split(X,
                                                    Y_value,
                                                    test_size=0.33,
                                                    shuffle=False)

models = [("RF", ExtraTreeRegressor(random_state=0)),
          ("LR", LinearRegression(n_jobs=-1))]

for m in models:
    m[1].fit(x_train, y_train)

    # Make an array of predictions on the test set
    pred = m[1].predict(x_test)

    # Output the hit-rate and the confusion matrix for each model
    print("%s:\n%0.6f" % (m[0], m[1].score(x_test, y_test)))

    result = pd.DataFrame(index=y_test.index)
    result['y_pred'] = pred
    result['y_test'] = y_test
    #Linscore=cross_val_score(LinRe,X,Y_value, cv=k_fold, scoring= 'r2')
Ejemplo n.º 11
0
    # KNN回归
    from sklearn import neighbors
    model_knn = neighbors.KNeighborsRegressor()

    # 随机森林回归
    from sklearn import ensemble
    model_random_forest = ensemble.RandomForestRegressor(n_estimators=20)

    # AdaBoost回归
    from sklearn import ensemble
    model_adaboost = ensemble.AdaBoostRegressor(n_estimators=50)

    # GBRT回归
    from sklearn import ensemble
    model_gradient_boost = ensemble.GradientBoostingRegressor(n_estimators=100)

    # Bagging回归
    from sklearn.ensemble import BaggingRegressor
    model_bagging = BaggingRegressor()

    # ExtraTree极端随机树回归
    from sklearn.tree import ExtraTreeRegressor
    model_extratree = ExtraTreeRegressor()

    # Ridge回归
    model_ridge = linear_model.Ridge(alpha=0.01)

    # 绘制回归曲线
    plot_regression(model_svr, x_data, y_data)
    #plot_decision(model_decisiontree_regression, x_data,y_data)
Ejemplo n.º 12
0
def regression(X,Y,method='svm'):
    '''
    分类器
    '''
    print("=======开始训练分类器======")
    print('采用的分类器为',method)
    if method=='svm':
        
        clf = svm.SVR(gamma='auto')

    # 方法选择
    # 1.决策树回归
    if method == 'tree':
        from sklearn import tree
        clf = tree.DecisionTreeRegressor()
        

        # 2.线性回归
    if method == 'linear' :
        from sklearn.linear_model import LinearRegression
        clf = LinearRegression()

         
        # 3.SVM回归

         
        # 4.kNN回归
    if method == 'knn':
        from sklearn import neighbors
        clf = neighbors.KNeighborsRegressor()
         
        # 5.随机森林回归
    if method == 'RFR':
        from sklearn import ensemble
        clf = ensemble.RandomForestRegressor(n_estimators=20)  # 使用20个决策树
    if method == 'Adaboost':
        # 6.Adaboost回归
        from sklearn import ensemble
        clf = ensemble.AdaBoostRegressor(n_estimators=50)  # 这里使用50个决策树
    if method == 'GBR':
        # 7.GBRT回归
        from sklearn import ensemble
        clf = ensemble.GradientBoostingRegressor(n_estimators=100)  # 这里使用100个决策树
    if method == 'Bag':
        # 8.Bagging回归
        from sklearn import ensemble
        clf = ensemble.BaggingRegressor()
    if method == 'ETR':
        # 9.ExtraTree极端随机数回归
        from sklearn.tree import ExtraTreeRegressor
        clf = ExtraTreeRegressor()      
        
    if method == 'MLP':
        from sklearn.neural_network import MLPRegressor
        clf = MLPRegressor(solver='adam',alpha=1e-5, hidden_layer_sizes=(100,4), random_state=1)
    

        
        
    clf.fit(X, Y)
        
    print("==========训练完毕=========")
    
    
    
    
    
    return clf
Ejemplo n.º 13
0
# 6.Adaboost回归
from sklearn import ensemble
model_adaboost_regressor = ensemble.AdaBoostRegressor(n_estimators=50)  # 这里使用50个决策树

# 7.GBRT回归
from sklearn import ensemble
model_gradient_boosting_regressor = ensemble.GradientBoostingRegressor(n_estimators=100)  # 这里使用100个决策树

# 8.Bagging回归
from sklearn import ensemble
model_bagging_regressor = ensemble.BaggingRegressor()

# 9.ExtraTree极端随机数回归
from sklearn.tree import ExtraTreeRegressor
model_extra_tree_regressor = ExtraTreeRegressor()

# 10.多项式回归
model_Polynomial = make_pipeline(PolynomialFeatures(3), Ridge())

# 11.高斯过程(Gaussian Processes)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
kernel = DotProduct() + WhiteKernel()
model_GaussianProcessRegressor = GaussianProcessRegressor(kernel=kernel,random_state=0)


# 根据溢价率,预测不同的转股价对应的价格
def pricingcb_different_delta(model,convert_value,pb,convertdelta):
    data_inconvert=data[(data['convertdelta'] ==0 )]
    data_outconvert = data[(data['convertdelta'] > 0)]
Ejemplo n.º 14
0
    def get_algorithm(self):
        '''
        Inputs:
            algorithm (string)  - Name of the regressor to run.  Follows Sklearn naming conventions.
                                    Available keys: ARDRegression | AdaBoostRegressor | BaggingRegressor | BayesianRidge | CCA
                                                    DecisionTreeRegressor | ElasticNet | ExtraTreeRegressor
                                                    ExtraTreesRegressor | GaussianProcessRegressor | GradientBoostingRegressor
                                                    HuberRegressor | KNeighborsRegressor | KernelRidge | Lars | Lasso
                                                    LassoLars | LinearRegression | LinearSVR | MLPRegressor | NuSVR | 
                                                    OrthogonalMatchingPursuit | PLSCanonical | PLSRegression | 
                                                    PassiveAggressiveRegressor | RANSACRegressor | RandomForestRegressor | 
                                                    Ridge | SGDRegressor | SVR | TheilSenRegressor | TransformedTargetRegressor

                                    Currently not supporting: ElasticNetCV | LarsCV | LassoCV | LassoLarsCV | LassoLarsIC | 
                                                    MultiTaskElasticNet | MultiTaskElasticNetCV | MultiTaskLasso | MultiTaskLassoCV |
                                                    OrthogonalMatchingPursuitCV | RidgeCV | RadiusNeighborsRegressor
        Outputs:

        Notes:
            Scoring Metrics: https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter
        '''
        if (self.algorithmName == "ARDRegression"): algorithm = ARDRegression()
        elif (self.algorithmName == "AdaBoostRegressor"):
            algorithm = AdaBoostRegressor()
        elif (self.algorithmName == "BaggingRegressor"):
            algorithm = BaggingRegressor()
        elif (self.algorithmName == "BayesianRidge"):
            algorithm = BayesianRidge()
        elif (self.algorithmName == "CCA"):
            algorithm = CCA()
        elif (self.algorithmName == "DecisionTreeRegressor"):
            algorithm = DecisionTreeRegressor()
        elif (self.algorithmName == "ElasticNet"):
            algorithm = ElasticNet()
        elif (self.algorithmName == "ExtraTreeRegressor"):
            algorithm = ExtraTreeRegressor()
        elif (self.algorithmName == "ExtraTreesRegressor"):
            algorithm = ExtraTreesRegressor()
        elif (self.algorithmName == "GaussianProcessRegressor"):
            algorithm = GaussianProcessRegressor()
        elif (self.algorithmName == "GradientBoostingRegressor"):
            algorithm = GradientBoostingRegressor()
        elif (self.algorithmName == "HuberRegressor"):
            algorithm = HuberRegressor()
        elif (self.algorithmName == "KNeighborsRegressor"):
            algorithm = KNeighborsRegressor()
        elif (self.algorithmName == "KernelRidge"):
            algorithm = KernelRidge()
        elif (self.algorithmName == "Lars"):
            algorithm = Lars()
        elif (self.algorithmName == "Lasso"):
            algorithm = Lasso()
        elif (self.algorithmName == "LassoLars"):
            algorithm = LassoLars()
        elif (self.algorithmName == "LinearRegression"):
            algorithm = LinearRegression()
        elif (self.algorithmName == "LinearSVR"):
            algorithm = LinearSVR()
        elif (self.algorithmName == "MLPRegressor"):
            algorithm = MLPRegressor()
        elif (self.algorithmName == "NuSVR"):
            algorithm = NuSVR()
        elif (self.algorithmName == "OrthogonalMatchingPursuit"):
            algorithm = OrthogonalMatchingPursuit()
        elif (self.algorithmName == "PLSCanonical"):
            algorithm = PLSCanonical()
        elif (self.algorithmName == "PLSRegression"):
            algorithm = PLSRegression()
        elif (self.algorithmName == "PassiveAggressiveRegressor"):
            algorithm = PassiveAggressiveRegressor()
        elif (self.algorithmName == "RANSACRegressor"):
            algorithm = RANSACRegressor()
        elif (self.algorithmName == "RandomForestRegressor"):
            algorithm = RandomForestRegressor()
        elif (self.algorithmName == "Ridge"):
            algorithm = Ridge()
        elif (self.algorithmName == "SGDRegressor"):
            algorithm = SGDRegressor()
        elif (self.algorithmName == "SVR"):
            algorithm = SVR()
        elif (self.algorithmName == "TheilSenRegressor"):
            algorithm = TheilSenRegressor()
        elif (self.algorithmName == "TransformedTargetRegressor"):
            algorithm = TransformedTargetRegressor()
        else:
            return None

        return algorithm
Ejemplo n.º 15
0
def models_ML(cluster):
    models = dict()
    n_trees = 100
    #    random_state=42
    #prameters for RandomSearch
    lr_param = {
        "fit_intercept": [True, False],
        "normalize": [False],
        "copy_X": [True, False]
    }
    knn_param = {
        "n_neighbors": [2, 3, 4, 5, 6, 7, 8],
        "metric": ["euclidean", "cityblock"]
    }
    dtree_param = {
        "max_depth": [3, None],
        "min_samples_leaf": sp_randint(1, 11),
        "criterion": ["mse"],
        "splitter": ["best", "random"],
        "max_features": ["auto", "sqrt", None]
    }
    lasso_param = {
        "alpha": [0.02, 0.024, 0.025, 0.026, 0.03],
        "fit_intercept": [True, False],
        "normalize": [True, False],
        "selection": ["random"]
    }
    ridge_param = {
        "alpha": [200, 230, 250, 265, 270, 275, 290, 300, 500],
        "fit_intercept": [True, False],
        "normalize": [True, False],
        "solver": ["auto"]
    }
    elas_param = {
        "alpha": list(np.logspace(-5, 2, 8)),
        "l1_ratio": [.2, .4, .6, .8],
        "fit_intercept": [True, False],
        "normalize": [True, False],
        "precompute": [True, False]
    }
    #    g = [pow(2,-15),pow(2,-14),pow(2,-13),pow(2,-12),pow(2,-11),pow(2,-10),pow(2,-9),pow(2,-8),pow(2,-7),pow(2,-6),pow(2,-5),pow(2,-4),pow(2,-3),pow(2,-2),pow(2,-1),pow(1,0),pow(2,1),pow(2,2),pow(2,3)]
    #    c=[pow(2,-5),pow(2,-4),pow(2,-3),pow(2,-2),pow(2,-1),pow(1,0),pow(2,1),pow(2,2),pow(2,3),pow(2,4),pow(2,5),pow(2,6),pow(2,7),pow(2,8),pow(2,9),pow(2,10),pow(2,11),pow(2,12),pow(2,13),pow(2,14),pow(2,15)]
    #    svr_param={"C":c,"gamma":g,"kernel":["rbf","sigmoid"]}
    #    gb_param={"n_estimators":[1, 2, 4, 8, 16, 32, 64, 100, 200],"max_depths":list(np.linspace(1, 32, 32, endpoint=True)),"min_samples_splits":list(np.linspace(0.1, 1.0, 10, endpoint=True)),"min_samples_leaf":list(np.linspace(0.1,0.5,5, endpoint=True)),"max_features":list(range(1,5))}

    if cluster in [1, 4, 7, 10, 13, 16, 19, 22, 25]:
        #Highly sparse data tree based algorithms
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
        models['bag'] = BaggingRegressor(n_estimators=n_trees)
        models['rf'] = RandomForestRegressor(n_estimators=n_trees,
                                             random_state=42)
        models['et'] = ExtraTreesRegressor(n_estimators=n_trees,
                                           random_state=42)
        models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    elif cluster == 2:
        models['llars'] = LassoLars()
        models['knn'] = KNeighborsRegressor(n_neighbors=7)
        models['et'] = ExtraTreesRegressor(n_estimators=n_trees,
                                           random_state=42)
        models['rf'] = RandomForestRegressor(n_estimators=n_trees,
                                             random_state=42)
    elif cluster == 3:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['cart'] = RandomizedSearchCV(DecisionTreeRegressor(),
                                            dtree_param,
                                            n_jobs=1,
                                            n_iter=100,
                                            random_state=42)
        models['rf'] = RandomForestRegressor(n_estimators=n_trees,
                                             random_state=42)
        models['et'] = ExtraTreesRegressor(n_estimators=n_trees,
                                           random_state=42)
    elif cluster == 5:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['huber'] = HuberRegressor()
        models['pa'] = PassiveAggressiveRegressor(max_iter=1000,
                                                  tol=1e-3,
                                                  random_state=42)
        models['extra'] = ExtraTreeRegressor(random_state=42)
        models['svmr'] = SVR()
    elif cluster == 6:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['huber'] = HuberRegressor()
        models['svmr'] = SVR()
        models['rf'] = RandomForestRegressor(n_estimators=n_trees,
                                             random_state=42)
    elif cluster == 8:
        models['llars'] = LassoLars()
        models['svmr'] = SVR()
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['huber'] = HuberRegressor()
        models['et'] = ExtraTreesRegressor(n_estimators=n_trees,
                                           random_state=42)
    elif cluster == 9:
        models['cart'] = RandomizedSearchCV(DecisionTreeRegressor(),
                                            dtree_param,
                                            n_jobs=1,
                                            n_iter=100)
        models['bag'] = BaggingRegressor(n_estimators=n_trees)
        models['rf'] = RandomForestRegressor(n_estimators=n_trees)
        models['et'] = ExtraTreesRegressor(n_estimators=n_trees)
    elif cluster == 11:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['en'] = RandomizedSearchCV(ElasticNet(),
                                          elas_param,
                                          scoring='neg_mean_squared_error',
                                          n_jobs=1,
                                          n_iter=100,
                                          cv=10,
                                          random_state=42)
        models['extra'] = ExtraTreeRegressor(random_state=42)
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
    elif cluster == 12:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
        models['knn'] = KNeighborsRegressor(n_neighbors=3)
        models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    elif cluster == 14:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['lasso'] = RandomizedSearchCV(Lasso(),
                                             lasso_param,
                                             n_jobs=1,
                                             n_iter=100)
        models['cart'] = RandomizedSearchCV(DecisionTreeRegressor(),
                                            dtree_param,
                                            n_jobs=1,
                                            n_iter=100)
        models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    elif cluster == 15:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['en'] = RandomizedSearchCV(ElasticNet(),
                                          elas_param,
                                          scoring='neg_mean_squared_error',
                                          n_jobs=1,
                                          n_iter=100,
                                          cv=10,
                                          random_state=42)
        models['huber'] = HuberRegressor()
        models['extra'] = ExtraTreeRegressor(random_state=42)
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
    elif cluster == 17:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
        models['extra'] = ExtraTreeRegressor(random_state=42)
        models['bag'] = BaggingRegressor(n_estimators=n_trees, random_state=42)
    elif cluster == 18:
        models['lasso'] = RandomizedSearchCV(Lasso(),
                                             lasso_param,
                                             n_jobs=1,
                                             n_iter=100,
                                             random_state=42)
        models['ridge'] = RandomizedSearchCV(Ridge(),
                                             ridge_param,
                                             n_jobs=1,
                                             n_iter=100,
                                             random_state=42)
        models['cart'] = RandomizedSearchCV(DecisionTreeRegressor(),
                                            dtree_param,
                                            n_jobs=1,
                                            n_iter=100,
                                            random_state=42)
        models['extra'] = ExtraTreeRegressor(random_state=42)
    elif cluster == 20:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['huber'] = HuberRegressor()
        models['cart'] = RandomizedSearchCV(DecisionTreeRegressor(),
                                            dtree_param,
                                            n_jobs=1,
                                            n_iter=100,
                                            random_state=42)
        models['bag'] = BaggingRegressor(n_estimators=n_trees)
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
        models['et'] = ExtraTreesRegressor(n_estimators=n_trees,
                                           random_state=42)
    elif cluster == 21:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['lasso'] = RandomizedSearchCV(Lasso(),
                                             lasso_param,
                                             n_jobs=1,
                                             n_iter=100,
                                             random_state=42)
        models['svmr'] = SVR()
        models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    elif cluster == 23:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['huber'] = HuberRegressor()
        models['bag'] = BaggingRegressor(n_estimators=n_trees)
        models['svmr'] = SVR()
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
    elif cluster == 24:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['lasso'] = RandomizedSearchCV(Lasso(),
                                             lasso_param,
                                             n_jobs=1,
                                             n_iter=100,
                                             random_state=42)
        models['pa'] = PassiveAggressiveRegressor(max_iter=1000,
                                                  tol=1e-3,
                                                  random_state=42)
        models['extra'] = ExtraTreeRegressor(random_state=42)
        models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    elif cluster == 26:
        models['lr'] = RandomizedSearchCV(LinearRegression(),
                                          lr_param,
                                          n_jobs=1,
                                          random_state=42)
        models['lasso'] = RandomizedSearchCV(Lasso(),
                                             lasso_param,
                                             n_jobs=1,
                                             n_iter=100,
                                             random_state=42)
        models['en'] = RandomizedSearchCV(ElasticNet(),
                                          elas_param,
                                          scoring='neg_mean_squared_error',
                                          n_jobs=1,
                                          n_iter=100,
                                          cv=10,
                                          random_state=42)
        models['extra'] = ExtraTreeRegressor(random_state=42)
        models['ada'] = AdaBoostRegressor(n_estimators=n_trees,
                                          random_state=42)
    elif cluster == 27:
        models['svmr'] = SVR()
        models['knn'] = KNeighborsRegressor(n_neighbors=3)
        models['bag'] = BaggingRegressor(n_estimators=n_trees)
        models['cart'] = RandomizedSearchCV(DecisionTreeRegressor(),
                                            dtree_param,
                                            n_jobs=1,
                                            n_iter=100,
                                            random_state=42)
        models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)

    return models
Ejemplo n.º 16
0
try_different_method(model)

##SVR回归
from sklearn import svm

model = svm.SVR()
try_different_method(model)

##KNN回归
from sklearn import neighbors

model = neighbors.KNeighborsRegressor()
try_different_method(model)

##Adaboost回归
from sklearn import ensemble

model = ensemble.AdaBoostRegressor(n_estimators=50)
try_different_method(model)

##GBRT回归
from sklearn import ensemble

model = ensemble.GradientBoostingRegressor(n_estimators=100)
try_different_method(model)

##ExtraTree极端随机树回归
from sklearn.tree import ExtraTreeRegressor

model = ExtraTreeRegressor()
try_different_method(model)
Ejemplo n.º 17
0
def loadData():
    train = pd.read_csv('train.csv')
    test = pd.read_csv('test.csv')
    #id=train['Id']
    #train['Id']
    #print train.head(3)
    #由于将全部的数值型变量映射为正太分布的
    train["SalePrice"] = np.log1p(train["SalePrice"])
    label = train['SalePrice']
    del train['SalePrice']

    #将训练集与测试集融合
    train = pd.concat([train, test])
    #索引为列的名字,值为类型
    #数值型特征
    numeric_feats = train.dtypes[train.dtypes != "object"].index
    skewed_feats = train[numeric_feats].apply(
        lambda x: skew(x.dropna()))  #计算偏度
    skewed_feats = skewed_feats[skewed_feats > 0.75]  #偏度大于0.75的进行正态变换
    skewed_feats = skewed_feats.index
    train[skewed_feats] = np.log1p(train[skewed_feats])
    #分类特征进行哑编码
    train = pd.get_dummies(train)
    #用均值填充空值
    train = train.fillna(train.mean())
    test = train[train['Id'] >= 1461]
    train = train[train['Id'] < 1461]
    del train['Id']
    sub = test[['Id']]
    del test['Id']

    #模型选择
    X_train, X_test, Y_train, Y_test = train_test_split(train,
                                                        label,
                                                        test_size=0.33)

    regs = [
        ['LassoCV', LassoCV(alphas=[1, 0.1, 0.001, 0.0005])],
        ['LinearRegression', LinearRegression()],
        ['Ridge', Ridge()],
        ['ElasticNet', ElasticNet()],
        ['RANSACRegressor', RANSACRegressor()],
        ['HuberRegressor', HuberRegressor()],
        ['DecisionTreeRegressor',
         DecisionTreeRegressor()],
        ['ExtraTreeRegressor', ExtraTreeRegressor()],
        ['AdaBoostRegressor',
         AdaBoostRegressor(n_estimators=150)],
        ['ExtraTreesRegressor',
         ExtraTreesRegressor(n_estimators=150)],
        [
            'GradientBoostingRegressor',
            GradientBoostingRegressor(n_estimators=150)
        ],
        ['RandomForestRegressor',
         RandomForestRegressor(n_estimators=150)],
        [
            'XGBRegressor',
            XGBRegressor(n_estimators=360, max_depth=2, learning_rate=0.1)
        ],
    ]

    preds = []
    for reg_name, reg in regs:
        print reg_name
        reg.fit(X_train, Y_train)
        y_pred = reg.predict(X_test)
        if np.sum(y_pred < 0) > 0:
            print 'y_pred have ' + str(
                np.sum(y_pred < 0)
            ) + " are negtive, we replace it witt median value of y_pred"
            y_pred[y_pred < 0] = np.median(y_pred)
        score = np.sqrt(mean_squared_error(np.log(y_pred), np.log(Y_test)))
        print
        preds.append([reg_name, y_pred])

    final_results = []
    for comb_len in range(1, len(regs) + 1):
        print "Model num:" + str(comb_len)
        results = []
        for comb in itertools.combinations(preds, comb_len):
            #选取一个模型的组合,比如comb_len=2的时候,comb为(['Lasso',y_pred],['Ridge',y_pred]
            pred_sum = 0
            model_name = []
            for reg_name, pre in comb:
                pred_sum += pre
                model_name.append(reg_name)
            pred_sum /= comb_len
            model_name = '+'.join(model_name)
            score = np.sqrt(
                mean_squared_error(np.log(np.expm1(pred_sum)),
                                   np.log(np.expm1(Y_test))))
            results.append([model_name, score])
        #操作每一个融合模型的分数
        results = sorted(results, key=lambda x: x[1])
        for model_name, score in results:
            print model_name + ":" + str(score)
        print
        final_results.append(results[0])

    print "best set of models"
    print
    for i in final_results:
        print i

    #选择模型
    result = 0
    choose_model = [
        LassoCV(alphas=[1, 0.1, 0.001, 0.0005]),
        GradientBoostingRegressor(n_estimators=150),
        XGBRegressor(n_estimators=360, max_depth=2, learning_rate=0.1)
    ]
    for model in choose_model:
        reg = model.fit(train, label)
        pre = reg.predict(test)
        result += pre
    result /= 3

    #写入文件
    result = np.expm1(result)
    sub['SalePrice'] = result
    list = [[int(x[0]), x[1]] for x in sub.values]
    with open("submission.csv", 'wb') as f:
        writer = csv.writer(f)
        writer.writerow(['Id', 'SalePrice'])
        for i in range(len(list)):
            writer.writerow(list[i])
                                                    test_size=0.50,
                                                    random_state=42)
"""##Model Selection"""

from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
r2_score(y_test, rfr.predict(X_test))

mean_squared_error(y_test, rfr.predict(X_test))

X_train.columns.shape
forest.feature_importances_.shape

forest = ExtraTreeRegressor()
forest.fit(X_train, y_train)
importances = forest.feature_importances_

indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(X_train.shape[1]):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

# Plot the impurity-based feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.bar(range(X_train.shape[1]),
Ejemplo n.º 19
0
print('Done formatting')

#14 Day Period Tests

X, y = formatData(dailyOpen, dailyClose, dailyHigh, dailyLow,dailyVolume)
testX, testy = dropOut(X,y)

print("done")

X = preprocessing.scale(np.asarray(X))
X_scale = preprocessing.scale(X)
y = np.asarray(y)
testX = preprocessing.scale(np.asarray(testX))
testy = np.asarray(testy)
clf = DecisionTreeRegressor(max_depth= None, min_samples_split = 2, random_state = 0).fit(X,y)
clfE = ExtraTreeRegressor(max_depth=None, min_samples_split=2, random_state=0).fit(X,y)

scores = cross_val_score(clf, X, y, cv = 5)
scoresE = cross_val_score(clfE, X, y, cv = 5)
print('Training Decision',scores.mean())
print('Training Extra', scoresE.mean())

unseen = cross_val_score(clf, testX, testy, cv = 5)
unseenE = cross_val_score(clfE, testX, testy, cv = 5)
print('New Data Decision', unseen.mean())
print('New Data Extra', unseenE.mean())

defaultPrdict = clf.predict(testX)
#defaultPrdictLog = clf.predict_proba(testX)
extraPrdict = clfE.predict(testX)
#extraPrdictLog = clfE.predict_proba(testX)
Ejemplo n.º 20
0
 def extra_tree_regressor(self):
     x_train, x_test, y_train, y_test = self.preprocessing()
     model = ExtraTreeRegressor()
     y_pred = model.fit(x_train, y_train).predict(x_test)
     self.printing(y_test, y_pred, 'Extra Tree')
Ejemplo n.º 21
0
def SVR_train(*data):
    X, Y = data
    ####3.1决策树回归####
    from sklearn import tree
    model_DecisionTreeRegressor = tree.DecisionTreeRegressor()
    ####3.2线性回归####
    from sklearn import linear_model
    model_LinearRegression = linear_model.LinearRegression()
    ####3.3SVM回归####
    from sklearn import svm
    model_SVR = svm.SVR()
    model_SVR2 = svm.SVR(kernel='rbf', C=100, gamma=0.1)
    ####3.4KNN回归####
    from sklearn import neighbors
    model_KNeighborsRegressor = neighbors.KNeighborsRegressor()
    ####3.5随机森林回归####
    from sklearn import ensemble
    model_RandomForestRegressor = ensemble.RandomForestRegressor(
        n_estimators=20)  # 这里使用20个决策树
    ####3.6Adaboost回归####
    from sklearn import ensemble
    model_AdaBoostRegressor = ensemble.AdaBoostRegressor(
        n_estimators=50)  # 这里使用50个决策树
    ####3.7GBRT回归####
    from sklearn import ensemble
    model_GradientBoostingRegressor = ensemble.GradientBoostingRegressor(
        n_estimators=100)  # 这里使用100个决策树
    ####3.8Bagging回归####
    from sklearn.ensemble import BaggingRegressor
    model_BaggingRegressor = BaggingRegressor()
    ####3.9ExtraTree极端随机树回归####
    from sklearn.tree import ExtraTreeRegressor
    model_ExtraTreeRegressor = ExtraTreeRegressor()

    # Create the (parametrised) models
    # print("Hit Rates/Confusion Matrices:\n")
    models = [
        ("model_DecisionTreeRegressor", model_DecisionTreeRegressor),
        ("model_LinearRegression", model_LinearRegression),
        (
            "model_SVR",
            model_SVR2  #model_SVR
        ),
        ("model_KNeighborsRegressor", model_KNeighborsRegressor),
        ("model_RandomForestRegressor", model_RandomForestRegressor),
        ("model_AdaBoostRegressor", model_AdaBoostRegressor),
        ("model_GradientBoostingRegressor", model_GradientBoostingRegressor),
        ("model_BaggingRegressor", model_BaggingRegressor),
        ("model_ExtraTreeRegressor", model_ExtraTreeRegressor)
    ]

    for m in models:

        #X = X.reset_index(drop=True)
        #print(X)
        # y = y.reset_index(drop=True)
        # print(y)

        from sklearn.model_selection import KFold
        kf = KFold(n_splits=2, shuffle=False)

        for train_index, test_index in kf.split(X):
            # print(train_index, test_index)
            # print(X.loc[[0,1,2]])

            X_train, X_test, y_train, y_test = X[train_index], X[
                test_index], Y[train_index], Y[
                    test_index]  # 这里的X_train,y_train为第iFold个fold的训练集,X_val,y_val为validation set
            #print(X_test, y_test)
            #print(X_train, y_train)
            print('======================================')

            import datetime
            starttime = datetime.datetime.now()

            print("正在训练%s模型:" % m[0])
            m[1].fit(X_train, y_train)

            # Make an array of predictions on the test set
            pred = m[1].predict(X_test)

            # Output the hit-rate and the confusion matrix for each model
            score = m[1].score(X_test, y_test)
            print("%s:\n%0.3f" % (m[0], m[1].score(X_test, y_test)))
            # print("%s\n" % confusion_matrix(y_test, pred, labels=[-1.0, 1.0]))#labels=["ant", "bird", "cat"]

            from sklearn.metrics import r2_score
            r2 = r2_score(y_test, pred)
            print('r2: ', r2)

            endtime = datetime.datetime.now()
            print('%s训练,预测耗费时间,单位秒:' % m[0], (endtime - starttime).seconds)

            #result = m[1].predict(X_test)
            import matplotlib.pyplot as plt
            plt.figure()
            plt.plot(np.arange(len(pred)), y_test, 'go-', label='true value')
            plt.plot(np.arange(len(pred)), pred, 'ro-', label='predict value')
            plt.title('score: %f' % score)
            plt.legend()
            plt.show()
Ejemplo n.º 22
0
def getModel(x, y):
    et = ExtraTreeRegressor()
    et.fit(x, y)
    #joblib.dump(et,'./model/et')#保存模型
    return et
Ejemplo n.º 23
0
print('mean_absolute_error', mean_absolute_error(y_test, DTR_prediction))
print('mean_squared_error', mean_squared_error(y_test, DTR_prediction))


# # ДОМАШКА

# In[847]:


from sklearn.tree import ExtraTreeRegressor


# In[848]:


ETR = ExtraTreeRegressor()


# In[849]:


ETR


# In[856]:


ETR.fit(x, y)


# In[857]:
Ejemplo n.º 24
0
    build_auto(GradientBoostingRegressor(n_estimators=31, random_state=13),
               "GradientBoostingAuto")
    build_auto(IsolationForest(n_estimators=31, random_state=13),
               "IsolationForestAuto")
    build_auto(
        LGBMRegressor(objective="regression", n_estimators=31,
                      random_state=13), "LightGBMAuto")
    build_auto(LinearRegression(), "LinearRegressionAuto")
    build_auto(RandomForestRegressor(n_estimators=17, random_state=13),
               "RandomForestAuto",
               compact=False,
               flat=False)
    build_auto(
        VotingRegressor(estimators=[
            ("major", DecisionTreeRegressor(max_depth=8, random_state=13)),
            ("minor", ExtraTreeRegressor(max_depth=5, random_state=13))
        ],
                        weights=[0.7, 0.3]), "VotingEnsembleAuto")
    build_auto(
        XGBRegressor(objective="reg:squarederror",
                     n_estimators=31,
                     random_state=13), "XGBoostAuto")

sparsify("Auto")

auto_X, auto_y = load_auto("AutoNA")

if ("Auto" in datasets) or ("AutoNA" in datasets):
    build_auto(
        LGBMRegressor(objective="regression", n_estimators=31,
                      random_state=13), "LightGBMAutoNA")
 mse_t = []
 rmse_t = []
 mae_t = []
 mdae_t = []
 evs_t = []
 r2_t = []
 for tr_i, ts_i in rkf.split(data):
     print(i, j, k, c)
     train, test = data.iloc[tr_i], data.iloc[ts_i]
     train_x = train.drop(columns=['Rainfall'])
     train_y = train['Rainfall']
     test_x = test.drop(columns=['Rainfall'])
     test_y = test['Rainfall']
     model = ExtraTreeRegressor(criterion='mse',
                                splitter='best',
                                max_depth=i,
                                min_samples_leaf=j,
                                min_samples_split=k)
     model.fit(train_x, train_y)
     ts_p = model.predict(test_x)
     mse_t.append(mse(test_y, ts_p))
     rmse_t.append(rmse(test_y, ts_p))
     mae_t.append(mae(test_y, ts_p))
     mdae_t.append(mdae(test_y, ts_p))
     evs_t.append(evs(test_y, ts_p))
     r2_t.append(r2(test_y, ts_p))
     c += 1
     dep_f.append(i)
     saml_f.append(j)
     sams_f.append(k)
     mse_f.append(np.mean(mse_t))
Ejemplo n.º 26
0
from sklearn import ensemble  # 6

model_GradientBoostingRegressor = ensemble.GradientBoostingRegressor()
model_heads.append("Gradient Boosting Regression\t")
models.append(model_GradientBoostingRegressor)

from sklearn.ensemble import BaggingRegressor  # 7

model_BaggingRegressor = BaggingRegressor()
model_heads.append("Bagging Regression\t\t\t\t")
models.append(model_BaggingRegressor)

from sklearn.tree import ExtraTreeRegressor  # 8

model_ExtraTreeRegressor = ExtraTreeRegressor()
model_heads.append("ExtraTree Regression\t\t\t")
models.append(model_ExtraTreeRegressor)

import xgboost as xgb  # 9

model_XGBoostRegressor = xgb.XGBRegressor()
model_heads.append("XGBoost Regression\t\t\t\t")
models.append(model_XGBoostRegressor)
##########Model Adding Ends###########


def load_data(x_path='./X_train.csv',
              y_path='./y_train.csv',
              x_test_path='./X_test.csv'):
    """
Ejemplo n.º 27
0
    def get_models(self, list_chosen):
        """Generate a library of base learners
        (Prophet works only if the data have the target in a pandas column named 'y' and a feature column with the tima data named 'ds')
        :param list_chosen: list with the names of the models to load
        :return: models, a dictionary with as index the name of the models, as elements the models"""

        linreg = LinearRegression(normalize=True, fit_intercept=True)
        dtr = DecisionTreeRegressor(random_state=self.SEED,
                                    min_samples_split=(0.018),
                                    min_samples_leaf=(0.007),
                                    max_depth=25)
        svrr = SVR(kernel='linear', epsilon=5)
        br = BaggingRegressor(n_estimators=350,
                              max_samples=0.9,
                              max_features=0.7,
                              bootstrap=False,
                              random_state=self.SEED)
        ada = AdaBoostRegressor(n_estimators=7,
                                loss='exponential',
                                learning_rate=0.01,
                                random_state=self.SEED)
        rf = RandomForestRegressor(n_estimators=1000,
                                   max_depth=30,
                                   max_leaf_nodes=1000,
                                   random_state=self.SEED)
        gbr = GradientBoostingRegressor(n_estimators=1000,
                                        learning_rate=0.01,
                                        random_state=self.SEED)
        xgbr1 = xgb.XGBRegressor(random_state=self.SEED)
        mdl = LGBMRegressor(n_estimators=1000, learning_rate=0.01)
        las = Lasso()
        rid = Ridge()
        en = ElasticNet()
        huber = HuberRegressor(max_iter=2000)
        lasl = LassoLars(max_iter=2000, eps=1, alpha=0.5, normalize=False)
        pa = PassiveAggressiveRegressor(C=1,
                                        max_iter=4000,
                                        random_state=self.SEED)
        sgd = SGDRegressor(max_iter=2000, tol=1e-3)
        knn = KNeighborsRegressor(n_neighbors=20)
        ex = ExtraTreeRegressor()
        exs = ExtraTreesRegressor(n_estimators=1000)
        pro = Prophet(changepoint_prior_scale=0.01)

        models_temp = {
            'BaggingRegressor': br,
            'RandomForestRegressor': rf,
            'GradientBoostingRegressor': gbr,
            'XGBRegressor': xgbr1,
            'LGBMRegressor': mdl,
            'ExtraTreesRegressor': exs,
            'LinearRegression': linreg,
            'SVR': svrr,
            'AdaBoostRegressor': ada,
            'LassoLars': lasl,
            'PassiveAggressiveRegressor': pa,
            'SGDRegressor': sgd,
            'DecisionTreeRegressor': dtr,
            'lasso': las,
            'ridge': rid,
            'ElasticNet': en,
            'HuberRegressor': huber,
            'KNeighborsRegressor': knn,
            'ExtraTreeRegressor': ex,
            'Prophet': pro
        }

        models = dict()
        for model in list_chosen:
            if model in models_temp:
                models[model] = models_temp[model]
        return models
Ejemplo n.º 28
0
    ])

BL_LT_prepared = full_pipeline.fit_transform(BL_LT) # 전처리 수행

BL_LT_prepared_train, \
BL_LT_prepared_test, \
BL_LT_labels_train, \
BL_LT_labels_test = train_test_split(
    BL_LT_prepared, BL_LT_labels, test_size = 0.10, random_state = 42) # 훈련:테스트 = 9:1 비율로 분리
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.svm import SVR


ada_et_tree_reg = AdaBoostRegressor(
    ExtraTreeRegressor(max_depth=200, random_state=42), n_estimators=60,
   learning_rate=0.5, random_state=42)
ada_et_tree_reg.fit(BL_LT_prepared_train,BL_LT_labels_train)   # AdaBoostRegressor(ExtraTreeRegressor) 알고리즘 학습 수행
BL_LT_predicted = ada_et_tree_reg.predict(BL_LT_prepared_test) # AdaBoostRegressor(ExtraTreeRegressor) 알고리즘 테스트 수행
ada_et_tree_mape_sub = (np.abs((BL_LT_predicted - BL_LT_labels_test) / BL_LT_labels_test).mean(axis=0)) # AdaBoostRegressor(ExtraTreeRegressor) 알고리즘 MAPE 산출

ada_svr_reg = AdaBoostRegressor(
    SVR(C=1000,degree=3,kernel='rbf'), n_estimators=30,
   learning_rate=0.1, random_state=42)
ada_svr_reg.fit(BL_LT_prepared_train, BL_LT_labels_train) # AdaBoostRegressor(SVR) 알고리즘 학습 수행
BL_LT_predicted= ada_svr_reg.predict(BL_LT_prepared_test) # AdaBoostRegressor(SVR) 알고리즘 테스트 수행
ada_svr_mape_sub = (np.abs((BL_LT_predicted - BL_LT_labels_test) / BL_LT_labels_test).mean(axis=0)) # AdaBoostRegressor(SVR) 알고리즘 MAPE 산출


from sklearn.svm import SVR
svm_rbf_reg = SVR(C=10, cache_size=200, coef0=0.0, degree=3,
def build_voting_tree_regressor(X,y,max_features,max_depth,min_samples_split):
	clf = ExtraTreeRegressor(max_features=max_features,max_depth=max_depth,min_samples_split=min_samples_split)
	clf = clf.fit(X,y)
	return clf
Ejemplo n.º 30
0
 def test_extra_tree_regressor(self):
     model = ExtraTreeRegressor()
     dump_single_regression(model)
     dump_multiple_regression(model)