Python LarsCV.LarsCV 예제들, sklearn.linear_model.LarsCV.LarsCV Python 예제들

예제 #1

0

파일 보기

    def __init__(self):
        # 알고리즘 이름
        self._name = 'larscv'

        # 기본 경로
        self._f_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = LarsCV(normalize=False)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

예제 #2

0

파일 보기

파일: Train_Models.py 프로젝트: ydk818/ABCD_ML

def train_regression_model(X, y, model_type='elastic cv', cv=3, extra_params={}):
    '''Wrapper function to train various regression models with X,y input,
       where extra params can be passed to override any default parameters''' 

    model_type = model_type.lower()

    if model_type == 'linear':
        model = LinearRegression(fit_intercept=True)
    
    elif model_type == 'elastic cv':
        model = ElasticNetCV(cv=cv)
    
    elif model_type == 'omp cv':
        model = OrthogonalMatchingPursuitCV(cv=cv)
    
    elif model_type == 'lars cv':
        model = LarsCV(cv=cv)
    
    elif model_type == 'ridge cv':
        model = RidgeCV(cv=cv)
    
    elif model_type == 'full lightgbm':
        model = Train_Light_GBM(X, y, int_cv=cv, regression=True, **extra_params)
        return model
        
    model.fit(X, y)
    return model

예제 #3

0

파일 보기

def larscv():
    X, y = make_regression(n_samples=200,n_features=10, noise=4.0, random_state=0)
    reg = LarsCV(cv=2).fit(X, y)
    print(reg.score(X, y) )
    print(X[:,0].shape,y.shape)
    plt.plot(X[:,0], y)
    plt.scatter(X[:,0], y)
    plt.show()

예제 #4

0

파일 보기

 def test_model_lars_cv(self):
     model, X = fit_regression_model(LarsCV())
     model_onnx = convert_sklearn(
         model, "lars", [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         basename="SklearnLarsCV-Dec4")

예제 #5

0

파일 보기

파일: 4.1_evaluate_estimator_performance.py 프로젝트: fragilefamilieschallenge/open-source-submissions

def get_model_by_name(model_name):
    return {
        'Linear Regression': LinearRegression(),
        'Lars CV': LarsCV(cv=10),
        'Lasso CV': LassoCV(cv=10),
        'Ridge CV': RidgeCV(cv=10),
        'Elastic Net CV': ElasticNetCV(cv=10),
        'Orthogonal Matching Pursuit CV': OrthogonalMatchingPursuitCV(cv=10),
        'Decision Tree Regressor': DecisionTreeRegressor(max_depth=3),
    }[model_name]

예제 #6

0

파일 보기

 def Lars_regression(self, X_train, y_train, X_test, y_test):
     
     my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)
     best_model = LarsCV(cv=my_cv, n_jobs=-1)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     mae = mean_absolute_error(y_test, y_pred)
     mse = mean_squared_error(y_test, y_pred)
     r2 = r2_score(y_test, y_pred)
     
     return best_model, mse, mae, r2

예제 #7

0

파일 보기

def ResultsLARS(DataSet, Y):
    X_train, X_test, y_train, y_test = train_test_split(DataSet,
                                                        Y,
                                                        train_size=0.75)
    LAR_cv = LarsCV(normalize=True)
    LAR_model = LAR_cv.fit(X_train, y_train)
    LAR_prediction = LAR_model.predict(X_test)
    LAR_mae = np.mean(np.abs(y_test - LAR_prediction))
    LAR_coefs = dict(
        zip(['Intercept'] + DataSet.columns.tolist(),
            np.round(
                np.concatenate((LAR_model.intercept_, LAR_model.coef_),
                               axis=None), 3)))
    print('Least Angle Regression MAE: {}'.format(np.round(LAR_mae, 3)))
    print('Least Angle Regression coefficients:{}'.format(LAR_coefs))
    del LAR_coefs['Intercept']
    DictionaryPlot(LAR_coefs, 'Least Angle Regression')

예제 #8

0

파일 보기

파일: larscv.py 프로젝트: shriy-singh/DataScientist

def _larscv(*,
            train,
            test,
            x_predict=None,
            metrics,
            fit_intercept=True,
            verbose=False,
            max_iter=500,
            normalize=True,
            precompute='auto',
            cv=None,
            max_n_alphas=1000,
            n_jobs=None,
            eps=2.220446049250313e-16,
            copy_X=True):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LarsCV.html#sklearn.linear_model.LarsCV
    """

    model = LarsCV(fit_intercept=fit_intercept,
                   verbose=verbose,
                   max_iter=max_iter,
                   normalize=normalize,
                   precompute=precompute,
                   cv=cv,
                   max_n_alphas=max_n_alphas,
                   n_jobs=n_jobs,
                   eps=eps,
                   copy_X=copy_X)
    model.fit(train[0], train[1])
    model_name = 'LarsCV'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)

예제 #9

0

파일 보기

파일: ML.py 프로젝트: sahahn/ABCD_Ev_Search

def train_regression_model(X, y, model_type='elastic', cv=3):

    if model_type == 'linear':
        model = LinearRegression(fit_intercept=True)
    elif model_type == 'elastic cv':
        model = ElasticNetCV(cv=cv)
    elif model_type == 'omp cv':
        model = OrthogonalMatchingPursuitCV(cv=cv)
    elif model_type == 'lars cv':
        model = LarsCV(cv=cv)
    elif model_type == 'ridge cv':
        model = RidgeCV(cv=cv)
    elif model_type == 'simple xgboost':
        model = XGBRegressor()
    elif model_type == 'simple lightgbm':
        model = LGBMRegressor()
    elif model_type == 'full lightgbm':
        model = train_light_gbm_regressor(X, y, cv, n_params=10, test_size=.2)
        return model

    model.fit(X, y)
    return model

예제 #10

0

파일 보기

def fit_linear_model(basis_matrix, train_vals, solver_type, **kwargs):
    solvers = {
        'lasso_lars': LassoLarsCV(cv=kwargs['cv']).fit,
        'lasso': LassoCV(cv=kwargs['cv']).fit,
        'lars': LarsCV(cv=kwargs['cv']).fit,
        'omp': OrthogonalMatchingPursuitCV(cv=kwargs['cv'], verbose=5).fit
    }
    assert train_vals.ndim == 2
    if solver_type in solvers:
        fit = solvers[solver_type]
        res = fit(basis_matrix, train_vals[:, 0])
    else:
        msg = f'Solver type {solver_type} not supported\n'
        msg += 'Supported solvers are:\n'
        for key in solvers.keys():
            msg += f'\t{key}\n'
        raise Exception(msg)

    cv_score = res.score(basis_matrix, train_vals[:, 0])
    coef = res.coef_[:, np.newaxis]
    coef[0] = res.intercept_
    return coef, cv_score

예제 #11

0

파일 보기

def check_w(w=[12, 24, 36, 48, 60]):
    '''
    robustness check for w_min, save the prediction results (Avew window) and OOS R_square

    Parameters
    ----------
    w: possible w_min  (list)
    '''
    for w_min in w:
        #linear ML prediction
        pre1 = linear_prediction(RidgeCV(), w_min=w_min, window_type="Avew")
        pre2 = linear_prediction(LassoCV(cv=5),
                                 w_min=w_min,
                                 window_type="Avew")
        pre3 = linear_prediction(ElasticNetCV(cv=5),
                                 w_min=w_min,
                                 window_type="Avew")
        pre4 = linear_prediction(LarsCV(cv=5), w_min=w_min, window_type="Avew")
        pre5 = linear_prediction(OrthogonalMatchingPursuitCV(cv=5),
                                 w_min=w_min,
                                 window_type="Avew")
        pre6 = MR(w_min=w_min, window_type="Avew")
        all_pre = pd.DataFrame({
            'Kintchen Sink': pre6,
            "ridge": pre1,
            "lasso": pre2,
            "elasticnet": pre3,
            "lars": pre4,
            "OMP": pre5,
        })
        all_pre['FC'] = all_pre.iloc[:, 1:].mean(axis=1)
        #save the prediction results
        all_pre.to_csv(
            os.path.join(path, "稳健性检验", "w_min", "预测结果",
                         "w_min=" + str(w_min) + ".csv"))
        #R2 test
        R2_test(all_pre, name="w_min=" + str(w_min) +
                ".csv")  #then you need move the result on your own

예제 #12

0

파일 보기

파일: hdmrpy.py 프로젝트: frbennett/rshdmrpy

 def ridge_regression(self, **kwargs):
     if self._regression_type == 'lasso':
         self.ridgereg = LassoCV(max_iter=50000)
         #self.ridgereg = LassoCV(max_iter=1e5, cv=10)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'ard':
         self.ridgereg = ARDRegression()
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'elastic':
         self.ridgereg = ElasticNetCV(cv=10)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'lars':
         self.ridgereg = LarsCV(cv=10)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'lassolars':
         self.ridgereg = LassoLarsCV(cv=5)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'ordinary':
         self.ridgereg = LinearRegression()
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'ridge':
         self.ridgereg = RidgeCV()
         self.ridgereg.fit(self.data, self.Y)

예제 #13

0

파일 보기

          data['y'])  # data.iloc[:, 0:13]
print model.coef_  # 各个特征的系数
print model.intercept_

# In[4]:

from sklearn.linear_model import Lars  #最小角回归
model1 = Lars(n_nonzero_coefs=7)
model1.fit(data.iloc[:, 0:13], data['y'])
print model1.coef_  # 各个特征的系数

# In[5]:

# 确定最合适的Alpha
from sklearn.linear_model import LarsCV  #交叉验证最小二乘法回归模型
model1 = LarsCV()
model1.fit(data.iloc[:, 0:13], data['y'])
print model1.coef_  # 各个特征的系数
print model1.alpha_

# In[6]:

from sklearn.linear_model import LassoCV  #交叉验证最小二乘法回归模型
model1 = LassoCV()
model1.fit(data.iloc[:, 0:13], data['y'])
print model1.coef_  # 各个特征的系数
print model1.alpha_

# In[8]:

from sklearn.linear_model import Lasso  # AdaptiveLasso找不到

예제 #14

0

파일 보기

파일: rg_qj_20190908.py 프로젝트: Janine-Jiang/regression_learner

        'PLSRegression 2D',
        make_pipeline(
            StandardScaler(), PCA(n_components=0.95),
            PolynomialFeatures(2, interaction_only=True, include_bias=True),
            PLSRegression())))

models.append(
    ModelClass('LinearRegressor',
               make_pipeline(StandardScaler(), LinearRegression())))

models.append(
    ModelClass('HuberRegressor',
               make_pipeline(StandardScaler(), HuberRegressor())))

models.append(
    ModelClass('Lars', make_pipeline(LarsCV(cv=cv_inner, normalize=True))))

models.append(
    ModelClass('LassoLarsCV', LassoLarsCV(cv=cv_inner, normalize=True)))

models.append(ModelClass('LassoLarsIC', make_pipeline(LassoLarsIC())))

models.append(
    ModelClass('BayesianRidge', make_pipeline(StandardScaler(),
                                              BayesianRidge())))

models.append(
    ModelClass(
        'ElasticNet kBest std',
        make_pipeline(
            StandardScaler(), SelectKBest(mutual_info_regression, k=6),

예제 #15

0

파일 보기

        np.round(
            np.concatenate(
                (elastic_net_model.intercept_, elastic_net_model.coef_),
                axis=None), 3)))

print('Elastic Net MSE: {}'.format(np.round(elastic_net_mae, 3)))
print('Elastic Net coefficients:', elastic_net_coefs)

##############################################################################
###################### LEAST ANGLE REGRESSION ################################
##############################################################################
print(
    "##############################################################################"
)
print("LEAST ANGLE REGRESSION")
LAR_cv = LarsCV(normalize=True)
LAR_model = LAR_cv.fit(X_train, y_train)
LAR_prediction = LAR_model.predict(X_test)
LAR_mae = mean_squared_error(y_test, LAR_prediction)
LAR_coefs = dict(
    zip(['Intercept'] + data.columns.tolist()[:-1],
        np.round(
            np.concatenate((LAR_model.intercept_, LAR_model.coef_), axis=None),
            3)))

print('Least Angle Regression MSE: {}'.format(np.round(LAR_mae, 3)))
print('Least Angle Regression coefficients:', LAR_coefs)

##############################################################################
################## PRINCIPAL COMPONENTS REGRESSION ###########################
##############################################################################

예제 #16

0

파일 보기

classifiers = [
    SVC(kernel="rbf", probability=True),
    SVC(kernel='linear', probability=True),
    SVC(kernel='sigmoid', probability=True),
    SVC(kernel='poly', probability=True, degree=3),
    SVC(kernel='poly', probability=True, degree=4),
    SVC(kernel='poly', probability=True, degree=5),
    DecisionTreeClassifier(),
    KNeighborsClassifier(),
    GaussianNB(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    QuadraticDiscriminantAnalysis(),
    LinearDiscriminantAnalysis(),
    ElasticNetCV(max_iter=10000),
    LarsCV(),
    LassoCV(max_iter=10000),
    LassoLarsCV(),
    LogisticRegressionCV(scoring=multi_class_log_loss),
    MultiTaskElasticNetCV(),
    MultiTaskLassoCV(),
    OrthogonalMatchingPursuitCV(),
    RidgeClassifierCV()
]
algorithm = 17
if len(sys.argv) > 1:
    algorithm = int(sys.argv[1])

name = names[algorithm]
clf = classifiers[algorithm]
output_file_name = output_file_names[algorithm] + file_identifier

예제 #17

0

파일 보기

 def default_model_create(self, x, y):
     self.model = LarsCV(cv=self.cv)
     return True

예제 #18

0

파일 보기

파일: S_linear_model_vp.py 프로젝트: royopa/test

    def __init__(self,
                 y,
                 x,
                 saz=False,
                 work_days=False,
                 country=None,
                 transf=None):
        index = x.index
        k = x.shape[1]

        if freq_df(x) == 'M':
            self.frequency = 12
        elif freq_df(x) == 'Q':
            self.frequency = 4

        if saz:
            # Inclui as dummies mensais para as regressões
            d_months = pd.get_dummies(index.month, prefix='D_M', prefix_sep='')
            d_months.index = index
            x = pd.concat([x, d_months], axis=1)

        if work_days == True:
            # Inclui a variavel de dias uteis
            wd = workdays_series(country, transf=transf)
            x = pd.concat([x, wd], axis=1, join='inner')

        fit_int = False
        norm_X = False
        parallel = 1
        cv_method = TimeSeriesSplit(self.frequency)
        el_net_l1_ratio = [.1, .5, .7, .9, .95, .99, 1]
        bag_n_estimators = [5, 10, 20, 50]
        adab_n_estimators = [10, 50, 100]
        adab_learn_rate = [0.1, 0.2, 0.5]
        arima_n_models = 50

        self.y = y
        self.x = x
        self.ar_elem = self.__check_ar_elem()

        models_par = {}
        models_npar = {}

        models_par['mlp_reg'] = GridSearchCV(
            make_pipeline(StandardScaler(), MLPRegressor()),
            param_grid={
                'mlpregressor__hidden_layer_sizes':
                [(round(.66 * k), round(.33 * k)),
                 (round(.75 * k), round(.50 * k), round(.25 * k)),
                 (round(.80 * k), round(.60 * k), round(.40 * k),
                  round(.20 * k))]
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['gp_reg'] = GridSearchCV(
            make_pipeline(StandardScaler(),
                          GaussianProcessRegressor(normalize_y=norm_X)),
            param_grid={
                'gaussianprocessregressor__kernel': [
                    WhiteKernel(),
                    ConstantKernel(),
                    RBF(),
                    Matern(),
                    RationalQuadratic(),
                    DotProduct()
                ]
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['ridgecv'] = make_pipeline(
            StandardScaler(),
            RidgeCV(fit_intercept=fit_int, normalize=norm_X, cv=cv_method))
        models_par['bay_rid'] = make_pipeline(
            StandardScaler(),
            BayesianRidge(fit_intercept=fit_int, normalize=norm_X))
        models_par['lassocv'] = make_pipeline(
            StandardScaler(),
            LassoCV(fit_intercept=fit_int,
                    normalize=norm_X,
                    n_jobs=parallel,
                    cv=cv_method))
        models_par['laslrscv'] = make_pipeline(
            StandardScaler(),
            LassoLarsCV(fit_intercept=fit_int,
                        normalize=norm_X,
                        n_jobs=parallel,
                        cv=cv_method))
        models_par['larscv'] = make_pipeline(
            StandardScaler(),
            LarsCV(fit_intercept=fit_int,
                   normalize=norm_X,
                   n_jobs=parallel,
                   cv=cv_method))
        models_par['elasnet'] = make_pipeline(
            StandardScaler(),
            ElasticNetCV(l1_ratio=el_net_l1_ratio,
                         fit_intercept=fit_int,
                         normalize=norm_X,
                         n_jobs=parallel,
                         cv=cv_method))
        models_par['hub_reg'] = GridSearchCV(
            make_pipeline(StandardScaler(),
                          HuberRegressor(fit_intercept=fit_int)),
            param_grid={
                'huberregressor__epsilon': [1.1, 1.2, 1.35],
                'huberregressor__alpha': [0.0001, 0.01, 0.1, 0.3]
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['ort_purs'] = make_pipeline(
            StandardScaler(),
            OrthogonalMatchingPursuitCV(fit_intercept=fit_int,
                                        normalize=norm_X,
                                        n_jobs=parallel,
                                        cv=cv_method))
        models_par['ard_reg'] = make_pipeline(
            StandardScaler(),
            ARDRegression(fit_intercept=fit_int, normalize=norm_X))
        models_par['sgd_reg'] = GridSearchCV(
            make_pipeline(StandardScaler(),
                          SGDRegressor(fit_intercept=fit_int, shuffle=False)),
            param_grid={
                'sgdregressor__l1_ratio':
                el_net_l1_ratio,
                'sgdregressor__loss':
                ['squared_loss', 'huber', 'epsilon_insensitive']
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['pas_agg'] = make_pipeline(
            StandardScaler(),
            PassiveAggressiveRegressor(fit_intercept=fit_int, shuffle=False))
        models_par['lin_all'] = make_pipeline(
            StandardScaler(),
            LinearRegression(fit_intercept=fit_int,
                             normalize=norm_X,
                             n_jobs=parallel))
        models_par['ols1'] = make_pipeline(
            StandardScaler(),
            SelectFromModel(DecisionTreeRegressor(), prefit=False),
            LinearRegression())
        models_par['ols2'] = make_pipeline(
            StandardScaler(),
            SelectFromModel(ElasticNetCV(l1_ratio=el_net_l1_ratio,
                                         fit_intercept=fit_int,
                                         normalize=norm_X,
                                         n_jobs=parallel,
                                         cv=cv_method),
                            prefit=False), LinearRegression())
        models_par['ols3'] = make_pipeline(
            StandardScaler(),
            SelectFromModel(LarsCV(fit_intercept=fit_int,
                                   normalize=norm_X,
                                   n_jobs=parallel,
                                   cv=cv_method),
                            prefit=False), LinearRegression())
        models_par['ols4'] = make_pipeline(
            StandardScaler(),
            SelectFromModel(BayesianRidge(fit_intercept=fit_int,
                                          normalize=norm_X),
                            prefit=False), LinearRegression())
        models_par['ols5'] = GridSearchCV(
            make_pipeline(StandardScaler(), PCA(), LinearRegression()),
            param_grid={'pca__n_components': [1, 2, 3, 4, 5]},
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['d_tree'] = make_pipeline(StandardScaler(),
                                             DecisionTreeRegressor())
        models_par['rand_for'] = GridSearchCV(
            make_pipeline(StandardScaler(), RandomForestRegressor()),
            param_grid={'randomforestregressor__n_estimators': [10, 50, 100]},
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['bag1'] = GridSearchCV(
            make_pipeline(StandardScaler(),
                          BaggingRegressor(max_samples=0.5, max_features=0.5)),
            param_grid={'baggingregressor__n_estimators': bag_n_estimators},
            cv=cv_method,
            refit=True)
        models_par['bag2'] = GridSearchCV(
            make_pipeline(
                StandardScaler(),
                BaggingRegressor(LinearRegression(fit_intercept=fit_int,
                                                  normalize=norm_X,
                                                  n_jobs=parallel),
                                 max_samples=0.5,
                                 max_features=0.5)),
            param_grid={'baggingregressor__n_estimators': bag_n_estimators},
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['bag3'] = GridSearchCV(make_pipeline(
            StandardScaler(),
            BaggingRegressor(PassiveAggressiveRegressor(fit_intercept=fit_int,
                                                        shuffle=False),
                             max_samples=0.5,
                             max_features=0.5)),
                                          param_grid={
                                              'baggingregressor__n_estimators':
                                              bag_n_estimators
                                          },
                                          cv=cv_method,
                                          refit=True)
        models_par['bag4'] = GridSearchCV(
            make_pipeline(
                StandardScaler(),
                BaggingRegressor(ARDRegression(fit_intercept=fit_int,
                                               normalize=norm_X),
                                 max_samples=0.5,
                                 max_features=0.5)),
            param_grid={'baggingregressor__n_estimators': bag_n_estimators},
            cv=cv_method,
            refit=True)
        models_par['bag5'] = GridSearchCV(make_pipeline(
            StandardScaler(),
            BaggingRegressor(OrthogonalMatchingPursuit(fit_intercept=fit_int,
                                                       normalize=norm_X),
                             max_samples=0.5,
                             max_features=0.5)),
                                          param_grid={
                                              'baggingregressor__n_estimators':
                                              bag_n_estimators
                                          },
                                          cv=cv_method,
                                          refit=True)
        models_par['ada1'] = GridSearchCV(
            make_pipeline(StandardScaler(), AdaBoostRegressor()),
            param_grid={
                'adaboostregressor__n_estimators': adab_n_estimators,
                'adaboostregressor__learning_rate': adab_learn_rate
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['ada2'] = GridSearchCV(
            make_pipeline(
                StandardScaler(),
                AdaBoostRegressor(
                    LinearRegression(fit_intercept=fit_int,
                                     normalize=norm_X,
                                     n_jobs=parallel))),
            param_grid={
                'adaboostregressor__n_estimators': adab_n_estimators,
                'adaboostregressor__learning_rate': adab_learn_rate
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['ada3'] = GridSearchCV(
            make_pipeline(
                StandardScaler(),
                AdaBoostRegressor(
                    PassiveAggressiveRegressor(fit_intercept=fit_int,
                                               shuffle=False))),
            param_grid={
                'adaboostregressor__n_estimators': adab_n_estimators,
                'adaboostregressor__learning_rate': adab_learn_rate
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['ada4'] = GridSearchCV(
            make_pipeline(
                StandardScaler(),
                AdaBoostRegressor(
                    ARDRegression(fit_intercept=fit_int, normalize=norm_X))),
            param_grid={
                'adaboostregressor__n_estimators': adab_n_estimators,
                'adaboostregressor__learning_rate': adab_learn_rate
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['ada5'] = GridSearchCV(
            make_pipeline(
                StandardScaler(),
                AdaBoostRegressor(
                    OrthogonalMatchingPursuit(fit_intercept=fit_int,
                                              normalize=norm_X))),
            param_grid={
                'adaboostregressor__n_estimators': adab_n_estimators,
                'adaboostregressor__learning_rate': adab_learn_rate
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        models_par['g_boost'] = GridSearchCV(
            make_pipeline(StandardScaler(), GradientBoostingRegressor()),
            param_grid={
                'gradientboostingregressor__n_estimators': adab_n_estimators,
                'gradientboostingregressor__learning_rate': adab_learn_rate
            },
            cv=cv_method,
            refit=True,
            n_jobs=parallel)
        norm_lin = pipe_transf([('std', StandardScaler()),
                                ('regression', LinearRegression())])
        models_par['rfecv'] = RFECV(estimator=norm_lin,
                                    step=1,
                                    cv=cv_method,
                                    scoring='r2')
        models_npar['arima1'] = make_pipeline(
            DropFeatures(self.ar_elem), StandardScaler(),
            SelectFromModel(DecisionTreeRegressor(), prefit=False),
            S_avg_arima(freq=self.frequency, n_models=arima_n_models))
        models_npar['arima2'] = make_pipeline(
            DropFeatures(self.ar_elem), StandardScaler(),
            SelectFromModel(ElasticNetCV(l1_ratio=el_net_l1_ratio,
                                         n_jobs=parallel,
                                         fit_intercept=fit_int,
                                         normalize=norm_X,
                                         cv=cv_method),
                            prefit=False),
            S_avg_arima(freq=self.frequency, n_models=arima_n_models))
        models_npar['arima3'] = make_pipeline(
            DropFeatures(self.ar_elem), StandardScaler(),
            SelectFromModel(LarsCV(fit_intercept=fit_int,
                                   normalize=norm_X,
                                   n_jobs=parallel,
                                   cv=cv_method),
                            prefit=False),
            S_avg_arima(freq=self.frequency, n_models=arima_n_models))
        models_npar['arima4'] = make_pipeline(
            DropFeatures(self.ar_elem), StandardScaler(),
            SelectFromModel(BayesianRidge(fit_intercept=fit_int,
                                          normalize=norm_X),
                            prefit=False),
            S_avg_arima(freq=self.frequency, n_models=arima_n_models))
        models_npar['arima5'] = S_avg_arima(freq=self.frequency,
                                            use_X=False,
                                            n_models=arima_n_models)
        self.models_par = models_par
        self.models_npar = models_npar

예제 #19

0

파일 보기

파일: mmm.py 프로젝트: pabloazurduy/mkt_mix_model

    },
    {
        'name': 'LLCV',
        'mdl': LassoLarsCV(max_n_alphas=1000)
    },
    {
        'name': 'LLaic',
        'mdl': LassoLarsIC(criterion='aic')
    },
    {
        'name': 'ENCV',
        'mdl': ElasticNetCV(n_alphas=100)
    },
    {
        'name': 'LarsCV',
        'mdl': LarsCV(max_n_alphas=1000)
    },
    {
        'name': 'LR',
        'mdl': LinearRegression()
    },
    {
        'name': 'ARDR',
        'mdl': ARDRegression()
    },
    {
        'name': 'BYR',
        'mdl': BayesianRidge()
    },
]

예제 #20

0

파일 보기

파일: credito_ampliado.py 프로젝트: royopa/test

n_splits = (len(df_general) - 50)

models = {}

models[1] = make_pipeline(
    StandardScaler(), SelectFromModel(DecisionTreeRegressor(),
                                      prefit=False)).fit(df_general, ibc)
models[2] = make_pipeline(
    StandardScaler(),
    SelectFromModel(ElasticNetCV(normalize=False,
                                 cv=TimeSeriesSplit(n_splits)),
                    prefit=False)).fit(df_general, ibc)
models[3] = make_pipeline(
    StandardScaler(),
    SelectFromModel(LarsCV(normalize=False, cv=TimeSeriesSplit(n_splits)),
                    prefit=False)).fit(df_general, ibc)
models[4] = make_pipeline(
    StandardScaler(),
    SelectFromModel(BayesianRidge(normalize=False),
                    prefit=False)).fit(df_general, ibc)
models[5] = make_pipeline(
    StandardScaler(),
    RFECV(LinearRegression(),
          cv=TimeSeriesSplit(n_splits))).fit(df_general, ibc)
models[6] = make_pipeline(StandardScaler(),
                          SelectKBest(mutual_info_regression,
                                      1)).fit(df_general, ibc)
models[7] = make_pipeline(StandardScaler(),
                          SelectKBest(mutual_info_regression,
                                      3)).fit(df_general, ibc)

예제 #21

0

파일 보기

파일: feature_selection.py 프로젝트: mvchalupnik/practicing-sklearn

# Fit
feat_selector = feat_selector.fit(X, y)

# Print support and ranking
print(feat_selector.support_) #False means feature can be eliminated?? (what is the algorithm process? still not sure) 
print(feat_selector.ranking_) #Ranking
print(X.columns)

################## Use LarsCV for hyperparameter optimization (wrapper)
#LARS works by starting with one variable, increasing its corresponding coefficient, and when the residual has 
#correlation with some other variable as much as it does the variable you started with, adding that in 
#and increasing in the joint least squares direction (find through fitting just those variables??), iterating. 
#This is a feature selection method because at the end you will find some coefficients are 0. 

# Instantiate
lars_mod = LarsCV(cv=5, normalize=False)

# Fit
feat_selector = lars_mod.fit(X, y)

# Print r-squared score and estimated alpha
print(lars_mod.score(X, y))
print(lars_mod.alpha_)

################# Using a RandomForestRegressor for feature selection (Tree-based methods)
#The way feature importance is calculated: Create trees. Then take one feature variable, 
#permute it randomly (shuffle), then rerun the observations through the trees. Calculate 
#the rate of misclassification. The % increase of misclassification rate gives the feature importance
#https://link.springer.com/article/10.1023/A:1010933404324

# Instantiate

예제 #22

0

파일 보기

파일: main.py 프로젝트: yang-wang-ck/jpmml-sklearn

	store_pkl(pipeline, name)
	mpg = DataFrame(pipeline.predict(auto_X), columns = ["mpg"])
	store_csv(mpg, name)

if "Auto" in datasets:
	build_auto(AdaBoostRegressor(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 5), random_state = 13, n_estimators = 17), "AdaBoostAuto")
	build_auto(ARDRegression(normalize = True), "BayesianARDAuto")
	build_auto(BayesianRidge(normalize = True), "BayesianRidgeAuto")
	build_auto(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 2), "DecisionTreeAuto", compact = False)
	build_auto(BaggingRegressor(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 5), random_state = 13, n_estimators = 3, max_features = 0.5), "DecisionTreeEnsembleAuto")
	build_auto(DummyRegressor(strategy = "median"), "DummyAuto")
	build_auto(ElasticNetCV(random_state = 13), "ElasticNetAuto")
	build_auto(ExtraTreesRegressor(random_state = 13, min_samples_leaf = 5), "ExtraTreesAuto")
	build_auto(GradientBoostingRegressor(random_state = 13, init = None), "GradientBoostingAuto")
	build_auto(HuberRegressor(), "HuberAuto")
	build_auto(LarsCV(), "LarsAuto")
	build_auto(LassoCV(random_state = 13), "LassoAuto")
	build_auto(LassoLarsCV(), "LassoLarsAuto")
	build_auto(OptimalLGBMRegressor(objective = "regression", n_estimators = 17, num_iteration = 11), "LGBMAuto", num_iteration = 11)
	build_auto(LinearRegression(), "LinearRegressionAuto")
	build_auto(BaggingRegressor(LinearRegression(), random_state = 13, max_features = 0.75), "LinearRegressionEnsembleAuto")
	build_auto(OrthogonalMatchingPursuitCV(), "OMPAuto")
	build_auto(RandomForestRegressor(random_state = 13, min_samples_leaf = 3), "RandomForestAuto", flat = True)
	build_auto(RidgeCV(), "RidgeAuto")
	build_auto(TheilSenRegressor(n_subsamples = 15, random_state = 13), "TheilSenAuto")
	build_auto(OptimalXGBRegressor(objective = "reg:linear", ntree_limit = 31), "XGBAuto", ntree_limit = 31)

if "Auto" in datasets:
	build_auto(TransformedTargetRegressor(DecisionTreeRegressor(random_state = 13)), "TransformedDecisionTreeAuto")
	build_auto(TransformedTargetRegressor(LinearRegression(), func = numpy.log, inverse_func = numpy.exp), "TransformedLinearRegressionAuto")

예제 #23

0

파일 보기

    clf = BaseEstimator()
    res = explain_weights(clf, vec=vec)
    assert 'BaseEstimator' in res.error
    for expl in format_as_all(res, clf):
        assert 'Error' in expl
        assert 'BaseEstimator' in expl
    with pytest.raises(TypeError):
        explain_weights(clf, unknown_argument=True)


@pytest.mark.parametrize(['reg'], [
    [ElasticNet(random_state=42)],
    [ElasticNetCV(random_state=42)],
    [HuberRegressor()],
    [Lars()],
    [LarsCV(max_n_alphas=10)],
    [Lasso(random_state=42)],
    [LassoCV(random_state=42)],
    [LassoLars(alpha=0.01)],
    [LassoLarsCV(max_n_alphas=10)],
    [LassoLarsIC()],
    [OrthogonalMatchingPursuit(n_nonzero_coefs=10)],
    [OrthogonalMatchingPursuitCV()],
    [PassiveAggressiveRegressor(C=0.1, random_state=42)],
    [Ridge(random_state=42)],
    [RidgeCV()],
    [SGDRegressor(random_state=42)],
    [LinearRegression()],
    [LinearSVR(random_state=42)],
    [TheilSenRegressor(random_state=42)],
])

예제 #24

0

파일 보기

파일: naive-regression.py 프로젝트: albertotonda/scikit-learn-naive

def main():

    # let's create a folder with a unique name to store results
    folderName = datetime.datetime.now().strftime(
        "%Y-%m-%d-%H-%M") + "-regression"
    if not os.path.exists(folderName): os.makedirs(folderName)

    # initialize logging
    common.initialize_logging(folderName)

    regressorsList = [

        # human-designed regressors
        [
            HumanRegressor("y = a_0 + a_1 * x + a_2 * x**2 + a_3 * x**3",
                           map_variables_to_features={"x": 0}),
            "HumanRegressor"
        ],
        [PolynomialRegressor(2), "PolynomialRegressor2"],
        #[PolynomialRegressor(3), "PolynomialRegressor3"],
        # keras neural network
        #[ANNRegressor(epochs=500, batch_size=32, layers=[16,4]), "KerasRegressor8-4"],
        #[ANNRegressor(epochs=700, batch_size=32, layers=[16,8]), "KerasRegressor16-8"],

        # cross decomposition
        [PLSRegression(), "PLSRegression"],

        # ensemble
        [AdaBoostRegressor(), "AdaBoostRegressor"],
        [BaggingRegressor(), "BaggingRegressor"],
        [BaggingRegressor(n_estimators=100), "BaggingRegressor_100"],
        [BaggingRegressor(n_estimators=300), "BaggingRegressor_300"],
        [ExtraTreesRegressor(), "ExtraTreesRegressor"],
        [GradientBoostingRegressor(), "GradientBoostingRegressor"],
        [RandomForestRegressor(), "RandomForestRegressor"],
        [RandomForestRegressor(n_estimators=100), "RandomForestRegressor_100"],
        [RandomForestRegressor(n_estimators=300), "RandomForestRegressor_300"],

        # isotonic
        #[IsotonicRegression(), "IsotonicRegression"], # apparently wants "X" as a 1d array

        # kernel ridge
        [KernelRidge(), "KernelRidge"],

        # linear
        #[ARDRegression(), "ARDRegression"], # takes too much time to train
        [BayesianRidge(), "BayesianRidge"],
        [ElasticNetCV(), "ElasticNetCV"],
        [LarsCV(), "LarsCV"],
        [LassoCV(), "LassoCV"],
        [LinearRegression(), "LinearRegression"],
        [PassiveAggressiveRegressor(), "PassiveAggressiveRegressor"],

        # neighbors
        [KNeighborsRegressor(), "KNeighborsRegressor"],
        [RadiusNeighborsRegressor(), "RadiusNeighborsRegressor"],

        # neural networks
        #[BernoulliRBM(), "BernoulliRBM"], # has a different interface, no "predict"

        # svm
        [SVR(), "SVR"],
        [LinearSVR(), "LinearSVR"],
        [NuSVR(), "NuSVR"],

        # tree
        [DecisionTreeRegressor(), "DecisionTreeRegressor (max depth 10)"],
        [ExtraTreeRegressor(), "ExtraTreeRegressor"],

        # generalized additive models
        [LinearGAM(n_splines=20), "LinearGAM(n_splines=20)"],

        # gaussian processes
        [
            GaussianProcessRegressor(kernel=DotProduct() + WhiteKernel()),
            "GaussianProcessRegressor"
        ],
    ]

    X = y = X_train = X_test = y_train = y_test = variablesX = variablesY = None
    numberOfSplits = 10  # TODO change number of splits from command line

    if True:
        # this is just a dumb benchmark
        X, y, variablesX, variablesY = common.loadEasyBenchmark()

    if False:
        X, y, variablesX, variablesY = common.loadChristianQuestionnaireRegression(
        )

    if False:
        X, y, variablesX, variablesY = common.loadYongShiDataCalibration2(
            "TIMBER")

    if False:
        X, y, variablesX, variablesY = common.loadLaurentBouvierNewData()

    if False:
        X, y, variablesX, variablesY = common.loadYongShiDataCalibration()

    if False:
        from sklearn.datasets import load_linnerud
        X, y = load_linnerud(return_X_y=True)

    if False:
        X, y, variablesX, variablesY = common.loadYingYingData()

    if False:
        X, y, variablesX, variablesY = common.loadCleaningDataGermanSpecific()
        #X, y, variablesX, variablesY = common.loadCleaningDataGerman()

    if False:
        X, y, variablesX, variablesY = common.loadInsects()

    if False:
        X, y, variablesX, variablesY = common.loadMilkProcessPipesDimensionalAnalysis(
        )
        #X, y, variablesX, variablesY = common.loadMilkProcessPipes()

    if False:  # ecosystem services
        X, y, variablesX, variablesY = common.loadEcosystemServices()

    if False:
        X, y, variablesX, variablesY = common.loadMarcoSoil()

    if False:
        # load dataset
        X, y = common.loadEureqaRegression()
        # randomly split between training and test
        #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    if False:
        # load dataset
        X_train, X_test, y_train, y_test = common.loadBiscuitExample()
        logging.info("X_train: " + str(X_train.shape))
        logging.info("X_test: " + str(X_test.shape))
        logging.info("y_train: " + str(y_train.shape))
        logging.info("y_test: " + str(y_test.shape))

        # in this particular case, I create the "global" X and y by putting together the two arrays
        X = np.append(X_train, X_test, axis=0)
        y = np.append(y_train, y_test, axis=0)

    if False:
        # load dataset
        X_train, X_test, y_train, y_test = common.loadAromoptiExample()
        logging.info("X_train: " + str(X_train.shape))
        logging.info("X_test: " + str(X_test.shape))
        logging.info("y_train: " + str(y_train.shape))
        logging.info("y_test: " + str(y_test.shape))

        # in this particular case, I create the "global" X and y by putting together the two arrays
        X = np.append(X_train, X_test, axis=0)
        y = np.append(y_train, y_test, axis=0)

    logging.info(
        "Regressing %d output variables, in function of %d input variables..."
        % (y.shape[1], X.shape[1]))

    # if the names of the variables are not specified, let's specify them!
    if variablesY is None:
        variablesY = ["y" + str(i) for i in range(0, len(y[0]))]
    if variablesX is None:
        variablesX = ["X" + str(i) for i in range(0, len(X[0]))]

    performances = dict()

    for variableIndex, variableY in enumerate(variablesY):

        logging.info("** Now evaluating models for variable \"%s\"... **" %
                     variableY)

        # obtain data
        y_ = y[:, variableIndex].ravel()

        # assume here that you will have train/test indexes instead
        # it's also easier for the plots, as we do not face the issue
        # of duplicate values (e.g. same value with two indexes)
        rs = ShuffleSplit(n_splits=numberOfSplits, random_state=42)
        #rs = LeaveOneOut()

        # initialize performance dictionary of arrays
        performances[variableY] = dict()
        for regressor, regressorName in regressorsList:
            performances[variableY][regressorName] = dict()
            performances[variableY][regressorName]["r^2"] = []
            performances[variableY][regressorName]["e.v"] = []
            performances[variableY][regressorName]["mse"] = []
            performances[variableY][regressorName]["mae"] = []
            performances[variableY][regressorName]["predicted"] = []

        # this is used to store all values of each fold, in order; maybe there's a smarter way to do it
        foldPointsInOrder = []

        # and now, for every regressor
        for foldIndex, indexes in enumerate(rs.split(X)):

            train_index, test_index = indexes

            X_train = X[train_index]
            y_train = y_[train_index]
            X_test = X[test_index]
            y_test = y_[test_index]

            # normalize
            logging.info("Normalizing data...")
            scalerX = StandardScaler()
            scalerY = StandardScaler()

            X_train = scalerX.fit_transform(X_train)
            X_test = scalerX.transform(X_test)

            y_train = scalerY.fit_transform(y_train.reshape(-1, 1)).ravel(
            )  # this "reshape/ravel" here is just to avoid warnings, it has no true effect on data
            y_test = scalerY.transform(y_test.reshape(-1, 1)).ravel()

            # now, we store points of the folder in order of how they appear
            foldPointsInOrder.extend(list(scalerY.inverse_transform(y_test)))

            for regressorIndex, regressorData in enumerate(regressorsList):

                regressor = regressorData[0]
                regressorName = regressorData[1]

                logging.info("Fold #%d/%d: training regressor #%d/%d \"%s\"" %
                             (foldIndex + 1, numberOfSplits, regressorIndex +
                              1, len(regressorsList), regressorName))

                try:
                    regressor.fit(X_train, y_train)

                    y_test_predicted = regressor.predict(X_test)
                    r2Test = r2_score(y_test, y_test_predicted)
                    mseTest = mean_squared_error(y_test, y_test_predicted)
                    maeTest = mean_absolute_error(y_test, y_test_predicted)
                    varianceTest = explained_variance_score(
                        y_test, y_test_predicted)

                    logging.info("R^2 score (test): %.4f" % r2Test)
                    logging.info("EV score (test): %.4f" % varianceTest)
                    logging.info("MSE score (test): %.4f" % mseTest)
                    logging.info("MAE score (test): %.4f" % maeTest)

                    # add performance to the list of performances
                    performances[variableY][regressorName]["r^2"].append(
                        r2Test)
                    performances[variableY][regressorName]["e.v"].append(
                        varianceTest)
                    performances[variableY][regressorName]["mse"].append(
                        mseTest)
                    performances[variableY][regressorName]["mae"].append(
                        maeTest)
                    # also record the predictions, to be used later in a global figure
                    performances[variableY][regressorName]["predicted"].extend(
                        list(scalerY.inverse_transform(y_test_predicted)))

                    try:
                        import matplotlib.pyplot as plt

                        # plotting first figure, with points 'x' and 'o'
                        y_predicted = regressor.predict(scalerX.transform(
                            X))  # 'X' was never wholly rescaled before
                        y_train_predicted = regressor.predict(X_train)

                        plt.figure()

                        plt.scatter(train_index,
                                    y_train,
                                    c="gray",
                                    label="training data")
                        plt.scatter(test_index,
                                    y_test,
                                    c="green",
                                    label="test data")

                        plt.plot(np.arange(len(y_predicted)),
                                 y_predicted,
                                 'x',
                                 c="red",
                                 label="regression")
                        plt.xlabel("order of data samples")
                        plt.ylabel("target")
                        plt.title(regressorName + ", R^2=%.4f (test)" % r2Test)
                        plt.legend()

                        logging.info("Saving figure...")
                        plt.savefig(
                            os.path.join(
                                folderName, regressorName + "-" + variableY +
                                "-fold-" + str(foldIndex + 1) + ".pdf"))
                        plt.close()

                        # plotting second figure, with everything close to a middle line
                        plt.figure()

                        plt.plot(y_train,
                                 y_train_predicted,
                                 'r.',
                                 label="training set")  # points
                        plt.plot(y_test,
                                 y_test_predicted,
                                 'go',
                                 label="test set")  # points
                        plt.plot([
                            min(y_train.min(), y_test.min()),
                            max(y_train.max(), y_test.max())
                        ],
                                 [
                                     min(y_train_predicted.min(),
                                         y_test_predicted.min()),
                                     max(y_train_predicted.max(),
                                         y_test_predicted.max())
                                 ], 'k--')  # line

                        plt.xlabel("measured")
                        plt.ylabel("predicted")
                        plt.title(regressorName + " measured vs predicted, " +
                                  variableY)
                        plt.legend(loc='best')

                        plt.savefig(
                            os.path.join(
                                folderName, regressorName + "-" + variableY +
                                "-fold-" + str(foldIndex + 1) + "-b.pdf"))
                        plt.close()

                        # also, save ordered list of features
                        featuresByImportance = relativeFeatureImportance(
                            regressor)

                        # if list exists, write feature importance to disk
                        # TODO horrible hack here, to avoid issues with GAM
                        if len(featuresByImportance
                               ) > 0 and "GAM" not in regressorName:
                            featureImportanceFileName = regressorName + "-" + variableY + "-featureImportance-fold" + str(
                                foldIndex) + ".csv"
                            with open(
                                    os.path.join(folderName,
                                                 featureImportanceFileName),
                                    "w") as fp:
                                fp.write("feature,importance\n")
                                for featureImportance, featureIndex in featuresByImportance:
                                    fp.write(variablesX[int(featureIndex)] +
                                             "," + str(featureImportance) +
                                             "\n")

                    except ImportError:
                        logging.info(
                            "Cannot import matplotlib. Skipping plots...")

                except Exception as e:
                    logging.info("Regressor \"" + regressorName +
                                 "\" failed on variable \"" + variableY +
                                 "\":" + str(e))

    logging.info("Final summary:")
    with open(os.path.join(folderName, "00_summary.txt"), "w") as fp:

        for variableY in variablesY:

            logging.info("For variable \"" + variableY + "\"")
            fp.write("For variable: " + variableY + " = f(" + variablesX[0])
            for i in range(1, len(variablesX)):
                fp.write("," + variablesX[i])
            fp.write(")\n")

            # create a list from the dictionary and sort it
            sortedPerformances = sorted(
                [(performances[variableY][regressorName], regressorName)
                 for regressorName in performances[variableY]],
                key=lambda x: np.mean(x[0]["r^2"]),
                reverse=True)

            for regressorData in sortedPerformances:
                regressorName = regressorData[1]
                regressorScore = regressorData[0]

                r2Mean = np.mean(regressorScore["r^2"])
                r2std = np.std(regressorScore["r^2"])

                varianceMean = np.mean(regressorScore["e.v"])
                varianceStd = np.std(regressorScore["e.v"])

                mseMean = np.mean(regressorScore["mse"])
                mseStd = np.std(regressorScore["mse"])

                maeMean = np.mean(regressorScore["mae"])
                maeStd = np.std(regressorScore["mae"])

                logging.info(
                    "\t- %s, R^2=%.4f (std=%.4f), Explained Variance=%.4f (std=%.4f), MSE=%.4f (std=%.4f), MAE=%.4f (std=%.4f)"
                    % (regressorName, r2Mean, r2std, varianceMean, varianceStd,
                       mseMean, mseStd, maeMean, maeStd))

                fp.write(
                    "\t- %s, R^2=%.4f (std=%.4f), Explained Variance=%.4f (std=%.4f), MSE=%.4f (std=%.4f), MAE=%.4f (std=%.4f)\n"
                    % (regressorName, r2Mean, r2std, varianceMean, varianceStd,
                       mseMean, mseStd, maeMean, maeStd))

                fp.write("\t\t- R^2:" +
                         str(["%.4f" % x
                              for x in regressorScore["r^2"]]) + "\n")
                fp.write("\t\t- E.V.:" +
                         str(["%.4f" % x
                              for x in regressorScore["e.v"]]) + "\n")
                fp.write("\t\t- MSE:" +
                         str(["%.4f" % x
                              for x in regressorScore["mse"]]) + "\n")
                fp.write("\t\t- MAE:" +
                         str(["%.4f" % x
                              for x in regressorScore["mae"]]) + "\n")

                # also, plot a "global" graph
                # issue here, if a regressor fails, you have incongruent matrixes: a check is in order
                # TODO also, the plot looks really bad if some values are negative; turn everything to absolute values?
                if len(foldPointsInOrder) == len(regressorScore["predicted"]):
                    fig = plt.figure()
                    ax = fig.add_subplot(111)

                    #bottom_left_corner = [min(foldPointsInOrder), max(foldPointsInOrder)]
                    #top_right_corner = [min(regressorScore["predicted"]), max(regressorScore["predicted"])]
                    x_bottom_top = [0, max(foldPointsInOrder)]
                    y_bottom_top = [0, max(foldPointsInOrder)]

                    ax.plot(foldPointsInOrder, regressorScore["predicted"],
                            'g.')  # points
                    ax.plot(x_bottom_top, y_bottom_top, 'k--',
                            label="1:1")  # line
                    ax.plot(x_bottom_top,
                            [y_bottom_top[0] * 1.20, y_bottom_top[1] * 1.20],
                            'r--',
                            label="20% error")
                    ax.plot(x_bottom_top,
                            [y_bottom_top[0] * 0.80, y_bottom_top[1] * 0.80],
                            'r--')

                    ax.set_title(regressorName + " measured vs predicted, " +
                                 variableY + " (all test)")
                    ax.set_xlabel("measured")
                    ax.set_ylabel("predicted")
                    ax.legend(loc='best')

                    plt.savefig(
                        os.path.join(
                            folderName,
                            regressorName + "-" + variableY + "-global-b.png"))
                    plt.close(fig)

예제 #25

0

파일 보기

파일: 04-08_recipe_32.py 프로젝트: delta0726/py-machine_learning

# - データを半分に取り分けてLARSモデルで学習

# 変数定義
# --- 訓練データ数
train_n = 100

# インスタンス生成と学習
# --- 非ゼロ係数の数を12個とする
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])

# インスタンス生成と学習
# --- 非ゼロ係数の数を500個とする（デフォルト）
lars_500 = Lars(n_nonzero_coefs=500)
lars_500.fit(reg_data[:train_n], reg_target[:train_n])

# 平均二乗誤差
np.mean(
    np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))

# 3 特徴量選択としてのLARS ---------------------------------------------------------------------

# インスタンス生成
lcv = LarsCV()

# 学習
lcv.fit(reg_data, reg_target)

# 非ゼロの係数
np.sum(lcv.coef_ != 0)

예제 #26

0

파일 보기

X_learning = df_all_data[:train_index]
X_test = df_all_data[train_index:]

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LarsCV, Lasso, LassoCV, ElasticNet, ElasticNetCV
from sklearn.linear_model import LassoLars, LassoLarsCV, Ridge, RidgeCV

from sklearn.model_selection import cross_val_score, KFold, GridSearchCV

import xgboost as xgb

models = []
models.append(("LrE", LinearRegression()))
models.append(("RidCV", RidgeCV()))
models.append(("LarCV", LarsCV()))
models.append(("LasCV", LassoCV()))
models.append(("ElNCV", ElasticNetCV()))
models.append(("LaLaCV", LassoLarsCV()))
models.append(("XGB", xgb.XGBRegressor()))

kfold = KFold(n_splits=10)


def getCVResult(models, X_learning, Y_learning):

    for name, model in models:
        cv_results = cross_val_score(model,
                                     X_learning,
                                     Y_learning,
                                     scoring='neg_mean_squared_error',

예제 #27

0

파일 보기

def GetAllModelsForComparison(X_train, Y_train):
    models = {
        'ARDRegression': ARDRegression(),
        'BayesianRidge': BayesianRidge(),
        'ElasticNet': ElasticNet(),
        'ElasticNetCV': ElasticNetCV(),
        'Hinge': Hinge(),
        #'Huber': Huber(),
        'HuberRegressor': HuberRegressor(),
        'Lars': Lars(),
        'LarsCV': LarsCV(),
        'Lasso': Lasso(),
        'LassoCV': LassoCV(),
        'LassoLars': LassoLars(),
        'LassoLarsCV': LassoLarsCV(),
        'LinearRegression': LinearRegression(),
        'Log': Log(),
        'LogisticRegression': LogisticRegression(),
        'LogisticRegressionCV': LogisticRegressionCV(),
        'ModifiedHuber': ModifiedHuber(),
        'MultiTaskElasticNet': MultiTaskElasticNet(),
        'MultiTaskElasticNetCV': MultiTaskElasticNetCV(),
        'MultiTaskLasso': MultiTaskLasso(),
        'MultiTaskLassoCV': MultiTaskLassoCV(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'OrthogonalMatchingPursuitCV': OrthogonalMatchingPursuitCV(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
        'Perceptron': Perceptron(),
        'RANSACRegressor': RANSACRegressor(),
        #'RandomizedLasso': RandomizedLasso(),
        #'RandomizedLogisticRegression': RandomizedLogisticRegression(),
        'Ridge': Ridge(),
        'RidgeCV': RidgeCV(),
        'RidgeClassifier': RidgeClassifier(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        'SquaredLoss': SquaredLoss(),
        'TheilSenRegressor': TheilSenRegressor(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LinearClassifierMixin': LinearClassifierMixin(),
        'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis(),
        'QuadraticDiscriminantAnalysis': QuadraticDiscriminantAnalysis(),
        'StandardScaler': StandardScaler(),
        'TransformerMixin': TransformerMixin(),
        'BaseEstimator': BaseEstimator(),
        'KernelRidge': KernelRidge(),
        'RegressorMixin': RegressorMixin(),
        'LinearSVC': LinearSVC(),
        'LinearSVR': LinearSVR(),
        'NuSVC': NuSVC(),
        'NuSVR': NuSVR(),
        'OneClassSVM': OneClassSVM(),
        'SVC': SVC(),
        'SVR': SVR(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        #'BallTree': BallTree(),
        #'DistanceMetric': DistanceMetric(),
        #'KDTree': KDTree(),
        'KNeighborsClassifier': KNeighborsClassifier(),
        'KNeighborsRegressor': KNeighborsRegressor(),
        'KernelDensity': KernelDensity(),
        #'LSHForest': LSHForest(),
        'LocalOutlierFactor': LocalOutlierFactor(),
        'NearestCentroid': NearestCentroid(),
        'NearestNeighbors': NearestNeighbors(),
        'RadiusNeighborsClassifier': RadiusNeighborsClassifier(),
        'RadiusNeighborsRegressor': RadiusNeighborsRegressor(),
        #'GaussianProcess': GaussianProcess(),
        'GaussianProcessRegressor': GaussianProcessRegressor(),
        'GaussianProcessClassifier': GaussianProcessClassifier(),
        'CCA': CCA(),
        'PLSCanonical': PLSCanonical(),
        'PLSRegression': PLSRegression(),
        'PLSSVD': PLSSVD(),
        #'ABCMeta': ABCMeta(),
        #'BaseDiscreteNB': BaseDiscreteNB(),
        'BaseEstimator': BaseEstimator(),
        #'BaseNB': BaseNB(),
        'BernoulliNB': BernoulliNB(),
        'ClassifierMixin': ClassifierMixin(),
        'GaussianNB': GaussianNB(),
        'LabelBinarizer': LabelBinarizer(),
        'MultinomialNB': MultinomialNB(),
        'DecisionTreeClassifier': DecisionTreeClassifier(),
        'DecisionTreeRegressor': DecisionTreeRegressor(),
        'ExtraTreeClassifier': ExtraTreeClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'BaggingClassifier': BaggingClassifier(),
        'BaggingRegressor': BaggingRegressor(),
        #'BaseEnsemble': BaseEnsemble(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'IsolationForest': IsolationForest(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RandomForestRegressor': RandomForestRegressor(),
        'RandomTreesEmbedding': RandomTreesEmbedding(),
        #'VotingClassifier': VotingClassifier(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LabelBinarizer': LabelBinarizer(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'OneVsOneClassifier': OneVsOneClassifier(),
        #'OneVsRestClassifier': OneVsRestClassifier(),
        #'OutputCodeClassifier': OutputCodeClassifier(),
        'Parallel': Parallel(),
        #'ABCMeta': ABCMeta(),
        'BaseEstimator': BaseEstimator(),
        #'ClassifierChain': ClassifierChain(),
        'ClassifierMixin': ClassifierMixin(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'MultiOutputClassifier': MultiOutputClassifier(),
        #'MultiOutputEstimator': MultiOutputEstimator(),
        #'MultiOutputRegressor': MultiOutputRegressor(),
        'Parallel': Parallel(),
        'RegressorMixin': RegressorMixin(),
        'LabelPropagation': LabelPropagation(),
        'LabelSpreading': LabelSpreading(),
        'BaseEstimator': BaseEstimator(),
        'IsotonicRegression': IsotonicRegression(),
        'RegressorMixin': RegressorMixin(),
        'TransformerMixin': TransformerMixin(),
        'BernoulliRBM': BernoulliRBM(),
        'MLPClassifier': MLPClassifier(),
        'MLPRegressor': MLPRegressor()
    }
    return models

예제 #28

0

파일 보기

파일: ks-checkpoint.py 프로젝트: GJBoth/DeePyMoD_SBL

uu = uu[:, :100]
X = np.transpose((t_grid.flatten(), x_grid.flatten()))
y = uu.reshape((uu.size, 1))

noise_level = 0.0
y_noisy = y + noise_level * np.std(y) * np.random.randn(y[:, 0].size, 1)
number_of_samples = 20000

idx = np.random.permutation(y.shape[0])
X_train = torch.tensor(X[idx, :][:number_of_samples],
                       dtype=torch.float32,
                       requires_grad=True)
y_train = torch.tensor(y_noisy[idx, :][:number_of_samples],
                       dtype=torch.float32)

estimator = LarsCV(fit_intercept=False)

config = {
    'n_in': 2,
    'hidden_dims': [20, 20, 20, 20, 20, 20, 20],
    'n_out': 1,
    'library_function': library_1D_in,
    'library_args': {
        'poly_order': 1,
        'diff_order': 4
    },
    'sparsity_estimator': estimator
}
model = DeepModDynamic(**config)
optimizer = torch.optim.Adam(model.network_parameters(),
                             betas=(0.99, 0.99),

예제 #29

0

파일 보기

if "Auto" in datasets:
	build_auto(AdaBoostRegressor(DecisionTreeRegressor(min_samples_leaf = 5, random_state = 13), random_state = 13, n_estimators = 17), "AdaBoostAuto")
	build_auto(ARDRegression(normalize = True), "BayesianARDAuto")
	build_auto(BayesianRidge(normalize = True), "BayesianRidgeAuto")
	build_auto(DecisionTreeRegressor(min_samples_leaf = 2, random_state = 13), "DecisionTreeAuto", compact = False)
	build_auto(BaggingRegressor(DecisionTreeRegressor(min_samples_leaf = 5, random_state = 13), n_estimators = 3, max_features = 0.5, random_state = 13), "DecisionTreeEnsembleAuto")
	build_auto(DummyRegressor(strategy = "median"), "DummyAuto")
	build_auto(ElasticNetCV(cv = 3, random_state = 13), "ElasticNetAuto")
	build_auto(ExtraTreesRegressor(n_estimators = 10, min_samples_leaf = 5, random_state = 13), "ExtraTreesAuto")
	build_auto(GBDTLMRegressor(RandomForestRegressor(n_estimators = 7, max_depth = 6, random_state = 13), LinearRegression()), "GBDTLMAuto")
	build_auto(GBDTLMRegressor(XGBRFRegressor(n_estimators = 17, max_depth = 6, random_state = 13), ElasticNet(random_state = 13)), "XGBRFLMAuto")
	build_auto(GradientBoostingRegressor(init = None, random_state = 13), "GradientBoostingAuto")
	build_auto(HistGradientBoostingRegressor(max_iter = 31, random_state = 13), "HistGradientBoostingAuto")
	build_auto(HuberRegressor(), "HuberAuto")
	build_auto(LarsCV(cv = 3), "LarsAuto")
	build_auto(LassoCV(cv = 3, random_state = 13), "LassoAuto")
	build_auto(LassoLarsCV(cv = 3), "LassoLarsAuto")
	build_auto(LinearRegression(), "LinearRegressionAuto")
	build_auto(BaggingRegressor(LinearRegression(), max_features = 0.75, random_state = 13), "LinearRegressionEnsembleAuto")
	build_auto(OrthogonalMatchingPursuitCV(cv = 3), "OMPAuto")
	build_auto(RandomForestRegressor(n_estimators = 10, min_samples_leaf = 3, random_state = 13), "RandomForestAuto", flat = True)
	build_auto(RidgeCV(), "RidgeAuto")
	build_auto(StackingRegressor([("ridge", Ridge(random_state = 13)), ("lasso", Lasso(random_state = 13))], final_estimator = GradientBoostingRegressor(n_estimators = 7, random_state = 13)), "StackingEnsembleAuto")
	build_auto(TheilSenRegressor(n_subsamples = 31, random_state = 13), "TheilSenAuto")
	build_auto(VotingRegressor([("dt", DecisionTreeRegressor(random_state = 13)), ("knn", KNeighborsRegressor()), ("lr", LinearRegression())], weights = [3, 1, 2]), "VotingEnsembleAuto")
	build_auto(XGBRFRegressor(n_estimators = 31, max_depth = 6, random_state = 13), "XGBRFAuto")

if "Auto" in datasets:
	build_auto(TransformedTargetRegressor(DecisionTreeRegressor(random_state = 13)), "TransformedDecisionTreeAuto")
	build_auto(TransformedTargetRegressor(LinearRegression(), func = numpy.log, inverse_func = numpy.exp), "TransformedLinearRegressionAuto")

예제 #30

0

파일 보기

print(regr.alpha_)
print(regr.intercept_)

plt.scatter(X[:, 0], y, color='black')
plt.scatter(X[:, 0], pred, color='red')
plt.show()

#%% Least Angle Regression LARS:
#Lars:fit_intercept, verbose, normalize
#LarsCV: fit_intercept, verbose, normalize, cv

from sklearn.linear_model import LarsCV, Lars
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
reg = LarsCV(cv=5).fit(X, y)
reg.score(X, y)
reg.alpha_
pred = reg.predict(X[:, ])

plt.scatter(X[:, 0], y, color='black')
plt.scatter(X[:, 0], pred, color='red')
plt.show()

reg2 = Lars().fit(X, y)
reg2.score(X, y)
reg2.alpha_
pred = reg2.predict(X[:, ])

#%% LassoLars: alpha, fit_intercept, normalize
#LassoLarsCV: alpha, fit_intercept, normalize, cv