Python RidgeCV.RidgeCV Beispiele, sklearn.linear_model.RidgeCV.RidgeCV Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: 8.4.overfit.py Projekt: cncn1/PythonCode


if __name__ == "__main__":
    np.random.seed(0)
    N = 9
    x = np.linspace(0, 6, N) + np.random.randn(N)
    x = np.sort(x)
    y = x**2 - 4 * x - 3 + np.random.randn(N)
    x.shape = -1, 1
    y.shape = -1, 1

    model_1 = Pipeline([('poly', PolynomialFeatures()),
                        ('linear', LinearRegression(fit_intercept=False))])
    model_2 = Pipeline([('poly', PolynomialFeatures()),
                        ('linear',
                         RidgeCV(alphas=np.logspace(-3, 2, 100),
                                 fit_intercept=False))])
    model_3 = Pipeline([('poly', PolynomialFeatures()),
                        ('linear',
                         LassoCV(alphas=np.logspace(-3, 2, 100),
                                 fit_intercept=False))])
    models = model_1, model_2, model_3
    mpl.rcParams['font.sans-serif'] = [u'simHei']
    mpl.rcParams['axes.unicode_minus'] = False
    np.set_printoptions(suppress=True)

    plt.figure(figsize=(8, 11), facecolor='w')
    d_pool = np.arange(1, N, 1)  # 阶
    m = d_pool.size
    clrs = []  # 颜色
    for c in np.linspace(16711680, 255, m):
        clrs.append('#%06x' % c)

Beispiel #2

0

Datei anzeigen

Datei: main.py Projekt: afzm4/cs3001hw6

def main():
    houses = fetch_california_housing()
    digits = datasets.load_iris()

    data = houses.data
    names = houses.feature_names
    target = houses.target
    #Q1
    #DistPlots for all 8 features, individually
    #sns.distplot(data[:,0], axlabel=names[0])
    #sns.distplot(data[:,1], axlabel=names[1])
    #sns.distplot(data[:,2], axlabel=names[2])
    #sns.distplot(data[:,3], axlabel=names[3])
    #sns.distplot(data[:,4], axlabel=names[4])
    #sns.distplot(data[:,5], axlabel=names[5])
    #sns.distplot(data[:,6], axlabel=names[6])
    #sns.distplot(data[:,7], axlabel=names[7])

    #Target DistPlot
    #sns.distplot(houses.target, axlabel='Target')

    test = max(data[:, 2])
    test2 = max(data[:, 5])

    housingDF = pd.DataFrame(data=data, columns=names)
    #All 8 DistPlots together
    #fig1 = housingDF.hist(bins=40, figsize=(9, 6))

    print("")
    print("Dependency on Targets: ")
    clf = GradientBoostingRegressor(n_estimators=100,
                                    max_depth=4,
                                    learning_rate=0.1,
                                    loss='huber',
                                    random_state=1)
    clf.fit(data, target)
    feat = [0, 1, 2, 3, 4, 5, 6, 7]
    '''fig, axs = plot_partial_dependence(clf, data, feat, feature_names=names,n_jobs=3, grid_resolution=50)
    fig.suptitle('Dependence of the target on each feature: ')
    plt.subplots_adjust(top=0.9, wspace=0.6, hspace=0.6)
    plt.show()'''
    #fig, axs = plot_partial_dependence()

    #Q3
    X_train, X_test, y_train, y_test = train_test_split(data, target)

    #linear regression
    lin = LinearRegression().fit(X_train, y_train)
    print("Linear Score: ", lin.score(X_test, y_test))

    #Ridge regression w/ CV
    rid = RidgeCV().fit(X_train, y_train)
    print("Ridge Score: ", rid.score(X_test, y_test))

    #Lasso regression w/ CV
    lasso = LassoCV().fit(X_train, y_train)
    print("Lasso Score: ", lasso.score(X_test, y_test))

    #Elastic Net regression w/ CV
    ela = ElasticNetCV().fit(X_train, y_train)
    print("ElasticNet Score: ", ela.score(X_test, y_test))

    #Using StandardScaler
    scaler = StandardScaler()
    dataSTD = scaler.fit_transform(data, target)
    X_train2, X_test2, y_train2, y_test2 = train_test_split(dataSTD, target)
    print("")
    print("With Standardization:")

    #linear regression STD
    lin = LinearRegression().fit(X_train2, y_train2)
    print("Linear Score: ", lin.score(X_test2, y_test2))

    #Ridge regression w/ CV STD
    rid = RidgeCV().fit(X_train2, y_train2)
    print("Ridge Score: ", rid.score(X_test2, y_test2))

    #Lasso regression w/ CV STD
    lasso = LassoCV().fit(X_train2, y_train2)
    print("Lasso Score: ", lasso.score(X_test2, y_test2))

    #Elastic Net regression w/ CV STD
    ela = ElasticNetCV().fit(X_train2, y_train2)
    print("ElasticNet Score: ", ela.score(X_test2, y_test2))

    #Q4
    print("")
    estimator = Ridge()
    paramsR = {
        'alpha':
        [25, 10, 4, 2, 1.0, 0.8, 0.5, 0.3, 0.2, 0.1, 0.05, 0.02, 0.01],
        'fit_intercept': [True, False],
    }
    gsCVR = GridSearchCV(estimator, paramsR)
    param_range = np.logspace(-3, 7, 200)
    train_scores, test_scores = validation_curve(Ridge(),
                                                 data,
                                                 target,
                                                 "alpha",
                                                 param_range=param_range,
                                                 cv=5)
    test_scores_mean = np.mean(test_scores, axis=1)

    plt.title("Validation Curve with Ridge")
    plt.xlabel("$\gamma$")
    plt.ylabel("Score")
    plt.ylim(0.0, 1.1)
    lw = 2
    plt.semilogx(param_range,
                 test_scores_mean,
                 label="Cross-validation score",
                 color="navy",
                 lw=lw)
    plt.legend(loc="best")
    plt.show()

    alphas = np.logspace(-3, 7, 200)

    coefs = []
    for a in alphas:
        ridge = Ridge(alpha=a, fit_intercept=False)
        ridge.fit(data, target)
        coefs.append(ridge.coef_)

    ax = plt.gca()

    ax.plot(alphas, coefs)
    ax.set_xscale('log')
    ax.set_xlim(ax.get_xlim()[::-1])
    plt.xlabel('alpha')
    plt.ylabel('weights')
    plt.title('Ridge coefficients of each feature')
    plt.axis('tight')
    plt.legend()
    plt.show()

    gsCVR.fit(X_train, y_train)
    #print(gsCVR.best_params_)
    rid = Ridge(alpha=25, fit_intercept=True).fit(X_train, y_train)
    print("Ridge Score(w/ best parameters): ", rid.score(X_test, y_test))
    estimator = LassoCV()
    paramsL = {
        'cv': [3, 4, 5, 6],
        'fit_intercept': [True, False],
        'normalize': [True, False],
        'precompute': [True, False]
    }
    gsCVL = GridSearchCV(estimator, paramsL)
    gsCVL.fit(X_train, y_train)
    #print(gsCVL.best_params_)
    param_range = np.logspace(-7, 3, 200)
    train_scores, test_scores = validation_curve(Lasso(),
                                                 data,
                                                 target,
                                                 "alpha",
                                                 param_range=param_range,
                                                 cv=5)
    test_scores_mean = np.mean(test_scores, axis=1)

    plt.title("Validation Curve with Lasso")
    plt.xlabel("$\gamma$")
    plt.ylabel("Score")
    plt.ylim(0.0, 1.1)
    lw = 2
    plt.semilogx(param_range,
                 test_scores_mean,
                 label="Cross-validation score",
                 color="navy",
                 lw=lw)
    plt.legend(loc="best")
    plt.show()
    '''alphas = np.logspace(-7, 3, 200)
    
    coefs = []
    for a in alphas:
        lasso1 = Lasso(alpha=a, fit_intercept=False)
        lasso1.fit(data, target)
        coefs.append(lasso1.coef_)
    
    ax = plt.gca()
    
    ax.plot(alphas, coefs)
    ax.set_xscale('log')
    ax.set_xlim(ax.get_xlim()[::-1])
    plt.xlabel('alpha')
    plt.ylabel('weights')
    plt.title('Lasso coefficients of each feature')
    plt.axis('tight')
    plt.legend()
    plt.show()'''

    las = LassoCV(cv=3, fit_intercept=True, normalize=True,
                  precompute=True).fit(X_train, y_train)
    print("Lasso Score(w/ best parameters): ", las.score(X_test, y_test))
    estimator = ElasticNetCV()
    paramsL = {
        'cv': [3, 4, 5, 6],
        'normalize': [True, False],
        'precompute': [True, False]
    }
    gsCVE = GridSearchCV(estimator, paramsL)
    gsCVE.fit(X_train, y_train)

    train_scores, test_scores = validation_curve(ElasticNet(),
                                                 data,
                                                 target,
                                                 "alpha",
                                                 param_range=param_range,
                                                 cv=3)
    test_scores_mean = np.mean(test_scores, axis=1)

    plt.title("Validation Curve with ElasticNet")
    plt.xlabel("$\gamma$")
    plt.ylabel("Score")
    plt.ylim(0.0, 1.1)
    lw = 2
    plt.semilogx(param_range,
                 test_scores_mean,
                 label="Cross-validation score",
                 color="navy",
                 lw=lw)
    plt.legend(loc="best")
    plt.show()
    '''alphas = np.logspace(-7, 3, 200)
    
    coefs = []
    for a in alphas:
        eN1 = ElasticNet(alpha=a, fit_intercept=False)
        eN1.fit(data, target)
        coefs.append(eN1.coef_)
    
    ax = plt.gca()
    
    ax.plot(alphas, coefs)
    ax.set_xscale('log')
    ax.set_xlim(ax.get_xlim()[::-1])
    plt.xlabel('alpha')
    plt.ylabel('weights')
    plt.title('ElasticNet coefficients of each feature')
    plt.axis('tight')
    plt.legend()
    plt.show()'''

    #print(gsCVE.best_params_)
    en = ElasticNetCV(cv=3, normalize=False,
                      precompute=True).fit(X_train, y_train)
    print("ElasticNet Score(w/ best parameters): ", en.score(X_test, y_test))

Beispiel #3

0

Datei anzeigen

Datei: 8_1_LinearRegression.py Projekt: blacksevenzqj/zoubo

from sklearn.linear_model import RidgeCV

X = pd.DataFrame(housevalue.data)
y = housevalue.target
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                y,
                                                test_size=0.3,
                                                random_state=420)
ft.recovery_index([Xtrain, Xtest])

# In[]:
# RidgeCV交叉验证 默认计算的是 R^2
Ridge_ = RidgeCV(
    alphas=np.arange(1, 1001, 100)
    #,scoring="neg_mean_squared_error" # 默认R^2
    ,
    store_cv_values=True
    #,cv=5 # 默认 留一验证： 论文证明岭回归最佳交叉验证方式
).fit(Xtrain, Ytrain)

# In[]:
# 无关交叉验证的岭回归结果： 根据交叉验证得出的模型，用于预测
Ridge_.score(Xtest, Ytest)  # 这个接口只会计算 R^2

# In[]:
# 调用 RidgeCV模型训练 的所有 交叉验证的结果
# 留一交叉验证：
# 矩阵为14448行： 与 折数相同 与 样本量相同
# 10列： 与 正则化超参数alphas数量相同
Ridge_.cv_values_.shape  # (14448, 10) 求的是 R^2 折数的均值， 所以要 按行求均值

Beispiel #4

0

Datei anzeigen

Datei: iterative_imputer.py Projekt: zzygyx9119/fancyimpute

    def fit_transform(self, X, y=None):
        """Fits the imputer on X and return the transformed X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Input data, where "n_samples" is the number of samples and
            "n_features" is the number of features.

        y : ignored.

        Returns
        -------
        Xt : array-like, shape (n_samples, n_features)
             The imputed input data.
        """
        self.random_state_ = getattr(self, "random_state_",
                                     check_random_state(self.random_state))

        if self.n_iter < 0:
            raise ValueError(
                "'n_iter' should be a positive integer. Got {} instead."
                .format(self.n_iter))

        if self.predictor is None:
            if self.sample_posterior:
                from sklearn.linear_model import BayesianRidge
                self._predictor = BayesianRidge()
            else:
                from sklearn.linear_model import RidgeCV
                # including a very small alpha to approximate OLS
                self._predictor = RidgeCV(alphas=np.array([1e-5, 0.1,  1, 10]))
        else:
            self._predictor = clone(self.predictor)

        if hasattr(self._predictor, 'random_state'):
            self._predictor.random_state = self.random_state_

        self._min_value = np.nan if self.min_value is None else self.min_value
        self._max_value = np.nan if self.max_value is None else self.max_value

        self.initial_imputer_ = None
        X, Xt, mask_missing_values = self._initial_imputation(X)

        if self.n_iter == 0:
            return Xt

        # order in which to impute
        # note this is probably too slow for large feature data (d > 100000)
        # and a better way would be good.
        # see: https://goo.gl/KyCNwj and subsequent comments
        ordered_idx = self._get_ordered_idx(mask_missing_values)
        self.n_features_with_missing_ = len(ordered_idx)

        abs_corr_mat = self._get_abs_corr_mat(Xt)

        # impute data
        n_samples, n_features = Xt.shape
        self.imputation_sequence_ = []
        if self.verbose > 0:
            print("[IterativeImputer] Completing matrix with shape %s"
                  % (X.shape,))
        start_t = time()
        for i_rnd in range(self.n_iter):
            if self.imputation_order == 'random':
                ordered_idx = self._get_ordered_idx(mask_missing_values)

            for feat_idx in ordered_idx:
                neighbor_feat_idx = self._get_neighbor_feat_idx(n_features,
                                                                feat_idx,
                                                                abs_corr_mat)
                Xt, predictor = self._impute_one_feature(
                    Xt, mask_missing_values, feat_idx, neighbor_feat_idx,
                    predictor=None, fit_mode=True)
                predictor_triplet = ImputerTriplet(feat_idx,
                                                   neighbor_feat_idx,
                                                   predictor)
                self.imputation_sequence_.append(predictor_triplet)

            if self.verbose > 0:
                print('[IterativeImputer] Ending imputation round '
                      '%d/%d, elapsed time %0.2f'
                      % (i_rnd + 1, self.n_iter, time() - start_t))

        Xt[~mask_missing_values] = X[~mask_missing_values]
        return Xt

Beispiel #5

0

Datei anzeigen

Datei: trainDf.py Projekt: simplextable/Titanic

                       gamma=0.6,
                       subsample=0.7,
                       colsample_bytree=0.7,
                       objective='reg:linear',
                       nthread=-1,
                       scale_pos_weight=1,
                       seed=27,
                       reg_alpha=0.00006,
                       random_state=42)

# Ridge Regressor
ridge_alphas = [
    1e-15, 1e-10, 1e-8, 9e-4, 7e-4, 5e-4, 3e-4, 1e-4, 1e-3, 5e-2, 1e-2, 0.1,
    0.3, 1, 3, 5, 10, 15, 18, 20, 30, 50, 75, 100
]
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=ridge_alphas, cv=kf))

# Support Vector Regressor
svr = make_pipeline(RobustScaler(), SVR(C=20, epsilon=0.008, gamma=0.0003))

# Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=6000,
                                learning_rate=0.01,
                                max_depth=4,
                                max_features='sqrt',
                                min_samples_leaf=15,
                                min_samples_split=10,
                                loss='huber',
                                random_state=42)

# Random Forest Regressor

Beispiel #6

0

Datei anzeigen

Datei: HBO-finalresults.py Projekt: ellelang/EAmosm

# In[23]:


print(lin_rmse, len(Intersection(seg_lin, Test_seg_test))/len(Test_seg_test))


# In[ ]:


# Import necessary modules
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import cross_val_score


alpha_space = np.logspace(-8, 0, 10)
reg_CV = RidgeCV(alphas=alpha_space, cv=5)
reg_CV.fit(X_train, y_train)


# In[ ]:


reg_CV.alpha_
reg_CV.coef_


# In[ ]:



# Ridge regression

Beispiel #7

0

Datei anzeigen

def QuickML_Ensembling(X_train, y_train, X_test, y_test='', modeltype='Regression', Boosting_Flag=False,
                            scoring='', verbose=0):
    """
    Quickly builds and runs multiple models for a clean data set(only numerics).
    """
    start_time = time.time()
    seed = 99
    if len(X_train) <= 100000 or X_train.shape[1] < 50:
        NUMS = 100
        FOLDS = 5
    else:
        NUMS = 200
        FOLDS = 10
    ## create Voting models
    estimators = []
    if modeltype == 'Regression':
        if scoring == '':
            scoring = 'neg_mean_squared_error'
        scv = ShuffleSplit(n_splits=FOLDS,random_state=seed)
        if Boosting_Flag is None:
            model5 = BaggingRegressor(DecisionTreeRegressor(random_state=seed),
                                        n_estimators=NUMS,random_state=seed)
            results1 = model5.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics1 = rmse(results1, y_test).mean()
            else:
                metrics1 = 0
            estimators.append(('Bagging1',model5, metrics1))
        else:
            model5 = LassoLarsCV(cv=scv)
            results1 = model5.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics1 = rmse(results1, y_test).mean()
            else:
                metrics1 = 0
            estimators.append(('LassoLarsCV Regression',model5, metrics1))
        model6 = LassoCV(alphas=np.logspace(-10,-1,50), cv=scv,random_state=seed)
        results2 = model6.fit(X_train,y_train).predict(X_test)
        if not isinstance(y_test, str):
            metrics2 = rmse(results2, y_test).mean()
        else:
            metrics2 = 0
        estimators.append(('LassoCV Regularization',model6, metrics2))
        model7 = RidgeCV(alphas=np.logspace(-10,-1,50), cv=scv)
        results3 = model7.fit(X_train,y_train).predict(X_test)
        if not isinstance(y_test, str):
            metrics3 = rmse(results3, y_test).mean()
        else:
            metrics3 = 0
        estimators.append(('RidgeCV Regression',model7, metrics3))
        ## Create an ensemble model ####
        if Boosting_Flag:
            model8 = BaggingRegressor(DecisionTreeRegressor(random_state=seed),
                                        n_estimators=NUMS,random_state=seed)
            results4 = model8.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics4 = rmse(results4, y_test).mean()
            else:
                metrics4 = 0
            estimators.append(('Bagging2',model8, metrics4))
        else:
            model8 = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(
                        min_samples_leaf=2, max_depth=1, random_state=seed),
                        n_estimators=NUMS, random_state=seed)
            results4 = model8.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics4 = rmse(results4, y_test).mean()
            else:
                metrics4 = 0
            estimators.append(('Boosting',model8, metrics4))
        estimators_list = [(tuples[0],tuples[1]) for tuples in estimators]
        estimator_names = [tuples[0] for tuples in estimators]
        if verbose >= 2:
            print('QuickML_Ensembling Model results:')
            print('    %s = %0.4f \n    %s = %0.4f\n    %s = %0.4f \n    %s = %0.4f' %(estimator_names[0], metrics1,
                    estimator_names[1], metrics2, estimator_names[2], metrics3, estimator_names[3], metrics4))
    else:
        if scoring == '':
            scoring = 'accuracy'
        scv = StratifiedKFold(n_splits=FOLDS,random_state=seed)
        if Boosting_Flag is None:
            model5 = ExtraTreesClassifier(n_estimators=NUMS,min_samples_leaf=2,random_state=seed)
            results1 = model5.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics1 = accu(results1, y_test).mean()
            else:
                metrics1 = 0
            estimators.append(('Bagging',model5, metrics1))
        else:
            model5 = LogisticRegressionCV(Cs=np.linspace(0.01,100,20),cv=scv,scoring=scoring,
                                          random_state=seed)
            results1 = model5.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics1 = accu(results1, y_test).mean() 
            else:
                metrics1 = 0
            estimators.append(('Logistic Regression',model5, metrics1))
        model6 = LinearDiscriminantAnalysis()
        results2 = model6.fit(X_train,y_train).predict(X_test)
        if not isinstance(y_test, str):
            metrics2 = accu(results2, y_test).mean()
        else:
            metrics2 = 0
        estimators.append(('Linear Discriminant',model6, metrics2))
        if modeltype == 'Binary_Classification':
            float_cols = X_train.columns[(X_train.dtypes==float).values].tolist()
            int_cols = X_train.columns[(X_train.dtypes==int).values].tolist()
            if (X_train[float_cols+int_cols]<0).astype(int).sum().sum() > 0:
                model7 = DecisionTreeClassifier(max_depth=5)
            else:
                model7 = GaussianNB()
        else:
            float_cols = X_train.columns[(X_train.dtypes==float).values].tolist()
            int_cols = X_train.columns[(X_train.dtypes==int).values].tolist()
            if (X_train[float_cols+int_cols]<0).astype(int).sum().sum() > 0:
                model7 = DecisionTreeClassifier(max_depth=5)
            else:
                model7 = MultinomialNB()
        results3 = model7.fit(X_train,y_train).predict(X_test)
        if not isinstance(y_test, str):
            metrics3 = accu(results3, y_test).mean()
        else:
            metrics3 = 0
        estimators.append(('Naive Bayes',model7, metrics3))
        if Boosting_Flag:
            #### If the Boosting_Flag is True, it means Boosting model is present. So choose a Bagging here.
            model8 = ExtraTreesClassifier(n_estimators=NUMS,min_samples_leaf=2,random_state=seed)
            results4 = model8.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics4 = accu(results4, y_test).mean()
            else:
                metrics4 = 0
            estimators.append(('Bagging',model8, metrics4))
        else:
            ## Create an ensemble model ####
            model8 = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
                                    random_state=seed, max_depth=1, min_samples_leaf=2
                                    ), n_estimators=NUMS, random_state=seed)
            results4 = model8.fit(X_train,y_train).predict(X_test)
            if not isinstance(y_test, str):
                metrics4 = accu(results4, y_test).mean()
            else:
                metrics4 = 0
            estimators.append(('Boosting',model8, metrics4))
        estimators_list = [(tuples[0],tuples[1]) for tuples in estimators]
        estimator_names = [tuples[0] for tuples in estimators]
        if not isinstance(y_test, str):
            if verbose >= 2:
                print('QuickML_Ensembling Model results:')
                print('    %s = %0.4f \n    %s = %0.4f\n    %s = %0.4f \n    %s = %0.4f' %(estimator_names[0], metrics1,
                        estimator_names[1], metrics2, estimator_names[2], metrics3, estimator_names[3], metrics4))
        else:
            if verbose >= 1:
                print('QuickML_Ensembling completed.')
    stacks = np.c_[results1,results2,results3,results4]
    if verbose == 1:
        print('    Time taken for Ensembling: %0.1f seconds' %(time.time()-start_time))
    return estimator_names, stacks
#########################################################

Beispiel #8

0

Datei anzeigen

import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from tpot.builtins import StackingEstimator

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=None)

# Average CV score on the training set was: 0.8259674898630861
exported_pipeline = make_pipeline(
    StackingEstimator(
        estimator=GradientBoostingRegressor(alpha=0.75,
                                            learning_rate=0.5,
                                            loss="quantile",
                                            max_depth=3,
                                            max_features=0.6500000000000001,
                                            min_samples_leaf=17,
                                            min_samples_split=2,
                                            n_estimators=100,
                                            subsample=0.5)), RidgeCV())

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)

Beispiel #9

0

Datei anzeigen

# The parameters inside the models can be varied
params = {
    'n_estimators': 500,
    'max_depth': 4,
    'min_samples_split': 5,
    'learning_rate': 0.01,
    'loss': 'ls'
}
GB_model = GradientBoostingRegressor(**params)
lin_model = Lasso(alpha=0.005, random_state=0)
RF_model = RandomForestRegressor(n_estimators=400, random_state=0)
estimators = [('Random Forest', RF_model), ('Lasso', lin_model),
              ('Gradient Boosting', GB_model)]
stacking_regressor = StackingRegressor(estimators=estimators,
                                       final_estimator=RidgeCV())

#6)Compare the performance

# capture all variables in a list
# except the target and the ID

train_vars = [var for var in X_train.columns if var not in ['Id', 'SalePrice']]

# create scaler

scaler = MinMaxScaler()

#  fit  the scaler to the train set

scaler.fit(X_train[train_vars])

Beispiel #10

0

Datei anzeigen

Datei: main_justfor.py Projekt: raghparihar/Public_Liberty_Mutual_Group_Property_Inspection_Prediction

            # Take the mean of the predictions of the cross validation set
            blend_test[:, j] = blend_test_j.mean(1)      
            print ('Clf_%d Mean norm. Gini = %0.5f (%0.5f)' % (j, cv_results[j,].mean(), cv_results[j,].std()))

    end_time = datetime.now()
    time_taken = end_time - start_time
    print ("Time taken for pre-blending calculations: ", time_taken)

    print ("CV-Results", cv_results)
    
    # Start blending!    
    print ("Blending models.")

    alphas = [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1000.0]
    
    bclf = RidgeCV(alphas=alphas, normalize=True, cv=5)
    bclf.fit(blend_train, Y_dev)       
    print ("Ridge Best alpha = ", bclf.alpha_)
    
    # Predict now
    Y_test_predict = bclf.predict(blend_test)
    
    if (DEVELOP):
        score1 = metrics.mean_absolute_error(Y_test, Y_test_predict)
        score = normalized_gini(Y_test, Y_test_predict)
        print ('Ridge MSE = %s normalized Gini = %s' % (score1, score))
else: # Submit! and generate solution
    score = cv_results.mean()      
    print ('Avg. CV-Score = %s' % (score))
    #generate solution
    submission = pd.DataFrame({"Id": testidx, "Hazard": Y_test_predict})

Beispiel #11

0

Datei anzeigen

datas = new_df.dropna(how='any')  # 只要有空，就删除所在行

X = datas[names]
Y = datas[quality]
Y = Y.ravel()  # Y拉长为扁平的数组

# 3.管道
# 创建模型列表
models = [
    Pipeline([
        ('Poly', PolynomialFeatures()),  # 模型特征的构造
        ('Linear', LinearRegression())  # 线性回归
    ]),
    Pipeline([
        ('Poly', PolynomialFeatures()),
        ('Linear', RidgeCV(alphas=np.logspace(-4, 2, 20))
         )  # RidgeCV模型，alphas学习率
    ]),
    Pipeline([
        ('Poly', PolynomialFeatures()),
        ('Linear', LassoCV(alphas=np.logspace(-4, 2, 20)))  # LassoCV模型
    ]),
    Pipeline([
        ('Poly', PolynomialFeatures()),
        ('Linear',
         ElasticNetCV(alphas=np.logspace(-4, 2, 20),
                      l1_ratio=np.linspace(0, 1, 5)))  # ElasticNetCV模型
    ])
]

# 4.划分数据

Beispiel #12

0

Datei anzeigen

Datei: pipeline_gen_82_idx_7_2021.02.09_05-54-33.py Projekt: Et9797/binding-affinity-prediction

                                             max_depth=1,
                                             min_child_weight=3,
                                             n_estimators=100,
                                             n_jobs=1,
                                             objective="reg:squarederror",
                                             subsample=0.9500000000000001,
                                             verbosity=0)), MinMaxScaler(),
    StackingEstimator(estimator=SGDRegressor(alpha=0.01,
                                             eta0=0.01,
                                             fit_intercept=False,
                                             l1_ratio=0.0,
                                             learning_rate="constant",
                                             loss="huber",
                                             penalty="elasticnet",
                                             power_t=0.0)),
    StackingEstimator(estimator=LinearSVR(
        C=25.0, dual=True, epsilon=0.01, loss="epsilon_insensitive",
        tol=0.001)), FeatureAgglomeration(affinity="l2", linkage="average"),
    SelectPercentile(score_func=f_regression, percentile=6),
    StackingEstimator(estimator=SGDRegressor(alpha=0.001,
                                             eta0=0.01,
                                             fit_intercept=False,
                                             l1_ratio=0.0,
                                             learning_rate="constant",
                                             loss="epsilon_insensitive",
                                             penalty="elasticnet",
                                             power_t=10.0)), RidgeCV())

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)

Beispiel #13

0

Datei anzeigen

        elif (est == "BayesianRidge"):
            alpha_1 = [1e-6, 1e-5, 1e-7]
            alpha_2 = [1e-6, 1e-5, 1e-7]
            lambda_1 = [1e-6, 1e-5, 1e-7]
            lambda_2 = [1e-6, 1e-5, 1e-7]
            param_grid = {
                'alpha_1': alpha_1,
                'alpha_2': alpha_2,
                'lambda_1': lambda_1,
                'lambda_2': lambda_2
            }
            grid_search = GridSearchCV(BayesianRidge(), param_grid, cv=5)
            grid_search.fit(df_pos_train[features], df_pos_train[target])

        elif (est == "Ridge"):
            grid_search = RidgeCV().fit(df_pos_train[features],
                                        df_pos_train['FD points'])

        elif (est == "SVM"):
            C = [50]
            gamma = [0.3]
            param_grid = {'C': C, 'gamma': gamma}
            grid_search = GridSearchCV(SVC(), param_grid, cv=5)
            grid_search.fit(df_pos_train[features], df_pos_train['FD points'])

        else:
            print est
            print "Cannot find the algorithm"
            exit()

        train_rmse = np.sqrt(np.mean( (df_pos_train['FD points'] - \
                    grid_search.predict(df_pos_train[features]))**2.0 ))

Beispiel #14

0

Datei anzeigen

Datei: HousePred.py Projekt: anirudhavilala/House-Price-Prediction

print('R2 Score',r2[2])
print('The root mean square error',np.sqrt(mean_squared_error(y_test,y_pred3)),'\n')
rmse.append(np.sqrt(mean_squared_error(y_test,y_pred3)))

##### Artificial Neural Networks
nn=MLPRegressor(hidden_layer_sizes=(3,40),activation='relu',solver='adam',learning_rate='adaptive',max_iter=10000,learning_rate_init=0.01,alpha=0.01)
nn.fit(x_train,y_train)
y_pred4=nn.predict(x_test)
print('Artificial Neural Network')
r2.append(r2_score(y_test,y_pred4))
print('R2 Score',r2[3])
print('The root mean square error',np.sqrt(mean_squared_error(y_test,y_pred4)),'\n')
rmse.append(np.sqrt(mean_squared_error(y_test,y_pred4)))

### Ridge regression
rir= RidgeCV(alphas=[0.001,0.01,0.1,1,2,5,10,15,20,30], fit_intercept = False)
rir.fit(x_train,y_train)
y_pred5=rir.predict(x_test)
print('Ridge Regression')
r2.append(r2_score(y_test,y_pred5))
print('R2 Score',r2[4])
print('The root mean square error',np.sqrt(mean_squared_error(y_test,y_pred5)),'\n')
rmse.append(np.sqrt(mean_squared_error(y_test,y_pred5)))

##### LASSO Regression
lar = LassoCV(alphas=np.linspace(0,5,100))
lar.fit(x_train,y_train)
y_pred6=lar.predict(x_test)
print('Lasso Regression')
r2.append(r2_score(y_test,y_pred6))
print('R2 Score',r2[5])

Beispiel #15

0

Datei anzeigen

np.save("npy/X", X)

################# FINAL RIDGE REGRESSION PART #################
print("FINAL RIDGE REGRESSION BEGIN")

##### LOADING
X = np.load("npy/X.npy")
pred_array = np.load("npy/pred_array.npy")

##### EXPANSION + STANDARDIZATION
X=standardize(expansion(X,4))[0]
pred_array=standardize(expansion(pred_array,4))[0]

##### ACTUAL RIDGE REGRESSION
y=df_3.Prediction.values # values to compare to based on yet still the 30% of original data
clf=RidgeCV(alphas=np.linspace(10**-8,1,100),cv=10)
clf=clf.fit(X,y)

print(clf.coef_)
print(clf.alpha_)

##### PREDICTION
pred=clf.predict(pred_array)

################# OUTPUT CREATION AND ROUNDING #################

final_array=np.rint(pred)
final_array[np.where(final_array>5)]=5
final_array[np.where(final_array<1)]=1

df2.Prediction = final_array

Beispiel #16

0

Datei anzeigen

                                             n_estimators=100,
                                             n_jobs=1,
                                             objective="reg:squarederror",
                                             subsample=0.9500000000000001,
                                             verbosity=0)), MinMaxScaler(),
    StackingEstimator(estimator=SGDRegressor(alpha=0.01,
                                             eta0=0.01,
                                             fit_intercept=False,
                                             l1_ratio=0.0,
                                             learning_rate="constant",
                                             loss="huber",
                                             penalty="elasticnet",
                                             power_t=0.0)),
    StackingEstimator(estimator=LinearSVR(
        C=25.0, dual=True, epsilon=0.01, loss="epsilon_insensitive",
        tol=0.001)), FeatureAgglomeration(affinity="l2", linkage="average"),
    SelectPercentile(score_func=f_regression, percentile=6),
    StackingEstimator(estimator=ExtraTreesRegressor(bootstrap=False,
                                                    max_features=0.8,
                                                    min_samples_leaf=19,
                                                    min_samples_split=10,
                                                    n_estimators=400)),
    StackingEstimator(estimator=LinearSVR(C=20.0,
                                          dual=True,
                                          epsilon=1.0,
                                          loss="squared_epsilon_insensitive",
                                          tol=0.1)), RidgeCV())

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)

Beispiel #17

0

Datei anzeigen

Datei: plot_multioutput_face_completion.py Projekt: Tyddan/TNM108-Machine-learning-for-social-media

X_train = train[:, :(n_pixels + 1) // 2]
# Lower half of the faces
y_train = train[:, n_pixels // 2:]
X_test = test[:, :(n_pixels + 1) // 2]
y_test = test[:, n_pixels // 2:]

# Fit estimators
ESTIMATORS = {
    "Extra trees":
    ExtraTreesRegressor(n_estimators=10, max_features=32, random_state=0),
    "K-nn":
    KNeighborsRegressor(),
    "Linear regression":
    LinearRegression(),
    "Ridge":
    RidgeCV(),
    "decision tree 10-50":
    DecisionTreeRegressor(max_depth=10, max_features=50),
    "decision tree 20-50":
    DecisionTreeRegressor(max_depth=20, max_features=50),
    "decision tree 20-25":
    DecisionTreeRegressor(max_depth=20, max_features=25),
    "Random 10-50":
    RandomForestRegressor(n_estimators=10, max_depth=10, max_features=50),
    "Random 20-50":
    RandomForestRegressor(n_estimators=10, max_depth=20, max_features=50),
    "Random 20-25":
    RandomForestRegressor(n_estimators=10, max_depth=20, max_features=25),
}

y_test_predict = dict()

Beispiel #18

0

Datei anzeigen

ns_pred = [0 for _ in range(len(y_test))]
ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_pred)
plt.plot(ns_fpr, ns_tpr, linestyle="--", label="No skill")
plt.xlabel("False positive rate")
plt.ylabel("True positive rate")
plt.legend()

plt.figure(6)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.legend()
plt.show()

# %%

ridge = RidgeCV()
ridge.fit(X_train, y_train)
# ridge_pred = ridge.predict(X_test)

for xy in [(X_train, y_train, "Training"), (X_test, y_test, "Testing")]:
    # part 1
    print(xy[2])
    X_ = xy[0]
    y_true = xy[1]
    y_predp = ridge.predict(xy[0])
    y_pred = np.where(y_predp < 0.5, 0, 1)
    # y_predp = ridge.predict_proba(xy[0])[:, 1]
    print(f"Log loss: {log_loss(y_true, y_predp):.3f}")
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.3f}")
    print(f"RMSE: {mean_squared_error(y_true, y_predp, squared=False):.3f}")

Beispiel #19

0

Datei anzeigen

def RidgeRegressionCV():
    return Pipeline([('std_sclaer', StandardScaler()),
                     ('ridge_reg', RidgeCV(cv=10))])

Beispiel #20

0

Datei anzeigen

X_test = test.values
y = pd.read_csv(path + "train.csv", index_col=0, usecols=['id', 'loss']).values

alphas = (.03, .1, .3, 1, 3, 10)
shifts = [200]
k_features = (1, 15)
# alphas = (.1,1,10)
# shifts=np.linspace(100,400,7)
# k_features=(1,10)


def scorer(model, X, y):
    return -mean_absolute_error(np.exp(model.predict(X)), np.exp(y))


lr = RidgeCV(alphas=alphas, fit_intercept=False, scoring=scorer)

bestscore = -np.inf
bestsfs = None
bestshift = None
for shift in shifts:
    sfs = SFS(lr,
              k_features=k_features,
              forward=True,
              floating=False,
              scoring=scorer,
              cv=kftune)
    sfs.fit(np.log(X + shift), np.log(y + shift))
    print shift, sfs.k_score_, len(sfs.k_feature_idx_)
    if sfs.k_score_ > bestscore:
        bestscore = sfs.k_score_

Beispiel #21

0

Datei anzeigen

    return np.sqrt(mean_squared_error(y, y_pred))


def cv_rmse(model, X=X):
    rmse = np.sqrt(-cross_val_score(
        model, X, y, scoring="neg_mean_squared_error", cv=kfolds))
    return (rmse)


alphas_alt = [14.5, 14.6, 14.7, 14.8, 14.9, 15, 15.1, 15.2, 15.3, 15.4, 15.5]
alphas2 = [
    5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008
]
e_alphas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007]
e_l1ratio = [0.8, 0.85, 0.9, 0.95, 0.99, 1]
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=alphas_alt, cv=kfolds))
lasso = make_pipeline(
    RobustScaler(),
    LassoCV(max_iter=1e7, alphas=alphas2, random_state=42, cv=kfolds))
elasticnet = make_pipeline(
    RobustScaler(),
    ElasticNetCV(max_iter=1e7, alphas=e_alphas, cv=kfolds, l1_ratio=e_l1ratio))
svr = make_pipeline(RobustScaler(), SVR(
    C=20,
    epsilon=0.008,
    gamma=0.0003,
))
gbr = GradientBoostingRegressor(n_estimators=3000,
                                learning_rate=0.05,
                                max_depth=4,
                                max_features='sqrt',

Beispiel #22

0

Datei anzeigen

Datei: estimator-tpot-automl-model-adopt.py Projekt: junyeongyu/house-price-estimater

#import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from tpot.builtins import StackingEstimator

# NOTE: Make sure that the class is labeled 'target' in the data file
#tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
#features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = x_train, x_test, y_train, y_test #\
#            train_test_split(features, tpot_data['target'].values, random_state=42)

# Score on the training set was:-782999502.5452403
model = make_pipeline(
    StackingEstimator(estimator=RidgeCV()),
    RandomForestRegressor(bootstrap=False, max_features=0.4, min_samples_leaf=5, min_samples_split=12, n_estimators=100)
)

model.fit(training_features, training_target)
results = model.predict(testing_features)
#print(results)


y_pred = model.predict(x_test[:10,])
print (y_pred)
print (y_test[:10])
print('MLPRegressor Regression score is %f (traning)' % model.score(x_train, y_train))
print('MLPRegressor Regression score is %f (test)' % model.score(x_test, y_test)) # 84%

Beispiel #23

0

Datei anzeigen

Datei: myproject2.py Projekt: yzy4ever/Data-Analysis

from scipy.stats import boxcox_normmax  #计算最佳BOX-COX转换系数lmbda
for i in skew_index:
    all_x[i] = boxcox1p(
        all_x[i],
        boxcox_normmax(all_x[i] + 1))  #使用inv_coxbox(y,lmbda)可以把boxcox处理后的值还原

#根据已有的线性特征，创建一些新非线性的特征

#离散变量转哑变量
all_x = pd.get_dummies(all_x).reset_index(drop=True)

#分离测试集和验证集
train_x = all_x.iloc[:len(train_y), :]
test_x = all_x.iloc[len(train_y):, :]

#训练模型
from sklearn.linear_model import Ridge, RidgeCV, ElasticNetCV, LassoCV, LassoLarsCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import cross_val_score

#或者不看图直接用clf = LassoLarsCV(cv=5).fit(train_x, train_y)
clf = RidgeCV(cv=5).fit(train_x, train_y)
#clf = LassoLarsCV(cv=5).fit(train_x, train_y)
#clf = ElasticNetCV(cv=5).fit(train_x, train_y)
y_pred = clf.predict(test_x)

import math
output = pd.DataFrame({'Id': test_ID, 'SalePrice': math.e**y_pred})
output.to_csv('submission.csv', index=False)

Beispiel #24

0

Datei anzeigen

    covs_ts = np.zeros((n_sub, n_fb, (p * (p + 1)) // 2))
    for fb in range(n_fb):
        covs_ts[:, fb, :] = TangentSpace(metric="wasserstein").fit(
            covs[:, fb, :, :]).transform(covs[:, fb, :, :])
    return covs_ts


file_covs = op.join(cfg.path_outputs, 'covs_allch_oas.float32.h5')
covs_allch = mne.externals.h5io.read_hdf5(file_covs)  # (sub, fb, ch, ch)

info = np.load(op.join(cfg.path_data, 'info_allch.npy')).item()
picks = mne.pick_types(info, meg=meg)

covs = proj_covs_common(covs_allch, picks, scale=scale, rank=rank, reg=reg)
X = proj_covs_ts(covs)
X = X.reshape(len(X), -1)

info = pd.read_csv(op.join(cfg.path_data, 'participants.csv'))
subjects = [d['subject'] for d in covs_allch if 'subject' in d]
y = info.set_index('Observations').age.loc[subjects]

ridge = make_pipeline(StandardScaler(),
                      RidgeCV(alphas=np.logspace(-3, 5, 100)))
score = -cross_val_score(ridge,
                         X,
                         y,
                         cv=cv,
                         scoring="neg_mean_absolute_error",
                         n_jobs=n_jobs,
                         verbose=True)

Beispiel #25

0

Datei anzeigen

Datei: rbf_train.py Projekt: shaileesjain/mt-encoding-models

def interpolation_function(word_time, fine_time, vectors, i):
    P = phi(word_time, word_time, best_eps[i])
    r = RidgeCV(alphas=alpha_vals, fit_intercept=False, store_cv_values=True)
    r.fit(P, vectors[:, i])
    interp_P = phi(word_time, fine_time, best_eps[i])
    return i, r.coef_, r.predict(interp_P), r.alpha_

Beispiel #26

0

Datei anzeigen

Datei: ridgereg.py Projekt: lucasxsong/sklearn

from sklearn.linear_model import Ridge, RidgeCV

### 1. load data set...

### 2. standardize data (rescale to zero-mean and unit-variance)

### 3. choose alpha and fit model

# in this case, alpha is the regularization strength, and reduces the variance of the estimate.
# ridgeCV = ridge regession with [C]andidate alpha [V]alues
regr_cv = RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0], normalize=True)

# decide what the best value for alpha is
model_cv = regr_cv.fit(X=trainX, y=trainYclass)
print("optimal alpha:", model_cv.alpha_)

### 4. score model on test data

# for scoring: returns the `coefficient of determination`
# CoD: R^2, the proportion of variance in the dependent variable that is predictable from thei ndependent variable.
#  e.g. a score of .46 means that 46% of the variability of the dependent variable has been accounted for
print('ridge score:', model_cv.score(X=testX, y=testYclass))

Beispiel #27

0

Datei anzeigen

Datei: stat_models_cont.py Projekt: VU-IVM/RGCPD

def ridgeCV(y_ts, df_norm, keys=None, kwrgs_model=None):
    '''
    X contains all precursor data, incl train and test
    X_train, y_train are split up by TrainIsTrue
    Preciction is made for whole timeseries
    '''
    #%%
    if keys is None:
        no_data_col = ['TrainIsTrue', 'RV_mask', 'fit_model_mask']
        keys = df_norm.columns
        keys = [k for k in keys if k not in no_data_col]
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    # warnings.filterwarnings("ignore", category=FutureWarning)

    if kwrgs_model == None:
        # use Bram settings
        kwrgs_model = {'fit_intercept': True, 'alphas': (.01, .1, 1.0, 10.0)}

    # find parameters for gridsearch optimization
    kwrgs_gridsearch = {
        k: i
        for k, i in kwrgs_model.items() if type(i) == list
    }
    # only the constant parameters are kept
    kwrgs = kwrgs_model.copy()
    [kwrgs.pop(k) for k in kwrgs_gridsearch.keys()]
    if 'feat_sel' in kwrgs:
        feat_sel = kwrgs.pop('feat_sel')
    else:
        feat_sel = None

    # Get training years
    x_fit_mask, y_fit_mask, x_pred_mask, y_pred_mask = utils.get_masks(df_norm)

    X = df_norm[keys]
    X = X.dropna(axis='columns')  # drop only nan columns
    # X = add_constant(X)
    X_train = X[x_fit_mask.values]
    X_pred = X[x_pred_mask.values]

    RV_fit = y_ts['ts'].loc[y_fit_mask.index]  # y_fit may be shortened
    # because X_test was used to predict y_train due to lag, hence train-test
    # leakage.

    # y_ts dates may no longer align with x_fit  y_fit masks
    y_fit_mask = df_norm['TrainIsTrue'].loc[y_fit_mask.index].values
    y_train = RV_fit[y_fit_mask].squeeze()

    # if y_pred_mask is not None:
    #     y_dates = RV_fit[y_pred_mask.values].index
    # else:
    # y_dates = RV_fit.index

    X = X_train

    # # Create stratified random shuffle which keeps together years as blocks.
    kwrgs_cv = ['kfold', 'seed']
    kwrgs_cv = {k: i for k, i in kwrgs.items() if k in kwrgs_cv}
    [kwrgs.pop(k) for k in kwrgs_cv.keys()]
    if len(kwrgs_cv) >= 1:
        cv = utils.get_cv_accounting_for_years(y_train, **kwrgs_cv)
        kwrgs['store_cv_values'] = False
    else:
        cv = None
        kwrgs['store_cv_values'] = True
    model = RidgeCV(cv=cv, **kwrgs)

    if feat_sel is not None:
        if feat_sel['model'] is None:
            feat_sel['model'] = model
        model, new_features, rfecv = utils.feature_selection(
            X_train, y_train.values, **feat_sel)
        X_pred = X_pred[new_features]
    else:
        model.fit(X_train, y_train)

    y_pred = model.predict(X_pred)

    prediction = pd.DataFrame(y_pred, index=y_pred_mask.index, columns=[0])
    model.X_pred = X_pred
    model.name = 'Ridge Regression'
    #%%
    return prediction, model

Beispiel #28

0

Datei anzeigen

Datei: regularization-example-code.py Projekt: Ajinth/GA_DSI_LECTURES

ridgereg = Ridge(alpha=0.1, normalize=True)
ridgereg.fit(X_train, y_train)
y_pred = ridgereg.predict(X_test)
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

# examine the coefficients
print(ridgereg.coef_)

# create an array of alpha values
alpha_range = 10.**np.arange(-2, 3)
alpha_range

# select the best alpha with RidgeCV
from sklearn.linear_model import RidgeCV
ridgeregcv = RidgeCV(alphas=alpha_range,
                     normalize=True,
                     scoring='mean_squared_error')
ridgeregcv.fit(X_train, y_train)
ridgeregcv.alpha_

# predict method uses the best alpha value
y_pred = ridgeregcv.predict(X_test)
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

# Lasso regression
# try alpha=0.001 and examine coefficients
from sklearn.linear_model import Lasso
lassoreg = Lasso(alpha=0.001, normalize=True)
lassoreg.fit(X_train, y_train)
print(lassoreg.coef_)

Beispiel #29

0

Datei anzeigen

#
# In machine-learning practice, Ridge Regression is more often used with
# non-negligible regularization.
#
# Above, we limited this regularization to a very little amount.
# Regularization improves the conditioning of the problem and reduces the
# variance of the estimates. RidgeCV applies cross validation in order to
# determine which value of the regularization parameter (`alpha`) is best
# suited for prediction.

from sklearn.linear_model import RidgeCV

model = make_pipeline(
    preprocessor,
    TransformedTargetRegressor(
        regressor=RidgeCV(alphas=np.logspace(-10, 10, 21)),
        func=np.log10,
        inverse_func=sp.special.exp10,
    ),
)

_ = model.fit(X_train, y_train)

# %%
# First we check which value of :math:`\alpha` has been selected.

model[-1].regressor_.alpha_

# %%
# Then we check the quality of the predictions.

Beispiel #30

0

Datei anzeigen

        estimator=KNeighborsRegressor(n_neighbors=48, p=1, weights="uniform")),
    StackingEstimator(estimator=XGBRegressor(learning_rate=0.001,
                                             max_depth=1,
                                             min_child_weight=3,
                                             n_estimators=100,
                                             n_jobs=1,
                                             objective="reg:squarederror",
                                             subsample=0.9500000000000001,
                                             verbosity=0)), MinMaxScaler(),
    StackingEstimator(estimator=SGDRegressor(alpha=0.01,
                                             eta0=0.01,
                                             fit_intercept=False,
                                             l1_ratio=0.0,
                                             learning_rate="constant",
                                             loss="huber",
                                             penalty="elasticnet",
                                             power_t=0.0)),
    StackingEstimator(estimator=LinearSVR(
        C=25.0, dual=True, epsilon=0.1, loss="epsilon_insensitive",
        tol=0.0001)), FeatureAgglomeration(affinity="l2", linkage="average"),
    StackingEstimator(estimator=ExtraTreesRegressor(bootstrap=False,
                                                    max_features=0.8,
                                                    min_samples_leaf=19,
                                                    min_samples_split=10,
                                                    n_estimators=400)),
    ZeroCount(), FeatureAgglomeration(affinity="manhattan",
                                      linkage="complete"), RidgeCV())

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)